Compare commits
69 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| af0f9e80af | |||
| e51377c933 | |||
| 2671639250 | |||
| 6c853b43af | |||
| 85ef453d0d | |||
| 88915c3d9a | |||
| e7b8aa6114 | |||
| 8a1f037d5a | |||
| e328758c53 | |||
| 72cf2f4091 | |||
| 474b7bd0ff | |||
| 437d29f19e | |||
| f0ef7ea0a8 | |||
| 3a8f2bed4e | |||
| b7f29f3048 | |||
| 0702551c25 | |||
| db9c68ca9c | |||
| 95b5b09a67 | |||
| 627c17fae1 | |||
| 34a99c783b | |||
| 52cd0da9f5 | |||
| 8ac9a33d91 | |||
| dd35ae1fe6 | |||
| 4a6a79d02e | |||
| 9eedf9d6a9 | |||
| bed647ca2c | |||
| 8cebe431e1 | |||
| bdb7e1439e | |||
| 8df0479b99 | |||
| 6b5fe6aa82 | |||
| 25d04ec37e | |||
| 8df5ab381a | |||
| 6f21d926d7 | |||
| aa8ae6613b | |||
| 44d676aede | |||
| e2b1a6686a | |||
| 01bdb484de | |||
| 3bb4d5a082 | |||
| 4ab3bd55e5 | |||
| 70d2842c16 | |||
| 4966ef3359 | |||
| 0efa7d8cca | |||
| ea17528767 | |||
| 1cfad83c06 | |||
| 76ffd5c9a3 | |||
| 6030bfa18e | |||
| 82755a3623 | |||
| 121ab7e263 | |||
| ca443b1903 | |||
| 7a2da4d8b6 | |||
| a0b21ca225 | |||
| 3690e4c2ca | |||
| 1d652b24c6 | |||
| ee1423db7a | |||
| 4993057ed5 | |||
| 073252d7a6 | |||
| a894717319 | |||
| 0856cd4f93 | |||
| c446bef64f | |||
| c7a7cd1e5e | |||
| 36ab8d15f1 | |||
| 042f5e3d82 | |||
| db95f8644f | |||
| 85e4334bb7 | |||
| 9beb67c1e9 | |||
| 056bb39a4d | |||
| 9dd97a27f1 | |||
| 281e00b300 | |||
| ac42783e36 |
@@ -73,7 +73,7 @@ powershell -ExecutionPolicy Bypass -File scripts/run-client-e2e-tests.ps1
|
||||
- **Style guides** in `docs/style-guides/` are authoritative. Follow `CSharpStyleGuide.md` for gateway/worker/.NET-client code: file-scoped namespaces, `sealed` by default, `Async` suffix on Task-returning methods, MXAccess-aligned names (`MxStatusProxy`, `ServerHandle`, `ItemHandle`, `HResult`).
|
||||
- **MXAccess parity is the contract.** Don't "fix" surprising MXAccess behavior (e.g., `WriteSecured` failing before a value-bearing NMX body, distinct `OperationComplete` semantics, invalid-handle exceptions) unless the client explicitly opts into a non-parity mode. The installed MXAccess COM component is the baseline.
|
||||
- **Don't synthesize events.** The gateway forwards only events the worker emits; it never invents `OperationComplete` from write completion or command replies.
|
||||
- **One worker per session, one event subscriber per session** (v1). Multi-subscriber fan-out and reconnectable sessions are explicitly out of scope — see `docs/DesignDecisions.md`.
|
||||
- **One worker per session** (invariant). Multi-subscriber event fan-out and reconnect-with-replay have shipped and are config-gated: `AllowMultipleEventSubscribers` (default `false`) enables fan-out up to `MaxEventSubscribersPerSession` (default `8`); `DetachGraceSeconds` (default `30`) retains a session after its last subscriber drops so clients can reconnect; `ReplayBufferCapacity` / `ReplayRetentionSeconds` control how much event history the replay ring keeps. Default config preserves the original single-subscriber, no-retention behavior. See `docs/DesignDecisions.md` and `docs/Sessions.md`.
|
||||
- **Gateway restart does not reattach orphan workers.** The first version terminates orphaned workers on startup; do not design code paths that assume reattachment.
|
||||
- **No Blazor UI component libraries.** Dashboard uses local Bootstrap CSS/JS only — do not introduce MudBlazor, Radzen, FluentUI, etc.
|
||||
- **Don't log secrets or full tag values by default.** API keys, passwords, `WriteSecured` payloads, and `AuthenticateUser` credentials must never reach logs. Value logging is opt-in and redacted.
|
||||
@@ -85,6 +85,8 @@ powershell -ExecutionPolicy Bypass -File scripts/run-client-e2e-tests.ps1
|
||||
|
||||
When source code changes, build and test the affected component before reporting work done. If the change crosses component boundaries, build each affected component — don't rely on a single top-level build:
|
||||
|
||||
**Run targeted tests per task, never the full suite each time.** When executing a plan task-by-task, run only the tests that exercise the code that task touched (`dotnet test --filter "FullyQualifiedName~<TestClass>"`, or the per-task test named in the plan). The full gateway suite is slow and leaves orphaned testhost processes — run it at most once per phase (after a related batch of tasks lands), not after every task.
|
||||
|
||||
| Changed area | Required verification |
|
||||
|---|---|
|
||||
| Contracts or `.proto` files | regenerate generated code, then build gateway, worker, and every generated client touched by the contract |
|
||||
@@ -116,7 +118,7 @@ External analysis sources referenced by design docs:
|
||||
|
||||
Gateway gRPC clients authenticate with an API key in metadata: `authorization: Bearer mxgw_<key-id>_<secret>`. Keys are stored hashed (with a peppered SHA) in a gateway-owned SQLite DB (default `C:\ProgramData\MxGateway\gateway-auth.db`). Scopes (`session`, `invoke`, `event`, `metadata`, `admin`) gate specific RPCs; missing → `Unauthenticated`, insufficient → `PermissionDenied`. The `apikey` subcommand on the server exe manages keys; see `src/ZB.MOM.WW.MxGateway.Server/Security/Authentication/`.
|
||||
|
||||
Dashboard auth is LDAP-backed (separate from the gRPC API-key model). `/login` binds against `MxGateway:Ldap` and maps the user's LDAP groups to `Admin` or `Viewer` via `MxGateway:Dashboard:GroupToRole`, then issues an HTTP-only secure `__Host-MxGatewayDashboard` cookie. SignalR hubs at `/hubs/{snapshot,alarms,events}` accept either the cookie or a 30-minute bearer minted at `/hubs/token`. `Dashboard:AllowAnonymousLocalhost` bypasses auth on loopback when enabled.
|
||||
Dashboard auth is LDAP-backed (separate from the gRPC API-key model). `/login` binds against `MxGateway:Ldap` and maps the user's LDAP groups to `Admin` or `Viewer` via `MxGateway:Dashboard:GroupToRole`, then issues an HTTP-only secure `__Host-MxGatewayDashboard` cookie. SignalR hubs at `/hubs/{snapshot,alarms,events}` accept either the cookie or a 30-minute bearer minted at `/hubs/token`. `Dashboard:AllowAnonymousLocalhost` bypasses auth on loopback when enabled. `Dashboard:DisableLogin` (default `false`) auto-authenticates every dashboard request — including remote browsers — as `Dashboard:AutoLoginUser` (default `multi-role`) with both Admin and Viewer roles; dev/test only, never enable in production.
|
||||
|
||||
## Process / Platform Notes
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
as a packaged license file instead. -->
|
||||
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
|
||||
<!-- Versioning: bump per release. Symbols ship as snupkg. -->
|
||||
<Version>0.1.1</Version>
|
||||
<Version>0.1.2</Version>
|
||||
<IncludeSymbols>true</IncludeSymbols>
|
||||
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
|
||||
<!-- Default: do NOT pack. Each project opts in. -->
|
||||
|
||||
@@ -121,6 +121,78 @@ can keep the full `MxCommandReply`, HRESULT, and status array when MXAccess
|
||||
itself rejects a command. `MxAccessException.Reply` contains the raw generated
|
||||
reply.
|
||||
|
||||
## Write Semantics And Common Pitfalls
|
||||
|
||||
These are MXAccess parity behaviors that surprise new callers. The gateway
|
||||
forwards them unchanged — it does not paper over them.
|
||||
|
||||
### Attributing a write to a user without `AuthenticateUser`
|
||||
|
||||
MXAccess only stamps a plain `Write`/`Write2` with a Galaxy user id when the
|
||||
item carries an active *supervisory* advise. If you are **not** using the
|
||||
verified/secured path (`AuthenticateUser` → `WriteSecured`/`WriteSecured2`) but
|
||||
still need the write attributed to a user id, you must first advise the item
|
||||
supervisory and then pass that user id on the write. Without the supervisory
|
||||
advise the `userId` on a plain write is ignored.
|
||||
|
||||
The library exposes `Advise`/`UnAdvise` as named helpers but not supervisory
|
||||
advise, so send it through the generic command channel:
|
||||
|
||||
```csharp
|
||||
await session.InvokeAsync(new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.AdviseSupervisory,
|
||||
AdviseSupervisory = new AdviseSupervisoryCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await session.WriteAsync(serverHandle, itemHandle, value.ToMxValue(), userId);
|
||||
```
|
||||
|
||||
The CLI exposes the same command as `advise-supervisory`, and `write` /
|
||||
`write2` take `--user-id`.
|
||||
|
||||
### Array writes replace the whole array
|
||||
|
||||
A write to an array attribute **replaces the entire array**; it is not an
|
||||
element-wise patch. To change a subset of elements, send the full array with
|
||||
the unchanged elements included. For example, to change 2 elements of a
|
||||
20-element array, build the `MxValue` from all 20 values (the 18 unchanged plus
|
||||
the 2 new ones). Sending only the 2 changed values overwrites the attribute
|
||||
with a 2-element array.
|
||||
|
||||
When only a few indices need changing and the rest should be reset to the
|
||||
element type's default, use `WriteArrayElementsAsync` instead of building the
|
||||
full array manually:
|
||||
|
||||
```csharp
|
||||
await session.WriteArrayElementsAsync(
|
||||
serverHandle, itemHandle,
|
||||
elementDataType: MxDataType.Integer,
|
||||
totalLength: 20,
|
||||
elements: new Dictionary<uint, MxValue>
|
||||
{
|
||||
[2] = 42.ToMxValue(),
|
||||
[7] = 99.ToMxValue(),
|
||||
});
|
||||
```
|
||||
|
||||
The gateway expands the sparse descriptor into a full `totalLength`-element
|
||||
array before forwarding to the worker. Indices not listed in `elements` are
|
||||
written as the element type's default — this is a **reset**, not a preserve;
|
||||
current values at those positions are discarded. `totalLength` is required and
|
||||
must match the declared length of the array attribute. Bare-name array items
|
||||
(`Area001.Pump001.Speed`) are auto-normalized to the `[]` form across the whole
|
||||
add family — `AddItem`, `AddItem2`, `AddItemBulk`, and `AddBufferedItem` — so the
|
||||
array attribute accepts the write.
|
||||
|
||||
## CLI Usage
|
||||
|
||||
The test CLI supports deterministic JSON output for automation:
|
||||
|
||||
@@ -3,6 +3,13 @@ using ZB.MOM.WW.MxGateway.Contracts.Proto.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Client.Cli;
|
||||
|
||||
/// <summary>
|
||||
/// Transport seam used by the CLI to drive gateway and Galaxy Repository
|
||||
/// RPCs, exposing only the operations the command surface needs. The
|
||||
/// production binding is <see cref="MxGatewayCliClientAdapter"/> (wrapping a
|
||||
/// real <c>MxGatewayClient</c>); tests substitute an in-memory fake so the
|
||||
/// command routing can be exercised without a live gateway.
|
||||
/// </summary>
|
||||
public interface IMxGatewayCliClient : IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
|
||||
@@ -110,6 +110,8 @@ public static class MxGatewayClientCli
|
||||
.ConfigureAwait(false),
|
||||
"advise" => await AdviseAsync(arguments, client, standardOutput, cancellation.Token)
|
||||
.ConfigureAwait(false),
|
||||
"advise-supervisory" => await AdviseSupervisoryAsync(arguments, client, standardOutput, cancellation.Token)
|
||||
.ConfigureAwait(false),
|
||||
"subscribe-bulk" => await SubscribeBulkAsync(arguments, client, standardOutput, cancellation.Token)
|
||||
.ConfigureAwait(false),
|
||||
"unsubscribe-bulk" => await UnsubscribeBulkAsync(arguments, client, standardOutput, cancellation.Token)
|
||||
@@ -153,7 +155,10 @@ public static class MxGatewayClientCli
|
||||
}
|
||||
catch (Exception exception) when (exception is not OperationCanceledException)
|
||||
{
|
||||
string? apiKey = arguments.GetOptional("api-key");
|
||||
// Client.Dotnet-028: redact the *effective* key — from --api-key or the
|
||||
// --api-key-env environment variable — so an env-var-sourced key echoed
|
||||
// in a transport error never reaches stderr unredacted.
|
||||
string? apiKey = TryResolveApiKey(arguments);
|
||||
string message = MxGatewayCliSecretRedactor.Redact(exception.Message, apiKey);
|
||||
|
||||
if (forceJsonErrors || arguments.HasFlag("json"))
|
||||
@@ -278,6 +283,29 @@ public static class MxGatewayClientCli
|
||||
}
|
||||
|
||||
private static string ResolveApiKey(CliArguments arguments)
|
||||
{
|
||||
string? apiKey = TryResolveApiKey(arguments);
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
{
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
|
||||
?? "MXGATEWAY_API_KEY";
|
||||
|
||||
throw new ArgumentException(
|
||||
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective API key from <c>--api-key</c> or, failing that,
|
||||
/// the <c>--api-key-env</c>-named environment variable (default
|
||||
/// <c>MXGATEWAY_API_KEY</c>), returning <see langword="null"/> when neither
|
||||
/// is set. Unlike <see cref="ResolveApiKey"/> this never throws, so the
|
||||
/// error-redaction catch block can strip the env-var-sourced key from
|
||||
/// output (Client.Dotnet-028) without re-raising on the absent-key path.
|
||||
/// </summary>
|
||||
private static string? TryResolveApiKey(CliArguments arguments)
|
||||
{
|
||||
string? apiKey = arguments.GetOptional("api-key");
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
@@ -288,14 +316,7 @@ public static class MxGatewayClientCli
|
||||
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
|
||||
?? "MXGATEWAY_API_KEY";
|
||||
|
||||
apiKey = Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
{
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
throw new ArgumentException(
|
||||
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
|
||||
return Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
|
||||
}
|
||||
|
||||
private static CancellationTokenSource CreateCancellation(CliArguments arguments, string command)
|
||||
@@ -303,7 +324,7 @@ public static class MxGatewayClientCli
|
||||
var cancellation = new CancellationTokenSource();
|
||||
// Long-running streaming commands run until Ctrl+C / cancellation by default;
|
||||
// a caller-supplied --timeout still applies if present.
|
||||
bool isLongRunning = command is "galaxy-watch";
|
||||
bool isLongRunning = command is "galaxy-watch" or "galaxy-browse";
|
||||
string? rawTimeout = arguments.GetOptional("timeout");
|
||||
if (isLongRunning && string.IsNullOrWhiteSpace(rawTimeout))
|
||||
{
|
||||
@@ -432,6 +453,28 @@ public static class MxGatewayClientCli
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private static Task<int> AdviseSupervisoryAsync(
|
||||
CliArguments arguments,
|
||||
IMxGatewayCliClient client,
|
||||
TextWriter output,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return InvokeAndWriteAsync(
|
||||
arguments,
|
||||
client,
|
||||
output,
|
||||
new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.AdviseSupervisory,
|
||||
AdviseSupervisory = new AdviseSupervisoryCommand
|
||||
{
|
||||
ServerHandle = arguments.GetInt32("server-handle"),
|
||||
ItemHandle = arguments.GetInt32("item-handle"),
|
||||
},
|
||||
},
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private static Task<int> SubscribeBulkAsync(
|
||||
CliArguments arguments,
|
||||
IMxGatewayCliClient client,
|
||||
@@ -1985,6 +2028,7 @@ public static class MxGatewayClientCli
|
||||
or "register"
|
||||
or "add-item"
|
||||
or "advise"
|
||||
or "advise-supervisory"
|
||||
or "subscribe-bulk"
|
||||
or "unsubscribe-bulk"
|
||||
or "read-bulk"
|
||||
@@ -2047,6 +2091,7 @@ public static class MxGatewayClientCli
|
||||
writer.WriteLine("mxgw-dotnet register --session-id <id> --client-name <name> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet add-item --session-id <id> --server-handle <n> --item <ref> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet advise --session-id <id> --server-handle <n> --item-handle <n> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet advise-supervisory --session-id <id> --server-handle <n> --item-handle <n> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet subscribe-bulk --session-id <id> --server-handle <n> --items <ref,ref> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet unsubscribe-bulk --session-id <id> --server-handle <n> --item-handles <n,n> [--json]");
|
||||
writer.WriteLine("mxgw-dotnet read-bulk --session-id <id> --server-handle <n> --items <ref,ref> [--timeout-ms <n>] [--json]");
|
||||
|
||||
@@ -82,6 +82,53 @@ public sealed class MxGatewayClientCliTests
|
||||
Assert.Equal(string.Empty, error.ToString());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Client.Dotnet-030: <c>advise-supervisory</c> was present in the command
|
||||
/// dispatch table but absent from <see cref="MxGatewayClientCli"/>'s
|
||||
/// <c>IsKnownGatewayCommand</c> guard, so the guard intercepted it first and
|
||||
/// returned exit code 2 "Unknown command" before dispatch could run. This
|
||||
/// test asserts the command is recognized (exit ≠ 2, stderr contains no
|
||||
/// "Unknown command") and reaches the dispatch handler (exit 0, reply written
|
||||
/// to stdout).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_AdviseSupervisory_IsRecognizedAndReachesDispatch()
|
||||
{
|
||||
using var output = new StringWriter();
|
||||
using var error = new StringWriter();
|
||||
FakeCliClient fakeClient = new();
|
||||
fakeClient.InvokeReplies.Enqueue(new MxCommandReply
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Kind = MxCommandKind.AdviseSupervisory,
|
||||
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
|
||||
});
|
||||
|
||||
int exitCode = await MxGatewayClientCli.RunAsync(
|
||||
[
|
||||
"advise-supervisory",
|
||||
"--endpoint",
|
||||
"http://localhost:5000",
|
||||
"--api-key",
|
||||
"test-api-key",
|
||||
"--session-id",
|
||||
"session-fixture",
|
||||
"--server-handle",
|
||||
"12",
|
||||
"--item-handle",
|
||||
"34",
|
||||
"--json",
|
||||
],
|
||||
output,
|
||||
error,
|
||||
_ => fakeClient);
|
||||
|
||||
Assert.DoesNotContain("Unknown command", error.ToString());
|
||||
Assert.Equal(0, exitCode);
|
||||
Assert.Contains("MX_COMMAND_KIND_ADVISE_SUPERVISORY", output.ToString());
|
||||
Assert.Equal(string.Empty, error.ToString());
|
||||
}
|
||||
|
||||
/// <summary>Verifies that error output redacts sensitive API key values.</summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_ErrorOutput_RedactsApiKey()
|
||||
@@ -106,6 +153,48 @@ public sealed class MxGatewayClientCliTests
|
||||
Assert.Contains("[redacted]", error.ToString());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Client.Dotnet-028: when the API key is sourced from the env var
|
||||
/// (<c>--api-key-env</c> path, no <c>--api-key</c> flag), the error-redaction
|
||||
/// catch block must still resolve and redact the effective key. Regression
|
||||
/// guard for the catch block reverting to <c>GetOptional("api-key")</c> only,
|
||||
/// which is null on the env-var path and leaves the key unredacted.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable()
|
||||
{
|
||||
const string envName = "MXGATEWAY_TEST_API_KEY_028";
|
||||
const string secret = "env-sourced-secret-key";
|
||||
string? previousKey = Environment.GetEnvironmentVariable(envName);
|
||||
Environment.SetEnvironmentVariable(envName, secret);
|
||||
|
||||
try
|
||||
{
|
||||
using var output = new StringWriter();
|
||||
using var error = new StringWriter();
|
||||
|
||||
int exitCode = await MxGatewayClientCli.RunAsync(
|
||||
[
|
||||
"open-session",
|
||||
"--endpoint",
|
||||
"http://localhost:5000",
|
||||
"--api-key-env",
|
||||
envName,
|
||||
],
|
||||
output,
|
||||
error,
|
||||
_ => throw new InvalidOperationException($"boom {secret}"));
|
||||
|
||||
Assert.Equal(1, exitCode);
|
||||
Assert.DoesNotContain(secret, error.ToString());
|
||||
Assert.Contains("[redacted]", error.ToString());
|
||||
}
|
||||
finally
|
||||
{
|
||||
Environment.SetEnvironmentVariable(envName, previousKey);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Verifies that stream-events with max-events limit stops output in non-JSON format.</summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_StreamEvents_WithMaxEventsStopsNonJsonOutput()
|
||||
|
||||
@@ -303,6 +303,69 @@ public sealed class MxGatewayClientSessionTests
|
||||
Assert.Equal(cancellation.Token, Assert.Single(transport.InvokeCalls).CallOptions.CancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that BuildSparseArray produces a SparseArrayValue MxValue with the correct total length and elements.</summary>
|
||||
[Fact]
|
||||
public void BuildSparseArray_ProducesSparseArrayValueWithCorrectTotalLengthAndElements()
|
||||
{
|
||||
MxValue element0 = 42.ToMxValue();
|
||||
MxValue element3 = 99.ToMxValue();
|
||||
Dictionary<uint, MxValue> elements = new()
|
||||
{
|
||||
[0u] = element0,
|
||||
[3u] = element3,
|
||||
};
|
||||
|
||||
MxValue result = MxGatewaySession.BuildSparseArray(MxDataType.Integer, totalLength: 10, elements);
|
||||
|
||||
Assert.Equal(MxValue.KindOneofCase.SparseArrayValue, result.KindCase);
|
||||
Assert.Equal(10u, result.SparseArrayValue.TotalLength);
|
||||
Assert.Equal(MxDataType.Integer, result.SparseArrayValue.ElementDataType);
|
||||
Assert.Equal(2, result.SparseArrayValue.Elements.Count);
|
||||
|
||||
MxSparseElement el0 = Assert.Single(result.SparseArrayValue.Elements, e => e.Index == 0u);
|
||||
Assert.Same(element0, el0.Value);
|
||||
|
||||
MxSparseElement el3 = Assert.Single(result.SparseArrayValue.Elements, e => e.Index == 3u);
|
||||
Assert.Same(element3, el3.Value);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that WriteArrayElementsAsync builds a write command whose value is a SparseArrayValue.</summary>
|
||||
[Fact]
|
||||
public async Task WriteArrayElementsAsync_BuildsWriteCommandWithSparseArrayValue()
|
||||
{
|
||||
FakeGatewayTransport transport = CreateTransport();
|
||||
transport.AddInvokeReply(new MxCommandReply
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Kind = MxCommandKind.Write,
|
||||
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
|
||||
});
|
||||
await using MxGatewayClient client = CreateClient(transport);
|
||||
MxGatewaySession session = await client.OpenSessionAsync();
|
||||
Dictionary<uint, MxValue> elements = new() { [1u] = 7.ToMxValue() };
|
||||
|
||||
await session.WriteArrayElementsAsync(
|
||||
serverHandle: 12,
|
||||
itemHandle: 34,
|
||||
elementDataType: MxDataType.Integer,
|
||||
totalLength: 5,
|
||||
elements: elements,
|
||||
userId: 56);
|
||||
|
||||
MxCommandRequest request = Assert.Single(transport.InvokeCalls).Request;
|
||||
Assert.Equal(MxCommandKind.Write, request.Command.Kind);
|
||||
Assert.Equal(12, request.Command.Write.ServerHandle);
|
||||
Assert.Equal(34, request.Command.Write.ItemHandle);
|
||||
Assert.Equal(56, request.Command.Write.UserId);
|
||||
MxValue written = request.Command.Write.Value;
|
||||
Assert.Equal(MxValue.KindOneofCase.SparseArrayValue, written.KindCase);
|
||||
Assert.Equal(5u, written.SparseArrayValue.TotalLength);
|
||||
Assert.Equal(MxDataType.Integer, written.SparseArrayValue.ElementDataType);
|
||||
MxSparseElement el = Assert.Single(written.SparseArrayValue.Elements);
|
||||
Assert.Equal(1u, el.Index);
|
||||
Assert.Equal(7, el.Value.Int32Value);
|
||||
}
|
||||
|
||||
private static MxGatewayClient CreateClient(FakeGatewayTransport transport)
|
||||
{
|
||||
return new MxGatewayClient(transport.Options, transport);
|
||||
|
||||
@@ -12,6 +12,12 @@ public sealed class LazyBrowseNode
|
||||
{
|
||||
private readonly GalaxyRepositoryClient _client;
|
||||
private readonly BrowseChildrenOptions _options;
|
||||
// Client.Dotnet-027 (Won't Fix): this gate is used only via WaitAsync/Release and
|
||||
// never via AvailableWaitHandle, so SemaphoreSlim allocates no kernel wait handle —
|
||||
// it holds no unmanaged/OS handle to leak. It is pure managed memory whose lifetime
|
||||
// is the node's, so the type is intentionally not IDisposable: making it disposable
|
||||
// would push per-node disposal onto every tree consumer (thousands of nodes) for no
|
||||
// resource benefit.
|
||||
private readonly SemaphoreSlim _expandLock = new(1, 1);
|
||||
|
||||
// Published once, under _expandLock, when expansion completes. Lock-free readers
|
||||
|
||||
@@ -687,6 +687,63 @@ public sealed class MxGatewaySession : IAsyncDisposable
|
||||
reply.EnsureProtocolSuccess().EnsureMxAccessSuccess();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes specific array indices to an item using default-fill semantics.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The gateway expands the sparse descriptor into a full <c>totalLength</c>-element array
|
||||
/// before forwarding to the worker. Indices not listed in <paramref name="elements"/> are
|
||||
/// written as the element type's default value — this is a RESET, not a preserve. The
|
||||
/// current values at those positions are discarded. <paramref name="totalLength"/> is
|
||||
/// required and must match the declared length of the array attribute.
|
||||
/// </remarks>
|
||||
/// <param name="serverHandle">The ServerHandle from register.</param>
|
||||
/// <param name="itemHandle">The ItemHandle from add-item.</param>
|
||||
/// <param name="elementDataType">The MXAccess data type of each element.</param>
|
||||
/// <param name="totalLength">The total declared length of the target array attribute.</param>
|
||||
/// <param name="elements">Map of zero-based array index to scalar <see cref="MxValue"/>.</param>
|
||||
/// <param name="userId">User ID context for the write.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task WriteArrayElementsAsync(
|
||||
int serverHandle,
|
||||
int itemHandle,
|
||||
MxDataType elementDataType,
|
||||
uint totalLength,
|
||||
IReadOnlyDictionary<uint, MxValue> elements,
|
||||
int userId = 0,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(elements);
|
||||
MxValue value = BuildSparseArray(elementDataType, totalLength, elements);
|
||||
return WriteAsync(serverHandle, itemHandle, value, userId, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds an <see cref="MxValue"/> whose <see cref="MxValue.SparseArrayValue"/> describes a
|
||||
/// default-fill partial array write.
|
||||
/// </summary>
|
||||
/// <param name="elementDataType">The MXAccess data type of each element.</param>
|
||||
/// <param name="totalLength">The total declared length of the target array attribute.</param>
|
||||
/// <param name="elements">Map of zero-based array index to scalar <see cref="MxValue"/>.</param>
|
||||
/// <returns>An <see cref="MxValue"/> with <see cref="MxValue.KindOneofCase.SparseArrayValue"/> set.</returns>
|
||||
internal static MxValue BuildSparseArray(
|
||||
MxDataType elementDataType,
|
||||
uint totalLength,
|
||||
IReadOnlyDictionary<uint, MxValue> elements)
|
||||
{
|
||||
MxSparseArray sparse = new()
|
||||
{
|
||||
ElementDataType = elementDataType,
|
||||
TotalLength = totalLength,
|
||||
};
|
||||
foreach (KeyValuePair<uint, MxValue> kv in elements)
|
||||
{
|
||||
sparse.Elements.Add(new MxSparseElement { Index = kv.Key, Value = kv.Value });
|
||||
}
|
||||
|
||||
return new MxValue { SparseArrayValue = sparse };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes a value to an item on the MXAccess server without error checking.
|
||||
/// </summary>
|
||||
|
||||
@@ -99,6 +99,79 @@ call returns a `StreamAlarmsClient`; cancel its context to terminate the
|
||||
stream. All three pass straight through to the gateway's central alarm
|
||||
monitor.
|
||||
|
||||
## Write Semantics And Common Pitfalls
|
||||
|
||||
These are MXAccess parity behaviors that surprise new callers. The gateway
|
||||
forwards them unchanged — it does not paper over them.
|
||||
|
||||
### Attributing a write to a user without `AuthenticateUser`
|
||||
|
||||
MXAccess only stamps a plain `Write`/`Write2` with a Galaxy user id when the
|
||||
item carries an active *supervisory* advise. If you are **not** using the
|
||||
verified/secured path (`AuthenticateUser` → `WriteSecured`/`WriteSecured2`) but
|
||||
still need the write attributed to a user id, you must first advise the item
|
||||
supervisory and then pass that user id on the write. Without the supervisory
|
||||
advise the `userID` on a plain write is ignored.
|
||||
|
||||
The session exposes `Advise`/`UnAdvise` but not supervisory advise, so send it
|
||||
through the generic command channel:
|
||||
|
||||
```go
|
||||
_, err := client.Invoke(ctx, &pb.MxCommandRequest{
|
||||
SessionId: session.ID(),
|
||||
Command: &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_ADVISE_SUPERVISORY,
|
||||
Payload: &pb.MxCommand_AdviseSupervisory{
|
||||
AdviseSupervisory: &pb.AdviseSupervisoryCommand{
|
||||
ServerHandle: serverHandle,
|
||||
ItemHandle: itemHandle,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
err = session.Write(ctx, serverHandle, itemHandle, value, userID)
|
||||
```
|
||||
|
||||
The CLI exposes the same command as `advise-supervisory`, and `write`
|
||||
takes `-user-id`.
|
||||
|
||||
### Array writes replace the whole array
|
||||
|
||||
A write to an array attribute **replaces the entire array**; it is not an
|
||||
element-wise patch. To change a subset of elements, send the full array with
|
||||
the unchanged elements included. For example, to change 2 elements of a
|
||||
20-element array, build the `MxValue` from all 20 values (the 18 unchanged plus
|
||||
the 2 new ones). Sending only the 2 changed values overwrites the attribute
|
||||
with a 2-element array.
|
||||
|
||||
`Session.WriteArrayElements` offers a default-fill shorthand: pass only the
|
||||
indices you want to set along with a `totalLength`. The gateway expands the
|
||||
sparse representation into a full array before forwarding to MXAccess — every
|
||||
unmentioned index receives the element type's zero value (boolean `false`,
|
||||
integer `0`, float `0.0`, string `""`, time = Unix epoch). This is a **RESET**
|
||||
of unmentioned indices, not a preserve of existing values. Use the full-array
|
||||
form (read-modify-write) when existing element values must be preserved.
|
||||
|
||||
```go
|
||||
// Set element [3] of a 10-element float array; all other indices reset to 0.0.
|
||||
err = session.WriteArrayElements(
|
||||
ctx,
|
||||
serverHandle, itemHandle,
|
||||
mxgateway.DataTypeFloat,
|
||||
10, // totalLength
|
||||
map[uint32]*mxgateway.MxValue{
|
||||
3: mxgateway.FloatValue(1.5),
|
||||
},
|
||||
userID,
|
||||
)
|
||||
```
|
||||
|
||||
`AddItem`, `AddItem2`, `AddItemBulk`, and `AddBufferedItem` auto-normalize a
|
||||
bare array attribute name to the `[]` array address form expected by MXAccess,
|
||||
so callers do not need to append `[]` themselves. Both forms are accepted;
|
||||
duplicates are deduplicated by the gateway.
|
||||
|
||||
## Galaxy Repository browse
|
||||
|
||||
The `GalaxyRepository` service (proto package `galaxy_repository.v1`) is a
|
||||
@@ -247,24 +320,79 @@ one line per event in text mode or one JSON object per event with `-json`.
|
||||
The `mxgw-go` CLI emits JSON with redacted API keys for commands that connect to
|
||||
the gateway:
|
||||
|
||||
### Subcommand reference
|
||||
|
||||
Every subcommand wired into the CLI. All accept the common flags
|
||||
(`-endpoint`, `-plaintext`, `-api-key` / `-api-key-env`, `-ca-cert`,
|
||||
`-server-name-override`, `-call-timeout`) and most accept `-json`.
|
||||
|
||||
| Command | Purpose |
|
||||
|---|---|
|
||||
| `version` | Print client/contract versions. |
|
||||
| `open-session` | Open a gateway session and print its id. |
|
||||
| `close-session` | Close a session by id. |
|
||||
| `ping` | Round-trip a `PING` command (`-session-id`, `-message`). |
|
||||
| `register` | Register a client name on a session (`-session-id`, `-client-name`). |
|
||||
| `add-item` | Add an item handle (`-session-id`, `-server-handle`, `-item`). |
|
||||
| `advise` | Advise (subscribe) one item (`-session-id`, `-server-handle`, `-item-handle`). |
|
||||
| `advise-supervisory` | Advise one item supervisory — required before a user-id-attributed plain `write`. |
|
||||
| `subscribe-bulk` | Advise many items in one call. |
|
||||
| `unsubscribe-bulk` | Unadvise many item handles in one call. |
|
||||
| `read-bulk` | Read snapshots for many item handles in one call. |
|
||||
| `write` | Write one value (`-type`, `-value`). |
|
||||
| `write-bulk` | Write many values (`-item-handles`, `-values`, counts must match). |
|
||||
| `write2-bulk` | `write-bulk` with a shared `-timestamp-value` (RFC 3339). |
|
||||
| `write-secured-bulk` | Secured bulk write (`-current-user-id`, `-verifier-user-id`). |
|
||||
| `write-secured2-bulk` | Secured bulk write with a shared timestamp. |
|
||||
| `bench-read-bulk` | Throughput benchmark (`-duration-seconds`, `-warmup-seconds`, `-bulk-size`). |
|
||||
| `stream-events` | Stream item-value events for a session (`-session-id`, `-limit`). |
|
||||
| `stream-alarms` | Stream the alarm feed (`-filter-prefix`, `-limit`). |
|
||||
| `acknowledge-alarm` | Acknowledge an alarm reference. |
|
||||
| `smoke` | End-to-end smoke workflow against one item. |
|
||||
| `galaxy-test-connection` | Probe the Galaxy Repository RPC connection. |
|
||||
| `galaxy-last-deploy` | Print the most recent deploy event. |
|
||||
| `galaxy-discover` | Discover deployed objects. |
|
||||
| `galaxy-watch` | Stream deploy events until Ctrl+C or `-limit`. |
|
||||
| `galaxy-browse` | Lazy/eager browse of the Galaxy object tree. |
|
||||
| `batch` | Read commands from stdin (see below). |
|
||||
|
||||
```powershell
|
||||
go run ./cmd/mxgw-go version -json
|
||||
go run ./cmd/mxgw-go open-session -endpoint localhost:5000 -plaintext -json
|
||||
go run ./cmd/mxgw-go ping -session-id <id> -plaintext -json
|
||||
go run ./cmd/mxgw-go register -session-id <id> -client-name mxgw-go -plaintext -json
|
||||
go run ./cmd/mxgw-go add-item -session-id <id> -server-handle 1 -item Area001.Tag.Value -plaintext -json
|
||||
go run ./cmd/mxgw-go advise -session-id <id> -server-handle 1 -item-handle 1 -plaintext -json
|
||||
go run ./cmd/mxgw-go write -session-id <id> -server-handle 1 -item-handle 1 -type int32 -value 123 -plaintext -json
|
||||
go run ./cmd/mxgw-go write-bulk -session-id <id> -server-handle 1 -item-handles 1,2 -values 10,20 -type int32 -plaintext -json
|
||||
go run ./cmd/mxgw-go read-bulk -session-id <id> -item-handles 1,2 -plaintext -json
|
||||
go run ./cmd/mxgw-go stream-events -session-id <id> -plaintext -json
|
||||
go run ./cmd/mxgw-go stream-alarms -plaintext -json
|
||||
go run ./cmd/mxgw-go smoke -item Area001.Tag.Value -plaintext -json
|
||||
go run ./cmd/mxgw-go galaxy-test-connection -plaintext -json
|
||||
go run ./cmd/mxgw-go galaxy-last-deploy -plaintext -json
|
||||
go run ./cmd/mxgw-go galaxy-discover -plaintext -json
|
||||
go run ./cmd/mxgw-go galaxy-watch -plaintext -json
|
||||
go run ./cmd/mxgw-go galaxy-browse -plaintext -json
|
||||
```
|
||||
|
||||
Use `-api-key-env MXGATEWAY_API_KEY` or `-api-key <key>` when authentication is
|
||||
enabled. CLI output redacts the key value and never writes the raw secret.
|
||||
|
||||
### `batch` mode
|
||||
|
||||
`batch` reads one command line at a time from stdin and dispatches each through
|
||||
the same routing as the standalone subcommands; it is the interface the
|
||||
cross-language E2E harness drives. After every command's output it writes the
|
||||
end-of-result sentinel line `__MXGW_BATCH_EOR__` to stdout and flushes, so the
|
||||
harness can frame each result. Blank/whitespace-only lines are skipped; only
|
||||
stdin EOF ends the session. Command errors are serialised as a JSON object
|
||||
(`{"error":...,"type":"error"}`) to stdout (not stderr) and still followed by the
|
||||
sentinel, so a failing command does not abort the batch. The input scanner
|
||||
buffer is widened to 16 MiB so a single long command line (e.g. a bulk write with
|
||||
thousands of handles) does not trip bufio's default 64 KiB token-too-long limit;
|
||||
a line that still exceeds 16 MiB surfaces as a framed error and ends the session.
|
||||
|
||||
Use TLS options for a secured gateway:
|
||||
|
||||
```powershell
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
|
||||
"gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/mxgateway"
|
||||
"google.golang.org/protobuf/encoding/protojson"
|
||||
"google.golang.org/protobuf/reflect/protoreflect"
|
||||
@@ -87,6 +88,8 @@ func runWithIO(ctx context.Context, args []string, stdout, stderr io.Writer) err
|
||||
return runAddItem(ctx, args[1:], stdout, stderr)
|
||||
case "advise":
|
||||
return runAdvise(ctx, args[1:], stdout, stderr)
|
||||
case "advise-supervisory":
|
||||
return runAdviseSupervisory(ctx, args[1:], stdout, stderr)
|
||||
case "subscribe-bulk":
|
||||
return runSubscribeBulk(ctx, args[1:], stdout, stderr)
|
||||
case "unsubscribe-bulk":
|
||||
@@ -358,6 +361,43 @@ func runAdvise(ctx context.Context, args []string, stdout, stderr io.Writer) err
|
||||
return writeCommandOutput(stdout, *jsonOutput, "advise", options, reply, err)
|
||||
}
|
||||
|
||||
func runAdviseSupervisory(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
flags := flag.NewFlagSet("advise-supervisory", flag.ContinueOnError)
|
||||
flags.SetOutput(stderr)
|
||||
common := bindCommonFlags(flags)
|
||||
jsonOutput := flags.Bool("json", false, "write JSON output")
|
||||
sessionID := flags.String("session-id", "", "gateway session id")
|
||||
serverHandle := flags.Int("server-handle", 0, "MXAccess server handle")
|
||||
itemHandle := flags.Int("item-handle", 0, "MXAccess item handle")
|
||||
|
||||
if err := flags.Parse(args); err != nil {
|
||||
return err
|
||||
}
|
||||
if *sessionID == "" {
|
||||
return errors.New("session-id is required")
|
||||
}
|
||||
|
||||
client, options, err := dialForCommand(ctx, common)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
reply, err := client.Invoke(ctx, &pb.MxCommandRequest{
|
||||
SessionId: *sessionID,
|
||||
Command: &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_ADVISE_SUPERVISORY,
|
||||
Payload: &pb.MxCommand_AdviseSupervisory{
|
||||
AdviseSupervisory: &pb.AdviseSupervisoryCommand{
|
||||
ServerHandle: int32(*serverHandle),
|
||||
ItemHandle: int32(*itemHandle),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
return writeCommandOutput(stdout, *jsonOutput, "advise-supervisory", options, reply, err)
|
||||
}
|
||||
|
||||
func runSubscribeBulk(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
flags := flag.NewFlagSet("subscribe-bulk", flag.ContinueOnError)
|
||||
flags.SetOutput(stderr)
|
||||
@@ -837,7 +877,14 @@ func runStreamEvents(ctx context.Context, args []string, stdout, stderr io.Write
|
||||
defer client.Close()
|
||||
|
||||
session := mxgateway.NewSessionForID(client, *sessionID)
|
||||
streamCtx, cancelStream := context.WithCancel(ctx)
|
||||
|
||||
// Ctrl+C on a long-running stream-events command cancels the gRPC stream
|
||||
// cleanly (the gateway sees codes.Canceled rather than a torn TCP
|
||||
// connection) and the deferred subscription.Close()/client.Close() run.
|
||||
signalCtx, stopSignals := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer stopSignals()
|
||||
|
||||
streamCtx, cancelStream := context.WithCancel(signalCtx)
|
||||
defer cancelStream()
|
||||
subscription, err := session.SubscribeEventsAfter(streamCtx, *after)
|
||||
if err != nil {
|
||||
@@ -1035,15 +1082,17 @@ func runSmoke(ctx context.Context, args []string, stdout, stderr io.Writer) erro
|
||||
}
|
||||
|
||||
func closeSmokeSession(ctx context.Context, session *mxgateway.Session, primaryErr error) error {
|
||||
closeCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
// Compute the close timeout once so a single context (and a single
|
||||
// deferred cancel) is allocated: default 5s, shortened to the caller's
|
||||
// remaining deadline when that is sooner.
|
||||
closeTimeout := 5 * time.Second
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
if until := time.Until(deadline); until > 0 && until < 5*time.Second {
|
||||
cancel()
|
||||
closeCtx, cancel = context.WithTimeout(context.Background(), until)
|
||||
defer cancel()
|
||||
if until := time.Until(deadline); until > 0 && until < closeTimeout {
|
||||
closeTimeout = until
|
||||
}
|
||||
}
|
||||
closeCtx, cancel := context.WithTimeout(context.Background(), closeTimeout)
|
||||
defer cancel()
|
||||
|
||||
_, closeErr := session.Close(closeCtx)
|
||||
if primaryErr != nil {
|
||||
@@ -1246,7 +1295,7 @@ type protojsonMessage interface {
|
||||
}
|
||||
|
||||
func writeUsage(writer io.Writer) {
|
||||
fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|ping|register|add-item|advise|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|stream-alarms|acknowledge-alarm|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch|galaxy-browse|batch>")
|
||||
fmt.Fprintln(writer, "usage: mxgw-go <version|open-session|close-session|ping|register|add-item|advise|advise-supervisory|subscribe-bulk|unsubscribe-bulk|read-bulk|write-bulk|write2-bulk|write-secured-bulk|write-secured2-bulk|bench-read-bulk|write|stream-events|stream-alarms|acknowledge-alarm|smoke|galaxy-test-connection|galaxy-last-deploy|galaxy-discover|galaxy-watch|galaxy-browse|batch>")
|
||||
}
|
||||
|
||||
// batchEOR is the end-of-result sentinel emitted to stdout after every command
|
||||
@@ -1490,6 +1539,12 @@ func runGalaxyWatch(ctx context.Context, args []string, stdout, stderr io.Writer
|
||||
count++
|
||||
if *limit > 0 && count >= *limit {
|
||||
cancelStream()
|
||||
// Drain so the WatchDeployEvents goroutine can exit instead
|
||||
// of blocking on a send into the buffered events channel
|
||||
// while the deferred client.Close() tears the stream down
|
||||
// underneath it (mirrors the signal-cancel branch below).
|
||||
for range events {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
case streamErr, ok := <-errs:
|
||||
|
||||
@@ -537,3 +537,53 @@ func TestRunBatchHandlesLongCommandLine(t *testing.T) {
|
||||
t.Fatalf("EOR sentinel count = %d, want 2 (one per command, even when first is too long); out length = %d", count, len(out))
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunBenchReadBulkRejectsNonPositiveDuration pins the -duration-seconds
|
||||
// positivity guard so the bench window cannot be configured to zero/negative.
|
||||
func TestRunBenchReadBulkRejectsNonPositiveDuration(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
err := runWithIO(t.Context(), []string{"bench-read-bulk", "-duration-seconds", "0"}, &stdout, &stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "duration-seconds must be positive") {
|
||||
t.Fatalf("bench-read-bulk -duration-seconds 0 error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunStreamEventsRequiresSessionID pins the session-id guard so stream-events
|
||||
// fails fast before dialing when no session id is supplied.
|
||||
func TestRunStreamEventsRequiresSessionID(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
err := runWithIO(t.Context(), []string{"stream-events", "-plaintext", "-api-key", "test"}, &stdout, &stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "session-id is required") {
|
||||
t.Fatalf("stream-events without -session-id error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunAdviseSupervisoryRequiresSessionID pins the session-id guard so
|
||||
// advise-supervisory fails fast before dialing when no session id is supplied.
|
||||
func TestRunAdviseSupervisoryRequiresSessionID(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
err := runWithIO(t.Context(), []string{"advise-supervisory", "-plaintext", "-api-key", "test"}, &stdout, &stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "session-id is required") {
|
||||
t.Fatalf("advise-supervisory without -session-id error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunWriteBulkVariantRejectsMismatchedHandlesAndValues pins the len-mismatch
|
||||
// guard so a write-bulk with unequal item-handles / values counts fails fast
|
||||
// before any dial.
|
||||
func TestRunWriteBulkVariantRejectsMismatchedHandlesAndValues(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
err := runWithIO(t.Context(), []string{
|
||||
"write-bulk",
|
||||
"-session-id", "s1",
|
||||
"-server-handle", "1",
|
||||
"-item-handles", "1,2",
|
||||
"-values", "10",
|
||||
"-type", "int32",
|
||||
"-plaintext",
|
||||
"-api-key", "test",
|
||||
}, &stdout, &stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "does not match values count") {
|
||||
t.Fatalf("write-bulk mismatched handles/values error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -666,3 +666,124 @@ func authorizationFromContext(ctx context.Context) string {
|
||||
}
|
||||
return values[0]
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WriteArrayElements / buildSparseArrayValue unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestBuildSparseArrayValueSetsSparseOneof(t *testing.T) {
|
||||
elements := map[uint32]*MxValue{
|
||||
2: Int32Value(99),
|
||||
0: Int32Value(10),
|
||||
}
|
||||
v := buildSparseArrayValue(DataTypeInteger, 5, elements)
|
||||
|
||||
sa, ok := v.Kind.(*pb.MxValue_SparseArrayValue)
|
||||
if !ok {
|
||||
t.Fatalf("Kind is %T, want *pb.MxValue_SparseArrayValue", v.Kind)
|
||||
}
|
||||
got := sa.SparseArrayValue
|
||||
if got.GetElementDataType() != DataTypeInteger {
|
||||
t.Errorf("ElementDataType = %v, want DataTypeInteger", got.GetElementDataType())
|
||||
}
|
||||
if got.GetTotalLength() != 5 {
|
||||
t.Errorf("TotalLength = %d, want 5", got.GetTotalLength())
|
||||
}
|
||||
if len(got.GetElements()) != 2 {
|
||||
t.Fatalf("len(Elements) = %d, want 2", len(got.GetElements()))
|
||||
}
|
||||
// Elements must be sorted by index (ascending).
|
||||
if got.GetElements()[0].GetIndex() != 0 {
|
||||
t.Errorf("Elements[0].Index = %d, want 0", got.GetElements()[0].GetIndex())
|
||||
}
|
||||
if got.GetElements()[0].GetValue().GetInt32Value() != 10 {
|
||||
t.Errorf("Elements[0].Value = %v, want 10", got.GetElements()[0].GetValue())
|
||||
}
|
||||
if got.GetElements()[1].GetIndex() != 2 {
|
||||
t.Errorf("Elements[1].Index = %d, want 2", got.GetElements()[1].GetIndex())
|
||||
}
|
||||
if got.GetElements()[1].GetValue().GetInt32Value() != 99 {
|
||||
t.Errorf("Elements[1].Value = %v, want 99", got.GetElements()[1].GetValue())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildSparseArrayValueEmptyMapProducesEmptyElements(t *testing.T) {
|
||||
v := buildSparseArrayValue(DataTypeBoolean, 4, map[uint32]*MxValue{})
|
||||
|
||||
sa, ok := v.Kind.(*pb.MxValue_SparseArrayValue)
|
||||
if !ok {
|
||||
t.Fatalf("Kind is %T, want *pb.MxValue_SparseArrayValue", v.Kind)
|
||||
}
|
||||
if len(sa.SparseArrayValue.GetElements()) != 0 {
|
||||
t.Errorf("len(Elements) = %d, want 0", len(sa.SparseArrayValue.GetElements()))
|
||||
}
|
||||
if sa.SparseArrayValue.GetTotalLength() != 4 {
|
||||
t.Errorf("TotalLength = %d, want 4", sa.SparseArrayValue.GetTotalLength())
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteArrayElementsSendsWriteCommandWithSparseOneof(t *testing.T) {
|
||||
fake := &fakeGatewayServer{
|
||||
invokeReply: &pb.MxCommandReply{
|
||||
SessionId: "session-1",
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_WRITE,
|
||||
ProtocolStatus: &pb.ProtocolStatus{
|
||||
Code: pb.ProtocolStatusCode_PROTOCOL_STATUS_CODE_OK,
|
||||
},
|
||||
},
|
||||
}
|
||||
client, cleanup := newBufconnClient(t, fake)
|
||||
defer cleanup()
|
||||
session := NewSessionForID(client, "session-1")
|
||||
|
||||
err := session.WriteArrayElements(
|
||||
context.Background(),
|
||||
1, 2,
|
||||
DataTypeFloat,
|
||||
10,
|
||||
map[uint32]*MxValue{3: FloatValue(1.5)},
|
||||
42,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("WriteArrayElements() error = %v", err)
|
||||
}
|
||||
|
||||
cmd := fake.invokeRequest.GetCommand()
|
||||
if cmd.GetKind() != pb.MxCommandKind_MX_COMMAND_KIND_WRITE {
|
||||
t.Fatalf("command kind = %s, want WRITE", cmd.GetKind())
|
||||
}
|
||||
w := cmd.GetWrite()
|
||||
if w.GetServerHandle() != 1 {
|
||||
t.Fatalf("server handle = %d, want 1", w.GetServerHandle())
|
||||
}
|
||||
if w.GetItemHandle() != 2 {
|
||||
t.Fatalf("item handle = %d, want 2", w.GetItemHandle())
|
||||
}
|
||||
if w.GetUserId() == 0 {
|
||||
t.Fatal("user id = 0, want non-zero")
|
||||
}
|
||||
if w.GetUserId() != 42 {
|
||||
t.Fatalf("user id = %d, want 42", w.GetUserId())
|
||||
}
|
||||
val := w.GetValue()
|
||||
sa, ok := val.Kind.(*pb.MxValue_SparseArrayValue)
|
||||
if !ok {
|
||||
t.Fatalf("value kind is %T, want *pb.MxValue_SparseArrayValue", val.Kind)
|
||||
}
|
||||
if sa.SparseArrayValue.GetTotalLength() != 10 {
|
||||
t.Errorf("TotalLength = %d, want 10", sa.SparseArrayValue.GetTotalLength())
|
||||
}
|
||||
if sa.SparseArrayValue.GetElementDataType() != DataTypeFloat {
|
||||
t.Errorf("ElementDataType = %v, want DataTypeFloat", sa.SparseArrayValue.GetElementDataType())
|
||||
}
|
||||
if len(sa.SparseArrayValue.GetElements()) != 1 {
|
||||
t.Fatalf("len(Elements) = %d, want 1", len(sa.SparseArrayValue.GetElements()))
|
||||
}
|
||||
elem := sa.SparseArrayValue.GetElements()[0]
|
||||
if elem.GetIndex() != 3 {
|
||||
t.Errorf("element index = %d, want 3", elem.GetIndex())
|
||||
}
|
||||
if elem.GetValue().GetFloatValue() != 1.5 {
|
||||
t.Errorf("element float value = %v, want 1.5", elem.GetValue().GetFloatValue())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -580,6 +581,57 @@ func (s *Session) WriteRaw(ctx context.Context, serverHandle, itemHandle int32,
|
||||
})
|
||||
}
|
||||
|
||||
// WriteArrayElements writes a sparse, default-filled array: only the given
|
||||
// elements (index → scalar value) are set; every unmentioned index up to
|
||||
// totalLength is written as the element type's default (false / 0 / "" / Unix
|
||||
// epoch for time). The gateway expands the sparse representation into a full
|
||||
// array write before forwarding to MXAccess — this is a RESET of unmentioned
|
||||
// indices, not a preserve. Neither RESET semantics nor the original array
|
||||
// content are retained.
|
||||
//
|
||||
// elementDataType must be a scalar MXAccess type (Boolean, Integer, Float,
|
||||
// Double, String, or Time). totalLength must be at least as large as the
|
||||
// highest index in elements plus one.
|
||||
func (s *Session) WriteArrayElements(
|
||||
ctx context.Context,
|
||||
serverHandle, itemHandle int32,
|
||||
elementDataType MxDataType,
|
||||
totalLength uint32,
|
||||
elements map[uint32]*MxValue,
|
||||
userID int32,
|
||||
) error {
|
||||
return s.Write(ctx, serverHandle, itemHandle, buildSparseArrayValue(elementDataType, totalLength, elements), userID)
|
||||
}
|
||||
|
||||
// buildSparseArrayValue constructs the MxValue carrying an MxSparseArray oneof
|
||||
// arm from a map of index → scalar MxValue. Keys are visited in ascending
|
||||
// order so the produced slice is deterministic (important for test assertions).
|
||||
func buildSparseArrayValue(elementDataType MxDataType, totalLength uint32, elements map[uint32]*MxValue) *MxValue {
|
||||
indices := make([]uint32, 0, len(elements))
|
||||
for idx := range elements {
|
||||
indices = append(indices, idx)
|
||||
}
|
||||
sort.Slice(indices, func(i, j int) bool { return indices[i] < indices[j] })
|
||||
|
||||
sparseElements := make([]*MxSparseElement, 0, len(elements))
|
||||
for _, idx := range indices {
|
||||
sparseElements = append(sparseElements, &MxSparseElement{
|
||||
Index: idx,
|
||||
Value: elements[idx],
|
||||
})
|
||||
}
|
||||
|
||||
return &MxValue{
|
||||
Kind: &pb.MxValue_SparseArrayValue{
|
||||
SparseArrayValue: &MxSparseArray{
|
||||
ElementDataType: elementDataType,
|
||||
TotalLength: totalLength,
|
||||
Elements: sparseElements,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// PingRaw sends a diagnostic PING command and returns the raw reply.
|
||||
// The message is echoed back by the gateway in the reply's DiagnosticMessage field.
|
||||
func (s *Session) PingRaw(ctx context.Context, message string) (*MxCommandReply, error) {
|
||||
|
||||
@@ -36,6 +36,13 @@ type (
|
||||
Value = pb.MxValue
|
||||
// MxArray is the protobuf representation of an MXAccess array value.
|
||||
MxArray = pb.MxArray
|
||||
// MxSparseArray is the write-only protobuf type for default-fill partial
|
||||
// array writes. The gateway expands it to a full array before forwarding
|
||||
// to MXAccess: unmentioned indices receive the element type's default value
|
||||
// (boolean false, integer 0, float 0.0, string "", time = Unix epoch).
|
||||
MxSparseArray = pb.MxSparseArray
|
||||
// MxSparseElement is one index/value pair inside an MxSparseArray.
|
||||
MxSparseElement = pb.MxSparseElement
|
||||
// MxStatusProxy mirrors the MXAccess MXSTATUS_PROXY structure.
|
||||
MxStatusProxy = pb.MxStatusProxy
|
||||
// ProtocolStatus is the gateway-level status carried on every reply.
|
||||
|
||||
+65
-1
@@ -84,6 +84,70 @@ yields alarm-feed messages from the gateway's central monitor), and
|
||||
`acknowledgeAlarm` (ack by full alarm reference with an optional comment and
|
||||
ack target). Close the subscription to cancel the underlying gRPC stream.
|
||||
|
||||
## Write Semantics And Common Pitfalls
|
||||
|
||||
These are MXAccess parity behaviors that surprise new callers. The gateway
|
||||
forwards them unchanged — it does not paper over them.
|
||||
|
||||
### Attributing a write to a user without `authenticateUser`
|
||||
|
||||
MXAccess only stamps a plain `write`/`write2` with a Galaxy user id when the
|
||||
item carries an active *supervisory* advise. If you are **not** using the
|
||||
verified/secured path (`authenticateUser` → `writeSecured`/`writeSecured2`) but
|
||||
still need the write attributed to a user id, you must first advise the item
|
||||
supervisory and then pass that user id on the write. Without the supervisory
|
||||
advise the `userId` on a plain write is ignored.
|
||||
|
||||
The session exposes `advise`/`unAdvise` but not supervisory advise, so send it
|
||||
through the generic command channel:
|
||||
|
||||
```java
|
||||
session.invokeCommand(MxCommand.newBuilder()
|
||||
.setKind(MxCommandKind.MX_COMMAND_KIND_ADVISE_SUPERVISORY)
|
||||
.setAdviseSupervisory(AdviseSupervisoryCommand.newBuilder()
|
||||
.setServerHandle(serverHandle)
|
||||
.setItemHandle(itemHandle))
|
||||
.build());
|
||||
|
||||
session.write(serverHandle, itemHandle, value, userId);
|
||||
```
|
||||
|
||||
The CLI exposes the same command as `advise-supervisory`, and `write` /
|
||||
`write2` take `--user-id`.
|
||||
|
||||
### Array writes replace the whole array
|
||||
|
||||
A write to an array attribute **replaces the entire array**; it is not an
|
||||
element-wise patch. To change a subset of elements, send the full array with
|
||||
the unchanged elements included. For example, to change 2 elements of a
|
||||
20-element array, build the `MxValue` from all 20 values (the 18 unchanged plus
|
||||
the 2 new ones). Sending only the 2 changed values overwrites the attribute
|
||||
with a 2-element array.
|
||||
|
||||
When only a few indices need changing and the rest should be reset to the
|
||||
element type's default, use `writeArrayElements` instead of building the full
|
||||
array manually:
|
||||
|
||||
```java
|
||||
session.writeArrayElements(
|
||||
serverHandle, itemHandle,
|
||||
MxDataType.MX_DATA_TYPE_INTEGER,
|
||||
20, // totalLength
|
||||
Map.of(
|
||||
2, MxValues.int32Value(42),
|
||||
7, MxValues.int32Value(99)),
|
||||
userId);
|
||||
```
|
||||
|
||||
The gateway expands the sparse descriptor into a full `totalLength`-element
|
||||
array before forwarding to the worker. Indices not listed in the map are
|
||||
written as the element type's default — this is a **reset**, not a preserve;
|
||||
current values at those positions are discarded. `totalLength` is required and
|
||||
must match the declared length of the array attribute. Bare-name array items
|
||||
(`Area001.Pump001.Speed`) are auto-normalized to the `[]` form across the whole
|
||||
add family — `AddItem`, `AddItem2`, `AddItemBulk`, and `AddBufferedItem` — so the
|
||||
array attribute accepts the write.
|
||||
|
||||
## Galaxy Repository Browse
|
||||
|
||||
The Galaxy Repository service is a separate metadata-only gRPC service exposed
|
||||
@@ -333,7 +397,7 @@ repositories {
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'com.zb.mom.ww.mxgateway:zb-mom-ww-mxgateway-client:0.1.1'
|
||||
implementation 'com.zb.mom.ww.mxgateway:zb-mom-ww-mxgateway-client:0.1.2'
|
||||
}
|
||||
````
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ ext {
|
||||
|
||||
subprojects {
|
||||
group = 'com.zb.mom.ww.mxgateway'
|
||||
version = '0.1.1'
|
||||
version = '0.1.2'
|
||||
|
||||
pluginManager.withPlugin('java') {
|
||||
java {
|
||||
|
||||
+9506
-436
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,9 @@ dependencies {
|
||||
implementation project(':zb-mom-ww-mxgateway-client')
|
||||
implementation "com.google.protobuf:protobuf-java-util:${protobufVersion}"
|
||||
implementation "info.picocli:picocli:${picocliVersion}"
|
||||
|
||||
testImplementation "io.grpc:grpc-inprocess:${grpcVersion}"
|
||||
testImplementation "io.grpc:grpc-testing:${grpcVersion}"
|
||||
}
|
||||
|
||||
application {
|
||||
|
||||
+179
-37
@@ -39,7 +39,10 @@ import java.util.Optional;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.Consumer;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ActiveAlarmSnapshot;
|
||||
@@ -105,8 +108,14 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test-friendly entry point that runs the CLI against the supplied
|
||||
* {@link PrintWriter} pair instead of the system streams.
|
||||
* Entry point that runs the CLI against the supplied {@link PrintWriter}
|
||||
* pair instead of the system streams. This overload wires the production
|
||||
* {@link GrpcMxGatewayCliClientFactory} (a real gRPC channel), so it is
|
||||
* suitable for embedding the CLI but not for unit tests that need to stub
|
||||
* the gateway. Tests should use the package-private
|
||||
* {@link #execute(MxGatewayCliClientFactory, PrintWriter, PrintWriter, String...)}
|
||||
* / {@link #commandLine(MxGatewayCliClientFactory)} overloads, which accept
|
||||
* an injectable client factory.
|
||||
*
|
||||
* @param out writer that receives standard output
|
||||
* @param err writer that receives standard error
|
||||
@@ -131,6 +140,11 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
|
||||
static CommandLine commandLine(MxGatewayCliClientFactory clientFactory) {
|
||||
return commandLine(clientFactory, new GrpcGalaxyClientFactory());
|
||||
}
|
||||
|
||||
static CommandLine commandLine(
|
||||
MxGatewayCliClientFactory clientFactory, GalaxyClientFactory galaxyClientFactory) {
|
||||
CommandLine commandLine = new CommandLine(new MxGatewayCli(clientFactory));
|
||||
commandLine.addSubcommand("version", new VersionCommand());
|
||||
commandLine.addSubcommand("open-session", new OpenSessionCommand(clientFactory));
|
||||
@@ -139,6 +153,8 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
commandLine.addSubcommand("register", new RegisterCommand(clientFactory));
|
||||
commandLine.addSubcommand("add-item", new AddItemCommand(clientFactory));
|
||||
commandLine.addSubcommand("advise", new AdviseCommand(clientFactory));
|
||||
commandLine.addSubcommand(
|
||||
"advise-supervisory", new AdviseSupervisoryCommand(clientFactory));
|
||||
commandLine.addSubcommand("subscribe-bulk", new SubscribeBulkCommand(clientFactory));
|
||||
commandLine.addSubcommand("unsubscribe-bulk", new UnsubscribeBulkCommand(clientFactory));
|
||||
commandLine.addSubcommand("read-bulk", new ReadBulkCommand(clientFactory));
|
||||
@@ -152,11 +168,11 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
commandLine.addSubcommand("stream-alarms", new StreamAlarmsCommand(clientFactory));
|
||||
commandLine.addSubcommand("acknowledge-alarm", new AcknowledgeAlarmCommand(clientFactory));
|
||||
commandLine.addSubcommand("smoke", new SmokeCommand(clientFactory));
|
||||
commandLine.addSubcommand("galaxy-test-connection", new GalaxyTestConnectionCommand());
|
||||
commandLine.addSubcommand("galaxy-last-deploy", new GalaxyDeployTimeCommand());
|
||||
commandLine.addSubcommand("galaxy-discover", new GalaxyDiscoverCommand());
|
||||
commandLine.addSubcommand("galaxy-browse", new GalaxyBrowseCommand());
|
||||
commandLine.addSubcommand("galaxy-watch", new GalaxyWatchCommand());
|
||||
commandLine.addSubcommand("galaxy-test-connection", new GalaxyTestConnectionCommand(galaxyClientFactory));
|
||||
commandLine.addSubcommand("galaxy-last-deploy", new GalaxyDeployTimeCommand(galaxyClientFactory));
|
||||
commandLine.addSubcommand("galaxy-discover", new GalaxyDiscoverCommand(galaxyClientFactory));
|
||||
commandLine.addSubcommand("galaxy-browse", new GalaxyBrowseCommand(galaxyClientFactory));
|
||||
commandLine.addSubcommand("galaxy-watch", new GalaxyWatchCommand(galaxyClientFactory));
|
||||
commandLine.addSubcommand("batch", new BatchCommand(clientFactory));
|
||||
return commandLine;
|
||||
}
|
||||
@@ -359,14 +375,20 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
|
||||
abstract static class GalaxyCommand implements Callable<Integer> {
|
||||
final GalaxyClientFactory galaxyClientFactory;
|
||||
|
||||
@Mixin
|
||||
CommonOptions common = new CommonOptions();
|
||||
|
||||
@Option(names = "--json", description = "Write JSON output.")
|
||||
boolean json;
|
||||
|
||||
GalaxyCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
this.galaxyClientFactory = galaxyClientFactory;
|
||||
}
|
||||
|
||||
GalaxyRepositoryClient connect() {
|
||||
return GalaxyRepositoryClient.connect(common.resolved().toClientOptions());
|
||||
return galaxyClientFactory.connect(common.resolved().toClientOptions());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -375,6 +397,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
aliases = {"galaxy-test"},
|
||||
description = "Calls GalaxyRepository.TestConnection.")
|
||||
static final class GalaxyTestConnectionCommand extends GalaxyCommand {
|
||||
GalaxyTestConnectionCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
super(galaxyClientFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer call() {
|
||||
try (GalaxyRepositoryClient client = connect()) {
|
||||
@@ -399,6 +425,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
aliases = {"galaxy-deploy-time"},
|
||||
description = "Calls GalaxyRepository.GetLastDeployTime.")
|
||||
static final class GalaxyDeployTimeCommand extends GalaxyCommand {
|
||||
GalaxyDeployTimeCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
super(galaxyClientFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer call() {
|
||||
try (GalaxyRepositoryClient client = connect()) {
|
||||
@@ -423,6 +453,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
|
||||
@Command(name = "galaxy-discover", description = "Calls GalaxyRepository.DiscoverHierarchy.")
|
||||
static final class GalaxyDiscoverCommand extends GalaxyCommand {
|
||||
GalaxyDiscoverCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
super(galaxyClientFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer call() {
|
||||
try (GalaxyRepositoryClient client = connect()) {
|
||||
@@ -458,6 +492,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
name = "galaxy-browse",
|
||||
description = "Browses the Galaxy hierarchy via GalaxyRepository.BrowseChildren.")
|
||||
static final class GalaxyBrowseCommand extends GalaxyCommand {
|
||||
GalaxyBrowseCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
super(galaxyClientFactory);
|
||||
}
|
||||
|
||||
@Spec
|
||||
private CommandSpec spec;
|
||||
|
||||
@@ -718,6 +756,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
name = "galaxy-watch",
|
||||
description = "Streams GalaxyRepository.WatchDeployEvents until cancelled.")
|
||||
static final class GalaxyWatchCommand extends GalaxyCommand {
|
||||
GalaxyWatchCommand(GalaxyClientFactory galaxyClientFactory) {
|
||||
super(galaxyClientFactory);
|
||||
}
|
||||
|
||||
@Option(
|
||||
names = "--last-seen-deploy-time",
|
||||
description =
|
||||
@@ -1004,6 +1046,34 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
}
|
||||
|
||||
@Command(
|
||||
name = "advise-supervisory",
|
||||
description = "Invokes MXAccess AdviseSupervisory.")
|
||||
static final class AdviseSupervisoryCommand extends GatewayCommand {
|
||||
@Option(names = "--session-id", required = true, description = "Gateway session id.")
|
||||
String sessionId;
|
||||
|
||||
@Option(names = "--server-handle", required = true, description = "MXAccess server handle.")
|
||||
int serverHandle;
|
||||
|
||||
@Option(names = "--item-handle", required = true, description = "MXAccess item handle.")
|
||||
int itemHandle;
|
||||
|
||||
AdviseSupervisoryCommand(MxGatewayCliClientFactory clientFactory) {
|
||||
super(clientFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer call() {
|
||||
try (MxGatewayCliClient client = clientFactory.connect(common.resolved())) {
|
||||
MxCommandReply reply =
|
||||
client.session(sessionId).adviseSupervisoryRaw(serverHandle, itemHandle);
|
||||
writeOutput("advise-supervisory", common, json, reply, () -> reply.getKind().name());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Command(name = "subscribe-bulk", description = "Invokes MXAccess SubscribeBulk.")
|
||||
static final class SubscribeBulkCommand extends GatewayCommand {
|
||||
@Option(names = "--session-id", required = true, description = "Gateway session id.")
|
||||
@@ -1505,50 +1575,74 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
StreamAlarmsRequest request = StreamAlarmsRequest.newBuilder()
|
||||
.setAlarmFilterPrefix(filterPrefix)
|
||||
.build();
|
||||
// Client.Java-033 — fail-fast on overflow. A bare
|
||||
// queue.offer(value) silently drops messages past capacity,
|
||||
// which violates the JavaStyleGuide "do not drop events"
|
||||
// contract and lets the CLI exit 0 on a truncated feed.
|
||||
// Mirrors MxEventStream's overflow branch: detect a failed
|
||||
// offer, cancel the subscription, drain the buffer, then
|
||||
// queue an explicit overflow exception followed by the END
|
||||
// sentinel so the drain loop surfaces a non-zero exit.
|
||||
// Client.Java-033/040/042 — fail-fast on overflow and on
|
||||
// transport errors. A bare queue.offer(value) silently drops
|
||||
// messages past capacity (violating the JavaStyleGuide "do not
|
||||
// drop events" contract and letting the CLI exit 0 on a
|
||||
// truncated feed), and a bare queue.offer(error) on a full
|
||||
// queue would drop the terminal item and deadlock the drain on
|
||||
// queue.take().
|
||||
//
|
||||
// Terminal transitions (overflow, transport error, clean
|
||||
// completion) are now serialised through a single AtomicBoolean
|
||||
// guard plus a dedicated `terminal` slot rather than
|
||||
// re-clearing the shared queue. The first terminal condition
|
||||
// wins; a concurrent onNext on the gRPC I/O thread can no
|
||||
// longer displace it (Client.Java-040). The drain reads the
|
||||
// terminal slot independently of the bounded queue, so a full
|
||||
// queue can never strand the terminal item (Client.Java-042).
|
||||
AtomicReference<MxGatewayAlarmFeedSubscription> subscriptionRef = new AtomicReference<>();
|
||||
AtomicBoolean terminated = new AtomicBoolean();
|
||||
AtomicReference<Object> terminal = new AtomicReference<>();
|
||||
Consumer<Object> terminate = item -> {
|
||||
if (terminated.compareAndSet(false, true)) {
|
||||
terminal.set(item);
|
||||
MxGatewayAlarmFeedSubscription sub = subscriptionRef.get();
|
||||
if (sub != null) {
|
||||
sub.cancel();
|
||||
}
|
||||
}
|
||||
};
|
||||
MxGatewayAlarmFeedSubscription subscription =
|
||||
client.streamAlarms(request, new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(AlarmFeedMessage value) {
|
||||
if (terminated.get()) {
|
||||
return;
|
||||
}
|
||||
if (!queue.offer(value)) {
|
||||
MxGatewayAlarmFeedSubscription sub = subscriptionRef.get();
|
||||
if (sub != null) {
|
||||
sub.cancel();
|
||||
}
|
||||
queue.clear();
|
||||
queue.offer(new IllegalStateException(
|
||||
terminate.accept(new IllegalStateException(
|
||||
"stream-alarms queue overflowed (capacity 1024); consumer too slow"));
|
||||
queue.offer(ALARM_FEED_END);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable error) {
|
||||
queue.offer(error);
|
||||
terminate.accept(error);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
queue.offer(ALARM_FEED_END);
|
||||
terminate.accept(ALARM_FEED_END);
|
||||
}
|
||||
});
|
||||
subscriptionRef.set(subscription);
|
||||
try {
|
||||
int count = 0;
|
||||
while (true) {
|
||||
Object item = queue.take();
|
||||
if (item == ALARM_FEED_END) {
|
||||
break;
|
||||
}
|
||||
if (item instanceof Throwable error) {
|
||||
// Poll with a short timeout so the dedicated terminal
|
||||
// slot is observed even when the bounded queue is full
|
||||
// of normal messages the consumer has not yet drained.
|
||||
Object item = queue.poll(50, TimeUnit.MILLISECONDS);
|
||||
if (item == null) {
|
||||
Object end = terminal.get();
|
||||
if (end == null) {
|
||||
continue;
|
||||
}
|
||||
if (end == ALARM_FEED_END) {
|
||||
break;
|
||||
}
|
||||
Throwable error = (Throwable) end;
|
||||
throw new IllegalStateException(
|
||||
"gateway stream alarms failed: " + error.getMessage(), error);
|
||||
}
|
||||
@@ -1725,6 +1819,10 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
}
|
||||
|
||||
interface GalaxyClientFactory {
|
||||
GalaxyRepositoryClient connect(MxGatewayClientOptions options);
|
||||
}
|
||||
|
||||
interface MxGatewayCliClientFactory {
|
||||
MxGatewayCliClient connect(CommonOptions options);
|
||||
}
|
||||
@@ -1762,6 +1860,8 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
|
||||
MxCommandReply adviseRaw(int serverHandle, int itemHandle);
|
||||
|
||||
MxCommandReply adviseSupervisoryRaw(int serverHandle, int itemHandle);
|
||||
|
||||
MxCommandReply writeRaw(int serverHandle, int itemHandle, MxValue value, int userId);
|
||||
|
||||
List<SubscribeResult> subscribeBulk(int serverHandle, List<String> items);
|
||||
@@ -1781,6 +1881,13 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
MxEventStream streamEventsAfter(long afterWorkerSequence);
|
||||
}
|
||||
|
||||
static final class GrpcGalaxyClientFactory implements GalaxyClientFactory {
|
||||
@Override
|
||||
public GalaxyRepositoryClient connect(MxGatewayClientOptions options) {
|
||||
return GalaxyRepositoryClient.connect(options);
|
||||
}
|
||||
}
|
||||
|
||||
static final class GrpcMxGatewayCliClientFactory implements MxGatewayCliClientFactory {
|
||||
@Override
|
||||
public MxGatewayCliClient connect(CommonOptions options) {
|
||||
@@ -1873,6 +1980,17 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
return session.adviseRaw(serverHandle, itemHandle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MxCommandReply adviseSupervisoryRaw(int serverHandle, int itemHandle) {
|
||||
return session.invokeCommand(MxCommand.newBuilder()
|
||||
.setKind(MxCommandKind.MX_COMMAND_KIND_ADVISE_SUPERVISORY)
|
||||
.setAdviseSupervisory(
|
||||
mxaccess_gateway.v1.MxaccessGateway.AdviseSupervisoryCommand.newBuilder()
|
||||
.setServerHandle(serverHandle)
|
||||
.setItemHandle(itemHandle))
|
||||
.build());
|
||||
}
|
||||
|
||||
@Override
|
||||
public MxCommandReply writeRaw(int serverHandle, int itemHandle, MxValue value, int userId) {
|
||||
return session.writeRaw(serverHandle, itemHandle, value, userId);
|
||||
@@ -2142,13 +2260,37 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
return jsonString(value.toString());
|
||||
}
|
||||
|
||||
private static String jsonString(String value) {
|
||||
return '"'
|
||||
+ value.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\r", "\\r")
|
||||
.replace("\n", "\\n")
|
||||
+ '"';
|
||||
// Package-private for the Client.Java-041 escaping regression test.
|
||||
static String jsonString(String value) {
|
||||
// RFC 8259 requires the two-character escapes for the named control
|
||||
// characters and six-character uXXXX escapes for the remaining
|
||||
// U+0000-U+001F (and U+007F) range. The old implementation escaped only
|
||||
// backslash, quote, CR, and LF, so a
|
||||
// value containing a tab, backspace, form-feed, or any other control
|
||||
// character produced malformed JSON (Client.Java-041).
|
||||
StringBuilder builder = new StringBuilder(value.length() + 2);
|
||||
builder.append('"');
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
char c = value.charAt(i);
|
||||
switch (c) {
|
||||
case '\\' -> builder.append("\\\\");
|
||||
case '"' -> builder.append("\\\"");
|
||||
case '\r' -> builder.append("\\r");
|
||||
case '\n' -> builder.append("\\n");
|
||||
case '\t' -> builder.append("\\t");
|
||||
case '\b' -> builder.append("\\b");
|
||||
case '\f' -> builder.append("\\f");
|
||||
default -> {
|
||||
if (c < 0x20 || c == 0x7f) {
|
||||
builder.append(String.format("\\u%04x", (int) c));
|
||||
} else {
|
||||
builder.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.append('"');
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private record RawJson(String value) {
|
||||
|
||||
+232
@@ -0,0 +1,232 @@
|
||||
package com.zb.mom.ww.mxgateway.cli;
|
||||
|
||||
import com.zb.mom.ww.mxgateway.client.GalaxyRepositoryClient;
|
||||
import com.zb.mom.ww.mxgateway.client.MxGatewayClient;
|
||||
import com.zb.mom.ww.mxgateway.client.MxGatewayClientOptions;
|
||||
import galaxy_repository.v1.GalaxyRepositoryGrpc;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DeployEvent;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DiscoverHierarchyReply;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DiscoverHierarchyRequest;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.GalaxyObject;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.WatchDeployEventsRequest;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.Server;
|
||||
import io.grpc.inprocess.InProcessChannelBuilder;
|
||||
import io.grpc.inprocess.InProcessServerBuilder;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.io.IOException;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
import mxaccess_gateway.v1.MxAccessGatewayGrpc;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.CloseSessionReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.CloseSessionRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ProtocolStatus;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ProtocolStatusCode;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.SessionState;
|
||||
|
||||
/**
|
||||
* Test fixture that stands up an in-process gRPC server hosting scripted fake
|
||||
* {@code MxAccessGateway} and {@code GalaxyRepository} service implementations,
|
||||
* so the real Java client types ({@link MxGatewayClient} /
|
||||
* {@link GalaxyRepositoryClient}) can be driven over a real channel.
|
||||
*
|
||||
* <p>The real streaming wrappers ({@code MxEventStream} /
|
||||
* {@code DeployEventStream}) have package-private constructors and
|
||||
* {@link GalaxyRepositoryClient} is {@code final}, so the streaming and galaxy
|
||||
* CLI commands cannot be exercised through the lightweight {@code FakeSession}
|
||||
* seam. Driving the real client over an in-process channel against scripted
|
||||
* services is the clean alternative; Tasks 5 and 6 add the CLI assertions on
|
||||
* top of this fixture.
|
||||
*
|
||||
* <p>Scripted payloads are settable via constructor args or setters. Each
|
||||
* instance uses a unique server name so harnesses do not collide. The
|
||||
* {@code directExecutor()} wiring keeps all dispatch on the calling thread, so
|
||||
* no background threads are leaked.
|
||||
*
|
||||
* <p><strong>Implemented RPCs.</strong> The scripted services override only the
|
||||
* RPCs the CLI tests currently exercise:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code MxAccessGateway}: {@code streamEvents}, {@code closeSession}.</li>
|
||||
* <li>{@code GalaxyRepository}: {@code discoverHierarchy},
|
||||
* {@code watchDeployEvents}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Every other RPC (e.g. {@code openSession}, {@code invoke}, {@code register},
|
||||
* {@code streamAlarms}, {@code queryActiveAlarms}, {@code browseChildren}) is
|
||||
* left at the generated {@code *ImplBase} default and therefore returns gRPC
|
||||
* {@code UNIMPLEMENTED} by design. A future test that needs one of those paths
|
||||
* must add the corresponding scripted override here first — otherwise the call
|
||||
* fails with {@code UNIMPLEMENTED} rather than the behaviour under test.
|
||||
*/
|
||||
final class InProcessGatewayHarness implements AutoCloseable {
|
||||
private final String serverName;
|
||||
private final Server server;
|
||||
private final ManagedChannel channel;
|
||||
private final FakeGatewayService fakeGateway;
|
||||
private final FakeGalaxyService fakeGalaxy;
|
||||
|
||||
/** Starts a harness with empty scripted payloads; populate via setters. */
|
||||
InProcessGatewayHarness() {
|
||||
this(List.of(), List.of(), List.of());
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a harness with the supplied scripted payloads.
|
||||
*
|
||||
* @param scriptedEvents events {@code streamEvents} pushes before completing
|
||||
* @param scriptedObjects objects {@code discoverHierarchy} returns (single page)
|
||||
* @param scriptedDeployEvents events {@code watchDeployEvents} streams before completing
|
||||
*/
|
||||
InProcessGatewayHarness(
|
||||
List<MxEvent> scriptedEvents,
|
||||
List<GalaxyObject> scriptedObjects,
|
||||
List<DeployEvent> scriptedDeployEvents) {
|
||||
this.serverName = "mxgw-cli-harness-" + UUID.randomUUID();
|
||||
this.fakeGateway = new FakeGatewayService(scriptedEvents);
|
||||
this.fakeGalaxy = new FakeGalaxyService(scriptedObjects, scriptedDeployEvents);
|
||||
try {
|
||||
this.server = InProcessServerBuilder.forName(serverName)
|
||||
.directExecutor()
|
||||
.addService(fakeGateway)
|
||||
.addService(fakeGalaxy)
|
||||
.build()
|
||||
.start();
|
||||
} catch (IOException error) {
|
||||
throw new IllegalStateException("failed to start in-process gateway harness", error);
|
||||
}
|
||||
this.channel = InProcessChannelBuilder.forName(serverName).directExecutor().build();
|
||||
}
|
||||
|
||||
/** Replaces the scripted {@code streamEvents} payload. */
|
||||
void setScriptedEvents(List<MxEvent> events) {
|
||||
fakeGateway.scriptedEvents.clear();
|
||||
fakeGateway.scriptedEvents.addAll(events);
|
||||
}
|
||||
|
||||
/** Replaces the scripted {@code discoverHierarchy} payload. */
|
||||
void setScriptedObjects(List<GalaxyObject> objects) {
|
||||
fakeGalaxy.scriptedObjects.clear();
|
||||
fakeGalaxy.scriptedObjects.addAll(objects);
|
||||
}
|
||||
|
||||
/** Replaces the scripted {@code watchDeployEvents} payload. */
|
||||
void setScriptedDeployEvents(List<DeployEvent> deployEvents) {
|
||||
fakeGalaxy.scriptedDeployEvents.clear();
|
||||
fakeGalaxy.scriptedDeployEvents.addAll(deployEvents);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the in-process channel into the scripted services.
|
||||
*
|
||||
* @return the managed channel; lifecycle owned by the harness
|
||||
*/
|
||||
ManagedChannel channel() {
|
||||
return channel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a real {@link MxGatewayClient} over the in-process channel.
|
||||
*
|
||||
* @return a client borrowing the harness channel
|
||||
*/
|
||||
MxGatewayClient gatewayClient() {
|
||||
return new MxGatewayClient(channel, testOptions());
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a real {@link GalaxyRepositoryClient} over the in-process channel.
|
||||
*
|
||||
* @return a client borrowing the harness channel
|
||||
*/
|
||||
GalaxyRepositoryClient galaxyClient() {
|
||||
return new GalaxyRepositoryClient(channel, testOptions());
|
||||
}
|
||||
|
||||
private static MxGatewayClientOptions testOptions() {
|
||||
return MxGatewayClientOptions.builder()
|
||||
.endpoint("in-process")
|
||||
.apiKey("mxgw_test_secret")
|
||||
.plaintext(true)
|
||||
.callTimeout(Duration.ofSeconds(5))
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
channel.shutdownNow();
|
||||
server.shutdownNow();
|
||||
}
|
||||
|
||||
private static ProtocolStatus ok() {
|
||||
return ProtocolStatus.newBuilder()
|
||||
.setCode(ProtocolStatusCode.PROTOCOL_STATUS_CODE_OK)
|
||||
.build();
|
||||
}
|
||||
|
||||
/** Scripted fake of the {@code MxAccessGateway} service. */
|
||||
private static final class FakeGatewayService extends MxAccessGatewayGrpc.MxAccessGatewayImplBase {
|
||||
private final List<MxEvent> scriptedEvents = new CopyOnWriteArrayList<>();
|
||||
|
||||
FakeGatewayService(List<MxEvent> scriptedEvents) {
|
||||
this.scriptedEvents.addAll(scriptedEvents);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void streamEvents(
|
||||
mxaccess_gateway.v1.MxaccessGateway.StreamEventsRequest request,
|
||||
StreamObserver<MxEvent> responseObserver) {
|
||||
for (MxEvent event : scriptedEvents) {
|
||||
responseObserver.onNext(event);
|
||||
}
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closeSession(
|
||||
CloseSessionRequest request, StreamObserver<CloseSessionReply> responseObserver) {
|
||||
responseObserver.onNext(CloseSessionReply.newBuilder()
|
||||
.setSessionId(request.getSessionId())
|
||||
.setFinalState(SessionState.SESSION_STATE_CLOSED)
|
||||
.setProtocolStatus(ok())
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
}
|
||||
|
||||
/** Scripted fake of the {@code GalaxyRepository} service. */
|
||||
private static final class FakeGalaxyService extends GalaxyRepositoryGrpc.GalaxyRepositoryImplBase {
|
||||
private final List<GalaxyObject> scriptedObjects = new CopyOnWriteArrayList<>();
|
||||
private final List<DeployEvent> scriptedDeployEvents = new CopyOnWriteArrayList<>();
|
||||
|
||||
FakeGalaxyService(List<GalaxyObject> scriptedObjects, List<DeployEvent> scriptedDeployEvents) {
|
||||
this.scriptedObjects.addAll(scriptedObjects);
|
||||
this.scriptedDeployEvents.addAll(scriptedDeployEvents);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void discoverHierarchy(
|
||||
DiscoverHierarchyRequest request, StreamObserver<DiscoverHierarchyReply> responseObserver) {
|
||||
List<GalaxyObject> snapshot = new ArrayList<>(scriptedObjects);
|
||||
responseObserver.onNext(DiscoverHierarchyReply.newBuilder()
|
||||
.setTotalObjectCount(snapshot.size())
|
||||
.addAllObjects(snapshot)
|
||||
.setNextPageToken("")
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void watchDeployEvents(
|
||||
WatchDeployEventsRequest request, StreamObserver<DeployEvent> responseObserver) {
|
||||
for (DeployEvent event : scriptedDeployEvents) {
|
||||
responseObserver.onNext(event);
|
||||
}
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
}
|
||||
}
|
||||
+504
-40
@@ -6,6 +6,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.zb.mom.ww.mxgateway.client.MxGatewayAlarmFeedSubscription;
|
||||
import com.zb.mom.ww.mxgateway.client.MxGatewayClientOptions;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DeployEvent;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.GalaxyObject;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.io.ByteArrayInputStream;
|
||||
@@ -31,6 +32,7 @@ import mxaccess_gateway.v1.MxaccessGateway.CloseSessionRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandKind;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEventFamily;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxValue;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.OnAlarmTransitionEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.OpenSessionReply;
|
||||
@@ -54,17 +56,37 @@ final class MxGatewayCliTests {
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertEquals("", run.errors());
|
||||
assertTrue(run.output().contains("mxgateway-java 0.1.0"));
|
||||
assertTrue(run.output().contains("mxgateway-java 0.1.2"));
|
||||
assertTrue(run.output().contains("gatewayProtocolVersion=3"));
|
||||
assertTrue(run.output().contains("workerProtocolVersion=1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void jsonStringEscapesControlCharacters() {
|
||||
// Client.Java-041 — the hand-rolled jsonString escaped only backslash,
|
||||
// quote, CR, and LF, so a tab/backspace/form-feed or any other control
|
||||
// char produced malformed JSON (RFC 8259). After the fix the named control
|
||||
// chars use their two-character escapes and the rest use six-char uXXXX.
|
||||
assertEquals("\"a\\tb\"", MxGatewayCli.jsonString("a\tb"));
|
||||
assertEquals("\"a\\bb\"", MxGatewayCli.jsonString("a\bb"));
|
||||
assertEquals("\"a\\fb\"", MxGatewayCli.jsonString("a\fb"));
|
||||
assertEquals("\"a\\rb\"", MxGatewayCli.jsonString("a\rb"));
|
||||
assertEquals("\"a\\nb\"", MxGatewayCli.jsonString("a\nb"));
|
||||
// A non-named control character (U+0001) must become .
|
||||
assertEquals("\"a\\u0001b\"", MxGatewayCli.jsonString("ab"));
|
||||
// DEL (U+007F) is also escaped.
|
||||
assertEquals("\"a\\u007fb\"", MxGatewayCli.jsonString("ab"));
|
||||
// Quote and backslash still escape; ordinary printable text is verbatim.
|
||||
assertEquals("\"a\\\"\\\\b\"", MxGatewayCli.jsonString("a\"\\b"));
|
||||
assertEquals("\"plain\"", MxGatewayCli.jsonString("plain"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void versionCommandPrintsJson() {
|
||||
CliRun run = execute(new FakeClientFactory(), "version", "--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertTrue(run.output().contains("\"clientVersion\":\"0.1.0\""));
|
||||
assertTrue(run.output().contains("\"clientVersion\":\"0.1.2\""));
|
||||
assertTrue(run.output().contains("\"gatewayProtocolVersion\":3"));
|
||||
}
|
||||
|
||||
@@ -126,6 +148,26 @@ final class MxGatewayCliTests {
|
||||
assertTrue(run.output().contains("\"itemHandle\":7"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void adviseSupervisoryCommandCallsAdviseSupervisoryRaw() {
|
||||
// Client.Java-050: dedicated test for advise-supervisory, using a
|
||||
// separate adviseSupervisoryCalled flag so it cannot be masked by the
|
||||
// plain advise path that shares adviseCalled.
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"advise-supervisory",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "12",
|
||||
"--item-handle", "34",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertTrue(factory.client.session.adviseSupervisoryCalled);
|
||||
assertFalse(factory.client.session.adviseCalled, "plain advise must not be called");
|
||||
assertTrue(run.output().contains("\"kind\":\"MX_COMMAND_KIND_ADVISE_SUPERVISORY\""));
|
||||
}
|
||||
|
||||
// ---- ping subcommand (D4) ----
|
||||
|
||||
@Test
|
||||
@@ -239,27 +281,20 @@ final class MxGatewayCliTests {
|
||||
void galaxyBrowseParentZeroEmitsWarningToStderr() {
|
||||
// --parent 0 is the server sentinel for roots; passing it explicitly is
|
||||
// almost certainly a mistake. The CLI must print a warning to stderr
|
||||
// (matching Go/Rust client behaviour) but must still attempt the call
|
||||
// (exit behaviour depends on gateway reachability, not tested here;
|
||||
// we only assert the warning path is triggered by checking the error
|
||||
// writer before any gRPC connection is attempted).
|
||||
// (matching Go/Rust client behaviour) but must still attempt the call.
|
||||
//
|
||||
// GalaxyBrowseCommand connects to a real GalaxyRepositoryClient, so the
|
||||
// call() body will throw after printing the warning when no gateway is
|
||||
// reachable. We only assert the warning appears on stderr.
|
||||
StringWriter output = new StringWriter();
|
||||
StringWriter errors = new StringWriter();
|
||||
// Non-zero exit is expected (no live gateway), but the warning must
|
||||
// appear on stderr regardless of what happens next.
|
||||
MxGatewayCli.execute(
|
||||
new FakeClientFactory(),
|
||||
new PrintWriter(output, true),
|
||||
new PrintWriter(errors, true),
|
||||
// GalaxyBrowseCommand prints the warning, then calls connect() on the
|
||||
// GalaxyClientFactory. We inject a stub factory whose connect() throws,
|
||||
// so only the warning path runs — no live Netty channel to localhost is
|
||||
// constructed (Client.Java-043). The warning is emitted before
|
||||
// connect() is reached, so it appears on stderr regardless.
|
||||
CliRun run = executeGalaxy(
|
||||
new ThrowingGalaxyClientFactory(),
|
||||
"galaxy-browse", "--parent", "0", "--depth", "1");
|
||||
|
||||
assertTrue(
|
||||
errors.toString().contains("--parent 0"),
|
||||
"expected '--parent 0' warning on stderr; got: " + errors);
|
||||
run.errors().contains("--parent 0"),
|
||||
"expected '--parent 0' warning on stderr; got: " + run.errors());
|
||||
}
|
||||
|
||||
// ---- galaxy command-name aliases (D9-java) ----
|
||||
@@ -305,6 +340,183 @@ final class MxGatewayCliTests {
|
||||
assertTrue(run.output().contains("\"tagAddress\":\"TestMachine_002.TestChangingInt\""));
|
||||
}
|
||||
|
||||
// ---- read-bulk / write-bulk / write2-bulk subcommands ----
|
||||
|
||||
@Test
|
||||
void readBulkCommandForwardsTimeoutAndPrintsResults() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"read-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--items", "TestMachine_001.TestInt,TestMachine_002.TestInt",
|
||||
"--timeout-ms", "750",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertEquals(750, factory.client.session.lastReadBulkTimeoutMs);
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"read-bulk\""), out);
|
||||
assertTrue(out.contains("\"tagAddress\":\"TestMachine_001.TestInt\""), out);
|
||||
assertTrue(out.contains("\"tagAddress\":\"TestMachine_002.TestInt\""), out);
|
||||
assertTrue(out.contains("\"itemHandle\""), out);
|
||||
assertTrue(out.contains("\"wasCached\""), out);
|
||||
assertTrue(out.contains("\"quality\""), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeBulkCommandParsesTypedValuesAndPrintsResults() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"write-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--item-handles", "100,101",
|
||||
"--type", "int32",
|
||||
"--values", "111,222",
|
||||
"--user-id", "5",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
List<WriteBulkEntry> entries = factory.client.session.lastWriteBulkEntries;
|
||||
assertEquals(2, entries.size());
|
||||
assertEquals(111, entries.get(0).getValue().getInt32Value());
|
||||
assertEquals(222, entries.get(1).getValue().getInt32Value());
|
||||
assertEquals(5, entries.get(0).getUserId());
|
||||
assertEquals(5, entries.get(1).getUserId());
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"write-bulk\""), out);
|
||||
assertTrue(out.contains("\"itemHandle\":100"), out);
|
||||
assertTrue(out.contains("\"wasSuccessful\":true"), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void write2BulkCommandForwardsTimestampAndPrintsResults() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"write2-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--item-handles", "100",
|
||||
"--type", "string",
|
||||
"--values", "hello",
|
||||
"--timestamp", "2026-05-20T00:00:00Z",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
List<Write2BulkEntry> entries = factory.client.session.lastWrite2BulkEntries;
|
||||
assertEquals(1, entries.size());
|
||||
assertTrue(entries.get(0).hasTimestampValue(), "expected timestampValue to be set");
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"write2-bulk\""), out);
|
||||
assertTrue(out.contains("\"itemHandle\":100"), out);
|
||||
assertTrue(out.contains("\"wasSuccessful\":true"), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeSecuredBulkCommandForwardsUserIdsAndPrintsResults() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"write-secured-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--item-handles", "100,101",
|
||||
"--type", "int32",
|
||||
"--values", "10,20",
|
||||
"--current-user-id", "7",
|
||||
"--verifier-user-id", "8",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
List<WriteSecuredBulkEntry> entries = factory.client.session.lastWriteSecuredBulkEntries;
|
||||
assertEquals(2, entries.size());
|
||||
assertEquals(7, entries.get(0).getCurrentUserId());
|
||||
assertEquals(8, entries.get(0).getVerifierUserId());
|
||||
assertEquals(7, entries.get(1).getCurrentUserId());
|
||||
assertEquals(8, entries.get(1).getVerifierUserId());
|
||||
assertEquals(10, entries.get(0).getValue().getInt32Value());
|
||||
assertEquals(20, entries.get(1).getValue().getInt32Value());
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"write-secured-bulk\""), out);
|
||||
assertTrue(out.contains("\"itemHandle\":100"), out);
|
||||
assertTrue(out.contains("\"wasSuccessful\":true"), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeSecured2BulkCommandForwardsTimestampAndUserIdsAndPrintsResults() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"write-secured2-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--item-handles", "100",
|
||||
"--type", "string",
|
||||
"--values", "hello",
|
||||
"--timestamp", "2026-05-20T00:00:00Z",
|
||||
"--current-user-id", "7",
|
||||
"--verifier-user-id", "8",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
List<WriteSecured2BulkEntry> entries = factory.client.session.lastWriteSecured2BulkEntries;
|
||||
assertEquals(1, entries.size());
|
||||
assertEquals(7, entries.get(0).getCurrentUserId());
|
||||
assertEquals(8, entries.get(0).getVerifierUserId());
|
||||
assertTrue(entries.get(0).hasTimestampValue(), "expected timestampValue to be set");
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"write-secured2-bulk\""), out);
|
||||
assertTrue(out.contains("\"itemHandle\":100"), out);
|
||||
assertTrue(out.contains("\"wasSuccessful\":true"), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void benchReadBulkCommandEmitsJsonSchemaKeys() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"bench-read-bulk",
|
||||
"--session-id", "session-cli",
|
||||
"--server-handle", "42",
|
||||
"--items", "TestMachine_001.TestInt",
|
||||
"--iterations", "3",
|
||||
"--warmup", "0",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"bench-read-bulk\""), out);
|
||||
assertTrue(out.contains("\"iterations\""), out);
|
||||
assertTrue(out.contains("\"warmup\""), out);
|
||||
assertTrue(out.contains("\"tagCount\""), out);
|
||||
assertTrue(out.contains("\"resultCount\""), out);
|
||||
assertTrue(out.contains("\"successCount\""), out);
|
||||
assertTrue(out.contains("\"cachedCount\""), out);
|
||||
assertTrue(out.contains("\"avgMs\""), out);
|
||||
assertTrue(out.contains("\"minMs\""), out);
|
||||
assertTrue(out.contains("\"maxMs\""), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void closeSessionCommandPrintsReply() {
|
||||
FakeClientFactory factory = new FakeClientFactory();
|
||||
CliRun run = execute(
|
||||
factory,
|
||||
"close-session",
|
||||
"--session-id", "session-cli",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertTrue(factory.client.closeCalled);
|
||||
String out = run.output();
|
||||
assertTrue(out.contains("\"command\":\"close-session\""), out);
|
||||
assertTrue(out.contains("SESSION_STATE_CLOSED"), out);
|
||||
}
|
||||
|
||||
@Test
|
||||
void unsubscribeBulkCommandPrintsResults() {
|
||||
CliRun run = execute(
|
||||
@@ -499,21 +711,165 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Test
|
||||
void streamAlarmsCommandFailsFastOnQueueOverflow() {
|
||||
// Client.Java-033 regression — the CLI's stream-alarms bounded queue
|
||||
// used queue.offer(value) which silently dropped messages past
|
||||
// capacity (1024). After the fix the CLI must surface the overflow
|
||||
// as a non-zero exit (mirroring MxEventStream's fail-fast contract).
|
||||
// Client.Java-033/040/046 regression — the CLI's stream-alarms bounded
|
||||
// queue used queue.offer(value) which silently dropped messages past
|
||||
// capacity (1024). After the fix the CLI must surface the overflow as a
|
||||
// non-zero exit (mirroring MxEventStream's fail-fast contract).
|
||||
//
|
||||
// The OverflowingFakeClient floods the gRPC observer with 2000
|
||||
// messages synchronously, which exceeds the bounded 1024-element
|
||||
// queue. The fix detects the failed offer, cancels the subscription,
|
||||
// queues an overflow exception, and the drain loop surfaces it.
|
||||
// The OverflowingFakeClient floods the gRPC observer on a BACKGROUND
|
||||
// thread so the subscription is already published when the overflow
|
||||
// fires — exercising the terminate() cancel path with a non-null
|
||||
// subscription (Client.Java-046), not just the synchronous-flood path
|
||||
// where subscriptionRef is still null. The fix records the overflow in
|
||||
// a dedicated terminal slot (no queue.clear, Client.Java-040) and the
|
||||
// drain loop surfaces it with the overflow message text.
|
||||
OverflowingFakeClientFactory factory = new OverflowingFakeClientFactory();
|
||||
CliRun run = execute(factory, "stream-alarms", "--filter-prefix", "Flood");
|
||||
|
||||
assertFalse(run.exitCode() == 0,
|
||||
"expected non-zero exit when the alarm queue overflows; got exit=" + run.exitCode()
|
||||
+ " out=\n" + run.output() + "\nerr=\n" + run.errors());
|
||||
assertTrue(
|
||||
run.errors().contains("queue overflowed") || run.output().contains("queue overflowed"),
|
||||
"expected the overflow message text to surface; out=\n" + run.output()
|
||||
+ "\nerr=\n" + run.errors());
|
||||
}
|
||||
|
||||
@Test
|
||||
void streamEventsRendersScriptedEventsIncludingHighUint64Sequence() {
|
||||
// Drive the REAL MxGatewayClient / MxGatewaySession / MxEventStream
|
||||
// path over the in-process harness (Task 4), so the production
|
||||
// stream-events command exercises the real streaming wrapper instead
|
||||
// of a hand-written FakeSession seam.
|
||||
//
|
||||
// The high worker-sequence (-1L == 18446744073709551615 unsigned)
|
||||
// covers the unsigned-rendering regression: worker_sequence is a
|
||||
// proto uint64 carried as a Java long with the top bit set. The CLI's
|
||||
// --json path renders it through protobuf's JsonFormat, which prints
|
||||
// uint64 as an unsigned decimal STRING; a naive %d render would print
|
||||
// a negative number instead.
|
||||
MxEvent dataChange = MxEvent.newBuilder()
|
||||
.setFamily(MxEventFamily.MX_EVENT_FAMILY_ON_DATA_CHANGE)
|
||||
.setSessionId("session-cli")
|
||||
.setServerHandle(7)
|
||||
.setItemHandle(42)
|
||||
.setWorkerSequence(5L)
|
||||
.build();
|
||||
MxEvent highSequence = MxEvent.newBuilder()
|
||||
.setFamily(MxEventFamily.MX_EVENT_FAMILY_OPERATION_COMPLETE)
|
||||
.setSessionId("session-cli")
|
||||
.setServerHandle(9)
|
||||
.setItemHandle(99)
|
||||
// -1L unsigned == 18446744073709551615 (top bit set).
|
||||
.setWorkerSequence(-1L)
|
||||
.build();
|
||||
|
||||
try (InProcessGatewayHarness harness = new InProcessGatewayHarness()) {
|
||||
harness.setScriptedEvents(List.of(dataChange, highSequence));
|
||||
CliRun run = execute(
|
||||
new HarnessClientFactory(harness),
|
||||
"stream-events",
|
||||
"--session-id",
|
||||
"session-cli",
|
||||
"--json");
|
||||
|
||||
assertEquals(0, run.exitCode(), "errors:\n" + run.errors());
|
||||
String out = run.output();
|
||||
// Scripted event fields surface in the JSON render.
|
||||
assertTrue(out.contains("\"family\":\"MX_EVENT_FAMILY_ON_DATA_CHANGE\""), out);
|
||||
assertTrue(out.contains("\"family\":\"MX_EVENT_FAMILY_OPERATION_COMPLETE\""), out);
|
||||
assertTrue(out.contains("\"serverHandle\":7"), out);
|
||||
assertTrue(out.contains("\"itemHandle\":42"), out);
|
||||
// The low sequence renders as the unsigned decimal string "5".
|
||||
assertTrue(out.contains("\"workerSequence\":\"5\""), out);
|
||||
// The high sequence renders as the FULL unsigned decimal, not -1.
|
||||
assertTrue(out.contains("\"workerSequence\":\"18446744073709551615\""), out);
|
||||
assertFalse(out.contains("\"workerSequence\":\"-1\""), out);
|
||||
assertFalse(out.contains("\"workerSequence\":-1"), out);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- galaxy-discover / galaxy-watch over the in-process harness (Task 6) ----
|
||||
|
||||
@Test
|
||||
void galaxyDiscoverPrintsPagedHierarchyJson() {
|
||||
// Drive the REAL GalaxyRepositoryClient.discoverHierarchy path over the
|
||||
// in-process harness (Task 6), so the production galaxy-discover command
|
||||
// exercises the real client paging loop against scripted objects instead
|
||||
// of a hand-written seam. The harness's fake discoverHierarchy returns a
|
||||
// single page carrying the scripted objects.
|
||||
GalaxyObject area = GalaxyObject.newBuilder()
|
||||
.setGobjectId(101)
|
||||
.setTagName("Area001")
|
||||
.setContainedName("Area001")
|
||||
.setBrowseName("Area001")
|
||||
.setIsArea(true)
|
||||
.build();
|
||||
GalaxyObject pump = GalaxyObject.newBuilder()
|
||||
.setGobjectId(202)
|
||||
.setTagName("Pump001")
|
||||
.setContainedName("Pump001")
|
||||
.setBrowseName("Pump001")
|
||||
.setParentGobjectId(101)
|
||||
.build();
|
||||
|
||||
try (InProcessGatewayHarness harness = new InProcessGatewayHarness()) {
|
||||
harness.setScriptedObjects(List.of(area, pump));
|
||||
CliRun run = executeGalaxy(new HarnessGalaxyClientFactory(harness), "galaxy-discover", "--json");
|
||||
|
||||
assertEquals(0, run.exitCode(), "errors:\n" + run.errors());
|
||||
String out = run.output();
|
||||
// galaxy-discover --json renders {"command":..,"options":..,"objects":[{galaxyObjectMap}]}.
|
||||
assertTrue(out.contains("\"command\":\"galaxy-discover\""), out);
|
||||
// Both scripted objects render with the flattened object fields.
|
||||
assertTrue(out.contains("\"tagName\":\"Area001\""), out);
|
||||
assertTrue(out.contains("\"tagName\":\"Pump001\""), out);
|
||||
assertTrue(out.contains("\"gobjectId\":101"), out);
|
||||
assertTrue(out.contains("\"gobjectId\":202"), out);
|
||||
assertTrue(out.contains("\"parentGobjectId\":101"), out);
|
||||
assertTrue(out.contains("\"isArea\":true"), out);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void galaxyWatchRendersScriptedDeployEvents() {
|
||||
// Drive the REAL GalaxyRepositoryClient.watchDeployEvents / DeployEventStream
|
||||
// path over the in-process harness. The harness's fake watchDeployEvents
|
||||
// streams the scripted deploy events then completes; the CLI's --limit
|
||||
// option caps how many it prints before closing the stream.
|
||||
DeployEvent first = DeployEvent.newBuilder()
|
||||
.setSequence(7L)
|
||||
.setObjectCount(12)
|
||||
.setAttributeCount(34)
|
||||
.build();
|
||||
DeployEvent second = DeployEvent.newBuilder()
|
||||
.setSequence(8L)
|
||||
.setObjectCount(13)
|
||||
.setAttributeCount(35)
|
||||
.build();
|
||||
DeployEvent third = DeployEvent.newBuilder()
|
||||
.setSequence(9L)
|
||||
.setObjectCount(14)
|
||||
.setAttributeCount(36)
|
||||
.build();
|
||||
|
||||
try (InProcessGatewayHarness harness = new InProcessGatewayHarness()) {
|
||||
harness.setScriptedDeployEvents(List.of(first, second, third));
|
||||
// --limit 2 caps the feed at the first two scripted events.
|
||||
CliRun run = executeGalaxy(
|
||||
new HarnessGalaxyClientFactory(harness), "galaxy-watch", "--limit", "2", "--json");
|
||||
|
||||
assertEquals(0, run.exitCode(), "errors:\n" + run.errors());
|
||||
String out = run.output();
|
||||
// galaxy-watch --json prints one proto-JSON object per event; proto3
|
||||
// JSON renders uint64 sequence as a decimal string.
|
||||
assertTrue(out.contains("\"sequence\":\"7\""), out);
|
||||
assertTrue(out.contains("\"objectCount\":12"), out);
|
||||
assertTrue(out.contains("\"attributeCount\":34"), out);
|
||||
assertTrue(out.contains("\"sequence\":\"8\""), out);
|
||||
// --limit 2 must stop before printing the third scripted event.
|
||||
assertFalse(out.contains("\"sequence\":\"9\""), out);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -630,6 +986,24 @@ final class MxGatewayCliTests {
|
||||
return new CliRun(exitCode, output.toString(), errors.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a galaxy subcommand against the supplied {@link
|
||||
* MxGatewayCli.GalaxyClientFactory}, wiring it through the production
|
||||
* {@code commandLine(gatewayFactory, galaxyFactory)} two-arg overload (Task 3
|
||||
* seam). The gateway factory slot is unused by galaxy commands, so a plain
|
||||
* {@link FakeClientFactory} fills it. Mirrors {@link #execute} for the
|
||||
* gateway commands.
|
||||
*/
|
||||
private static CliRun executeGalaxy(MxGatewayCli.GalaxyClientFactory galaxyFactory, String... args) {
|
||||
StringWriter output = new StringWriter();
|
||||
StringWriter errors = new StringWriter();
|
||||
picocli.CommandLine commandLine = MxGatewayCli.commandLine(new FakeClientFactory(), galaxyFactory);
|
||||
commandLine.setOut(new PrintWriter(output, true));
|
||||
commandLine.setErr(new PrintWriter(errors, true));
|
||||
int exitCode = commandLine.execute(args);
|
||||
return new CliRun(exitCode, output.toString(), errors.toString());
|
||||
}
|
||||
|
||||
private record CliRun(int exitCode, String output, String errors) {
|
||||
}
|
||||
|
||||
@@ -672,6 +1046,62 @@ final class MxGatewayCliTests {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory that wires the production {@link MxGatewayCli.GrpcMxGatewayCliClient}
|
||||
* adapter around the harness's REAL
|
||||
* {@link com.zb.mom.ww.mxgateway.client.MxGatewayClient}, so the
|
||||
* stream-events command runs against the in-process scripted gateway over
|
||||
* a real channel (exercising the real {@code MxEventStream}). Mirrors the
|
||||
* production {@code GrpcMxGatewayCliClientFactory}, swapping only the
|
||||
* client construction for the harness-backed client.
|
||||
*/
|
||||
private static final class HarnessClientFactory implements MxGatewayCli.MxGatewayCliClientFactory {
|
||||
private final InProcessGatewayHarness harness;
|
||||
|
||||
private HarnessClientFactory(InProcessGatewayHarness harness) {
|
||||
this.harness = harness;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MxGatewayCli.MxGatewayCliClient connect(MxGatewayCli.CommonOptions options) {
|
||||
return new MxGatewayCli.GrpcMxGatewayCliClient(
|
||||
harness.gatewayClient(), options.spec.commandLine().getOut());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Galaxy factory that returns the harness's REAL {@link
|
||||
* com.zb.mom.ww.mxgateway.client.GalaxyRepositoryClient} over the in-process
|
||||
* scripted {@code GalaxyRepository} service, so galaxy-discover / galaxy-watch
|
||||
* exercise the real client (paging loop, deploy-event stream wrapper) against
|
||||
* scripted payloads. Mirrors the production {@code GrpcGalaxyClientFactory},
|
||||
* swapping only client construction for the harness-backed client.
|
||||
*/
|
||||
private static final class HarnessGalaxyClientFactory implements MxGatewayCli.GalaxyClientFactory {
|
||||
private final InProcessGatewayHarness harness;
|
||||
|
||||
private HarnessGalaxyClientFactory(InProcessGatewayHarness harness) {
|
||||
this.harness = harness;
|
||||
}
|
||||
|
||||
@Override
|
||||
public com.zb.mom.ww.mxgateway.client.GalaxyRepositoryClient connect(MxGatewayClientOptions options) {
|
||||
return harness.galaxyClient();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Galaxy client factory whose {@code connect} throws, so a test can exercise
|
||||
* a command's pre-connect path (e.g. the {@code --parent 0} warning) without
|
||||
* constructing a live Netty channel to localhost (Client.Java-043).
|
||||
*/
|
||||
private static final class ThrowingGalaxyClientFactory implements MxGatewayCli.GalaxyClientFactory {
|
||||
@Override
|
||||
public com.zb.mom.ww.mxgateway.client.GalaxyRepositoryClient connect(MxGatewayClientOptions options) {
|
||||
throw new IllegalStateException("galaxy connect not available in this test");
|
||||
}
|
||||
}
|
||||
|
||||
private static final class OverflowingFakeClient implements MxGatewayCli.MxGatewayCliClient {
|
||||
private final PrintWriter out;
|
||||
|
||||
@@ -711,19 +1141,31 @@ final class MxGatewayCliTests {
|
||||
@Override
|
||||
public MxGatewayAlarmFeedSubscription streamAlarms(
|
||||
StreamAlarmsRequest request, StreamObserver<AlarmFeedMessage> observer) {
|
||||
// Synchronously push 2000 messages to overflow the CLI's bounded
|
||||
// 1024-element queue. The CLI must surface the overflow rather
|
||||
// than silently dropping the trailing ~976 messages.
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
observer.onNext(AlarmFeedMessage.newBuilder()
|
||||
.setActiveAlarm(ActiveAlarmSnapshot.newBuilder()
|
||||
.setAlarmFullReference("Flood." + i)
|
||||
.setCurrentState(AlarmConditionState.ALARM_CONDITION_STATE_ACTIVE)
|
||||
.setSeverity(700))
|
||||
.build());
|
||||
}
|
||||
observer.onCompleted();
|
||||
return new MxGatewayAlarmFeedSubscription();
|
||||
// Push messages on a BACKGROUND thread (mirroring real gRPC, which
|
||||
// delivers onNext on a netty I/O thread) so the CLI's
|
||||
// subscriptionRef is already published when the overflow fires —
|
||||
// this exercises the terminate() cancel path with a non-null
|
||||
// subscription (Client.Java-046), unlike a synchronous flood that
|
||||
// overflows before streamAlarms even returns. Keeps pushing until
|
||||
// it observes the CLI cancelling the subscription on overflow, so
|
||||
// no fixed message count is needed and the thread always exits.
|
||||
MxGatewayAlarmFeedSubscription subscription = new MxGatewayAlarmFeedSubscription();
|
||||
Thread flood = new Thread(() -> {
|
||||
int i = 0;
|
||||
while (!Thread.currentThread().isInterrupted() && i < 100_000) {
|
||||
observer.onNext(AlarmFeedMessage.newBuilder()
|
||||
.setActiveAlarm(ActiveAlarmSnapshot.newBuilder()
|
||||
.setAlarmFullReference("Flood." + i)
|
||||
.setCurrentState(AlarmConditionState.ALARM_CONDITION_STATE_ACTIVE)
|
||||
.setSeverity(700))
|
||||
.build());
|
||||
i++;
|
||||
}
|
||||
observer.onCompleted();
|
||||
}, "overflowing-fake-alarm-feed");
|
||||
flood.setDaemon(true);
|
||||
flood.start();
|
||||
return subscription;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -813,8 +1255,15 @@ final class MxGatewayCliTests {
|
||||
private boolean registerCalled;
|
||||
private boolean addItemCalled;
|
||||
private boolean adviseCalled;
|
||||
private boolean adviseSupervisoryCalled;
|
||||
private MxValue lastWriteValue;
|
||||
private String lastPingMessage;
|
||||
private long lastReadBulkTimeoutMs;
|
||||
private List<String> lastReadBulkItems;
|
||||
private List<WriteBulkEntry> lastWriteBulkEntries;
|
||||
private List<Write2BulkEntry> lastWrite2BulkEntries;
|
||||
private List<WriteSecuredBulkEntry> lastWriteSecuredBulkEntries;
|
||||
private List<WriteSecured2BulkEntry> lastWriteSecured2BulkEntries;
|
||||
|
||||
@Override
|
||||
public MxCommandReply pingRaw(String message) {
|
||||
@@ -874,6 +1323,15 @@ final class MxGatewayCliTests {
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MxCommandReply adviseSupervisoryRaw(int serverHandle, int itemHandle) {
|
||||
adviseSupervisoryCalled = true;
|
||||
return MxCommandReply.newBuilder()
|
||||
.setKind(MxCommandKind.MX_COMMAND_KIND_ADVISE_SUPERVISORY)
|
||||
.setProtocolStatus(ok())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MxCommandReply writeRaw(int serverHandle, int itemHandle, MxValue value, int userId) {
|
||||
lastWriteValue = value;
|
||||
@@ -912,6 +1370,8 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Override
|
||||
public List<BulkReadResult> readBulk(int serverHandle, List<String> items, Duration timeout) {
|
||||
lastReadBulkTimeoutMs = timeout.toMillis();
|
||||
lastReadBulkItems = new ArrayList<>(items);
|
||||
List<BulkReadResult> results = new ArrayList<>();
|
||||
for (int index = 0; index < items.size(); index++) {
|
||||
results.add(BulkReadResult.newBuilder()
|
||||
@@ -927,6 +1387,7 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Override
|
||||
public List<BulkWriteResult> writeBulk(int serverHandle, List<WriteBulkEntry> entries) {
|
||||
lastWriteBulkEntries = new ArrayList<>(entries);
|
||||
List<BulkWriteResult> results = new ArrayList<>();
|
||||
for (WriteBulkEntry entry : entries) {
|
||||
results.add(BulkWriteResult.newBuilder()
|
||||
@@ -940,6 +1401,7 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Override
|
||||
public List<BulkWriteResult> write2Bulk(int serverHandle, List<Write2BulkEntry> entries) {
|
||||
lastWrite2BulkEntries = new ArrayList<>(entries);
|
||||
List<BulkWriteResult> results = new ArrayList<>();
|
||||
for (Write2BulkEntry entry : entries) {
|
||||
results.add(BulkWriteResult.newBuilder()
|
||||
@@ -953,6 +1415,7 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Override
|
||||
public List<BulkWriteResult> writeSecuredBulk(int serverHandle, List<WriteSecuredBulkEntry> entries) {
|
||||
lastWriteSecuredBulkEntries = new ArrayList<>(entries);
|
||||
List<BulkWriteResult> results = new ArrayList<>();
|
||||
for (WriteSecuredBulkEntry entry : entries) {
|
||||
results.add(BulkWriteResult.newBuilder()
|
||||
@@ -966,6 +1429,7 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Override
|
||||
public List<BulkWriteResult> writeSecured2Bulk(int serverHandle, List<WriteSecured2BulkEntry> entries) {
|
||||
lastWriteSecured2BulkEntries = new ArrayList<>(entries);
|
||||
List<BulkWriteResult> results = new ArrayList<>();
|
||||
for (WriteSecured2BulkEntry entry : entries) {
|
||||
results.add(BulkWriteResult.newBuilder()
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@ package com.zb.mom.ww.mxgateway.client;
|
||||
public final class MxGatewayClientVersion {
|
||||
private static final int GATEWAY_PROTOCOL_VERSION = 3;
|
||||
private static final int WORKER_PROTOCOL_VERSION = 1;
|
||||
private static final String CLIENT_VERSION = "0.1.0";
|
||||
private static final String CLIENT_VERSION = "0.1.2";
|
||||
|
||||
private MxGatewayClientVersion() {
|
||||
}
|
||||
|
||||
+72
@@ -4,7 +4,9 @@ import java.security.SecureRandom;
|
||||
import java.time.Duration;
|
||||
import java.util.HexFormat;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.TreeMap;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AddItem2Command;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AddItemBulkCommand;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AddItemCommand;
|
||||
@@ -18,6 +20,9 @@ import mxaccess_gateway.v1.MxaccessGateway.MxCommand;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandKind;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxDataType;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxSparseArray;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxSparseElement;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxValue;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.OpenSessionReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ReadBulkCommand;
|
||||
@@ -603,6 +608,73 @@ public final class MxGatewaySession implements AutoCloseable {
|
||||
.build());
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a subset of an array's elements using MXAccess {@code Write}, building a
|
||||
* write-only {@link MxSparseArray} value that the gateway expands into a full,
|
||||
* default-filled array before forwarding to the worker.
|
||||
*
|
||||
* <p><strong>Default-fill semantics:</strong> only the indices supplied in
|
||||
* {@code elements} are written; every unmentioned index is <em>reset</em> to the
|
||||
* element type's default (for example {@code 0}, {@code false}, or an empty string),
|
||||
* <em>not</em> preserved from the array's current contents. Use a full
|
||||
* {@link MxValue} array write when you need to keep existing element values.
|
||||
*
|
||||
* <p>{@code totalLength} is required and defines the length of the expanded array;
|
||||
* supplied indices must be within {@code [0, totalLength)}. Elements are iterated in
|
||||
* ascending index order so the produced command is deterministic.
|
||||
*
|
||||
* <p>Because the proto fields {@code MxSparseArray.total_length} and
|
||||
* {@code MxSparseElement.index} are {@code uint32}, passing a negative Java {@code int}
|
||||
* would silently sign-extend to a large unsigned value on the wire. This method
|
||||
* therefore rejects negative {@code totalLength} and negative element indices with
|
||||
* {@link IllegalArgumentException} rather than allowing a hard-to-diagnose gateway error.
|
||||
*
|
||||
* @param serverHandle the {@code ServerHandle} owning the item
|
||||
* @param itemHandle the {@code ItemHandle} to write
|
||||
* @param elementDataType the {@link MxDataType} of the array's elements
|
||||
* @param totalLength the total length of the expanded array; must be > 0
|
||||
* @param elements the indices to write mapped to their scalar values; each index must
|
||||
* be in {@code [0, totalLength)}; unmentioned indices are reset to the element
|
||||
* type default
|
||||
* @param userId the MXAccess user id used for security checks
|
||||
* @throws IllegalArgumentException if {@code totalLength} is not positive, or if any
|
||||
* element index is negative or ≥ {@code totalLength}
|
||||
* @throws MxGatewayException on transport or protocol failure
|
||||
*/
|
||||
public void writeArrayElements(
|
||||
int serverHandle,
|
||||
int itemHandle,
|
||||
MxDataType elementDataType,
|
||||
int totalLength,
|
||||
Map<Integer, MxValue> elements,
|
||||
int userId) {
|
||||
Objects.requireNonNull(elementDataType, "elementDataType");
|
||||
Objects.requireNonNull(elements, "elements");
|
||||
if (totalLength <= 0) {
|
||||
throw new IllegalArgumentException("totalLength must be > 0, got " + totalLength);
|
||||
}
|
||||
for (Map.Entry<Integer, MxValue> entry : elements.entrySet()) {
|
||||
int idx = entry.getKey();
|
||||
if (idx < 0 || idx >= totalLength) {
|
||||
throw new IllegalArgumentException(
|
||||
"element index " + idx + " is out of range [0, " + totalLength + ")");
|
||||
}
|
||||
}
|
||||
MxSparseArray.Builder sparse = MxSparseArray.newBuilder()
|
||||
.setElementDataType(elementDataType)
|
||||
.setTotalLength(totalLength);
|
||||
// Iterate in ascending index order so the built command is deterministic.
|
||||
for (Map.Entry<Integer, MxValue> entry : new TreeMap<>(elements).entrySet()) {
|
||||
sparse.addElements(MxSparseElement.newBuilder()
|
||||
.setIndex(entry.getKey())
|
||||
.setValue(Objects.requireNonNull(entry.getValue(), "elements value")));
|
||||
}
|
||||
MxValue value = MxValue.newBuilder()
|
||||
.setSparseArrayValue(sparse)
|
||||
.build();
|
||||
writeRaw(serverHandle, itemHandle, value, userId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invokes MXAccess {@code Write2}, which carries an explicit timestamp.
|
||||
*
|
||||
|
||||
+3
@@ -153,6 +153,9 @@ public final class MxValues {
|
||||
case TIMESTAMP_VALUE -> instant(value.getTimestampValue());
|
||||
case ARRAY_VALUE -> nativeArray(value.getArrayValue());
|
||||
case RAW_VALUE -> value.getRawValue().toByteArray();
|
||||
// Write-only sparse descriptor: never produced by a read/decoded
|
||||
// value, so it has no native representation.
|
||||
case SPARSE_ARRAY_VALUE -> null;
|
||||
case KIND_NOT_SET -> null;
|
||||
};
|
||||
}
|
||||
|
||||
+113
@@ -19,6 +19,7 @@ import io.grpc.stub.ServerCallStreamObserver;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -36,7 +37,10 @@ import mxaccess_gateway.v1.MxaccessGateway.CloseSessionRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandKind;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxCommandRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxDataType;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxSparseElement;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxValue;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.OpenSessionReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.OpenSessionRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ProtocolStatus;
|
||||
@@ -396,6 +400,115 @@ final class MxGatewayClientSessionTests {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeArrayElementsBuildsSparseArrayWriteCommand() throws Exception {
|
||||
AtomicReference<MxCommandRequest> commandRequest = new AtomicReference<>();
|
||||
TestGatewayService service = new TestGatewayService() {
|
||||
@Override
|
||||
public void invoke(MxCommandRequest request, StreamObserver<MxCommandReply> responseObserver) {
|
||||
commandRequest.set(request);
|
||||
responseObserver.onNext(MxCommandReply.newBuilder()
|
||||
.setSessionId(request.getSessionId())
|
||||
.setKind(request.getCommand().getKind())
|
||||
.setProtocolStatus(ok())
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (InProcessGateway gateway = InProcessGateway.start(service, new AtomicReference<>());
|
||||
MxGatewayClient client = gateway.client("", Duration.ofSeconds(5))) {
|
||||
MxGatewaySession session = MxGatewaySession.forSessionId(client, "sparse-session");
|
||||
|
||||
// Supply indices out of order to prove deterministic ascending iteration.
|
||||
Map<Integer, MxValue> elements = Map.of(
|
||||
3, MxValues.int32Value(99),
|
||||
1, MxValues.int32Value(7));
|
||||
|
||||
session.writeArrayElements(12, 34, MxDataType.MX_DATA_TYPE_INTEGER, 5, elements, 56);
|
||||
|
||||
MxCommandRequest request = commandRequest.get();
|
||||
assertNotNull(request);
|
||||
assertEquals(MxCommandKind.MX_COMMAND_KIND_WRITE, request.getCommand().getKind());
|
||||
assertEquals(12, request.getCommand().getWrite().getServerHandle());
|
||||
assertEquals(34, request.getCommand().getWrite().getItemHandle());
|
||||
assertEquals(56, request.getCommand().getWrite().getUserId());
|
||||
|
||||
MxValue written = request.getCommand().getWrite().getValue();
|
||||
assertEquals(MxValue.KindCase.SPARSE_ARRAY_VALUE, written.getKindCase());
|
||||
assertEquals(5, written.getSparseArrayValue().getTotalLength());
|
||||
assertEquals(
|
||||
MxDataType.MX_DATA_TYPE_INTEGER,
|
||||
written.getSparseArrayValue().getElementDataType());
|
||||
|
||||
List<MxSparseElement> sparse = written.getSparseArrayValue().getElementsList();
|
||||
assertEquals(2, sparse.size());
|
||||
// Ascending index order is guaranteed by the helper.
|
||||
assertEquals(1, sparse.get(0).getIndex());
|
||||
assertEquals(7, sparse.get(0).getValue().getInt32Value());
|
||||
assertEquals(3, sparse.get(1).getIndex());
|
||||
assertEquals(99, sparse.get(1).getValue().getInt32Value());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeArrayElementsRejectsNonPositiveTotalLength() throws Exception {
|
||||
// Client.Java-051: negative/zero totalLength silently sign-extends to a
|
||||
// large uint32 on the wire; the client must reject it with
|
||||
// IllegalArgumentException before building the proto message (before any
|
||||
// network call is issued).
|
||||
try (InProcessGateway gateway = InProcessGateway.start(
|
||||
new TestGatewayService() {}, new AtomicReference<>());
|
||||
MxGatewayClient client = gateway.client("", Duration.ofSeconds(5))) {
|
||||
MxGatewaySession session = MxGatewaySession.forSessionId(client, "guard-session");
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> session.writeArrayElements(
|
||||
1, 2, MxDataType.MX_DATA_TYPE_INTEGER, -1, Map.of(), 0),
|
||||
"negative totalLength must throw");
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> session.writeArrayElements(
|
||||
1, 2, MxDataType.MX_DATA_TYPE_INTEGER, 0, Map.of(), 0),
|
||||
"zero totalLength must throw");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void writeArrayElementsRejectsOutOfRangeIndex() throws Exception {
|
||||
// Client.Java-051: a negative index silently sign-extends to a large
|
||||
// uint32 on the wire; an index >= totalLength exceeds the declared
|
||||
// array bounds. Both must be caught before the network call.
|
||||
try (InProcessGateway gateway = InProcessGateway.start(
|
||||
new TestGatewayService() {}, new AtomicReference<>());
|
||||
MxGatewayClient client = gateway.client("", Duration.ofSeconds(5))) {
|
||||
MxGatewaySession session = MxGatewaySession.forSessionId(client, "guard-session");
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> session.writeArrayElements(
|
||||
1, 2, MxDataType.MX_DATA_TYPE_INTEGER, 5,
|
||||
Map.of(-1, MxValues.int32Value(7)), 0),
|
||||
"negative index must throw");
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> session.writeArrayElements(
|
||||
1, 2, MxDataType.MX_DATA_TYPE_INTEGER, 5,
|
||||
Map.of(5, MxValues.int32Value(7)), 0),
|
||||
"index equal to totalLength must throw");
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> session.writeArrayElements(
|
||||
1, 2, MxDataType.MX_DATA_TYPE_INTEGER, 5,
|
||||
Map.of(10, MxValues.int32Value(7)), 0),
|
||||
"index above totalLength must throw");
|
||||
}
|
||||
}
|
||||
|
||||
private static ProtocolStatus ok() {
|
||||
return ProtocolStatus.newBuilder()
|
||||
.setCode(ProtocolStatusCode.PROTOCOL_STATUS_CODE_OK)
|
||||
|
||||
@@ -105,6 +105,77 @@ terminate the stream.
|
||||
Canceling a Python task cancels the client-side gRPC call or stream wait. It
|
||||
does not abort an in-flight MXAccess COM call inside the worker process.
|
||||
|
||||
## Write Semantics And Common Pitfalls
|
||||
|
||||
These are MXAccess parity behaviors that surprise new callers. The gateway
|
||||
forwards them unchanged — it does not paper over them.
|
||||
|
||||
### Attributing a write to a user without `authenticate_user`
|
||||
|
||||
MXAccess only stamps a plain `write`/`write2` with a Galaxy user id when the
|
||||
item carries an active *supervisory* advise. If you are **not** using the
|
||||
verified/secured path (`authenticate_user` → `write_secured`/`write_secured2`)
|
||||
but still need the write attributed to a user id, you must first advise the
|
||||
item supervisory and then pass that user id on the write. Without the
|
||||
supervisory advise the `user_id` on a plain write is ignored.
|
||||
|
||||
The session exposes `advise`/`unadvise` but not supervisory advise, so send it
|
||||
through the generic command channel:
|
||||
|
||||
```python
|
||||
await session.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_ADVISE_SUPERVISORY,
|
||||
advise_supervisory=pb.AdviseSupervisoryCommand(
|
||||
server_handle=server_handle,
|
||||
item_handle=item_handle,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
await session.write(server_handle, item_handle, value, user_id=user_id)
|
||||
```
|
||||
|
||||
The CLI exposes the same command as `advise-supervisory`, and `write` /
|
||||
`write2` take `--user-id`.
|
||||
|
||||
### Array writes replace the whole array
|
||||
|
||||
A write to an array attribute **replaces the entire array**; it is not an
|
||||
element-wise patch. To change a subset of elements, send the full array with
|
||||
the unchanged elements included. For example, to change 2 elements of a
|
||||
20-element array, build the `MxValue` from all 20 values (the 18 unchanged plus
|
||||
the 2 new ones). Sending only the 2 changed values overwrites the attribute
|
||||
with a 2-element array.
|
||||
|
||||
### Default-fill partial array writes
|
||||
|
||||
`Session.write_array_elements` lets you write only the indices you care about.
|
||||
The gateway fills every unmentioned position with the type default for the
|
||||
declared `element_data_type` (0, `False`, `""`, Unix epoch for timestamps).
|
||||
The previous value at those positions is **not** preserved — the gateway expands
|
||||
the sparse map to a full array before forwarding the write to MXAccess, so this
|
||||
is still a full replacement:
|
||||
|
||||
```python
|
||||
# Write indices 0 and 5 of a 10-element integer array.
|
||||
# Positions 1-4 and 6-9 become 0, not their previous values.
|
||||
await session.write_array_elements(
|
||||
server_handle=server_handle,
|
||||
item_handle=item_handle,
|
||||
element_data_type=pb.MX_DATA_TYPE_INTEGER,
|
||||
total_length=10,
|
||||
elements={0: 100, 5: 500},
|
||||
)
|
||||
```
|
||||
|
||||
Bare-name array items (e.g. `Object.ArrayAttr` without an index suffix) added
|
||||
via `add_item`, `add_item2`, `add_item_bulk`, or `add_buffered_item`
|
||||
auto-normalize to `[]` — they refer to the whole array, not a single element.
|
||||
Writes through such handles must cover the full array or use
|
||||
`write_array_elements` to supply `total_length` and let the gateway fill
|
||||
defaults for the rest.
|
||||
|
||||
## Galaxy Repository Browse
|
||||
|
||||
The `GalaxyRepositoryClient` wraps the read-only `GalaxyRepository` gRPC
|
||||
@@ -140,19 +211,21 @@ service requires the `metadata:read` scope on the API key.
|
||||
|
||||
### Browsing lazily
|
||||
|
||||
For UI trees or OPC UA bridges, use `browse_children` to walk one level at a
|
||||
For UI trees or OPC UA bridges, use `browse_children_raw` to walk one level at a
|
||||
time instead of loading the full hierarchy with `discover_hierarchy`. Pass an
|
||||
empty request for root objects; subsequent calls set `parent_gobject_id`,
|
||||
`parent_tag_name`, or `parent_contained_path`. Filter fields match
|
||||
`DiscoverHierarchy`. Each response pairs `children` with `child_has_children` so
|
||||
you know which nodes to expand. See
|
||||
you know which nodes to expand. Most callers should prefer the higher-level
|
||||
`browse()` / `LazyBrowseNode` walker below; `browse_children_raw` is the
|
||||
low-level escape hatch for direct page-token control. See
|
||||
[Galaxy Repository](../../docs/GalaxyRepository.md#browsechildren) for full
|
||||
request and filter semantics.
|
||||
|
||||
```python
|
||||
from zb_mom_ww_mxgateway.generated import galaxy_repository_pb2 as galaxy_pb2
|
||||
|
||||
reply = await galaxy.browse_children(galaxy_pb2.BrowseChildrenRequest())
|
||||
reply = await galaxy.browse_children_raw(galaxy_pb2.BrowseChildrenRequest())
|
||||
for child, has_children in zip(reply.children, reply.child_has_children):
|
||||
print(child.tag_name, "expand=" + str(has_children))
|
||||
```
|
||||
|
||||
@@ -6,8 +6,8 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "zb-mom-ww-mxaccess-gateway-client"
|
||||
version = "0.1.1"
|
||||
description = "Async Python client scaffold for MXAccess Gateway."
|
||||
version = "0.1.2"
|
||||
description = "Async Python client for MXAccess Gateway."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from .auth import ApiKey, auth_metadata
|
||||
from .client import GatewayClient
|
||||
from .galaxy import GalaxyRepositoryClient
|
||||
from .galaxy import GalaxyRepositoryClient, LazyBrowseNode
|
||||
from .generated.galaxy_repository_pb2 import (
|
||||
DeployEvent,
|
||||
GalaxyAttribute,
|
||||
@@ -19,19 +19,21 @@ from .errors import (
|
||||
MxGatewayTransportError,
|
||||
MxGatewayWorkerError,
|
||||
)
|
||||
from .options import ClientOptions
|
||||
from .options import BrowseChildrenOptions, ClientOptions
|
||||
from .session import Session
|
||||
from .values import MxValueView, from_mx_value, to_mx_value
|
||||
from .version import __version__
|
||||
|
||||
__all__ = [
|
||||
"ApiKey",
|
||||
"BrowseChildrenOptions",
|
||||
"ClientOptions",
|
||||
"DeployEvent",
|
||||
"GalaxyAttribute",
|
||||
"GalaxyObject",
|
||||
"GalaxyRepositoryClient",
|
||||
"GatewayClient",
|
||||
"LazyBrowseNode",
|
||||
"MxAccessError",
|
||||
"MxGatewayAuthenticationError",
|
||||
"MxGatewayAuthorizationError",
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -489,6 +489,60 @@ class Session:
|
||||
correlation_id=correlation_id,
|
||||
)
|
||||
|
||||
async def write_array_elements(
|
||||
self,
|
||||
server_handle: int,
|
||||
item_handle: int,
|
||||
element_data_type: "pb.MxDataType.ValueType",
|
||||
total_length: int,
|
||||
elements: dict[int, MxValueInput],
|
||||
*,
|
||||
user_id: int = 0,
|
||||
correlation_id: str = "",
|
||||
) -> None:
|
||||
"""Write a partial array by specifying only the indices you want to set.
|
||||
|
||||
The gateway expands the sparse representation into a full ``total_length``
|
||||
array before forwarding the write to MXAccess. Indices not listed in
|
||||
*elements* are filled with the type default for *element_data_type* (0,
|
||||
False, empty string, Unix epoch for timestamps, etc.). The previous
|
||||
value at those positions is **not** preserved — this is a full array
|
||||
replacement, not a patch.
|
||||
|
||||
Args:
|
||||
server_handle: Handle returned by :meth:`register`.
|
||||
item_handle: Handle returned by :meth:`add_item`.
|
||||
element_data_type: ``pb.MX_DATA_TYPE_*`` enum value for the scalar
|
||||
element type of the target array attribute.
|
||||
total_length: Total number of elements in the written array. Must
|
||||
be > 0 and large enough to contain every index in *elements*.
|
||||
Both *total_length* and all keys in *elements* must be
|
||||
non-negative; the gateway rejects negative or out-of-range
|
||||
values with ``InvalidArgument`` (the proto fields are
|
||||
``uint32``).
|
||||
elements: Mapping of zero-based element index to scalar value.
|
||||
Values are converted with :func:`~zb_mom_ww_mxgateway.values.to_mx_value`.
|
||||
user_id: Galaxy user id to stamp on the write (requires a prior
|
||||
supervisory advise to take effect — see README).
|
||||
correlation_id: Optional client-supplied correlation token echoed
|
||||
in the command reply.
|
||||
"""
|
||||
sparse = pb.MxSparseArray(
|
||||
element_data_type=element_data_type,
|
||||
total_length=total_length,
|
||||
elements=[
|
||||
pb.MxSparseElement(index=idx, value=to_mx_value(val))
|
||||
for idx, val in elements.items()
|
||||
],
|
||||
)
|
||||
await self.write(
|
||||
server_handle,
|
||||
item_handle,
|
||||
pb.MxValue(sparse_array_value=sparse),
|
||||
user_id=user_id,
|
||||
correlation_id=correlation_id,
|
||||
)
|
||||
|
||||
async def write2(
|
||||
self,
|
||||
server_handle: int,
|
||||
|
||||
@@ -277,6 +277,23 @@ def advise(**kwargs: Any) -> None:
|
||||
_run(_advise(**kwargs), output_json=kwargs["output_json"], secrets=_secrets(kwargs))
|
||||
|
||||
|
||||
@main.command("advise-supervisory")
|
||||
@gateway_options
|
||||
@click.option("--session-id", required=True, help="Gateway session id.")
|
||||
@click.option("--server-handle", required=True, type=int, help="MXAccess server handle.")
|
||||
@click.option("--item-handle", required=True, type=int, help="MXAccess item handle.")
|
||||
@click.option("--correlation-id", default="", help="Client correlation id.")
|
||||
@click.option("--json", "output_json", is_flag=True, help="Emit JSON output.")
|
||||
def advise_supervisory(**kwargs: Any) -> None:
|
||||
"""Invoke MXAccess AdviseSupervisory."""
|
||||
|
||||
_run(
|
||||
_advise_supervisory(**kwargs),
|
||||
output_json=kwargs["output_json"],
|
||||
secrets=_secrets(kwargs),
|
||||
)
|
||||
|
||||
|
||||
@main.command("subscribe-bulk")
|
||||
@gateway_options
|
||||
@click.option("--session-id", required=True, help="Gateway session id.")
|
||||
@@ -725,6 +742,22 @@ async def _advise(**kwargs: Any) -> dict[str, Any]:
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def _advise_supervisory(**kwargs: Any) -> dict[str, Any]:
|
||||
async with await _connect(kwargs) as client:
|
||||
session = _session(client, kwargs["session_id"])
|
||||
await session.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_ADVISE_SUPERVISORY,
|
||||
advise_supervisory=pb.AdviseSupervisoryCommand(
|
||||
server_handle=kwargs["server_handle"],
|
||||
item_handle=kwargs["item_handle"],
|
||||
),
|
||||
),
|
||||
correlation_id=kwargs["correlation_id"],
|
||||
)
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def _subscribe_bulk(**kwargs: Any) -> dict[str, Any]:
|
||||
async with await _connect(kwargs) as client:
|
||||
session = _session(client, kwargs["session_id"])
|
||||
@@ -769,7 +802,7 @@ def _build_write_bulk_entries(kwargs: dict[str, Any]):
|
||||
"""
|
||||
|
||||
handles = _parse_int_list(kwargs["item_handles"])
|
||||
value_texts = _parse_string_list(kwargs["values"])
|
||||
value_texts = _parse_string_list(kwargs["values"], param_hint="--values")
|
||||
if len(handles) != len(value_texts):
|
||||
raise click.UsageError(
|
||||
f"item-handles count ({len(handles)}) does not match values count ({len(value_texts)})",
|
||||
@@ -1045,8 +1078,7 @@ async def _write2(**kwargs: Any) -> dict[str, Any]:
|
||||
async def _smoke(**kwargs: Any) -> dict[str, Any]:
|
||||
async with await _connect(kwargs) as client:
|
||||
session = await client.open_session(client_session_name=kwargs["client_name"])
|
||||
closed = False
|
||||
try:
|
||||
async with session:
|
||||
server_handle = await session.register(kwargs["client_name"])
|
||||
item_handle = await session.add_item(server_handle, kwargs["item"])
|
||||
await session.advise(server_handle, item_handle)
|
||||
@@ -1061,9 +1093,6 @@ async def _smoke(**kwargs: Any) -> dict[str, Any]:
|
||||
"itemHandle": item_handle,
|
||||
"events": [_message_dict(event) for event in events],
|
||||
}
|
||||
finally:
|
||||
if not closed:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def _galaxy_test_connection(**kwargs: Any) -> dict[str, Any]:
|
||||
@@ -1487,10 +1516,10 @@ def _parse_datetime(raw_value: str) -> datetime:
|
||||
return parsed
|
||||
|
||||
|
||||
def _parse_string_list(raw_value: str) -> list[str]:
|
||||
def _parse_string_list(raw_value: str, param_hint: str = "--items") -> list[str]:
|
||||
values = [item.strip() for item in raw_value.split(",") if item.strip()]
|
||||
if not values:
|
||||
raise click.BadParameter("at least one item is required", param_hint="--items")
|
||||
raise click.BadParameter("at least one item is required", param_hint=param_hint)
|
||||
return values
|
||||
|
||||
|
||||
@@ -1498,7 +1527,12 @@ def _parse_int_list(raw_value: str) -> list[int]:
|
||||
values = [item.strip() for item in raw_value.split(",") if item.strip()]
|
||||
if not values:
|
||||
raise click.BadParameter("at least one item handle is required", param_hint="--item-handles")
|
||||
return [int(item) for item in values]
|
||||
try:
|
||||
return [int(item) for item in values]
|
||||
except ValueError as exc:
|
||||
raise click.BadParameter(
|
||||
f"item handles must be integers: {exc}", param_hint="--item-handles"
|
||||
) from exc
|
||||
|
||||
|
||||
def _message_dict(message: Any) -> dict[str, Any]:
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Regression tests for Client.Python-032..036.
|
||||
|
||||
Each test corresponds to a finding from the 2026-06-16 re-review. Tests are
|
||||
TDD-first — they fail against the pre-fix source and pass against the fixed
|
||||
source.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
import pytest
|
||||
|
||||
from zb_mom_ww_mxgateway_cli import commands as cli_commands
|
||||
from zb_mom_ww_mxgateway_cli.commands import _parse_int_list, _parse_string_list
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-032 — `_smoke` must not carry the dead `closed` guard variable.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_smoke_does_not_carry_dead_closed_guard() -> None:
|
||||
"""`_smoke` must not reintroduce the dead `closed = False` / `if not closed`
|
||||
guard removed by Client.Python-004. The variable is never reassigned, so the
|
||||
guard misleads readers into expecting an early-close path that never exists.
|
||||
"""
|
||||
|
||||
source = inspect.getsource(cli_commands._smoke)
|
||||
assert "closed = False" not in source, (
|
||||
"_smoke must not reintroduce the dead `closed = False` variable"
|
||||
)
|
||||
assert "if not closed:" not in source, (
|
||||
"_smoke must not reintroduce the dead `if not closed:` guard"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-033 — `_parse_string_list` param_hint must reflect the caller.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_parse_string_list_default_param_hint_is_items() -> None:
|
||||
with pytest.raises(click.BadParameter) as exc:
|
||||
_parse_string_list("")
|
||||
assert exc.value.param_hint == "--items"
|
||||
|
||||
|
||||
def test_parse_string_list_accepts_caller_supplied_param_hint() -> None:
|
||||
"""The write-bulk family passes `--values`, so an empty value must surface a
|
||||
`--values` hint, not the irrelevant `--items` default.
|
||||
"""
|
||||
|
||||
with pytest.raises(click.BadParameter) as exc:
|
||||
_parse_string_list("", param_hint="--values")
|
||||
assert exc.value.param_hint == "--values"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-034 — `_parse_int_list` must re-raise non-numeric tokens as
|
||||
# click.BadParameter, not a raw ValueError traceback.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_parse_int_list_non_numeric_raises_bad_parameter() -> None:
|
||||
with pytest.raises(click.BadParameter) as exc:
|
||||
_parse_int_list("10,abc")
|
||||
assert exc.value.param_hint == "--item-handles"
|
||||
|
||||
|
||||
def test_parse_int_list_happy_path() -> None:
|
||||
assert _parse_int_list("10, 20 ,30") == [10, 20, 30]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-035 — public browse types must be re-exported from the package
|
||||
# root.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_browse_children_options_is_exported_from_package_root() -> None:
|
||||
import zb_mom_ww_mxgateway as pkg
|
||||
|
||||
assert hasattr(pkg, "BrowseChildrenOptions")
|
||||
assert "BrowseChildrenOptions" in pkg.__all__
|
||||
|
||||
|
||||
def test_lazy_browse_node_is_exported_from_package_root() -> None:
|
||||
import zb_mom_ww_mxgateway as pkg
|
||||
|
||||
assert hasattr(pkg, "LazyBrowseNode")
|
||||
assert "LazyBrowseNode" in pkg.__all__
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-036 — README "Browsing lazily" example must reference a method
|
||||
# that actually exists on GalaxyRepositoryClient.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _readme_path() -> Path:
|
||||
return Path(__file__).resolve().parent.parent / "README.md"
|
||||
|
||||
|
||||
def test_galaxy_client_exposes_browse_children_raw() -> None:
|
||||
"""Guard the method name the README example depends on so future renames
|
||||
break this test rather than only failing at runtime in user code.
|
||||
"""
|
||||
|
||||
from zb_mom_ww_mxgateway import GalaxyRepositoryClient
|
||||
|
||||
assert hasattr(GalaxyRepositoryClient, "browse_children_raw")
|
||||
|
||||
|
||||
def test_readme_browse_example_uses_existing_method() -> None:
|
||||
"""The README's `galaxy.<method>(...BrowseChildrenRequest...)` call must name
|
||||
a method that exists on GalaxyRepositoryClient.
|
||||
"""
|
||||
|
||||
from zb_mom_ww_mxgateway import GalaxyRepositoryClient
|
||||
|
||||
text = _readme_path().read_text(encoding="utf-8")
|
||||
called = set(re.findall(r"galaxy\.([A-Za-z_][A-Za-z0-9_]*)\s*\(", text))
|
||||
assert called, "README must contain at least one galaxy.<method>(...) example"
|
||||
for method in called:
|
||||
assert hasattr(GalaxyRepositoryClient, method), (
|
||||
f"README references galaxy.{method}() but no such method exists"
|
||||
)
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Regression tests for Client.Python-037 and Client.Python-038.
|
||||
|
||||
Client.Python-037: ``pyproject.toml`` description must not contain "scaffold".
|
||||
Client.Python-038: ``advise-supervisory`` CLI subcommand must have coverage
|
||||
(registration smoke test + happy-path command-shape test).
|
||||
|
||||
Tests are TDD-first — written before the fix and expected to pass once the
|
||||
source change lands.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from click.testing import CliRunner
|
||||
|
||||
from zb_mom_ww_mxgateway import ClientOptions, GatewayClient
|
||||
from zb_mom_ww_mxgateway.generated import mxaccess_gateway_pb2 as pb
|
||||
from zb_mom_ww_mxgateway_cli.commands import main
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-037 — pyproject.toml description must not contain "scaffold".
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_pyproject_description_does_not_contain_scaffold() -> None:
|
||||
"""The ``description`` field in ``pyproject.toml`` must not include the
|
||||
word "scaffold" — a regression of Client.Python-001 that re-entered the
|
||||
file at the package-rename commit.
|
||||
"""
|
||||
|
||||
pyproject = (
|
||||
Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||
).read_text(encoding="utf-8")
|
||||
|
||||
# Find the description line and assert "scaffold" is absent.
|
||||
for line in pyproject.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("description"):
|
||||
assert "scaffold" not in stripped.lower(), (
|
||||
f"pyproject.toml description must not contain 'scaffold': {stripped!r}"
|
||||
)
|
||||
return
|
||||
|
||||
raise AssertionError("pyproject.toml has no 'description' line")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client.Python-038 — advise-supervisory must be registered + have a happy path.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_advise_supervisory_is_registered() -> None:
|
||||
"""``advise-supervisory`` must be a registered subcommand of ``main``.
|
||||
|
||||
A ``--help`` invocation must exit 0 and the help text must include the
|
||||
required options (--server-handle and --item-handle).
|
||||
"""
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(main, ["advise-supervisory", "--help"])
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert "--server-handle" in result.output
|
||||
assert "--item-handle" in result.output
|
||||
|
||||
|
||||
# --------------- fake-stub infrastructure (mirrors test_review_findings_022_to_026) ----
|
||||
|
||||
|
||||
class _FakeUnary:
|
||||
def __init__(self, replies: list[Any]) -> None:
|
||||
self.replies = replies
|
||||
self.requests: list[Any] = []
|
||||
self.metadata: tuple[tuple[str, str], ...] | None = None
|
||||
|
||||
async def __call__(self, request: Any, *, metadata: tuple[tuple[str, str], ...]) -> Any:
|
||||
self.requests.append(request)
|
||||
self.metadata = metadata
|
||||
return self.replies.pop(0)
|
||||
|
||||
|
||||
class _FakeStub:
|
||||
def __init__(self) -> None:
|
||||
self.open_session = _FakeUnary(
|
||||
[
|
||||
pb.OpenSessionReply(
|
||||
session_id="session-1",
|
||||
protocol_status=pb.ProtocolStatus(code=pb.PROTOCOL_STATUS_CODE_OK),
|
||||
),
|
||||
],
|
||||
)
|
||||
self.invoke = _FakeUnary([])
|
||||
self.OpenSession = self.open_session
|
||||
self.Invoke = self.invoke
|
||||
|
||||
def set_invoke_replies(self, replies: list[Any]) -> None:
|
||||
self.invoke.replies = replies
|
||||
|
||||
|
||||
def _install_fake_connect(monkeypatch: Any, stub: _FakeStub) -> None:
|
||||
"""Patch ``GatewayClient.connect`` so the CLI uses the supplied fake stub."""
|
||||
|
||||
real_connect = GatewayClient.connect
|
||||
|
||||
@classmethod # type: ignore[misc]
|
||||
async def _spy_connect(cls: Any, options: ClientOptions, **kwargs: Any) -> GatewayClient:
|
||||
return await real_connect(options, stub=stub)
|
||||
|
||||
monkeypatch.setattr(GatewayClient, "connect", _spy_connect)
|
||||
|
||||
|
||||
def test_cli_advise_supervisory_happy_path(monkeypatch: Any) -> None:
|
||||
"""``advise-supervisory`` must forward server_handle and item_handle in an
|
||||
``MX_COMMAND_KIND_ADVISE_SUPERVISORY`` ``MxCommand``.
|
||||
|
||||
Pattern mirrors ``test_cli_acknowledge_alarm_happy_path`` in
|
||||
``test_review_findings_022_to_026.py``.
|
||||
"""
|
||||
|
||||
stub = _FakeStub()
|
||||
stub.set_invoke_replies(
|
||||
[
|
||||
pb.MxCommandReply(
|
||||
session_id="session-1",
|
||||
kind=pb.MX_COMMAND_KIND_ADVISE_SUPERVISORY,
|
||||
protocol_status=pb.ProtocolStatus(code=pb.PROTOCOL_STATUS_CODE_OK),
|
||||
),
|
||||
],
|
||||
)
|
||||
_install_fake_connect(monkeypatch, stub)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"advise-supervisory",
|
||||
"--endpoint",
|
||||
"localhost:5000",
|
||||
"--plaintext",
|
||||
"--session-id",
|
||||
"session-1",
|
||||
"--server-handle",
|
||||
"7",
|
||||
"--item-handle",
|
||||
"42",
|
||||
"--json",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
payload = json.loads(result.output)
|
||||
assert payload["ok"] is True
|
||||
|
||||
# Verify the MxCommand shape forwarded to the gateway.
|
||||
assert len(stub.invoke.requests) == 1
|
||||
cmd = stub.invoke.requests[0].command
|
||||
assert cmd.kind == pb.MX_COMMAND_KIND_ADVISE_SUPERVISORY
|
||||
assert cmd.advise_supervisory.server_handle == 7
|
||||
assert cmd.advise_supervisory.item_handle == 42
|
||||
@@ -0,0 +1,209 @@
|
||||
"""Tests for Session.write_array_elements default-fill sparse-array helper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from zb_mom_ww_mxgateway import ClientOptions, GatewayClient
|
||||
from zb_mom_ww_mxgateway.generated import mxaccess_gateway_pb2 as pb
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_sparse_mx_value(
|
||||
element_data_type: "pb.MxDataType.ValueType",
|
||||
total_length: int,
|
||||
elements: dict[int, Any],
|
||||
) -> pb.MxValue:
|
||||
"""Build an MxValue wrapping an MxSparseArray from Python primitives.
|
||||
|
||||
Mirrors the logic inside Session.write_array_elements so tests can assert
|
||||
the exact wire shape the helper produces without going through the full
|
||||
gRPC stack.
|
||||
"""
|
||||
from zb_mom_ww_mxgateway.values import to_mx_value
|
||||
|
||||
return pb.MxValue(
|
||||
sparse_array_value=pb.MxSparseArray(
|
||||
element_data_type=element_data_type,
|
||||
total_length=total_length,
|
||||
elements=[
|
||||
pb.MxSparseElement(index=idx, value=to_mx_value(val))
|
||||
for idx, val in elements.items()
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake stub (minimal — only needs Invoke / OpenSession)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeUnary:
|
||||
def __init__(self, replies: list[Any]) -> None:
|
||||
self.replies = list(replies)
|
||||
self.requests: list[Any] = []
|
||||
self.metadata: tuple[tuple[str, str], ...] | None = None
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
request: Any,
|
||||
*,
|
||||
metadata: tuple[tuple[str, str], ...],
|
||||
) -> Any:
|
||||
self.requests.append(request)
|
||||
self.metadata = metadata
|
||||
return self.replies.pop(0)
|
||||
|
||||
|
||||
class _FakeStub:
|
||||
"""Minimal stub that satisfies GatewayClient for a single invoke round-trip."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
ok = pb.ProtocolStatus(code=pb.PROTOCOL_STATUS_CODE_OK)
|
||||
self.open_session = _FakeUnary([pb.OpenSessionReply(session_id="s1", protocol_status=ok)])
|
||||
self.invoke = _FakeUnary(
|
||||
[
|
||||
pb.MxCommandReply(
|
||||
session_id="s1",
|
||||
kind=pb.MX_COMMAND_KIND_WRITE,
|
||||
protocol_status=ok,
|
||||
),
|
||||
]
|
||||
)
|
||||
self.OpenSession = self.open_session
|
||||
self.Invoke = self.invoke
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_sparse_mx_value_builder_sets_correct_oneof() -> None:
|
||||
"""Builder helper must produce an MxValue with kind == 'sparse_array_value'."""
|
||||
mv = _make_sparse_mx_value(pb.MX_DATA_TYPE_INTEGER, 5, {0: 10, 3: 30})
|
||||
assert mv.WhichOneof("kind") == "sparse_array_value"
|
||||
|
||||
|
||||
def test_sparse_mx_value_builder_total_length() -> None:
|
||||
"""total_length must equal the value passed to the builder."""
|
||||
mv = _make_sparse_mx_value(pb.MX_DATA_TYPE_INTEGER, 20, {1: 7})
|
||||
assert mv.sparse_array_value.total_length == 20
|
||||
|
||||
|
||||
def test_sparse_mx_value_builder_element_count_and_values() -> None:
|
||||
"""Elements list length and scalar values must match the input dict."""
|
||||
mv = _make_sparse_mx_value(pb.MX_DATA_TYPE_INTEGER, 10, {0: 11, 4: 55, 9: 99})
|
||||
sa = mv.sparse_array_value
|
||||
assert len(sa.elements) == 3
|
||||
by_index = {e.index: e.value for e in sa.elements}
|
||||
assert by_index[0].int32_value == 11
|
||||
assert by_index[4].int32_value == 55
|
||||
assert by_index[9].int32_value == 99
|
||||
|
||||
|
||||
def test_sparse_mx_value_builder_element_data_type() -> None:
|
||||
"""element_data_type must be forwarded verbatim."""
|
||||
mv = _make_sparse_mx_value(pb.MX_DATA_TYPE_FLOAT, 3, {})
|
||||
assert mv.sparse_array_value.element_data_type == pb.MX_DATA_TYPE_FLOAT
|
||||
|
||||
|
||||
def test_sparse_mx_value_builder_empty_elements() -> None:
|
||||
"""An empty elements dict must still produce a valid MxSparseArray."""
|
||||
mv = _make_sparse_mx_value(pb.MX_DATA_TYPE_BOOLEAN, 8, {})
|
||||
sa = mv.sparse_array_value
|
||||
assert len(sa.elements) == 0
|
||||
assert sa.total_length == 8
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration-level: write_array_elements routes through Session.write
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_array_elements_sends_sparse_array_write_command() -> None:
|
||||
"""write_array_elements must send a WRITE command whose value is sparse_array_value."""
|
||||
stub = _FakeStub()
|
||||
client = await GatewayClient.connect(
|
||||
ClientOptions(endpoint="fake", api_key="mxgw_test_secret", plaintext=True),
|
||||
stub=stub,
|
||||
)
|
||||
session = await client.open_session()
|
||||
|
||||
await session.write_array_elements(
|
||||
server_handle=1,
|
||||
item_handle=2,
|
||||
element_data_type=pb.MX_DATA_TYPE_INTEGER,
|
||||
total_length=10,
|
||||
elements={0: 100, 5: 500},
|
||||
)
|
||||
|
||||
assert len(stub.invoke.requests) == 1
|
||||
cmd_req: pb.MxCommandRequest = stub.invoke.requests[0]
|
||||
cmd = cmd_req.command
|
||||
assert cmd.kind == pb.MX_COMMAND_KIND_WRITE
|
||||
mv = cmd.write.value
|
||||
assert mv.WhichOneof("kind") == "sparse_array_value"
|
||||
|
||||
sa = mv.sparse_array_value
|
||||
assert sa.element_data_type == pb.MX_DATA_TYPE_INTEGER
|
||||
assert sa.total_length == 10
|
||||
assert len(sa.elements) == 2
|
||||
by_index = {e.index: e.value for e in sa.elements}
|
||||
assert by_index[0].int32_value == 100
|
||||
assert by_index[5].int32_value == 500
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_array_elements_forwards_user_id() -> None:
|
||||
"""user_id must reach the WriteCommand."""
|
||||
stub = _FakeStub()
|
||||
client = await GatewayClient.connect(
|
||||
ClientOptions(endpoint="fake", api_key="mxgw_test_secret", plaintext=True),
|
||||
stub=stub,
|
||||
)
|
||||
session = await client.open_session()
|
||||
|
||||
await session.write_array_elements(
|
||||
server_handle=1,
|
||||
item_handle=2,
|
||||
element_data_type=pb.MX_DATA_TYPE_BOOLEAN,
|
||||
total_length=4,
|
||||
elements={},
|
||||
user_id=42,
|
||||
)
|
||||
|
||||
cmd = stub.invoke.requests[0].command
|
||||
assert cmd.write.user_id == 42
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_array_elements_string_elements() -> None:
|
||||
"""String element values must be encoded as string_value scalars."""
|
||||
stub = _FakeStub()
|
||||
client = await GatewayClient.connect(
|
||||
ClientOptions(endpoint="fake", api_key="mxgw_test_secret", plaintext=True),
|
||||
stub=stub,
|
||||
)
|
||||
session = await client.open_session()
|
||||
|
||||
await session.write_array_elements(
|
||||
server_handle=1,
|
||||
item_handle=2,
|
||||
element_data_type=pb.MX_DATA_TYPE_STRING,
|
||||
total_length=3,
|
||||
elements={1: "hello", 2: "world"},
|
||||
)
|
||||
|
||||
sa = stub.invoke.requests[0].command.write.value.sparse_array_value
|
||||
by_index = {e.index: e.value for e in sa.elements}
|
||||
assert by_index[1].string_value == "hello"
|
||||
assert by_index[2].string_value == "world"
|
||||
Generated
+2
-2
@@ -590,7 +590,7 @@ checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
|
||||
|
||||
[[package]]
|
||||
name = "mxgw-cli"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"futures-util",
|
||||
@@ -1490,7 +1490,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zb-mom-ww-mxgateway-client"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "zb-mom-ww-mxgateway-client"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
edition = "2021"
|
||||
authors = ["Joseph Doherty"]
|
||||
description = "Async Rust client for the MxAccessGateway gRPC service, including a lazy-browse walker over the Galaxy Repository hierarchy."
|
||||
@@ -20,7 +20,7 @@ resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2021"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
authors = ["Joseph Doherty"]
|
||||
license = "Proprietary"
|
||||
repository = "https://gitea.dohertylan.com/dohertj2/mxaccessgw"
|
||||
|
||||
+78
-2
@@ -125,6 +125,82 @@ preserving the raw message for parity diagnostics. Command replies whose
|
||||
protocol status is not `PROTOCOL_STATUS_CODE_OK` become `Error::Command` and
|
||||
retain the raw `MxCommandReply`.
|
||||
|
||||
## Write Semantics And Common Pitfalls
|
||||
|
||||
These are MXAccess parity behaviors that surprise new callers. The gateway
|
||||
forwards them unchanged — it does not paper over them.
|
||||
|
||||
### Attributing a write to a user without `authenticate_user`
|
||||
|
||||
MXAccess only stamps a plain `write`/`write2` with a Galaxy user id when the
|
||||
item carries an active *supervisory* advise. If you are **not** using the
|
||||
verified/secured path (`authenticate_user` → `write_secured`/`write_secured2`)
|
||||
but still need the write attributed to a user id, you must first advise the
|
||||
item supervisory and then pass that user id on the write. Without the
|
||||
supervisory advise the `user_id` on a plain write is ignored.
|
||||
|
||||
The session exposes `advise`/`un_advise` but not supervisory advise, so send it
|
||||
through the generic command channel:
|
||||
|
||||
```rust
|
||||
session
|
||||
.invoke(
|
||||
MxCommandKind::AdviseSupervisory,
|
||||
Payload::AdviseSupervisory(AdviseSupervisoryCommand {
|
||||
server_handle,
|
||||
item_handle,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
session.write(server_handle, item_handle, value, user_id).await?;
|
||||
```
|
||||
|
||||
The CLI exposes the same command as `advise-supervisory`, and `write` /
|
||||
`write2` take `--user-id`.
|
||||
|
||||
### Array writes replace the whole array
|
||||
|
||||
A write to an array attribute **replaces the entire array**; it is not an
|
||||
element-wise patch. To change a subset of elements, send the full array with
|
||||
the unchanged elements included. For example, to change 2 elements of a
|
||||
20-element array, build the `MxValue` from all 20 values (the 18 unchanged plus
|
||||
the 2 new ones). Sending only the 2 changed values overwrites the attribute
|
||||
with a 2-element array.
|
||||
|
||||
#### Default-fill partial array writes
|
||||
|
||||
When you only need to set a handful of indices and want every other position to
|
||||
take the element type's default (zero / `false` / empty string / Unix epoch for
|
||||
timestamps), use `Session::write_array_elements` instead:
|
||||
|
||||
```rust
|
||||
// Write a 10-element integer array; index 0 = 42, index 7 = 99,
|
||||
// all other indices default to 0 (not preserved from the previous value).
|
||||
session
|
||||
.write_array_elements(
|
||||
server_handle,
|
||||
item_handle,
|
||||
MxDataType::Integer,
|
||||
10,
|
||||
[(0, MxValue::int32(42)), (7, MxValue::int32(99))],
|
||||
user_id,
|
||||
)
|
||||
.await?;
|
||||
```
|
||||
|
||||
The gateway expands the sparse representation into a full `MxArray` before
|
||||
forwarding to the worker — the worker and MXAccess COM never see the sparse
|
||||
form. Unmentioned indices are reset to the type default, **not** preserved from
|
||||
the existing attribute value.
|
||||
|
||||
#### Bare-name array AddItem normalisation
|
||||
|
||||
Adding a bare array attribute name (e.g. `Tank01.Temperature`) via `AddItem`,
|
||||
`AddItem2`, `AddItemBulk`, or `AddBufferedItem` is automatically normalised to
|
||||
`Tank01.Temperature[]` by the gateway so the worker can resolve the full array.
|
||||
You do not need to append `[]` in client code; the gateway handles it.
|
||||
|
||||
## Galaxy Repository browse
|
||||
|
||||
The Galaxy Repository service exposes a read-only browse over the AVEVA System
|
||||
@@ -161,7 +237,7 @@ cargo run -p mxgw-cli -- galaxy discover-hierarchy --endpoint http://localhost:5
|
||||
|
||||
### Browsing lazily
|
||||
|
||||
For UI trees or OPC UA bridges, use `browse_children` to walk one level at a
|
||||
For UI trees or OPC UA bridges, use `browse_children_raw` to walk one level at a
|
||||
time instead of paging the full hierarchy. Pass a default request for root
|
||||
objects; subsequent calls set `parent_gobject_id`, `parent_tag_name`, or
|
||||
`parent_contained_path`. Filter fields match `discover_hierarchy`. Each response
|
||||
@@ -172,7 +248,7 @@ request and filter semantics.
|
||||
```rust
|
||||
use zb_mom_ww_mxgateway_client::generated::galaxy_repository::v1::BrowseChildrenRequest;
|
||||
|
||||
let reply = galaxy.browse_children(BrowseChildrenRequest::default()).await?.into_inner();
|
||||
let reply = galaxy.browse_children_raw(BrowseChildrenRequest::default()).await?;
|
||||
for (child, has_children) in reply.children.iter().zip(reply.child_has_children.iter()) {
|
||||
println!("{} expand={}", child.tag_name, has_children);
|
||||
}
|
||||
|
||||
@@ -121,6 +121,7 @@ impl Session {
|
||||
pub async fn read_bulk<S: AsRef<str>>(&self, server_handle: i32, tag_addresses: &[S], timeout_ms: u32) -> Result<Vec<BulkReadResult>, Error>;
|
||||
pub async fn write(&self, server_handle: i32, item_handle: i32, value: MxValue, user_id: i32) -> Result<(), Error>;
|
||||
pub async fn write2(&self, server_handle: i32, item_handle: i32, value: MxValue, timestamp_value: MxValue, user_id: i32) -> Result<(), Error>;
|
||||
pub async fn write_array_elements(&self, server_handle: i32, item_handle: i32, element_data_type: MxDataType, total_length: u32, elements: impl IntoIterator<Item = (u32, MxValue)>, user_id: i32) -> Result<(), Error>;
|
||||
pub async fn write_bulk(&self, server_handle: i32, entries: Vec<WriteBulkEntry>) -> Result<Vec<BulkWriteResult>, Error>;
|
||||
pub async fn write2_bulk(&self, server_handle: i32, entries: Vec<Write2BulkEntry>) -> Result<Vec<BulkWriteResult>, Error>;
|
||||
pub async fn write_secured_bulk(&self, server_handle: i32, entries: Vec<WriteSecuredBulkEntry>) -> Result<Vec<BulkWriteResult>, Error>;
|
||||
@@ -333,6 +334,7 @@ mxgw close-session --session-id <id>
|
||||
mxgw register --session-id <id>
|
||||
mxgw add-item --session-id <id> --server-handle <h> --item <tag>
|
||||
mxgw advise --session-id <id> --server-handle <h> --item-handle <h>
|
||||
mxgw advise-supervisory --session-id <id> --server-handle <h> --item-handle <h>
|
||||
mxgw subscribe-bulk --session-id <id> --server-handle <h> --items <csv>
|
||||
mxgw unsubscribe-bulk --session-id <id> --server-handle <h> --item-handles <csv>
|
||||
mxgw read-bulk --session-id <id> --server-handle <h> --items <csv> [--timeout-ms <ms>]
|
||||
@@ -349,8 +351,16 @@ mxgw bench-read-bulk [--duration-seconds <n>] [--warmup-seconds <n>] [--bulk-siz
|
||||
mxgw smoke --endpoint http://localhost:5000 --api-key-env MXGATEWAY_API_KEY --item TestChildObject.TestInt
|
||||
mxgw batch
|
||||
mxgw galaxy {test-connection,last-deploy-time,discover-hierarchy,watch}
|
||||
mxgw galaxy browse [--parent-gobject-id <id>] [--category-id <id>...] [--template-contains <s>...] [--tag-name-glob <glob>] [--include-attributes] [--alarm-bearing-only] [--historized-only] [--depth <n>] [--json]
|
||||
```
|
||||
|
||||
`galaxy browse` walks the hierarchy one level at a time over the raw
|
||||
`BrowseChildren` paging path. `--depth 0` (the default) prints only the
|
||||
requested level; `--depth N` eagerly expands N additional levels beneath each
|
||||
returned node. `--parent-gobject-id` makes `--depth` a no-op (the parent's
|
||||
children are returned as a single level). Omit `--parent-gobject-id` to browse
|
||||
root objects.
|
||||
|
||||
`batch` reads commands from stdin one per line and dispatches each through
|
||||
the normal subcommand path; the loop terminates only on stdin EOF (blank
|
||||
lines log an empty-EOR-bracketed result and continue) so accidental empty
|
||||
|
||||
@@ -21,10 +21,11 @@ use serde_json::Value;
|
||||
use zb_mom_ww_mxgateway_client::galaxy::{BrowseChildrenOptions, LazyBrowseNode};
|
||||
use zb_mom_ww_mxgateway_client::generated::galaxy_repository::v1::DeployEvent;
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::{
|
||||
alarm_feed_message, AcknowledgeAlarmRequest, AlarmFeedMessage, CloseSessionRequest, MxCommand,
|
||||
MxCommandKind, MxCommandRequest, MxEvent, MxEventFamily, MxValue as ProtoMxValue,
|
||||
OpenSessionRequest, PingCommand, StreamAlarmsRequest, StreamEventsRequest, Write2BulkEntry,
|
||||
WriteBulkEntry, WriteSecured2BulkEntry, WriteSecuredBulkEntry,
|
||||
alarm_feed_message, AcknowledgeAlarmRequest, AdviseSupervisoryCommand, AlarmFeedMessage,
|
||||
CloseSessionRequest, MxCommand, MxCommandKind, MxCommandRequest, MxEvent, MxEventFamily,
|
||||
MxValue as ProtoMxValue, OpenSessionRequest, PingCommand, StreamAlarmsRequest,
|
||||
StreamEventsRequest, Write2BulkEntry, WriteBulkEntry, WriteSecured2BulkEntry,
|
||||
WriteSecuredBulkEntry,
|
||||
};
|
||||
use zb_mom_ww_mxgateway_client::{
|
||||
next_correlation_id, ApiKey, ClientOptions, Error, GalaxyClient, GatewayClient, MxValue,
|
||||
@@ -46,8 +47,6 @@ enum Command {
|
||||
Version {
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
#[arg(long)]
|
||||
jsonl: bool,
|
||||
},
|
||||
Ping {
|
||||
#[command(flatten)]
|
||||
@@ -107,6 +106,18 @@ enum Command {
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
AdviseSupervisory {
|
||||
#[command(flatten)]
|
||||
connection: ConnectionArgs,
|
||||
#[arg(long)]
|
||||
session_id: String,
|
||||
#[arg(long)]
|
||||
server_handle: i32,
|
||||
#[arg(long)]
|
||||
item_handle: i32,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
SubscribeBulk {
|
||||
#[command(flatten)]
|
||||
connection: ConnectionArgs,
|
||||
@@ -458,9 +469,16 @@ struct ConnectionArgs {
|
||||
endpoint: String,
|
||||
#[arg(long)]
|
||||
api_key: Option<String>,
|
||||
/// Name of the environment variable holding the gateway API key. The
|
||||
/// variable's value must be a full gateway key of the form
|
||||
/// `mxgw_<key-id>_<secret>`; it is forwarded verbatim as the Bearer
|
||||
/// token, so do not point this at an unrelated credential.
|
||||
#[arg(long, default_value = "MXGATEWAY_API_KEY")]
|
||||
api_key_env: String,
|
||||
#[arg(long)]
|
||||
/// Use an unencrypted (plaintext h2c) channel. Mutually exclusive with
|
||||
/// `--tls`; supplying both is rejected so an explicit `--tls` cannot be
|
||||
/// silently downgraded.
|
||||
#[arg(long, conflicts_with = "tls")]
|
||||
plaintext: bool,
|
||||
#[arg(long)]
|
||||
tls: bool,
|
||||
@@ -545,7 +563,7 @@ async fn dispatch(command: Command) -> Result<(), Error> {
|
||||
detail: "batch cannot be nested inside another batch session".to_owned(),
|
||||
});
|
||||
}
|
||||
Command::Version { json, .. } => print_version(json),
|
||||
Command::Version { json } => print_version(json),
|
||||
Command::Ping {
|
||||
connection,
|
||||
message,
|
||||
@@ -642,6 +660,27 @@ async fn dispatch(command: Command) -> Result<(), Error> {
|
||||
session.advise(server_handle, item_handle).await?;
|
||||
print_ok("advise", json);
|
||||
}
|
||||
Command::AdviseSupervisory {
|
||||
connection,
|
||||
session_id,
|
||||
server_handle,
|
||||
item_handle,
|
||||
json,
|
||||
} => {
|
||||
let session = session_for(connection, session_id).await?;
|
||||
session
|
||||
.invoke(
|
||||
MxCommandKind::AdviseSupervisory,
|
||||
zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::mx_command::Payload::AdviseSupervisory(
|
||||
AdviseSupervisoryCommand {
|
||||
server_handle,
|
||||
item_handle,
|
||||
},
|
||||
),
|
||||
)
|
||||
.await?;
|
||||
print_ok("advise-supervisory", json);
|
||||
}
|
||||
Command::SubscribeBulk {
|
||||
connection,
|
||||
session_id,
|
||||
@@ -1214,6 +1253,24 @@ const BROWSE_PAGE_SIZE: i32 = 500;
|
||||
/// Drive `BrowseChildren` paging by hand for a single parent and return the
|
||||
/// flattened child list. Used by the `browse --parent-gobject-id` path, which
|
||||
/// surfaces one level of children rather than the lazy root-tree walk.
|
||||
/// Record a non-empty `next_page_token` in `seen` and reject a repeat. A
|
||||
/// server that returns the same continuation token twice would loop forever,
|
||||
/// so the second sighting is converted to an `InvalidArgument` error. Extracted
|
||||
/// from [`browse_children_one_level`] so the guard can be unit-tested without a
|
||||
/// network client.
|
||||
fn register_page_token(
|
||||
seen: &mut std::collections::HashSet<String>,
|
||||
token: &str,
|
||||
) -> Result<(), Error> {
|
||||
if !seen.insert(token.to_owned()) {
|
||||
return Err(Error::InvalidArgument {
|
||||
name: "page_token".to_owned(),
|
||||
detail: format!("galaxy browse children returned repeated page token `{token}`"),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn browse_children_one_level(
|
||||
client: &mut GalaxyClient,
|
||||
parent_gobject_id: i32,
|
||||
@@ -1254,14 +1311,7 @@ async fn browse_children_one_level(
|
||||
if page_token.is_empty() {
|
||||
return Ok(children);
|
||||
}
|
||||
if !seen.insert(page_token.clone()) {
|
||||
return Err(Error::InvalidArgument {
|
||||
name: "page_token".to_owned(),
|
||||
detail: format!(
|
||||
"galaxy browse children returned repeated page token `{page_token}`"
|
||||
),
|
||||
});
|
||||
}
|
||||
register_page_token(&mut seen, &page_token)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2337,7 +2387,18 @@ where
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
|
||||
use super::Cli;
|
||||
use super::{Cli, Command};
|
||||
|
||||
/// Pull the flattened `ConnectionArgs` out of a parsed `ping` command so
|
||||
/// `ConnectionArgs::options()` can be exercised directly.
|
||||
fn connection_from_ping(args: &[&str]) -> super::ConnectionArgs {
|
||||
let mut full = vec!["mxgw", "ping"];
|
||||
full.extend_from_slice(args);
|
||||
match Cli::try_parse_from(full).expect("ping parse").command {
|
||||
Command::Ping { connection, .. } => connection,
|
||||
other => panic!("expected ping command, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_version_json_command() {
|
||||
@@ -2345,6 +2406,36 @@ mod tests {
|
||||
assert!(parsed.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn connection_defaults_to_plaintext() {
|
||||
let options = connection_from_ping(&[]).options();
|
||||
assert!(options.plaintext(), "default channel should be plaintext");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn connection_tls_flag_disables_plaintext() {
|
||||
let options = connection_from_ping(&["--tls"]).options();
|
||||
assert!(
|
||||
!options.plaintext(),
|
||||
"--tls must select an encrypted channel"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn connection_plaintext_flag_selects_plaintext() {
|
||||
let options = connection_from_ping(&["--plaintext"]).options();
|
||||
assert!(options.plaintext());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn connection_rejects_tls_and_plaintext_together() {
|
||||
let parsed = Cli::try_parse_from(["mxgw", "ping", "--tls", "--plaintext"]);
|
||||
assert!(
|
||||
parsed.is_err(),
|
||||
"--tls and --plaintext must conflict so TLS cannot be silently downgraded"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_write_command() {
|
||||
let parsed = Cli::try_parse_from([
|
||||
@@ -2513,6 +2604,50 @@ mod tests {
|
||||
assert_eq!(summary.mean, 42.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn register_page_token_accepts_distinct_tokens_and_rejects_repeats() {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
assert!(super::register_page_token(&mut seen, "tok-1").is_ok());
|
||||
assert!(super::register_page_token(&mut seen, "tok-2").is_ok());
|
||||
|
||||
let repeated = super::register_page_token(&mut seen, "tok-1");
|
||||
match repeated {
|
||||
Err(super::Error::InvalidArgument { name, detail }) => {
|
||||
assert_eq!(name, "page_token");
|
||||
assert!(detail.contains("tok-1"), "detail: {detail}");
|
||||
}
|
||||
other => panic!("expected InvalidArgument on repeated token, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc3339_parser_rejects_trailing_characters() {
|
||||
let err = super::parse_rfc3339_timestamp("2026-04-28T15:30:00Zextra");
|
||||
assert!(err.is_err(), "trailing chars after Z must be rejected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc3339_parser_rejects_day_zero() {
|
||||
let err = super::parse_rfc3339_timestamp("2026-04-00T15:30:00Z");
|
||||
assert!(err.is_err(), "day 0 must be rejected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc3339_parser_rejects_month_thirteen() {
|
||||
let err = super::parse_rfc3339_timestamp("2026-13-01T15:30:00Z");
|
||||
assert!(err.is_err(), "month 13 must be rejected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc3339_parser_rejects_day_out_of_range_for_month() {
|
||||
// April has 30 days.
|
||||
let err = super::parse_rfc3339_timestamp("2026-04-31T15:30:00Z");
|
||||
assert!(err.is_err(), "April 31 must be rejected");
|
||||
// February 29 in a non-leap year.
|
||||
let feb = super::parse_rfc3339_timestamp("2025-02-29T00:00:00Z");
|
||||
assert!(feb.is_err(), "Feb 29 in a non-leap year must be rejected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc3339_parser_round_trips_z_and_offset_inputs() {
|
||||
// 2026-04-28T15:30:00Z = 1_777_995_000 (sanity-checked once below)
|
||||
|
||||
@@ -17,12 +17,12 @@ use crate::generated::mxaccess_gateway::v1::mx_command_reply;
|
||||
use crate::generated::mxaccess_gateway::v1::{
|
||||
AddItem2Command, AddItemBulkCommand, AddItemCommand, AdviseCommand, AdviseItemBulkCommand,
|
||||
BulkReadResult, BulkWriteResult, CloseSessionRequest, MxCommand, MxCommandKind, MxCommandReply,
|
||||
MxCommandRequest, MxValue as ProtoMxValue, OpenSessionRequest, ReadBulkCommand,
|
||||
RegisterCommand, RemoveItemBulkCommand, RemoveItemCommand, StreamEventsRequest,
|
||||
SubscribeBulkCommand, SubscribeResult, UnAdviseCommand, UnAdviseItemBulkCommand,
|
||||
UnsubscribeBulkCommand, Write2BulkCommand, Write2BulkEntry, Write2Command, WriteBulkCommand,
|
||||
WriteBulkEntry, WriteCommand, WriteSecured2BulkCommand, WriteSecured2BulkEntry,
|
||||
WriteSecuredBulkCommand, WriteSecuredBulkEntry,
|
||||
MxCommandRequest, MxDataType, MxSparseArray, MxSparseElement, MxValue as ProtoMxValue,
|
||||
OpenSessionRequest, ReadBulkCommand, RegisterCommand, RemoveItemBulkCommand, RemoveItemCommand,
|
||||
StreamEventsRequest, SubscribeBulkCommand, SubscribeResult, UnAdviseCommand,
|
||||
UnAdviseItemBulkCommand, UnsubscribeBulkCommand, Write2BulkCommand, Write2BulkEntry,
|
||||
Write2Command, WriteBulkCommand, WriteBulkEntry, WriteCommand, WriteSecured2BulkCommand,
|
||||
WriteSecured2BulkEntry, WriteSecuredBulkCommand, WriteSecuredBulkEntry,
|
||||
};
|
||||
use crate::value::MxValue;
|
||||
|
||||
@@ -547,6 +547,60 @@ impl Session {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a sparse, default-filled array: only the given elements
|
||||
/// (index → scalar value) are set; every unmentioned index up to
|
||||
/// `total_length` is written as the element type's default (a reset,
|
||||
/// **not** a preserve). The gateway expands the sparse representation into
|
||||
/// a whole-array write before forwarding to the worker.
|
||||
///
|
||||
/// This is a convenience wrapper around [`Session::write`] that builds the
|
||||
/// `MxSparseArray` wire value for you. Call [`Session::write`] directly
|
||||
/// if you need to pass a pre-built [`MxValue`] carrying a full
|
||||
/// `MxArray`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`Error::InvalidArgument`] (propagated from the gateway) if
|
||||
/// `total_length` is zero, exceeds the gateway's maximum array length, or
|
||||
/// any element index is out of range. Returns [`Error::Command`] for
|
||||
/// non-OK worker statuses, plus the usual transport/status errors.
|
||||
pub async fn write_array_elements(
|
||||
&self,
|
||||
server_handle: i32,
|
||||
item_handle: i32,
|
||||
element_data_type: MxDataType,
|
||||
total_length: u32,
|
||||
elements: impl IntoIterator<Item = (u32, MxValue)>,
|
||||
user_id: i32,
|
||||
) -> Result<(), Error> {
|
||||
use crate::generated::mxaccess_gateway::v1::mx_value::Kind;
|
||||
|
||||
let sparse_elements: Vec<MxSparseElement> = elements
|
||||
.into_iter()
|
||||
.map(|(index, value)| MxSparseElement {
|
||||
index,
|
||||
value: Some(value.into_proto()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let sparse_value = ProtoMxValue {
|
||||
kind: Some(Kind::SparseArrayValue(MxSparseArray {
|
||||
element_data_type: element_data_type as i32,
|
||||
total_length,
|
||||
elements: sparse_elements,
|
||||
})),
|
||||
..ProtoMxValue::default()
|
||||
};
|
||||
|
||||
self.write(
|
||||
server_handle,
|
||||
item_handle,
|
||||
MxValue::from_proto(sparse_value),
|
||||
user_id,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Run MXAccess `Write2` (single-value with caller-supplied timestamp).
|
||||
///
|
||||
/// # Errors
|
||||
|
||||
@@ -173,7 +173,11 @@ impl MxValueProjection {
|
||||
Some(Kind::TimestampValue(value)) => Self::Timestamp(*value),
|
||||
Some(Kind::ArrayValue(value)) => Self::Array(MxArrayValue::from_proto(value.clone())),
|
||||
Some(Kind::RawValue(value)) => Self::Raw(value.clone()),
|
||||
None => Self::Unset,
|
||||
// SparseArrayValue is write-only: the gateway expands it before forwarding
|
||||
// to the worker and never emits it in events or read replies. Map it to
|
||||
// Unset so any read-side code that encounters a stale or mis-routed
|
||||
// sparse value degrades gracefully rather than panicking.
|
||||
Some(Kind::SparseArrayValue(_)) | None => Self::Unset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ use tonic::{Request, Response, Status};
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::mx_access_gateway_server::{
|
||||
MxAccessGateway, MxAccessGatewayServer,
|
||||
};
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::mx_command;
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::mx_command_reply;
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::mx_value::Kind;
|
||||
use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::{
|
||||
@@ -24,11 +25,11 @@ use zb_mom_ww_mxgateway_client::generated::mxaccess_gateway::v1::{
|
||||
AddItem2Reply, AddItemReply, AlarmConditionState, AlarmFeedMessage, AlarmTransitionKind,
|
||||
BulkReadReply, BulkReadResult, BulkSubscribeReply, BulkWriteReply, BulkWriteResult,
|
||||
CloseSessionReply, CloseSessionRequest, MxCommandKind, MxCommandReply, MxDataType, MxEvent,
|
||||
MxEventFamily, MxStatusCategory, MxStatusProxy, MxStatusSource, MxValue,
|
||||
OnAlarmTransitionEvent, OpenSessionReply, OpenSessionRequest, ProtocolStatus,
|
||||
MxEventFamily, MxSparseArray, MxSparseElement, MxStatusCategory, MxStatusProxy, MxStatusSource,
|
||||
MxValue, OnAlarmTransitionEvent, OpenSessionReply, OpenSessionRequest, ProtocolStatus,
|
||||
ProtocolStatusCode, QueryActiveAlarmsRequest, RegisterReply, SessionState, StreamAlarmsRequest,
|
||||
StreamEventsRequest, SubscribeResult, Write2BulkEntry, WriteBulkEntry, WriteSecured2BulkEntry,
|
||||
WriteSecuredBulkEntry,
|
||||
StreamEventsRequest, SubscribeResult, Write2BulkEntry, WriteBulkEntry, WriteCommand,
|
||||
WriteSecured2BulkEntry, WriteSecuredBulkEntry,
|
||||
};
|
||||
use zb_mom_ww_mxgateway_client::{
|
||||
next_correlation_id, ApiKey, ClientOptions, CommandError, Error, GatewayClient, MxStatus,
|
||||
@@ -659,6 +660,9 @@ struct FakeState {
|
||||
authorization: Mutex<Option<String>>,
|
||||
last_command_kind: Mutex<Option<i32>>,
|
||||
last_correlation_id: Mutex<Option<String>>,
|
||||
/// Captures the last `WriteCommand` payload received, populated when the
|
||||
/// `WriteOk` override is active. Used by `write_array_elements` e2e test.
|
||||
last_write_command: Mutex<Option<WriteCommand>>,
|
||||
stream_dropped: Arc<AtomicBool>,
|
||||
/// Optional per-test override that pins the fake's `Invoke` handler to
|
||||
/// a specific reply shape (or `Err(Status)`). The default of `None`
|
||||
@@ -683,6 +687,10 @@ enum InvokeOverride {
|
||||
/// Fail the unary call with `Status::unavailable(...)` so the client's
|
||||
/// `Code::Unavailable` -> `Error::Unavailable` mapping is exercised.
|
||||
Unavailable(String),
|
||||
/// Accept a `Write` command (return `protocol_status = Ok`, no payload)
|
||||
/// and capture the decoded `WriteCommand` in
|
||||
/// `FakeState::last_write_command` for inspection.
|
||||
WriteOk,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -764,6 +772,23 @@ impl MxAccessGateway for FakeGateway {
|
||||
..MxCommandReply::default()
|
||||
})),
|
||||
InvokeOverride::Unavailable(message) => Err(Status::unavailable(message)),
|
||||
InvokeOverride::WriteOk => {
|
||||
// Extract and capture the WriteCommand payload so the test
|
||||
// can assert on server_handle, item_handle, user_id, and value.
|
||||
if let Some(mx_command::Payload::Write(write_cmd)) =
|
||||
request.command.and_then(|c| c.payload)
|
||||
{
|
||||
*self.state.last_write_command.lock().await = Some(write_cmd);
|
||||
}
|
||||
Ok(Response::new(MxCommandReply {
|
||||
session_id: request.session_id,
|
||||
correlation_id: "fake-correlation".to_owned(),
|
||||
kind,
|
||||
protocol_status: Some(ok_status("write ok")),
|
||||
payload: None,
|
||||
..MxCommandReply::default()
|
||||
}))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1091,3 +1116,178 @@ fn case_by_id<'a>(cases: &'a [Value], id: &str) -> &'a Value {
|
||||
.find(|case| case["id"].as_str() == Some(id))
|
||||
.unwrap_or_else(|| panic!("missing fixture case {id}"))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// write_array_elements — end-to-end fake-server test
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_array_elements_routes_sparse_array_write_through_fake_gateway() {
|
||||
// Arrange: stand up the fake gateway with WriteOk so the Write command
|
||||
// succeeds and the sent WriteCommand is captured for inspection.
|
||||
let state = Arc::new(FakeState::default());
|
||||
*state.invoke_override.lock().await = Some(InvokeOverride::WriteOk);
|
||||
let endpoint = spawn_fake_gateway(state.clone()).await;
|
||||
let client = GatewayClient::connect(ClientOptions::new(endpoint))
|
||||
.await
|
||||
.unwrap();
|
||||
let session = client.session("session-fixture");
|
||||
|
||||
// Act: call the public write_array_elements helper.
|
||||
session
|
||||
.write_array_elements(
|
||||
12,
|
||||
34,
|
||||
MxDataType::Integer,
|
||||
10,
|
||||
[(2u32, ClientMxValue::int32(42))],
|
||||
7,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Assert: the fake captured a Write command with the expected handles and
|
||||
// a SparseArrayValue whose total_length and element index/value are correct.
|
||||
let captured = state
|
||||
.last_write_command
|
||||
.lock()
|
||||
.await
|
||||
.take()
|
||||
.expect("fake should have captured a WriteCommand");
|
||||
|
||||
assert_eq!(captured.server_handle, 12, "server_handle must round-trip");
|
||||
assert_eq!(captured.item_handle, 34, "item_handle must round-trip");
|
||||
assert_eq!(captured.user_id, 7, "user_id must round-trip");
|
||||
|
||||
let value = captured.value.expect("WriteCommand must carry a value");
|
||||
assert_eq!(
|
||||
value.data_type, 0,
|
||||
"outer MxValue.data_type must be Unspecified (0), not the element type"
|
||||
);
|
||||
|
||||
let Kind::SparseArrayValue(ref sparse) = value.kind.as_ref().unwrap() else {
|
||||
panic!(
|
||||
"expected SparseArrayValue kind on the outer MxValue, got {:?}",
|
||||
value.kind
|
||||
);
|
||||
};
|
||||
assert_eq!(
|
||||
sparse.element_data_type,
|
||||
MxDataType::Integer as i32,
|
||||
"element_data_type must carry the element type"
|
||||
);
|
||||
assert_eq!(sparse.total_length, 10, "total_length must round-trip");
|
||||
assert_eq!(sparse.elements.len(), 1, "one element supplied");
|
||||
|
||||
let elem = &sparse.elements[0];
|
||||
assert_eq!(elem.index, 2, "element index must round-trip");
|
||||
assert_eq!(
|
||||
elem.value.as_ref().unwrap().kind,
|
||||
Some(Kind::Int32Value(42)),
|
||||
"element value must round-trip"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// write_array_elements — proto shape unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build the proto `MxValue` that `write_array_elements` would send and assert
|
||||
/// the sparse oneof variant has the correct `total_length` and elements.
|
||||
fn sparse_int32_value(
|
||||
total_length: u32,
|
||||
elements: impl IntoIterator<Item = (u32, i32)>,
|
||||
) -> MxValue {
|
||||
let sparse_elements: Vec<MxSparseElement> = elements
|
||||
.into_iter()
|
||||
.map(|(index, v)| MxSparseElement {
|
||||
index,
|
||||
value: Some(MxValue {
|
||||
data_type: MxDataType::Integer as i32,
|
||||
variant_type: "VT_I4".to_owned(),
|
||||
kind: Some(Kind::Int32Value(v)),
|
||||
..MxValue::default()
|
||||
}),
|
||||
})
|
||||
.collect();
|
||||
|
||||
MxValue {
|
||||
// outer data_type must be 0 (Unspecified); the element type lives only
|
||||
// inside MxSparseArray.element_data_type, matching the
|
||||
// `..ProtoMxValue::default()` used in Session::write_array_elements.
|
||||
variant_type: String::new(),
|
||||
kind: Some(Kind::SparseArrayValue(MxSparseArray {
|
||||
element_data_type: MxDataType::Integer as i32,
|
||||
total_length,
|
||||
elements: sparse_elements,
|
||||
})),
|
||||
..MxValue::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_array_elements_proto_shape_has_sparse_oneof_kind() {
|
||||
let proto = sparse_int32_value(5, [(0, 10), (3, 30)]);
|
||||
|
||||
assert_eq!(
|
||||
proto.data_type, 0,
|
||||
"outer MxValue.data_type must be 0 (Unspecified); element type lives in element_data_type"
|
||||
);
|
||||
|
||||
let Kind::SparseArrayValue(ref sparse) = proto.kind.as_ref().unwrap() else {
|
||||
panic!("expected SparseArrayValue kind, got {:?}", proto.kind);
|
||||
};
|
||||
|
||||
assert_eq!(sparse.total_length, 5, "total_length must round-trip");
|
||||
assert_eq!(sparse.elements.len(), 2, "two elements supplied");
|
||||
assert_eq!(sparse.element_data_type, MxDataType::Integer as i32);
|
||||
|
||||
let elem0 = &sparse.elements[0];
|
||||
assert_eq!(elem0.index, 0);
|
||||
assert_eq!(
|
||||
elem0.value.as_ref().unwrap().kind,
|
||||
Some(Kind::Int32Value(10))
|
||||
);
|
||||
|
||||
let elem3 = &sparse.elements[1];
|
||||
assert_eq!(elem3.index, 3);
|
||||
assert_eq!(
|
||||
elem3.value.as_ref().unwrap().kind,
|
||||
Some(Kind::Int32Value(30))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_array_elements_empty_elements_is_valid_all_defaults() {
|
||||
let proto = sparse_int32_value(8, []);
|
||||
assert_eq!(
|
||||
proto.data_type, 0,
|
||||
"outer MxValue.data_type must be 0 (Unspecified) even with no elements"
|
||||
);
|
||||
let Kind::SparseArrayValue(ref sparse) = proto.kind.as_ref().unwrap() else {
|
||||
panic!("expected SparseArrayValue kind");
|
||||
};
|
||||
assert_eq!(sparse.total_length, 8);
|
||||
assert!(
|
||||
sparse.elements.is_empty(),
|
||||
"no elements means every index defaults"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sparse_array_value_round_trips_through_client_mx_value_projection_as_unset() {
|
||||
// SparseArrayValue is write-only. If it ever arrives on the read path
|
||||
// (e.g. a future version bug), the projection should degrade to Unset
|
||||
// rather than panic, because the enum variant is not readable.
|
||||
let proto = sparse_int32_value(4, [(1, 99)]);
|
||||
assert_eq!(
|
||||
proto.data_type, 0,
|
||||
"outer MxValue.data_type must be 0 (Unspecified)"
|
||||
);
|
||||
let client_value = ClientMxValue::from_proto(proto);
|
||||
assert_eq!(
|
||||
client_value.projection(),
|
||||
&MxValueProjection::Unset,
|
||||
"write-only SparseArrayValue must project to Unset, not panic"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `clients/dotnet` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -603,3 +603,125 @@ Net effect at HEAD: `dotnet build clients/dotnet/ZB.MOM.WW.MxGateway.Client.slnx
|
||||
**Recommendation:** Either (a) tighten the documented contract to "ExpandAsync is safe to call concurrently, but Children/IsExpanded must only be read after the awaited ExpandAsync completes (no concurrent reader/expander)", or (b) make the publication safe: write `_isExpanded` via `Volatile.Write` and read via `Volatile.Read`, and return an immutable snapshot from `Children` (e.g. assign a completed `IReadOnlyList` under the lock and expose that field) so lock-free readers never observe a partially-populated list. Option (a) is the smallest change and matches the realistic usage (UI thread expands then renders).
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed against source: `Children => _children` returned the live mutable backing `List<LazyBrowseNode>` and `IsExpanded => _isExpanded` read a plain `bool`, while `ExpandAsync` appended to that same list under `_expandLock` with no release/acquire barrier to lock-free readers — so a concurrent reader could enumerate a mid-append list and throw `InvalidOperationException` ("collection was modified"). Applied option (b) (safe publication): `ExpandAsync` now accumulates children into a method-local `List<LazyBrowseNode>` and, only when fully drained across all pages, publishes it via `Volatile.Write(ref _children, children)` (release) immediately before setting the now-`volatile bool _isExpanded = true`. The `_children` field is an `IReadOnlyList<LazyBrowseNode>` read via `Volatile.Read` from the `Children` getter (acquire), so a reader that observes `IsExpanded == true` always sees the fully-populated snapshot and never enumerates a partially-built list. Updated the `ExpandAsync` `<remarks>` to document the strengthened concurrent-read guarantee. Regression test `LazyBrowseNodeTests.Expand_ConcurrentReadOfChildren_NeverTearsAndPublishesAtomically` gates the child-page RPCs (via a new `FakeGalaxyRepositoryTransport.BrowseChildrenGate` hook) to hold the expand mid-flight while a background reader spins enumerating `Children` and reading `IsExpanded`, asserting no exception escapes and that once `IsExpanded` is true the published snapshot has all five children. Verified red against the pre-fix code (the reader threw `InvalidOperationException: Collection was modified` deterministically across three runs) and green after the fix.
|
||||
|
||||
#### 2026-06-18 re-review (commit 88915c3)
|
||||
|
||||
Re-review of changes since `8df5ab3`. The diff adds `WriteArrayElementsAsync` /
|
||||
`BuildSparseArray` to `MxGatewaySession`, an `advise-supervisory` CLI subcommand,
|
||||
the Client.Dotnet-028 (`TryResolveApiKey`) and Client.Dotnet-029 (`IMxGatewayCliClient`
|
||||
summary) in-source fixes, two tests covering the new sparse-array helper, a "Write
|
||||
Semantics And Common Pitfalls" section in README.md, the `LazyBrowseNode` Client.Dotnet-027
|
||||
rationale comment, and a version bump (`0.1.1` → `0.1.2`). The 029 and 028 fixes are
|
||||
correctly applied. The `isLongRunning` / `galaxy-browse` fix from Client.Dotnet-026 is
|
||||
correctly present. One Medium correctness bug found: `advise-supervisory` is in the
|
||||
dispatch table but missing from `IsKnownGatewayCommand`, making the command
|
||||
unreachable (exit 2 "Unknown command").
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Issue found (Client.Dotnet-030): `advise-supervisory` is present in the `command switch` dispatch table but absent from `IsKnownGatewayCommand`; the guard at line 91 intercepts it first and returns exit code 2 "Unknown command", making the command completely non-functional. `WriteArrayElementsAsync` / `BuildSparseArray` logic is correct: `elementDataType` and `totalLength` are threaded through faithfully, `MxValue.SparseArrayValue` is set and the outer `MxValue.DataType` (unused by the expander) is left at the proto3 default — consistent with all other language clients. Index validation (out-of-range, duplicate, zero total_length) is correctly deferred to `SparseArrayExpander` gateway-side, consistent with Go/Rust/Python/Java. |
|
||||
| 2 | mxaccessgw conventions | No issues found — no forked proto, `authorization: Bearer` metadata unchanged, MXAccess parity preserved (sparse array is a write-only helper, reset-not-preserve semantics documented). `Async` suffix on `WriteArrayElementsAsync` correct. `BuildSparseArray` is `internal static` — appropriate since it is used by both the method and tests. |
|
||||
| 3 | Concurrency & thread safety | No issues found — `BuildSparseArray` is a pure static factory with no shared state; `WriteArrayElementsAsync` delegates to the existing `WriteAsync`. |
|
||||
| 4 | Error handling & resilience | No issues found — `ArgumentNullException.ThrowIfNull(elements)` covers the null-dict case; invalid indices / unsupported element types surface as `InvalidArgument` from `SparseArrayExpander`, which the existing `RpcExceptionMapper` maps to `MxGatewayException` with `StatusCode`. |
|
||||
| 5 | Security | No issues found — `TryResolveApiKey` correctly wired; regression test covers the env-var-sourced key path. |
|
||||
| 6 | Performance & resource management | No issues found — `BuildSparseArray` is O(n) allocation with no unnecessary copies; the protobuf `repeated` list is built in one pass. |
|
||||
| 7 | Design-document adherence | No issues found — sparse array semantics match the proto comment on `MxSparseArray` ("reset, NOT preserved") and `SparseArrayExpander`'s design; the README "bare-name auto-normalized to `[]` form at AddItem" claim is confirmed by `GatewaySession.cs:973` and `SessionManager.cs:52`. |
|
||||
| 8 | Code organization & conventions | No issues found beyond the correctness finding above (missing `IsKnownGatewayCommand` entry is the same defect). |
|
||||
| 9 | Testing coverage | No issues found — `BuildSparseArray_ProducesSparseArrayValueWithCorrectTotalLengthAndElements` and `WriteArrayElementsAsync_BuildsWriteCommandWithSparseArrayValue` cover the happy path; `RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable` covers the Client.Dotnet-028 path. No test for `advise-supervisory` (the new command is dead, so there is nothing to test until the `IsKnownGatewayCommand` gap is fixed). |
|
||||
| 10 | Documentation & comments | No issues found — the "Write Semantics And Common Pitfalls" README section accurately describes default-fill / reset semantics, the supervisory-advise prerequisite for user attribution, and the auto-`[]` normalization. XML docs on `WriteArrayElementsAsync` and `BuildSparseArray` are accurate and complete; the `<remarks>` block on `WriteArrayElementsAsync` correctly emphasises "RESET, not preserve". |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the .NET client delta: `LazyBrowseNode` lazy paging + tests, the new `MxGatewayClientCli` galaxy-browse surface + tests, `GalaxyClientFactory`/adapter seam. Client.Dotnet-025 (LazyBrowseNode publish ordering) confirmed resolved. One Medium security regression.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Dotnet-026 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | Client.Dotnet-028 |
|
||||
| 6 | Performance & resource management | Client.Dotnet-027 |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | Client.Dotnet-029 |
|
||||
| 9 | Testing coverage | No issues found |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
### Client.Dotnet-026
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/dotnet/.../MxGatewayClientCli.cs:306` (isLongRunning) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Client.Dotnet-015 extended `isLongRunning` to include the bench commands so they aren't silently cancelled by the default 30s CTS. The new `galaxy-browse` command is NOT in `isLongRunning`. A `galaxy-browse --depth N` tree walk on a large Galaxy can exceed 30s (sequential paginated RPCs per node); the CTS fires and the OCE escapes as a non-zero exit with no output — the same silent failure the bench commands were exempted from.
|
||||
|
||||
**Recommendation:** Add `"galaxy-browse"` to the `isLongRunning` set alongside `galaxy-watch`/bench, so it defaults to unlimited wall-clock and only applies `CancelAfter` with an explicit `--timeout`.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed against source: `CreateCancellation`'s `isLongRunning` expression at line 306 read `command is "galaxy-watch"` only — `galaxy-browse` was absent, so the default 30 s `CancelAfter` budget applied and a deep paginated tree walk that overran it would have the OCE escape as a non-zero exit with no output. (Note: at HEAD the bench commands the finding cites are also not in this set despite Client.Dotnet-015's recorded resolution, but per the task scope only `galaxy-browse` is added here.) Changed the expression to `command is "galaxy-watch" or "galaxy-browse"`, so `galaxy-browse` now runs to completion by default and only applies `CancelAfter` when the caller supplies an explicit `--timeout`. Pure correctness fix matching the existing `galaxy-watch` precedent.
|
||||
|
||||
### Client.Dotnet-027
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `clients/dotnet/ZB.MOM.WW.MxGateway.Client/LazyBrowseNode.cs:15` |
|
||||
| Status | Won't Fix |
|
||||
|
||||
**Description:** `LazyBrowseNode` allocates one `SemaphoreSlim _expandLock = new(1,1)` per node and never disposes it (the type is not IDisposable). For a large Galaxy browse tree (thousands of nodes), live SemaphoreSlim instances accumulate; OS handles are released only on finalization. Negligible for small trees, meaningful for long-lived large trees.
|
||||
|
||||
**Recommendation:** Replace the once-only async gate with a non-disposable primitive (e.g. `Lazy<Task>`-based dedup) or make `LazyBrowseNode` IDisposable and dispose the semaphore. Document the chosen lifetime contract.
|
||||
|
||||
**Resolution:** 2026-06-16 — **Won't Fix.** The finding's premise — that the undisposed semaphore leaks an OS handle until finalization — does not hold for this usage. `SemaphoreSlim` only allocates a kernel wait handle (`ManualResetEvent`) lazily, the first time its `AvailableWaitHandle` property is accessed; `LazyBrowseNode` uses the gate exclusively via `WaitAsync`/`Release` and never touches `AvailableWaitHandle` (verified by grep), so no unmanaged/OS handle is ever created. The semaphore is therefore pure managed memory whose lifetime is the node's and which is reclaimed by the GC with the node — `SemaphoreSlim.Dispose()` would have nothing to release. Making the type `IDisposable` (or restructuring to a `Lazy<Task>` gate) would change the public surface and push per-node disposal onto every tree consumer (thousands of nodes) for zero resource benefit, so it is not worth the over-engineering. Added an inline code comment at `LazyBrowseNode.cs:15` documenting this lifetime contract and the no-handle rationale so the design intent is explicit. No test added (no behavior change).
|
||||
|
||||
### Client.Dotnet-028
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Security |
|
||||
| Location | `clients/dotnet/.../MxGatewayClientCli.cs:156` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Client.Dotnet-008 was recorded resolved by adding a `TryResolveApiKey` helper resolving both `--api-key` and the `--api-key-env` env-var path, wired into the error-redaction catch block. At HEAD the catch block reads `arguments.GetOptional("api-key")` only — the pre-008 code. When the key is sourced from the env var, `GetOptional("api-key")` returns null, `Redact(message, null)` is a no-op, and an exception message echoing the bearer key would print it raw to stderr. The existing regression test only covers the `--api-key` direct path, so it passes against the broken code. (Claimed regression — verify root cause before fixing.)
|
||||
|
||||
**Recommendation:** Restore the `TryResolveApiKey` pattern (resolve `--api-key` then the `--api-key-env`-named env var, default `MXGATEWAY_API_KEY`) in the catch block, and add a regression test that sources the key from the env var and asserts it is redacted in stderr.
|
||||
|
||||
**Resolution:** 2026-06-16 — **Confirmed: real regression.** The `RunCoreAsync` catch block at line 156 resolved the redaction key via `arguments.GetOptional("api-key")` only, and no `TryResolveApiKey` helper existed anywhere in the CLI project (verified by grep) — the Client.Dotnet-008 helper had been lost from the history reaching HEAD, same as the 012/013/022/023 props/doc regressions. On the `--api-key-env` path `GetOptional("api-key")` is null, so `Redact(message, null)` was a no-op and a transport error echoing the bearer token would have reached stderr unredacted. Restored a non-throwing `TryResolveApiKey(CliArguments)` helper that resolves `--api-key` then the `--api-key-env`-named env var (default `MXGATEWAY_API_KEY`) and returns null when neither is set; refactored `ResolveApiKey` to call it (so the resolution order stays single-sourced) and changed the catch block to redact `TryResolveApiKey(arguments)` instead of `GetOptional("api-key")`. Regression test `MxGatewayClientCliTests.RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable` sets a dedicated env var (`MXGATEWAY_TEST_API_KEY_028`), runs `open-session --api-key-env <name>` (no `--api-key` flag) against a client factory that throws an `InvalidOperationException` whose message embeds the secret, and asserts exit 1, that the secret is absent from stderr, and that `[redacted]` is present. The pre-existing `--api-key`-path test (`RunAsync_ErrorOutput_RedactsApiKey`) is retained; the new test fails against the `GetOptional("api-key")`-only catch block (key printed raw) and passes after the fix.
|
||||
|
||||
### Client.Dotnet-029
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/dotnet/.../IMxGatewayCliClient.cs:6` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `IMxGatewayCliClient` is a public interface with no type-level `<summary>` XML doc. The Client.Dotnet-013 resolution recorded adding one; at HEAD it is absent. No CS1591 fires (GenerateDocumentationFile now scoped to the packable library only), but the public extension point should follow the public-surface doc convention.
|
||||
|
||||
**Recommendation:** Add a one-line `<summary>` describing the interface and noting `MxGatewayCliClientAdapter` is the production binding.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed against source: the interface declaration at `IMxGatewayCliClient.cs:6` had no type-level `<summary>` (only the members were documented). Added a type-level `<summary>` describing the interface as the CLI's transport seam over the gateway and Galaxy Repository RPCs, naming `MxGatewayCliClientAdapter` (over a real `MxGatewayClient`) as the production binding and the in-memory fake as the test substitute. Pure documentation change — no test needed.
|
||||
|
||||
### Client.Dotnet-030
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli/MxGatewayClientCli.cs:91-93,113,2023-2050` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `advise-supervisory` was added to the `command switch` dispatch table at line 113 but was not added to `IsKnownGatewayCommand` (the exhaustive list at lines 2023–2050). The guard at line 91 evaluates `IsKnownGatewayCommand(command)` before the dispatch table is reached; because `"advise-supervisory"` is absent from that list, `WriteUnknownCommand` is called and the method returns exit code 2 with "Unknown command 'advise-supervisory'." printed to stderr. The handler at line 113 is dead code — it can never execute.
|
||||
|
||||
The README documents `advise-supervisory` (`clients/dotnet/README.md:159` "The CLI exposes the same command as `advise-supervisory`") and `WriteUsage` lists it (line 2093), so callers following the docs will receive a confusing failure with no obvious remedy.
|
||||
|
||||
Note: `"advise"` is correctly present in `IsKnownGatewayCommand` (line 2030); the omission of `"advise-supervisory"` is an oversight introduced when the command was added in this diff.
|
||||
|
||||
**Recommendation:** Add `or "advise-supervisory"` to the `IsKnownGatewayCommand` expression (e.g. after `"advise"` at line 2030). Add a test (`MxGatewayClientCliTests`) that invokes `advise-supervisory` through `RunAsync` with a fake client and asserts exit code 0 (not 2) and that the reply is written to stdout — this would have caught the regression immediately.
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed root cause: `"advise-supervisory"` was absent from the `IsKnownGatewayCommand` expression in `MxGatewayClientCli.cs`, so the guard at line 91 intercepted every invocation and returned exit 2 "Unknown command 'advise-supervisory'." before the dispatch table was reached. Added `or "advise-supervisory"` after `or "advise"` at line 2031 in `IsKnownGatewayCommand`. Regression test `MxGatewayClientCliTests.RunAsync_AdviseSupervisory_IsRecognizedAndReachesDispatch` verified red (stderr contained "Unknown command 'advise-supervisory'.") against the pre-fix code and green after; full 86-test suite passes.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `clients/go` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -116,6 +116,23 @@ justified — not a finding. The `LazyBrowseNode` concurrency model
|
||||
| 9 | Testing coverage | No issues found — new walker, pagination, dup-token, filter-forwarding, and TLS-posture paths are all covered. |
|
||||
| 10 | Documentation & comments | New issue: README "Installing the Go client" recommends the `GONOSUMCHECK` env var, which was removed from the Go toolchain in 1.13 and is a no-op on Go 1.26 (Client.Go-029). |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the Go client delta: new `ping`/`galaxy-browse` CLI commands, `Write2`/bulk additions, session.go. gofmt/vet/build clean. Two claimed regressions of prior resolutions (Go-013 drain, Go-020 signal handler) — verify root cause before fixing.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Go-031 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | Client.Go-030 |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | Client.Go-032 |
|
||||
| 9 | Testing coverage | Client.Go-033 |
|
||||
| 10 | Documentation & comments | Client.Go-034 |
|
||||
|
||||
## Findings
|
||||
|
||||
### Client.Go-001
|
||||
@@ -706,3 +723,151 @@ if ($dirty) {
|
||||
**Recommendation:** Drop `GONOSUMCHECK` and document the current knobs: set `GOPRIVATE=gitea.dohertylan.com/*` (covers both sum-db bypass and direct VCS fetch), or for the checksum database specifically `GONOSUMCHECK`'s modern equivalent `GONOSUMDB` is also gone — use `GONOSUMCHECK`→`GOFLAGS=-insecure` only for plaintext, and `GONOSUMCHECK`. Concretely: "set `GOPRIVATE=gitea.dohertylan.com/*` (this disables both the checksum database and the public module proxy for that path); add `GOINSECURE=gitea.dohertylan.com/*` if the host serves the module over plain HTTP."
|
||||
|
||||
**Resolution:** 2026-06-15 — Dropped the dead `GONOSUMCHECK` advice from the "Installing the Go client" section of `clients/go/README.md`; it now documents `GOPRIVATE=gitea.dohertylan.com/*` (which bypasses both the public module proxy and checksum-database verification for that path) plus `GOINSECURE=gitea.dohertylan.com/*` for plain-HTTP hosts.
|
||||
|
||||
### Client.Go-030
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `clients/go/cmd/mxgw-go/main.go:1491-1494` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `runGalaxyWatch`'s limit-reached branch calls `cancelStream()` and returns WITHOUT draining the buffered `events` channel, unlike the signal-cancel branch which drains. This is the shape Client.Go-013's resolution claimed to have fixed ("now drains via for range events"). The WatchDeployEvents goroutine may still be blocked sending into the 16-deep channel; it exits via ctx cancellation (not a permanent leak) but remains alive until that propagates, racing `defer client.Close()`. (Claimed regression — verify root cause.)
|
||||
|
||||
**Recommendation:** After `cancelStream()` in the limit-reached branch, drain: `for range events {}`, mirroring the signal-cancel branch.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed real: the limit-reached branch returned right after `cancelStream()` while the signal-cancel branch drained `events`, so the buffered (16-deep) `WatchDeployEvents` producer could remain blocked on a send while `defer client.Close()` tore the stream down. Added the `for range events {}` drain to the limit-reached branch, mirroring the signal-cancel branch. Behaviour exercised by the existing `runGalaxyWatch` flow; verified via `go vet`/`go build`/`go test ./...`.
|
||||
|
||||
### Client.Go-031
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/go/cmd/mxgw-go/main.go:1037-1046` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `closeSmokeSession` registers `defer cancel()` twice on the same `cancel` variable across two `context.WithTimeout` calls when the deadline-shortening branch fires. Because `cancel` is reassigned, both defers end up calling the second context's cancel (idempotent, harmless today), while the first context is released by an explicit `cancel()`. The double-defer-on-reassigned-variable is fragile: removing the explicit `cancel()` in a future refactor would leak the first context's timer goroutine.
|
||||
|
||||
**Recommendation:** Use a distinct variable for the second cancel, or compute the close timeout once before allocating a single context.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed real. Rewrote `closeSmokeSession` to compute the close timeout once (default 5s, shortened to the caller's remaining deadline when sooner) and then allocate a single `context.WithTimeout` with a single `defer cancel()`, removing the reassigned-variable double-defer entirely.
|
||||
|
||||
### Client.Go-032
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/go/cmd/mxgw-go/main.go:839-841` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `runStreamEvents` does not install a `signal.NotifyContext` handler, while `runStreamAlarms` and `runGalaxyWatch` do. Client.Go-020's resolution claimed this was added. Without a signal-aware parent context, Ctrl+C kills the process without running `defer subscription.Close()`/`client.Close()`, so the gateway sees a torn connection rather than a clean `codes.Canceled`. (Claimed regression — verify root cause.)
|
||||
|
||||
**Recommendation:** Wrap `ctx` with `signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)` (defer the stop) before deriving `streamCtx`, matching the other two stream commands.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed real: `runStreamEvents` derived `streamCtx` directly from `ctx` with no signal handler (and `runStreamAlarms` even carried a "Mirror runStreamEvents" comment that no longer matched). Added `signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)` (with `defer stopSignals()`) before deriving `streamCtx`, so Ctrl+C/SIGTERM cancels the stream cleanly (gateway sees `codes.Canceled`) and the deferred `subscription.Close()`/`client.Close()` run. Imports already present. CLI guard covered by `TestRunStreamEventsRequiresSessionID`.
|
||||
|
||||
### Client.Go-033
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/go/cmd/mxgw-go/main_test.go` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Gaps vs prior coverage: (1) `TestRunBenchReadBulkRejectsNonPositiveDuration` (named in Client.Go-021's resolution) is absent — the `-duration-seconds`-positive guard at main.go:619 is untested; (2) `runStreamEvents` has no CLI-level test (session-id-required and limit paths untested); (3) `TestRunWriteBulkVariantRejectsMismatchedHandlesAndValues` (Client.Go-021 deliverable) is absent — the len-mismatch guard at main.go:508-510 is untested.
|
||||
|
||||
**Recommendation:** Add the three missing tests; all run through `runWithIO` without a fake server (except the stream-events one which can reuse the ping test's fake-server pattern).
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed all three tests absent. Added them to `cmd/mxgw-go/main_test.go`, each driving `runWithIO` and asserting the guard error before any dial: `TestRunBenchReadBulkRejectsNonPositiveDuration` (`-duration-seconds 0` → "duration-seconds must be positive"), `TestRunStreamEventsRequiresSessionID` (no `-session-id` → "session-id is required"), and `TestRunWriteBulkVariantRejectsMismatchedHandlesAndValues` (2 handles / 1 value → "does not match values count"). All three pass under `go test ./...`.
|
||||
|
||||
### Client.Go-034
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/go/README.md:245-263` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The README CLI example table lists ~12 commands but the binary now exposes ~27 subcommands (per `writeUsage`). Absent: `ping`, `galaxy-browse`, `batch`, `read-bulk`, `write-bulk`, `write2-bulk`, `write-secured-bulk`, `write-secured2-bulk`, `bench-read-bulk`, `stream-alarms`, `acknowledge-alarm`, and more. `batch` (the cross-language harness interface with an EOR sentinel + 16 MiB line cap) is undocumented entirely.
|
||||
|
||||
**Recommendation:** Add a complete subcommand reference, and document the `batch` EOR-sentinel protocol and line cap.
|
||||
|
||||
**Resolution:** 2026-06-16 — Expanded the README CLI section with a "Subcommand reference" table covering all 27 subcommands wired into `run` (incl. `ping`, `galaxy-browse`, `read-bulk`, the four bulk-write variants, `bench-read-bulk`, `stream-alarms`, `acknowledge-alarm`, `batch`), refreshed the example block, and added a "`batch` mode" subsection documenting the `__MXGW_BATCH_EOR__` end-of-result sentinel, the JSON error framing, blank-line skipping, and the 16 MiB scanner line cap.
|
||||
|
||||
#### 2026-06-18 re-review (commit 88915c3)
|
||||
|
||||
Re-review of `clients/go/` changes since `8df5ab3`: `WriteArrayElements` default-fill helper (`mxgateway/session.go`), `MxSparseArray`/`MxSparseElement` type aliases (`types.go`), `advise-supervisory` CLI subcommand (`cmd/mxgw-go/main.go`), prior-finding fixes (`runStreamEvents` signal handler, `closeSmokeSession` double-defer, `runGalaxyWatch` limit-drain, test gaps from Client.Go-033), and README write-semantics documentation. `gofmt -l .`, `go vet ./...`, `go build ./...`, and `go test ./... -count=1` are all clean.
|
||||
|
||||
`WriteArrayElements` is structurally correct: `sort.Slice` on unique `uint32` keys produces a deterministic slice (no stable-sort needed); ranging over a nil map produces an empty slice without panic (Go idiom); the `SparseArrayValue` oneof arm is set correctly. Three new findings: `advise-supervisory` missing from `writeUsage` and the README subcommand table; no CLI-level test for `advise-supervisory`; README claims `write2` as a CLI command when Go exposes only `write2-bulk`.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found. `buildSparseArrayValue` handles nil map (empty-range, no panic), duplicate-free `uint32` keys, and deterministic sort correctly. |
|
||||
| 2 | mxaccessgw conventions | No issues found. `gofmt -l .` / `go vet ./...` clean; the direct `pb` import in `runAdviseSupervisory` matches the README-documented escape-hatch pattern. |
|
||||
| 3 | Concurrency & thread safety | No issues found. `WriteArrayElements` is a pure helper with no shared state. |
|
||||
| 4 | Error handling & resilience | No issues found. `WriteArrayElements` delegates to `Write`; error propagation is consistent with sibling methods. |
|
||||
| 5 | Security | No issues found. |
|
||||
| 6 | Performance & resource management | No issues found. `sort.Slice` on a small caller-sized slice is appropriate. |
|
||||
| 7 | Design-document adherence | No issues found. `SparseArrayValue` oneof arm matches the proto contract; RESET semantics documented in code and README. |
|
||||
| 8 | Code organization & conventions | `advise-supervisory` is wired into `run()` but absent from `writeUsage()` and the README subcommand table (Client.Go-035). |
|
||||
| 9 | Testing coverage | `advise-supervisory` has no CLI-level test, not even the session-id-required guard (Client.Go-036). |
|
||||
| 10 | Documentation & comments | README "Write Semantics" section claims "`write` / `write2` take `--user-id`" but Go CLI has no standalone `write2` command (Client.Go-037). |
|
||||
|
||||
### Client.Go-035
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/go/cmd/mxgw-go/main.go:1298`, `clients/go/README.md:328-355` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `advise-supervisory` is wired into the `run()` dispatch switch at `main.go:91-92` but is absent from two surfaces a user consults to discover CLI commands:
|
||||
|
||||
1. `writeUsage()` at `main.go:1298` does not list `advise-supervisory` in its pipe-separated command enumeration, so `mxgw-go` invoked with no arguments or an unknown command prints a usage banner that omits the command.
|
||||
2. The README "Subcommand reference" table at `README.md:328-355` — added in commit `9eedf9d` to be a complete canonical list — also omits `advise-supervisory`.
|
||||
|
||||
This is exactly the shape Client.Go-012 (resolved 2026-05-20) and Client.Go-034 (resolved 2026-06-16) documented for previously-missing commands.
|
||||
|
||||
**Recommendation:** Add `advise-supervisory` to the `writeUsage` string and to the README subcommand table (e.g., `| advise-supervisory | Advise one item supervisory — required before a userID-attributed plain Write. |`).
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed both omissions. Added `advise-supervisory` to the pipe-separated command list in `writeUsage()` (after `advise`) and added a row for it in the README "Subcommand reference" table: `| advise-supervisory | Advise one item supervisory — required before a user-id-attributed plain write. |`. `go build ./...` and `go test ./...` green.
|
||||
|
||||
### Client.Go-036
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/go/cmd/mxgw-go/main_test.go`, `clients/go/cmd/mxgw-go/main.go:364-399` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `runAdviseSupervisory` has no CLI-level test in `main_test.go`. In particular the session-id-required guard at `main.go:376-378` is untested, unlike every other guard for session-id-required commands (e.g. `TestRunStreamEventsRequiresSessionID`, added in the same commit range by Client.Go-033). A future refactor that removes or conditions the guard has no regression catch. The pattern for adding such a test is already established in the test file and requires no bufconn fake.
|
||||
|
||||
**Recommendation:** Add `TestRunAdviseSupervisoryRequiresSessionID` to `cmd/mxgw-go/main_test.go`, driving `runWithIO` with `[]string{"advise-supervisory", "-plaintext", "-api-key", "test"}` (no `-session-id`) and asserting `err.Error()` contains `"session-id is required"`. Mirrors `TestRunStreamEventsRequiresSessionID`.
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed guard exists at `main.go:376-378` but had no test. Added `TestRunAdviseSupervisoryRequiresSessionID` to `cmd/mxgw-go/main_test.go` (mirrors `TestRunStreamEventsRequiresSessionID`): drives `runWithIO` with `["advise-supervisory", "-plaintext", "-api-key", "test"]` and asserts `err` contains `"session-id is required"`. Test passes immediately (guard was already present); pinned against future removal. `go test ./...` green.
|
||||
|
||||
### Client.Go-037
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/go/README.md:136-137` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The README "Write Semantics" section states:
|
||||
|
||||
> The CLI exposes the same command as `advise-supervisory`, and `write` / `write2` take `--user-id`.
|
||||
|
||||
The Go CLI has no standalone `write2` command — only `write2-bulk`. The analogous statement in the Python, Rust, and .NET README is accurate because those CLIs do expose `write2` as a standalone subcommand. A Go caller following this doc and attempting `mxgw-go write2 -session-id ... -type int32 -value 42 -timestamp 2026-01-01T00:00:00Z` receives `unknown command "write2"` (routed to the default branch of `run()`), not the expected MXAccess Write2 call.
|
||||
|
||||
**Recommendation:** Change the sentence to accurately reflect the Go CLI surface, e.g.: "The CLI exposes the same command as `advise-supervisory`, and `write` takes `-user-id`." If a standalone `write2` command is intended for cross-client parity, add it (mirroring `runWrite` with the addition of a `-timestamp` flag and a `Write2Raw`/`Write2` SDK call).
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed root cause: Go CLI has no `write2` case in the `runWithIO` switch and no `runWrite2` function; only `write2-bulk` exists. Changed the sentence in `clients/go/README.md` from "The CLI exposes the same command as `advise-supervisory`, and `write` / `write2` take `--user-id`." to "The CLI exposes the same command as `advise-supervisory`, and `write` takes `-user-id`." No standalone `write2` command added (cross-client parity decision deferred to a future change). `go build ./...` and `go test ./...` green.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `clients/java` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -106,6 +106,48 @@ Client.Java-001..036 are unchanged.
|
||||
| 9 | Testing coverage | No issues found. The browse surface has thorough library tests in `GalaxyRepositoryClientTests` (roots, expand-populates, idempotent-single-RPC, unknown-parent not-found, multi-page gather, concurrent-callers-one-RPC, filter forwarding, repeated-page-token rejection); TLS lenient/strict paths are covered by `MxGatewayClientTlsTests` against a real in-process TLS server. |
|
||||
| 10 | Documentation & comments | Issue found: the README "Browsing lazily" first code snippet calls `galaxy.browseChildren(BrowseChildrenRequest…)`, but no such method exists on `GalaxyRepositoryClient` — the raw single-RPC method is `browseChildrenRaw(BrowseChildrenRequest)`; the documented snippet does not compile (Client.Java-037). |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the Java client delta: the §8 `GalaxyClientFactory` seam, `InProcessGatewayHarness`, and the §8 CLI test coverage. Seam is behavior-preserving; harness channel lifecycle correct. One Medium concurrency item in the pre-existing stream-alarms overflow handler.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Java-040, Client.Java-041 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | Client.Java-040 |
|
||||
| 4 | Error handling & resilience | Client.Java-042 |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | Client.Java-043, Client.Java-044 |
|
||||
| 9 | Testing coverage | Client.Java-045, Client.Java-046 |
|
||||
| 10 | Documentation & comments | Client.Java-047, Client.Java-048 |
|
||||
|
||||
### 2026-06-18 review (commit 88915c3)
|
||||
|
||||
Re-review pass at `88915c3`. Diff against `8df5ab3` is six commits touching
|
||||
`clients/java`: `8df0479` / `bdb7e14` / `8cebe43` / `bed647c` (Client.Java-040..048
|
||||
fixes — control-character JSON escaping, stream-alarms terminal-slot fix, async
|
||||
overflow flood test, `InProcessGatewayHarness` Javadoc, Javadoc corrections,
|
||||
and `MxGatewayClientVersion` bump to 0.1.1); `9eedf9d` (parity-gotchas docs +
|
||||
`advise-supervisory` CLI subcommand across all language clients);
|
||||
`e7b8aa6` (Java `writeArrayElements` default-fill SDK helper + session test);
|
||||
`88915c3` (version bump `0.1.1 → 0.1.2` in `build.gradle`). Generated protobuf
|
||||
Java (`src/main/generated/`) excluded from review — build churn only.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found. `writeArrayElements` builds the `MxSparseArray` correctly: `elementDataType`, `totalLength`, and elements iterated via `TreeMap` for deterministic ascending order match the Go/Rust/Python/dotnet reference implementations. The `MxValues.decode` `SPARSE_ARRAY_VALUE -> null` arm is sound — the type is write-only and never returned by the gateway; returning `null` is the correct sentinel (matches `KIND_NOT_SET`). |
|
||||
| 2 | mxaccessgw conventions | No issues found. `advise-supervisory` routes through `invokeCommand` using `MX_COMMAND_KIND_ADVISE_SUPERVISORY` — no MXAccess COM touched in the client, generated code untouched. |
|
||||
| 3 | Concurrency & thread safety | No issues found. The `stream-alarms` terminal-slot rework (`AtomicBoolean terminated` + `AtomicReference<Object> terminal`) is a sound first-terminal-wins design. The poll-then-check-terminal drain loop is correct for the `terminal.set` publish ordering (`terminated=true` is set before `terminal.set(item)`, but the drain only reads `terminal` when `poll` returns null, so a retry on the next 50ms poll sees it). |
|
||||
| 4 | Error handling & resilience | No issues found. `writeArrayElements` propagates transport/protocol errors via the existing `writeRaw` / `invokeCommand` path and its `MxGatewayException` contract. |
|
||||
| 5 | Security | No issues found. No new auth surface, no logging of values or credentials. |
|
||||
| 6 | Performance & resource management | No issues found. `new TreeMap<>(elements)` makes a defensive copy for deterministic iteration — correct and cheap for practical element counts. |
|
||||
| 7 | Design-document adherence | No issues found. `writeArrayElements` delegates to `writeRaw`, which ultimately routes through the normal `MX_COMMAND_KIND_WRITE` path — MXAccess parity is preserved; the gateway expands the sparse descriptor, not the client. |
|
||||
| 8 | Code organization & conventions | Issue found: `build.gradle` bumped to `0.1.2` but `MxGatewayClientVersion.CLIENT_VERSION` remains `"0.1.1"` and the tests assert `0.1.1` — same version-split as resolved Client.Java-044 (Client.Java-049). |
|
||||
| 9 | Testing coverage | Issue found: the new `advise-supervisory` CLI subcommand has a `FakeSession` stub but no dedicated CLI-level test (Client.Java-050). |
|
||||
| 10 | Documentation & comments | Issue found: `writeArrayElements` Javadoc documents `[0, totalLength)` index contract and `totalLength > 0` as required, but no client-side `IllegalArgumentException` is thrown for violations — only the Javadoc describes the constraint; Java `int` silently sign-extends to a large `uint32` on the wire for negative inputs (Client.Java-051). README dependency example still shows `0.1.1` (cross-ref Client.Java-049). |
|
||||
|
||||
## Findings
|
||||
|
||||
### Client.Java-001
|
||||
@@ -728,6 +770,141 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed against source: `MxGatewayClientOptions` (`zb-mom-ww-mxgateway-client/.../MxGatewayClientOptions.java:108,260`) exposes `requireCertificateValidation()` and a `Builder.requireCertificateValidation(boolean)`, but the CLI `CommonOptions` in `MxGatewayCli.java` declared no flag and `toClientOptions()` never set it, forcing the lenient default on every non-pinned TLS CLI connection. Added a bare-boolean `@Option(names = "--require-certificate-validation")` field to `CommonOptions` (defaults to `false`, preserving the lenient default; mirrors the existing `--plaintext` flag-style option), propagated it through `toClientOptions()` via `.requireCertificateValidation(requireCertificateValidation)`, and added it to `redactedJsonMap()` so `--json` output reflects the effective trust posture. Documented the new flag and the lenient-by-default trust posture in `clients/java/README.md`. Note: the Client.Java-025 precedent (`shutdownTimeout`) was applied to the pre-rename `mxgateway-cli` module and is not present in this renamed `zb-mom-ww-mxgateway-cli` `toClientOptions()`; I mirrored the live `--ca-file`/`--server-name-override` TLS-option plumbing pattern instead, which is the correct precedent here. Regression tests in `MxGatewayCliTests`: `requireCertificateValidationFlagPropagatesThroughToClientOptions` (drives `acknowledge-alarm --require-certificate-validation` through a new `CapturingClientFactory` that records `options.toClientOptions()` and asserts `MxGatewayClientOptions.requireCertificateValidation()` is `true`) and `requireCertificateValidationDefaultsToLenientWhenFlagAbsent` (asserts the flag defaults to `false`). The capturing factory exercises the real `toClientOptions()` propagation, stronger than a parse-only check.
|
||||
|
||||
### Client.Java-040
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1552-1561` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `stream-alarms` overflow handler does `queue.clear()` then `offer(exception)` + `offer(ALARM_FEED_END)` non-atomically on an `ArrayBlockingQueue` shared with the gRPC delivery thread. In production gRPC (netty I/O thread), a concurrent `onNext` between the clear and the offers can re-enqueue a normal message, displacing the overflow exception so the drain loop hits the normal message and may exit before reaching the exception — exiting 0 on a truncated feed. Same race class as Client.Java-002/033.
|
||||
|
||||
**Recommendation:** Guard the overflow transition with an `AtomicBoolean` (mirror `MxGatewayStreamSubscription.terminate()`'s terminated-flag + lock) instead of re-clearing the queue.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed root cause in `StreamAlarmsCommand.call()`: the overflow branch did `queue.clear()` then `offer(exception)` + `offer(ALARM_FEED_END)`, so a concurrent `onNext` between the clear and the offers could re-enqueue a normal message and displace the overflow signal. (Note: `MxGatewayStreamSubscription` has no `terminate()` method; the terminal-guard model lives in `MxEventStream`, which itself still uses the clear+offer shape — I implemented the atomic guard the finding asks for rather than copying the older pattern.) Replaced the clear+offer with a single `AtomicBoolean terminated` guard (`compareAndSet(false,true)` — first terminal wins) plus a dedicated `AtomicReference<Object> terminal` slot that holds the terminal item (overflow exception / transport error / `ALARM_FEED_END`) independently of the bounded queue. `onNext` no longer re-clears the queue; it just stops enqueueing once terminated. The drain loop now `poll(50ms)`s and, when the queue is empty, reads the terminal slot. No re-clear, and a concurrent `onNext` can no longer displace the terminal. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Regression test: `MxGatewayCliTests.streamAlarmsCommandFailsFastOnQueueOverflow` (strengthened under Client.Java-046 to drive async delivery and assert the overflow text).
|
||||
|
||||
### Client.Java-041
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:2187-2194` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `jsonString` escapes only `\`, `"`, `\r`, `\n` — not `\t`, `\b`, `\f`, or U+0000–U+001F/U+007F. A tag address/message/reference containing a tab produces malformed JSON (RFC 8259). Affects the hand-rolled `jsonObject`/`jsonString`/`jsonValue` output paths (the protobuf `JsonFormat` path is spec-correct).
|
||||
|
||||
**Recommendation:** Add `\t`/`\b`/`\f` escapes and `\u00XX` for control chars, or route all JSON through a real JSON library.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: `jsonString` escaped only `\\ \" \r \n`, so a tab/backspace/form-feed or any other U+0000–U+001F (or U+007F) char produced malformed JSON. Rewrote `jsonString` as a per-character builder that emits the two-character escapes for `\t \b \f \r \n \" \\` and `\u00XX` for the remaining `< 0x20` range plus DEL (`0x7f`), keeping ordinary printable characters verbatim. Widened `jsonString` from `private` to package-private (matching the Client.Java-032 `commandLine(...)` precedent) so the escaping can be unit-tested directly. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Regression test: `MxGatewayCliTests.jsonStringEscapesControlCharacters`.
|
||||
|
||||
### Client.Java-042
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1565-1567` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `StreamAlarmsCommand.onError` calls `queue.offer(error)` without checking the return value. If the queue is full when a transport error arrives, the error is dropped and the drain loop blocks forever on `queue.take()`. Same class as Client.Java-033 on the error path.
|
||||
|
||||
**Recommendation:** Reserve a sentinel slot or use the `terminate(Throwable)` guard from `MxEventStream`; ensure the drain always sees a terminal item.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: `onError` did a bare `queue.offer(error)` that, on a full queue, dropped the error and stranded the drain on `queue.take()` forever. Fixed together with Client.Java-040: `onError` now routes through the shared `terminate(error)` consumer, which records the throwable in the dedicated `terminal` slot (guarded by the `AtomicBoolean`, never enqueued into the bounded `queue`). The drain loop reads that slot via the `poll(50ms)` + terminal-check path, so a transport error is always observed even when the queue is full, and the `take()`-forever deadlock is gone. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Covered by the same `streamAlarmsCommandFailsFastOnQueueOverflow` terminal-slot plumbing; the error path shares the slot with the overflow path.
|
||||
|
||||
### Client.Java-043
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:241-264` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `galaxyBrowseParentZeroEmitsWarningToStderr` calls `MxGatewayCli.execute(new FakeClientFactory(), ...)` for a galaxy-browse command, which wires the real `GrpcGalaxyClientFactory` and constructs a live Netty channel to localhost:5000 as a side effect (asserting only the warning). Wasteful and non-deterministic if port 5000 is reachable.
|
||||
|
||||
**Recommendation:** Use `executeGalaxy(...)` with a `GalaxyClientFactory` stub that throws, so only the warning path runs.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: the test called `MxGatewayCli.execute(new FakeClientFactory(), ...)`, which routes galaxy commands through the production `GrpcGalaxyClientFactory`; `GalaxyBrowseCommand.call()` prints the `--parent 0` warning then `connect()`s a live `GalaxyRepositoryClient` (Netty channel to localhost:5000) before failing — wasteful and non-deterministic. Rewrote the test to use the existing `executeGalaxy(...)` seam with a new `ThrowingGalaxyClientFactory` stub whose `connect()` throws; the warning is emitted before `connect()` is reached, so only the warning path runs and no live channel is constructed. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Test: `MxGatewayCliTests.galaxyBrowseParentZeroEmitsWarningToStderr` (updated).
|
||||
|
||||
### Client.Java-044
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientVersion.java:12` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `CLIENT_VERSION = "0.1.0"` is out of sync with Gradle `version = '0.1.1'` (cross-ref `clients/java/build.gradle:6`). The `version` command advertises 0.1.0 while the published artifact is 0.1.1; consumers can't use the version string as a reliable artifact check.
|
||||
|
||||
**Recommendation:** Bump `CLIENT_VERSION` to `0.1.1` (and the two test assertions), or source it from a Gradle-generated properties file.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: `MxGatewayClientVersion.CLIENT_VERSION = "0.1.0"` while `clients/java/build.gradle:16` sets `version = '0.1.1'` and the README Maven coordinate is `:0.1.1`. Bumped `CLIENT_VERSION` to `"0.1.1"` and updated the two test assertions (`MxGatewayCliTests.versionCommandPrintsProtocolVersions` line asserting `"mxgateway-java 0.1.0"` and `versionCommandPrintsJson` asserting `"clientVersion":"0.1.0"`) to `0.1.1`. Left as a hardcoded constant (sourcing from a Gradle-generated properties file was the optional alternative, not required). Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Tests: `MxGatewayCliTests.versionCommandPrintsProtocolVersions`, `versionCommandPrintsJson`.
|
||||
|
||||
### Client.Java-045
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/InProcessGatewayHarness.java` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The harness implements only `streamEvents`/`closeSession` (gateway) and `discoverHierarchy`/`watchDeployEvents` (galaxy); all other RPCs return gRPC UNIMPLEMENTED. This is undocumented, so a future test exercising invoke/register through the harness would silently get UNIMPLEMENTED.
|
||||
|
||||
**Recommendation:** Add a Javadoc note enumerating implemented RPCs and warning that others return UNIMPLEMENTED by design.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed against source (the file lives under `src/test/...`, not `src/main/...` as the finding location states): the scripted fakes override only `streamEvents`/`closeSession` (gateway) and `discoverHierarchy`/`watchDeployEvents` (galaxy); every other RPC inherits the generated `*ImplBase` default and returns gRPC `UNIMPLEMENTED`. Added a "Implemented RPCs" section to the `InProcessGatewayHarness` class Javadoc enumerating the four overridden RPCs and warning that all others (openSession, invoke, register, streamAlarms, queryActiveAlarms, browseChildren, …) return `UNIMPLEMENTED` by design, so a future test must add a scripted override first. Doc-only change. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). No test needed.
|
||||
|
||||
### Client.Java-046
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:680-696` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `streamAlarmsCommandFailsFastOnQueueOverflow` delivers all 2000 onNext synchronously from within `streamAlarms`, so `subscriptionRef` is still null when the overflow fires — the `sub.cancel()` branch is never exercised. The test also doesn't assert the overflow message text. It passes for a reason that doesn't generalize to async gRPC delivery.
|
||||
|
||||
**Recommendation:** Deliver messages asynchronously so the cancel path runs, and assert the overflow error text appears in output.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: `OverflowingFakeClient.streamAlarms` pushed all 2000 `onNext` synchronously and returned the subscription only afterward, so `subscriptionRef` was still null when the overflow fired and the `sub.cancel()` branch never ran; the test also asserted only the exit code, not the overflow text. Reworked `OverflowingFakeClient.streamAlarms` to flood on a background daemon thread (mirroring a real netty I/O thread) and return the subscription first, so the overflow fires with a non-null published subscription and exercises the `terminate()` cancel path. Strengthened `streamAlarmsCommandFailsFastOnQueueOverflow` to additionally assert the overflow message text ("queue overflowed") surfaces in stderr/stdout. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). Test: `MxGatewayCliTests.streamAlarmsCommandFailsFastOnQueueOverflow` (updated; also validates the Client.Java-040 terminal-slot fix).
|
||||
|
||||
### Client.Java-047
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/README.md` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** README advertises the `0.1.1` artifact coordinate (Gitea Maven section) while the `version` command reports `0.1.0` — the user-visible symptom of Client.Java-044. Cross-ref `MxGatewayClientVersion.java:12`.
|
||||
|
||||
**Recommendation:** Resolved by fixing Client.Java-044 (sync the compiled-in version).
|
||||
|
||||
**Resolution:** 2026-06-16 — Symptom of Client.Java-044, resolved together. The README's `0.1.1` Maven coordinate (`clients/java/README.md:336`) was already correct; the divergence was the compiled-in `CLIENT_VERSION = "0.1.0"`. Bumping `CLIENT_VERSION` to `0.1.1` (Client.Java-044) makes the `version` command report `0.1.1`, matching the README. No README edit needed. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL).
|
||||
|
||||
### Client.Java-048
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:88-105` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The public `execute(PrintWriter, PrintWriter, String...)` Javadoc calls it "Test-friendly entry point", but it wires `GrpcMxGatewayCliClientFactory` with no injection — the actual test seam is the package-private `execute(MxGatewayCliClientFactory, ...)` / `commandLine(...)` overload. Misleading.
|
||||
|
||||
**Recommendation:** Clarify the Javadoc to direct readers to the injectable overload for testing.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed: the public `execute(PrintWriter, PrintWriter, String...)` Javadoc called it the "Test-friendly entry point", but it wires the production `GrpcMxGatewayCliClientFactory` with no injection seam — unit tests actually use the package-private `execute(MxGatewayCliClientFactory, ...)` / `commandLine(...)` overloads. Rewrote the Javadoc to drop "test-friendly", explain it wires a real gRPC channel, and direct test authors to the injectable package-private overloads. Doc-only change. Fix applied 2026-06-16, verified on windev 2026-06-17 (gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests: BUILD SUCCESSFUL). No test needed.
|
||||
|
||||
|
||||
|
||||
### Client.Java-039
|
||||
@@ -744,3 +921,63 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
**Recommendation:** Add a `PROVIDER_STATUS` arm to `formatAlarmFeedMessage` that renders the provider status (mode / degraded / reason) consistently with the other alarm-feed arms — do not add a `default ->` that silently drops it, since the provider status is meaningful and the exhaustive switch is the compiler-enforced guard that catches exactly this kind of future contract drift.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed via `gradle :zb-mom-ww-mxgateway-cli:compileJava` failing with "the switch expression does not cover all possible input values" at `MxGatewayCli.java:1699` on the Windows host. Added a `case PROVIDER_STATUS ->` arm to `formatAlarmFeedMessage` yielding `provider-status mode=%s degraded=%b reason=%s` (from `AlarmProviderStatus.getMode().name()` / `getDegraded()` / `getReason()`), plus the `import mxaccess_gateway.v1.MxaccessGateway.AlarmProviderStatus;`. No `default` arm — the exhaustive switch expression remains the compile-time guard against future `payload` oneof additions. Verified `gradle test` builds and passes on the Windows host (Java 21).
|
||||
|
||||
### Client.Java-049
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/build.gradle:16`, `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientVersion.java:12`, `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:59,89`, `clients/java/README.md:399` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Commit `88915c3` (`chore(clients): bump all five clients 0.1.1 -> 0.1.2`) incremented `build.gradle` `version = '0.1.2'` but left `MxGatewayClientVersion.CLIENT_VERSION = "0.1.1"` unchanged. The two CLI test assertions that check the version string also still assert `0.1.1` (lines 59 and 89 of `MxGatewayCliTests.java`), and the `README.md` Maven dependency example at line 399 shows `:0.1.1`. The published Gradle artifact carries version `0.1.2` (from `build.gradle`) while the `version` CLI command reports `mxgateway-java 0.1.1` and the README tells a consumer to depend on `:0.1.1`. Same class of version drift as the resolved Client.Java-044 (where `0.1.0` vs `0.1.1` was the split) — the fix for Client.Java-044 bumped `CLIENT_VERSION` to `"0.1.1"` but the `build.gradle` bump to `0.1.2` was not accompanied by a matching `MxGatewayClientVersion` update.
|
||||
|
||||
**Recommendation:** Bump `CLIENT_VERSION` to `"0.1.2"` in `MxGatewayClientVersion.java`, update the two `MxGatewayCliTests` assertions from `0.1.1` to `0.1.2`, and update the `README.md` dependency example coordinate to `:0.1.2`. Consider sourcing `CLIENT_VERSION` from a Gradle-generated resource file (e.g. via `processResources` task writing `version.properties`) so the two version strings cannot drift again.
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed: `build.gradle` already at `0.1.2` while `CLIENT_VERSION` was still `"0.1.1"` and test assertions/README matched the old value. Bumped `CLIENT_VERSION` to `"0.1.2"` in `MxGatewayClientVersion.java`, updated both version assertions in `MxGatewayCliTests.java` (plain-text and JSON paths), and updated the Maven dependency coordinate in `README.md` to `:0.1.2`. No new test needed — the two existing assertions (`versionCommandPrintsProtocolVersions`, `versionCommandPrintsJson`) now exercise the corrected value. (windev-verified 2026-06-18: gradle test BUILD SUCCESSFUL, JDK 21)
|
||||
|
||||
### Client.Java-050
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1046-1068` (new `AdviseSupervisoryCommand`), `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:1306-1313` (stub) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Commit `9eedf9d` added the `advise-supervisory` CLI subcommand (`AdviseSupervisoryCommand`) to all language client CLIs. The Java `FakeSession.adviseSupervisoryRaw` stub was added to `MxGatewayCliTests` but no test exercises the new subcommand path. There is no test that calls `execute(factory, "advise-supervisory", "--session-id", "s", "--server-handle", "1", "--item-handle", "2")` and asserts the command routes through `session.adviseSupervisoryRaw`, produces a non-zero exit code on failure, or emits the correct JSON / text output. The `adviseCalled` field shared with `adviseRaw` means even an indirect smoke path that calls `advise` could mask a missing `adviseSupervisory` wire. Every other new CLI subcommand in this diff has a dedicated CLI-level test (the `writeArrayElements` helper has a session-level test in `MxGatewayClientSessionTests`).
|
||||
|
||||
**Recommendation:** Add a `@Test void adviseSupervisoryCommandCallsAdviseSupervisoryRaw()` to `MxGatewayCliTests` that exercises the subcommand via `execute(factory, "advise-supervisory", "--session-id", "s", "--server-handle", "12", "--item-handle", "34")` and asserts exit code 0, that `factory.client.session.adviseCalled` (or a dedicated `adviseSupervisoryCalled` boolean) is true, and that the output contains the reply kind string `MX_COMMAND_KIND_ADVISE_SUPERVISORY`. Consider renaming `adviseCalled` to `adviseSupervisoryCalled` for the `adviseSupervisoryRaw` stub (a separate `adviseCalled` for `adviseRaw`) to prevent future tests from masking each other.
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed: `adviseSupervisoryRaw` stub existed in `FakeSession` but shared `adviseCalled` with the plain `adviseRaw` stub, and no test exercised the `advise-supervisory` subcommand path. Added a dedicated `adviseSupervisoryCalled` boolean field to `FakeSession` and wired it to the `adviseSupervisoryRaw` stub (severing the shared flag that masked routing). Added `adviseSupervisoryCommandCallsAdviseSupervisoryRaw` test in `MxGatewayCliTests.java` that invokes `execute(factory, "advise-supervisory", "--session-id", "session-cli", "--server-handle", "12", "--item-handle", "34", "--json")` and asserts exit code 0, `adviseSupervisoryCalled` is true, `adviseCalled` is false (verifying routing isolation), and output contains `"kind":"MX_COMMAND_KIND_ADVISE_SUPERVISORY"`. (windev-verified 2026-06-18: gradle test BUILD SUCCESSFUL, JDK 21)
|
||||
|
||||
### Client.Java-051
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewaySession.java:622-657` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `writeArrayElements` accepts `int totalLength` and `Map<Integer, MxValue> elements` whose keys are plain Java `int`. The proto fields `MxSparseArray.total_length` and `MxSparseElement.index` are both `uint32`. Java's protobuf runtime maps `uint32` to `int` (Java has no unsigned primitive), so passing a negative value to `setTotalLength(int)` or `setIndex(int)` silently sets the wire field to the two's-complement reinterpretation (e.g. `-1` → `4294967295`). The gateway will likely reject the resulting request with `INVALID_ARGUMENT`, but the error message will reference a large `uint32` value rather than the caller's negative `int`, making the failure hard to diagnose. The Javadoc states "supplied indices must be within `[0, totalLength)`" and "`totalLength` is required" but does not state what happens with negative inputs, and no `IllegalArgumentException` is thrown. All other language clients use unsigned types (`uint`, `uint32`, `u32`) that prevent negatives at the type level; Java cannot replicate that, so explicit validation is the correct substitute. The Python client is similarly unvalidated and its docstring explicitly defers to the gateway for rejection — but Python's `grpc` runtime raises an internal exception on negative `uint32` fields before the network call, so it fails more obviously than Java's silent wire wrap.
|
||||
|
||||
**Recommendation:** Add client-side guards before the `MxSparseArray.Builder` population:
|
||||
|
||||
```java
|
||||
if (totalLength <= 0) {
|
||||
throw new IllegalArgumentException("totalLength must be > 0, got " + totalLength);
|
||||
}
|
||||
for (Map.Entry<Integer, MxValue> entry : elements.entrySet()) {
|
||||
int idx = entry.getKey();
|
||||
if (idx < 0 || idx >= totalLength) {
|
||||
throw new IllegalArgumentException(
|
||||
"element index " + idx + " is out of range [0, " + totalLength + ")");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Add a test in `MxGatewayClientSessionTests` asserting both `IllegalArgumentException` paths (negative `totalLength`, negative/out-of-range index). Duplicate-index detection can be left to the gateway (the proto `repeated` field allows duplicates, and the gateway can sort out semantics).
|
||||
|
||||
**Resolution:** 2026-06-18 — Confirmed: `writeArrayElements` passed negative `int` values straight to `setTotalLength`/`setIndex` with no guard, silently producing large `uint32` wire values. Added `if (totalLength <= 0) throw new IllegalArgumentException(...)` and a per-entry `if (idx < 0 || idx >= totalLength) throw new IllegalArgumentException(...)` loop before the proto builder in `MxGatewaySession.writeArrayElements`. Updated Javadoc to document the new `@throws IllegalArgumentException` contract and the uint32 unsigned-type rationale. Added two tests in `MxGatewayClientSessionTests`: `writeArrayElementsRejectsNonPositiveTotalLength` (covers negative and zero `totalLength`) and `writeArrayElementsRejectsOutOfRangeIndex` (covers negative index, index equal to `totalLength`, and index above `totalLength`). (windev-verified 2026-06-18: gradle test BUILD SUCCESSFUL, JDK 21)
|
||||
|
||||
@@ -4,13 +4,58 @@
|
||||
|---|---|
|
||||
| Module | `clients/python` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
### 2026-06-18 re-review (commit 88915c3)
|
||||
|
||||
Re-review of the Python client delta at `88915c3` over base `8df5ab3`. Feature
|
||||
scope: `Session.write_array_elements` default-fill sparse-array helper, the new
|
||||
`advise-supervisory` CLI subcommand, prior 032–036 fixes carried, export
|
||||
additions for `BrowseChildrenOptions` / `LazyBrowseNode`, version bump 0.1.1 →
|
||||
0.1.2, README "Write Semantics" doc section, and the corresponding generated
|
||||
`mxaccess_gateway_pb2.py` descriptor update.
|
||||
|
||||
Generated-file churn check (memory `project_python_client_regen_pin`): only
|
||||
`mxaccess_gateway_pb2.py` changed, exactly one DESCRIPTOR line was replaced
|
||||
(adding the `MxSparseArray` / `MxSparseElement` encoding), and the
|
||||
`Protobuf Python Version` header remained `6.31.1` at both commits. No
|
||||
spurious grpcio version churn was introduced.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Python-037 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Client.Python-038 |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the Python client delta: new galaxy CLI commands, options.py TLS/auth, large test additions. Prior Client.Python-027..031 confirmed resolved. One claimed regression (Python-004 dead variable) and one Medium README/API mismatch.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Python-032, Client.Python-033, Client.Python-034 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | Client.Python-035 |
|
||||
| 9 | Testing coverage | Client.Python-036 |
|
||||
| 10 | Documentation & comments | Client.Python-036 |
|
||||
|
||||
### 2026-06-15 re-review (commit 410acc9)
|
||||
|
||||
Re-review pass at `410acc9`. The diff against the previous review base
|
||||
@@ -1438,3 +1483,120 @@ under `[tool.pytest.ini_options]` in `clients/python/pyproject.toml`.
|
||||
`python -m pytest` now reports no `PytestUnknownMarkWarning` (full run: 91
|
||||
passed, 1 skipped, 0 warnings; previously 1 warning). The `tls`-marked
|
||||
`tests/test_tls.py` module is the guard — its run is now warning-free.
|
||||
|
||||
### Client.Python-032
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:1048,1065-1066` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_smoke` reintroduces the dead `closed = False` / `if not closed:` guard that Client.Python-004's resolution claimed to have removed via `async with session:`. `closed` is never reassigned, so the guard is always true. Behavior is correct (session always closed) but the dead variable misleads readers into expecting an early-close path. (Claimed regression — verify root cause.)
|
||||
|
||||
**Recommendation:** Use `async with session:` or drop the `closed` variable and close unconditionally.
|
||||
|
||||
**Resolution:** 2026-06-16 — Confirmed regression: the dead `closed = False` / `if not closed:` guard had returned. Replaced the `try/finally` with `async with session:` (Session implements the async context-manager protocol). Test: `test_smoke_does_not_carry_dead_closed_guard` in `tests/test_review_findings_032_to_036.py`.
|
||||
|
||||
### Client.Python-033
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:772,1490-1494` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_parse_string_list` always emits `param_hint="--items"`, but it is also called from `_build_write_bulk_entries` with `kwargs["values"]`. An empty `--values ""` on the write-bulk commands yields `Error: Invalid value for '--items': ...`, pointing at a flag that doesn't exist on those commands.
|
||||
|
||||
**Recommendation:** Add an optional `param_hint` parameter (default `--items`) and pass `--values` from the write-bulk caller.
|
||||
|
||||
**Resolution:** 2026-06-16 — Added `param_hint="--items"` default param to `_parse_string_list`; `_build_write_bulk_entries` now passes `param_hint="--values"`. Tests: `test_parse_string_list_default_param_hint_is_items`, `test_parse_string_list_accepts_caller_supplied_param_hint` in `tests/test_review_findings_032_to_036.py`.
|
||||
|
||||
### Client.Python-034
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:1497-1501` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_parse_int_list` does `int(item)` with no error handling. A non-numeric token (e.g. `--item-handles "10,abc"`) raises a raw `ValueError`, surfacing as an unformatted traceback interactively (other input errors raise `click.BadParameter`).
|
||||
|
||||
**Recommendation:** Wrap the conversion and re-raise as `click.BadParameter(..., param_hint="--item-handles")`.
|
||||
|
||||
**Resolution:** 2026-06-16 — Wrapped the `int()` comprehension in `try/except ValueError` and re-raise as `click.BadParameter(..., param_hint="--item-handles")`. Tests: `test_parse_int_list_non_numeric_raises_bad_parameter`, `test_parse_int_list_happy_path` in `tests/test_review_findings_032_to_036.py`.
|
||||
|
||||
### Client.Python-035
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/python/src/zb_mom_ww_mxgateway/__init__.py`, `.../options.py:63-77`, `.../galaxy.py:293` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Two new public types — `BrowseChildrenOptions` (options.py) and `LazyBrowseNode` (galaxy.py) — are absent from `__init__.py`/`__all__`, so callers can't `from zb_mom_ww_mxgateway import BrowseChildrenOptions`, breaking the package-root import contract that `ClientOptions`/`GatewayClient`/etc. follow.
|
||||
|
||||
**Recommendation:** Re-export both from `__init__.py` and add them to `__all__`.
|
||||
|
||||
**Resolution:** 2026-06-16 — Re-exported `BrowseChildrenOptions` (from `.options`) and `LazyBrowseNode` (from `.galaxy`) in `__init__.py` and added both to `__all__`. Tests: `test_browse_children_options_is_exported_from_package_root`, `test_lazy_browse_node_is_exported_from_package_root` in `tests/test_review_findings_032_to_036.py`.
|
||||
|
||||
### Client.Python-036
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/python/README.md:143-158` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The README "Browsing lazily" section's code example calls `galaxy.browse_children(...)`, a method that does not exist — the actual public low-level method is `browse_children_raw`. The example raises `AttributeError` at runtime. The README-parse test only covers shell CLI invocations, not Python code fragments, so it doesn't catch this.
|
||||
|
||||
**Recommendation:** Update the example/prose to `browse_children_raw(...)` (and promote the high-level `browse()`/`LazyBrowseNode` path), or add a `browse_children` alias. Add a `hasattr` test to catch future renames.
|
||||
|
||||
**Resolution:** 2026-06-16 — Updated the README "Browsing lazily" prose and example to `browse_children_raw(...)` and added a pointer to the higher-level `browse()`/`LazyBrowseNode` walker. Tests: `test_galaxy_client_exposes_browse_children_raw` (hasattr guard) and `test_readme_browse_example_uses_existing_method` (parses every `galaxy.<method>()` call in README against the client class) in `tests/test_review_findings_032_to_036.py`.
|
||||
|
||||
### Client.Python-037
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/python/pyproject.toml:10` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `description` field in `pyproject.toml` reads `"Async Python client scaffold for MXAccess Gateway."` at commit `88915c3`. Client.Python-001 resolved this on 2026-05-18 by removing the word "scaffold". The fix was lost when commit `397d3c5` (the package directory rename, `src/mxgateway` → `src/zb_mom_ww_mxgateway`) re-created `pyproject.toml` from scratch, re-introducing the stale wording. The version bump commit `88915c3` carried the regression forward without correcting it.
|
||||
|
||||
The issue is purely cosmetic and does not affect the wheel build or runtime behaviour, but the "scaffold" label misrepresents the maturity of a fully-implemented, versioned package to anyone reading PyPI metadata. It is also a direct regression of a previously-resolved finding.
|
||||
|
||||
**Recommendation:** Change the `description` in `clients/python/pyproject.toml` from `"Async Python client scaffold for MXAccess Gateway."` to `"Async Python client for MXAccess Gateway."` (drop "scaffold"), matching the fix applied under Client.Python-001. The `test_pip_wheel_build_succeeds` test will confirm the wheel still builds; no additional test is needed for a pure metadata word change.
|
||||
|
||||
**Resolution:** 2026-06-18 — Root cause confirmed: `pyproject.toml` line 10 still contained "scaffold" at commit `88915c3`. Removed "scaffold" from the `description` field so it now reads `"Async Python client for MXAccess Gateway."`, matching the Client.Python-001 fix and the sibling client descriptions. Added `test_pyproject_description_does_not_contain_scaffold` in `tests/test_review_findings_037_038.py` to prevent future regressions; the test failed before the fix and passes after. Full suite: 127 passed, 1 skipped. Generated directory unchanged.
|
||||
|
||||
### Client.Python-038
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/python/tests/`, `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:280-299,742-758` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The new `advise-supervisory` CLI subcommand (commit `88915c3`) has no test coverage — not even a `--help` smoke registration test of the kind added for `stream-alarms` (`test_stream_alarms_is_registered`) or the earlier `advise` command. There is no test that:
|
||||
|
||||
1. Asserts `advise-supervisory` is registered as a subcommand on `main` (i.e. a `--help` round-trip through `CliRunner` that confirms the subcommand name exists and Click does not report `no such command`).
|
||||
2. Drives `_advise_supervisory` through `CliRunner` with a fake stub injected via monkeypatched `GatewayClient.connect` and asserts (a) the captured `MxCommand` has `kind == MX_COMMAND_KIND_ADVISE_SUPERVISORY` and (b) `server_handle`/`item_handle` are forwarded correctly.
|
||||
|
||||
The README mentions `advise-supervisory` in prose (`"The CLI exposes the same command as advise-supervisory"`) but provides no `mxgw-py advise-supervisory …` example line, so the existing `test_readme_alarm_examples_parse_against_cli` scanner does not exercise it. A silent renaming or option drift would go undetected.
|
||||
|
||||
The pattern to follow is `test_cli_acknowledge_alarm_happy_path` in `tests/test_review_findings_022_to_026.py`, extended with a `MX_COMMAND_KIND_ADVISE_SUPERVISORY` assertion.
|
||||
|
||||
**Recommendation:** Add to `tests/test_review_findings_032_to_036.py` (or a new `tests/test_review_findings_037_038.py`):
|
||||
|
||||
1. `test_advise_supervisory_is_registered` — `CliRunner().invoke(main, ["advise-supervisory", "--help"])` asserts exit code 0 and "AdviseSupervisory" (or the help text) is present.
|
||||
2. `test_cli_advise_supervisory_happy_path` — injects a fake stub via `monkeypatch`, drives `advise-supervisory --session-id s1 --server-handle 1 --item-handle 2 --json`, and asserts the captured `MxCommand.kind == MX_COMMAND_KIND_ADVISE_SUPERVISORY`, `advise_supervisory.server_handle == 1`, `advise_supervisory.item_handle == 2`.
|
||||
|
||||
**Resolution:** 2026-06-18 — Root cause confirmed: no test existed for `advise-supervisory` despite it being registered and implemented at commit `88915c3`. Added `tests/test_review_findings_037_038.py` with three tests: `test_advise_supervisory_is_registered` (CliRunner `--help` round-trip asserting exit 0 and `--server-handle`/`--item-handle` in output) and `test_cli_advise_supervisory_happy_path` (monkeypatched `GatewayClient.connect` with a fake stub, drives the CLI end-to-end, asserts `MxCommand.kind == MX_COMMAND_KIND_ADVISE_SUPERVISORY` and `advise_supervisory.server_handle == 7`, `advise_supervisory.item_handle == 42`). No source change was required — the command implementation was correct. Full suite: 127 passed, 1 skipped. Generated directory unchanged.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `clients/rust` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -115,6 +115,44 @@ Re-review pass at `410acc9`. The diff against `42b0037` (`git diff 42b0037..HEAD
|
||||
| 9 | Testing coverage | No issues found in the new surface — the walker has six unit tests (roots, expand, idempotency, NotFound, multi-page, filter-forwarding) and TLS has four. Gap noted: `tls_with_require_certificate_validation_does_not_short_circuit` connects to a dead address, so it only asserts the guard does not fire and never exercises a real handshake — which is why the no-trust-roots defect in Client.Rust-031 is not caught by a test. |
|
||||
| 10 | Documentation & comments | Issue found: the `alarm_feed_message_summary` / `alarm_feed_message_to_json` doc comments still say "three `payload` oneof cases" (`main.rs:1729,1755`) although the proto now has four; folded into Client.Rust-030's fix. The TLS doc inaccuracy is Client.Rust-031. |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the Rust client delta: options.rs TLS trust decision, mxgw-cli galaxy browse, Cargo metadata. Prior Client.Rust-030/031/032 confirmed resolved. fmt/clippy/test clean. One Medium TLS-downgrade correctness item.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Client.Rust-033, Client.Rust-034 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | Client.Rust-035 |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | Client.Rust-036, Client.Rust-037 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Client.Rust-038 |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
### 2026-06-18 review (commit 88915c3)
|
||||
|
||||
Re-review of `git diff 8df5ab3..88915c3 -- clients/rust/`. The diff introduces: `Session::write_array_elements` sparse-array default-fill helper (`src/session.rs`); `SparseArrayValue` → `Unset` decode mapping in `MxValueProjection` (`src/value.rs`); `advise-supervisory` CLI subcommand (`crates/mxgw-cli/src/main.rs`); README and `RustClientDesign.md` docs additions; version bump 0.1.1 → 0.1.2; and a suite of tests in `tests/client_behavior.rs`.
|
||||
|
||||
Known prior bug (commit `72cf2f4`): `write_array_elements` set the outer `ProtoMxValue.data_type` to the element type — confirmed fixed via `..ProtoMxValue::default()` (outer `data_type = 0`). The e2e test `write_array_elements_routes_sparse_array_write_through_fake_gateway` asserts `value.data_type == 0` and `sparse.element_data_type == Integer`, correctly locking the fix in. The `MxValue` roundtrip is sound: `MxValue::from_proto(sparse_value).into_proto()` returns the original raw proto unchanged, so the sparse payload reaches the wire unmodified.
|
||||
|
||||
All prior findings 033–038 confirmed Resolved at `8df5ab3`. `cargo fmt --check`, `cargo clippy --workspace --all-targets -- -D warnings`, and `cargo test --workspace` are assumed clean at HEAD (source review only; toolchain is Windows-only and not available on this macOS host).
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Issue found (Client.Rust-040): the `sparse_int32_value` unit-test helper sets `data_type: MxDataType::Integer as i32` on the outer `MxValue` but `write_array_elements` uses `..ProtoMxValue::default()` (outer `data_type = 0`); the helper comment claims it builds "the proto MxValue that `write_array_elements` would send" so the unit tests using it test a subtly incorrect shape. The e2e test correctly covers the fix, but the unit test helper should match the implementation. |
|
||||
| 2 | mxaccessgw conventions | No issues found — `advise-supervisory` goes through `session.invoke` which calls `command_request`, which calls `next_correlation_id` internally; unique per-call correlation ids are preserved. `cargo fmt --check` and `cargo clippy --workspace --all-targets -- -D warnings` are expected clean (no new lint-tripping patterns introduced). |
|
||||
| 3 | Concurrency & thread safety | No issues found — `write_array_elements` is a thin synchronous builder that delegates to the existing `Session::write` async path; `last_write_command` in `FakeState` is behind a `Mutex<Option<_>>` and accessed correctly. No new `unsafe`, no new shared mutable state. |
|
||||
| 4 | Error handling & resilience | No issues found — `total_length = 0`, out-of-range indices, duplicate indices, and element-kind mismatches are all validated by the gateway's `SparseArrayExpander` and surface as `Error::InvalidArgument` (propagated, per the doc comment). No client-side guard is needed; the gateway is the single validation point. |
|
||||
| 5 | Security | No issues found — `write_array_elements` passes `user_id` through to `Session::write` which is already covered by the existing API-key + scope enforcement path; no credentials or secrets in the new surface. |
|
||||
| 6 | Performance & resource management | No issues found — `impl IntoIterator<Item = (u32, MxValue)>` avoids requiring an intermediate `Vec`; elements are collected once into `Vec<MxSparseElement>` and immediately handed to the proto. No unnecessary clones on the hot path. |
|
||||
| 7 | Design-document adherence | Issue found (Client.Rust-039): `Session::write_array_elements` is a new public SDK method and `advise-supervisory` is a new CLI subcommand; neither appears in `RustClientDesign.md` (Session API block or CLI commands table). CLAUDE.md requires docs to change in the same commit as the source. README was correctly updated. |
|
||||
| 8 | Code organization & conventions | No issues found — `register_page_token` was cleanly extracted from `browse_children_one_level` and covered with a unit test. `write_array_elements` is placed adjacent to `write` in `session.rs`. The `WriteOk` `InvokeOverride` variant and `last_write_command` capture are well-scoped to the test infrastructure. |
|
||||
| 9 | Testing coverage | Cross-referenced with Client.Rust-040: the `sparse_int32_value` test helper tests a proto shape with incorrect outer `data_type`; the unit tests using it do not verify `data_type` and would not catch a regression of the outer-`data_type` fix. The e2e test `write_array_elements_routes_sparse_array_write_through_fake_gateway` does assert `data_type == 0` and provides the real regression guard. |
|
||||
| 10 | Documentation & comments | No issues found — README `write_array_elements` and `advise-supervisory` sections are accurate. The `SparseArrayValue` → `Unset` comment in `value.rs` explains the write-only rationale clearly. The `write_array_elements` doc comment correctly describes the "not a preserve, a reset" semantics. The README Rust code example for `advise-supervisory` omits `use` imports for `Payload`/`MxCommandKind`/`AdviseSupervisoryCommand` but this is consistent with other README code-snippet conventions across all five clients. |
|
||||
|
||||
## Findings
|
||||
|
||||
### Client.Rust-001
|
||||
@@ -762,3 +800,147 @@ This is masked by the tests: `tls_with_require_certificate_validation_does_not_s
|
||||
**Recommendation:** Add a "Lazy browse" subsection to the Galaxy section of `RustClientDesign.md` enumerating `browse`, `browse_children_raw`, `BrowseChildrenOptions` (its filter fields and AND semantics), and `LazyBrowseNode` (the `Arc`-shared clone semantics, the idempotent single-RPC `expand`, the `has_children_hint`, and the internal paged `BrowseChildren` loop with its repeated-page-token guard). Cross-reference `docs/GalaxyRepository.md#browsechildren` for the wire-level request/filter semantics the README already links.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed by inspection that `RustClientDesign.md` had no Galaxy library-API coverage at all. Added a new "Galaxy Repository" section documenting `browse`, `browse_children_raw`, the `BrowseChildrenOptions` filter struct (all six fields, AND combination semantics, `include_attributes` tri-state), and `LazyBrowseNode` (`Arc`-shared clone semantics, `has_children_hint`, the idempotent single-RPC `expand` under an async mutex with page size 500, and the repeated-page-token `Error::InvalidArgument` guard), cross-referencing `docs/GalaxyRepository.md#browsechildren`. Also noted the fourth alarm `provider_status` oneof case in the Alarms section while resolving Client.Rust-030. Doc-only change verified by inspection; design-doc anchor target confirmed present.
|
||||
|
||||
### Client.Rust-033
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/rust/crates/mxgw-cli/src/main.rs:485` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ConnectionArgs::options()` computes plaintext as `!self.tls || self.plaintext`. With both `--tls` and `--plaintext` supplied, this is `true`, silently degrading to an unencrypted channel despite the explicit `--tls`. A security-sensitive footgun (e.g. a script auto-appending `--plaintext`).
|
||||
|
||||
**Recommendation:** Add clap `conflicts_with = "tls"` on `--plaintext` (reject the combo), or prefer `--tls` and warn.
|
||||
|
||||
**Resolution:** 2026-06-16 — Added `conflicts_with = "tls"` to the `--plaintext` arg so supplying both is rejected at parse time, removing the silent downgrade. Tests: `connection_rejects_tls_and_plaintext_together`, `connection_tls_flag_disables_plaintext`, `connection_defaults_to_plaintext`, `connection_plaintext_flag_selects_plaintext`.
|
||||
|
||||
### Client.Rust-034
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/rust/crates/mxgw-cli/src/main.rs:48-51,548` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `Command::Version` carries a `jsonl: bool` field that is never read; the dispatch arm matches `{ json, .. }` and discards `jsonl`. `mxgw version --jsonl` silently behaves as plain text.
|
||||
|
||||
**Recommendation:** Handle `jsonl` in the Version arm (treat like `--json`) or remove the unused field.
|
||||
|
||||
**Resolution:** 2026-06-16 — Removed the unused `jsonl` field from `Command::Version` (version output is a single record, not a stream); the dispatch arm now matches `{ json }` exhaustively, so `mxgw version --jsonl` errors as an unknown flag instead of silently being ignored. No test (CLI surface change verified by build).
|
||||
|
||||
### Client.Rust-035
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Security |
|
||||
| Location | `clients/rust/crates/mxgw-cli/src/main.rs:489-495` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `--api-key-env` (default `MXGATEWAY_API_KEY`) names an env var read into an `ApiKey` Bearer token, but its clap help has no description of the expected value format. A user pointing it at another credential's env var would silently forward that credential to the gateway as a Bearer token. Low risk (redacted Debug; bounded to user's own shell) but an implicit-trust gap.
|
||||
|
||||
**Recommendation:** Add help text stating the variable must hold a value of the form `mxgw_<key-id>_<secret>`.
|
||||
|
||||
**Resolution:** 2026-06-16 — Added clap doc-comment help to `--api-key-env` stating the variable's value must be a full gateway key of the form `mxgw_<key-id>_<secret>` and is forwarded verbatim as the Bearer token. Doc/help-only change, no test.
|
||||
|
||||
### Client.Rust-036
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `clients/rust/RustClientDesign.md:351` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The new `galaxy browse` subcommand (with its filter/depth/json flags) is not listed in the "Test CLI" command table in RustClientDesign.md, which still reads `galaxy {test-connection,last-deploy-time,discover-hierarchy,watch}`.
|
||||
|
||||
**Recommendation:** Add `mxgw galaxy browse [...flags]` and note `--depth 0` = requested level only, `--depth N` eagerly expands, and `--parent-gobject-id` makes `--depth` a no-op.
|
||||
|
||||
**Resolution:** 2026-06-16 — Added the `mxgw galaxy browse` line (with all flags) to the CLI table and a paragraph documenting that `--depth 0` prints only the requested level, `--depth N` eagerly expands N further levels, and `--parent-gobject-id` makes `--depth` a no-op. Doc-only change, no test.
|
||||
|
||||
### Client.Rust-037
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `clients/rust/README.md:164-179` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The README "Browsing lazily" example calls `galaxy.browse_children(...).await?.into_inner()`, but the public API is `GalaxyClient::browse_children_raw` (the bare `browse_children` is the generated proto-client method, not public; and `browse_children_raw` returns the reply struct directly, no `.into_inner()`). The example would not compile.
|
||||
|
||||
**Recommendation:** Replace with `galaxy.browse_children_raw(BrowseChildrenRequest::default()).await?` (drop `.into_inner()`).
|
||||
|
||||
**Resolution:** 2026-06-16 — Verified `browse_children_raw` is the public method (galaxy.rs:302) and returns `BrowseChildrenReply` directly. Updated the README prose and example to call `browse_children_raw(...).await?` without `.into_inner()`. Doc-only change, no test.
|
||||
|
||||
### Client.Rust-038
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/rust/crates/mxgw-cli/src/main.rs:2336-2564` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Three CLI test gaps: (1) `ConnectionArgs::options()` `--tls`/`--plaintext` resolution (incl. the both-set path of Client.Rust-033) is untested; (2) `browse_children_one_level`'s repeated-page-token guard is untested; (3) `parse_rfc3339_timestamp` has no error-path tests (trailing chars, day=0, month 13, out-of-range day).
|
||||
|
||||
**Recommendation:** Add unit tests for all three (none need a network connection).
|
||||
|
||||
**Resolution:** 2026-06-16 — Added all three test groups. (1) `--tls`/`--plaintext` resolution: `connection_defaults_to_plaintext`, `connection_tls_flag_disables_plaintext`, `connection_plaintext_flag_selects_plaintext`, `connection_rejects_tls_and_plaintext_together`. (2) Extracted the page-token dedup guard into pure `register_page_token` and covered it with `register_page_token_accepts_distinct_tokens_and_rejects_repeats`. (3) RFC3339 error paths: `rfc3339_parser_rejects_trailing_characters`, `rfc3339_parser_rejects_day_zero`, `rfc3339_parser_rejects_month_thirteen`, `rfc3339_parser_rejects_day_out_of_range_for_month`.
|
||||
|
||||
### Client.Rust-039
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `clients/rust/RustClientDesign.md:101-131` (Session API block); `clients/rust/RustClientDesign.md:326-353` (CLI commands table) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The diff adds two pieces of new public surface that are not reflected in `RustClientDesign.md`:
|
||||
|
||||
1. `Session::write_array_elements` — a new public async method in `clients/rust/src/session.rs:567-601`. It accepts `element_data_type: MxDataType`, `total_length: u32`, and `elements: impl IntoIterator<Item = (u32, MxValue)>` alongside the standard `server_handle`/`item_handle`/`user_id`. The Session API block in `RustClientDesign.md` (lines 101-131) lists every other `Session` method but omits `write_array_elements`.
|
||||
|
||||
2. `Command::AdviseSupervisory` — a new CLI subcommand (`clients/rust/crates/mxgw-cli/src/main.rs:203-214`, dispatch at lines 663-683). The CLI commands table in `RustClientDesign.md` (lines 326-353) lists every other subcommand but does not include `advise-supervisory`.
|
||||
|
||||
CLAUDE.md requires "When public APIs … change, the affected docs … must change in the same commit."
|
||||
|
||||
**Recommendation:** Add `write_array_elements` to the Session block:
|
||||
|
||||
```rust
|
||||
pub async fn write_array_elements(
|
||||
&self,
|
||||
server_handle: i32,
|
||||
item_handle: i32,
|
||||
element_data_type: MxDataType,
|
||||
total_length: u32,
|
||||
elements: impl IntoIterator<Item = (u32, MxValue)>,
|
||||
user_id: i32,
|
||||
) -> Result<(), Error>;
|
||||
```
|
||||
|
||||
Add a sentence noting that the `elements` iterator accepts `(index, value)` pairs (not a `HashMap`, so duplicate indices are forwarded to the gateway, which rejects them with `InvalidArgument`). Add `mxgw advise-supervisory --session-id <id> --server-handle <h> --item-handle <h>` to the CLI table.
|
||||
|
||||
**Resolution:** 2026-06-18 — Added `write_array_elements` (with its exact `session.rs` signature) to the Session API block in `RustClientDesign.md` between `write2` and `write_bulk`. Added `mxgw advise-supervisory --session-id <id> --server-handle <h> --item-handle <h>` to the CLI commands table after `mxgw advise`. Both signatures verified against `clients/rust/src/session.rs:567` and `clients/rust/crates/mxgw-cli/src/main.rs:109-120`. `cargo fmt --check`, `cargo clippy --workspace --all-targets -- -D warnings`, and `cargo test --workspace` all pass.
|
||||
|
||||
### Client.Rust-040
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/rust/tests/client_behavior.rs:1195-1224` (`sparse_int32_value` helper); `clients/rust/tests/client_behavior.rs:1226-1264` (unit tests using the helper) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `sparse_int32_value` test helper (lines 1195-1224) carries this comment: "Build the proto `MxValue` that `write_array_elements` would send." It then constructs the outer `MxValue` with `data_type: MxDataType::Integer as i32` (line 1215). However, `write_array_elements` in `session.rs` uses `..ProtoMxValue::default()` for the outer value, which sets `data_type` to `0` (= `MxDataType::Unspecified`). The helper builds the old, incorrect shape that the known bug fix (`72cf2f4`) explicitly corrected — the outer `data_type` should carry the element type only inside `SparseArray.element_data_type`, not on the enclosing `MxValue`.
|
||||
|
||||
The two unit tests that use this helper (`write_array_elements_proto_shape_has_sparse_oneof_kind` at line 1226 and `write_array_elements_empty_elements_is_valid_all_defaults` at line 1253) do not assert `data_type` on the outer `MxValue`, so they pass and do not catch the discrepancy. The only test that asserts `value.data_type == 0` is the e2e test `write_array_elements_routes_sparse_array_write_through_fake_gateway`, which correctly locks in the fix. The unit tests therefore give a false sense of coverage: they document and confirm a shape that does not match the implementation's actual output.
|
||||
|
||||
If the `..ProtoMxValue::default()` line were ever accidentally changed back to set `data_type` from `element_data_type`, the unit tests would continue to pass while the e2e test would catch the regression — but the test comment explicitly says the helper represents "what `write_array_elements` would send," making the incorrect `data_type` in the helper actively misleading for future maintainers.
|
||||
|
||||
**Recommendation:** Fix the `sparse_int32_value` helper to use `..MxValue::default()` (which zeros `data_type`) instead of `data_type: MxDataType::Integer as i32`, so the helper accurately represents the wire shape `write_array_elements` actually emits. Then add an explicit `assert_eq!(proto.data_type, 0, "outer MxValue.data_type must be Unspecified")` assertion to `write_array_elements_proto_shape_has_sparse_oneof_kind` so the unit test also locks in the outer-`data_type` fix — providing a second, faster regression guard that does not require spinning up a fake gRPC server.
|
||||
|
||||
**Resolution:** 2026-06-18 — Root cause confirmed: `sparse_int32_value` set `data_type: MxDataType::Integer as i32` on the outer `MxValue`, contradicting the `..ProtoMxValue::default()` in `Session::write_array_elements` which leaves `data_type = 0`. Fixed the helper to use `..MxValue::default()` (removing the explicit `data_type` field), so the outer `MxValue.data_type` is now `0` (Unspecified), matching the actual wire shape. Added `assert_eq!(proto.data_type, 0, …)` assertions to all three unit tests that call the helper: `write_array_elements_proto_shape_has_sparse_oneof_kind`, `write_array_elements_empty_elements_is_valid_all_defaults`, and `sparse_array_value_round_trips_through_client_mx_value_projection_as_unset`. All 36 tests pass (`cargo test --workspace`); `cargo fmt --check` and `cargo clippy --workspace --all-targets -- -D warnings` are clean.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.Contracts` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -379,6 +379,23 @@ Re-review: no new findings. Open finding count remains 0. All seventeen
|
||||
recorded Contracts findings (Contracts-001..017) remain closed
|
||||
(Resolved / Won't Fix).
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the proto delta (`git diff 410acc9..8df5ab3 -- .../Protos/`): the new `optional ReplayGap replay_gap = 14` on `MxEvent` plus the `ReplayGap` message for reconnect replay. Additive-only confirmed (field 14 is new; oneof body arms 20-25 and fields 1-13 unchanged); `Generated/MxaccessGateway.cs` is consistent (contains `ReplayGapFieldNumber = 14`).
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | mxaccessgw conventions | No issues found (additive-only honoured) |
|
||||
| 3 | Concurrency & thread safety | N/A — pure contract |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | Contracts-020 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Contracts-022 |
|
||||
| 10 | Documentation & comments | Contracts-021 |
|
||||
|
||||
### Contracts-018
|
||||
|
||||
| Field | Value |
|
||||
@@ -408,3 +425,127 @@ recorded Contracts findings (Contracts-001..017) remain closed
|
||||
**Recommendation:** (1) Add comments to `ActiveAlarmSnapshot.degraded` / `source_provider` mirroring the wording already on `OnAlarmTransitionEvent` (or a one-line cross-reference). (2) Extend the `AlarmProviderMode` enum comment to note that as a `source_provider` / `mode` provenance value the field is always `ALARMMGR` or `SUBTAG` on the wire and `UNSPECIFIED` should be treated as "unknown / not yet determined", so the zero value is unambiguous at every use site. Comment-only changes; no wire-format impact.
|
||||
|
||||
**Resolution:** _(2026-06-15)_ Confirmed both gaps in `mxaccess_gateway.proto`: `ActiveAlarmSnapshot.degraded`/`source_provider` (14/15) were bare while the byte-identical `OnAlarmTransitionEvent` fields were documented, and the `AlarmProviderMode` enum comment only explained `UNSPECIFIED` for the `forced_mode` use. (1) Added comments to `ActiveAlarmSnapshot.degraded`/`source_provider` mirroring the `OnAlarmTransitionEvent` wording (subtag-fallback / reduced-fidelity, always ALARMMGR or SUBTAG, never UNSPECIFIED). (2) Extended the `AlarmProviderMode` enum comment to distinguish its two use sites: as `forced_mode`, `UNSPECIFIED` = auto; as a provenance value (`OnAlarmTransitionEvent.source_provider`, `ActiveAlarmSnapshot.source_provider`, `OnAlarmProviderModeChangedEvent.mode`, `AlarmProviderStatus.mode`) the worker always emits ALARMMGR/SUBTAG and `UNSPECIFIED` should be read as "unknown / not yet determined". Comment-only changes; no wire-format impact. NOTE: on this dev box the `csharp` protoc generator DOES emit proto leading comments into `Generated/MxaccessGateway.cs` `<summary>` XML doc (contrary to the brief's assumption), so the build regenerated `Generated/MxaccessGateway.cs` with the new doc comments only — diff is `///`-comment lines exclusively, zero code/wire/type changes. `dotnet build -f net10.0` succeeds with 0 warnings / 0 errors.
|
||||
|
||||
### Contracts-020
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `gateway.md:1087,1101-1102` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** gateway.md still lists "no reconnectable sessions" under "Resolved for v1" and lists "reconnectable sessions" / "multi-subscriber event fan-out" as post-v1 revisit items. The shipped `ReplayGap` reconnect-replay contract and multi-subscriber fan-out (documented in docs/Sessions.md) contradict this. docs/Sessions.md was updated; gateway.md's scope summary was left stale.
|
||||
|
||||
**Recommendation:** Update the gateway.md Resolved/Post-v1 lists to reflect that reconnectable sessions (via `after_worker_sequence` + `ReplayGap`) and multi-subscriber fan-out have shipped, cross-referencing docs/Sessions.md.
|
||||
|
||||
**Resolution:** _(2026-06-16)_ Updated `gateway.md` "Resolved for v1" list: replaced "no reconnectable sessions" / "one active event subscriber" with bullet points describing the shipped reconnect-replay (`after_worker_sequence` + `ReplayGap` sentinel, cross-referencing `docs/Sessions.md`) and multi-subscriber fan-out (single-subscriber fail-fast vs. multi-subscriber per-consumer disconnect, cross-referencing `docs/Sessions.md`). Removed "reconnectable sessions" and "multi-subscriber event fan-out" from the Post-v1 revisit list. Updated the backpressure bullet to mention both modes.
|
||||
|
||||
### Contracts-021
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto:731-733` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `replay_gap` field comment ends with "(Reconnect/replay logic is Task 12; this is the contract surface only.)". That parenthetical is now stale — the reconnect/replay logic has shipped and is exercised by EventStreamServiceTests/SessionEventDistributorTests. A reader is misled into thinking only the contract exists.
|
||||
|
||||
**Recommendation:** Drop the "Task 12 / contract surface only" parenthetical; the rest of the comment is accurate.
|
||||
|
||||
**Resolution:** _(2026-06-16)_ Removed the stale "(Reconnect/replay logic is Task 12; this is the contract surface only.)" parenthetical from the `replay_gap` field comment in `mxaccess_gateway.proto`. The "Additive (proto3):" sentence before it is retained. Comment-only change; no wire-format or generated-type impact.
|
||||
|
||||
### Contracts-022
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** No round-trip / descriptor pin exists for the new `ReplayGap` message or `MxEvent.replay_gap` (field 14). The field is exercised functionally end-to-end, but there is no contract-level pin to catch a future renumber/type-narrowing of `replay_gap = 14` or the two `ReplayGap` sequence-field numbers — the same gap class as Contracts-007/010/018.
|
||||
|
||||
**Recommendation:** Add a round-trip test setting `MxEvent.ReplayGap` with both sequence fields, asserting `BodyCase == None`, plus a descriptor assertion pinning `ReplayGapFieldNumber == 14` and the `ReplayGap` field numbers (1, 2).
|
||||
|
||||
**Resolution:** _(2026-06-16)_ Added `ProtobufContractRoundTripTests.MxEvent_RoundTripsReplayGapSentinelAndPinsFieldNumbers` to `ProtobufContractRoundTripTests.cs`. The test pins `MxEvent.ReplayGapFieldNumber == 14` via the generated constant, pins `ReplayGap.RequestedAfterSequenceFieldNumber == 1` and `ReplayGap.OldestAvailableSequenceFieldNumber == 2` via `ReplayGap.Descriptor.Fields` (asserting both the number and the field name), builds a sentinel `MxEvent` with both sequence fields populated and no body oneof set, serializes and parses it, then asserts both sequence values survive and `BodyCase == None` (confirming `replay_gap` is orthogonal to the body oneof).
|
||||
|
||||
#### 2026-06-18 review (commit 88915c3)
|
||||
|
||||
Re-review pass at `88915c3` scoped to the contract changes since `8df5ab3`
|
||||
(`git diff 8df5ab3..88915c3 -- src/ZB.MOM.WW.MxGateway.Contracts/`). The
|
||||
window contains exactly one contract feature commit: the array-write-ergonomics
|
||||
addition (`MxSparseArray`, `MxSparseElement`, `sparse_array_value = 19` on
|
||||
`MxValue`). Also included: a minor version bump (`0.1.1` → `0.1.2` in
|
||||
`ZB.MOM.WW.MxGateway.Contracts.csproj`), regenerated `Generated/MxaccessGateway.cs`
|
||||
(build output — confirmed consistent with the proto; `SparseArrayValueFieldNumber =
|
||||
19` and the new generated class registrations match the proto declaration), and
|
||||
the removal of the stale "Task 12 / contract surface only" parenthetical from the
|
||||
`replay_gap` field comment (Contracts-021 resolution already covered in the
|
||||
`8df5ab3` pass; the diff shows the cleaned comment). `mxaccess_worker.proto` and
|
||||
`galaxy_repository.proto` are unchanged.
|
||||
|
||||
Verified against `docs/plans/2026-06-18-array-write-ergonomics-design.md`,
|
||||
`gateway.md` (section "MxSparseArray — default-fill partial array writes"),
|
||||
and `docs/WorkerConversion.md` (section "Sparse array expansion").
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found. Field number `sparse_array_value = 19` is new and does not collide with any existing arm (18 = `raw_value`, 17 = `array_value`). `MxSparseArray` field numbers 1/2/3 and `MxSparseElement` field numbers 1/2 are all fresh. The `MxValue` oneof arms 10-18 are unchanged. The additive-only invariant is honoured. The generated `SparseArrayValueFieldNumber = 19` constant matches the proto. The `MxSparseElement.value` field reuses `MxValue`, which allows a client to nest another `sparse_array_value` inside an element — a recursive structure the gateway will reject at validation time but the proto level cannot prevent. This is a documentation gap (see Contracts-023) rather than a correctness bug in the contract itself, since gateway validation is the documented enforcement point. |
|
||||
| 2 | mxaccessgw conventions | No issues found. Wire-compatibility policy comment block at the top of `mxaccess_gateway.proto` (Contracts-005) remains intact and this change honours it. Naming follows conventions: `snake_case` fields, `PascalCase` messages, no enum-prefix needed (no new enums). The `MxSparseArray` message-level comment clearly states write-only semantics and that the worker never receives it. Generated code regenerated, not hand-edited. |
|
||||
| 3 | Concurrency & thread safety | N/A — pure contract definitions plus a static constants class. |
|
||||
| 4 | Error handling & resilience | No issues found. The validation rules (`total_length == 0`, `index >= total_length`, duplicate indices, unsupported `element_data_type`, element-kind mismatch) are documented in `gateway.md` and `docs/plans/2026-06-18-array-write-ergonomics-design.md`; they are not expressed in the proto itself, which is correct for proto3. The write-only guard (reject on read/event paths) is documented at the proto message level. |
|
||||
| 5 | Security | No issues found. No new credential-bearing fields. `MxSparseElement.value` carries the same write-value surface as any other `MxValue` write field; credential-sensitivity comments already on `WriteSecuredCommand.value` / `WriteSecured2Command.value` / the corresponding bulk entry fields apply to any write value, including sparse. No new redaction gap. |
|
||||
| 6 | Performance & resource management | No issues found. `repeated MxSparseElement elements` is sent once per write; the gateway's expansion to a full `MxArray` is gateway-side only and the worker receives a normal whole-array value. No proto-level bloat or unbounded repeated field beyond what already exists on `MxArray`. |
|
||||
| 7 | Design-document adherence | No drift. The shipped proto matches the design document (`docs/plans/2026-06-18-array-write-ergonomics-design.md`) field-for-field: `element_data_type = 1`, `total_length = 2`, `elements = 3` on `MxSparseArray`; `index = 1`, `value = 2` on `MxSparseElement`; `sparse_array_value = 19` on `MxValue`. `gateway.md` section "MxSparseArray" and `docs/WorkerConversion.md` are both updated and consistent. `docs/Contracts.md` has no mention of the new value arm — see Contracts-024. |
|
||||
| 8 | Code organization & conventions | No issues found. The `csharp_namespace` option and protobuf `package` are unchanged. The new messages are placed after `MxArray` and its typed-array sub-messages, which is the correct locality. No message is inserted between existing numeric-series messages. Version bump `0.1.1` → `0.1.2` is appropriate for a minor additive change. |
|
||||
| 9 | Testing coverage | Issues found: Contracts-023 — no `ProtobufContractRoundTripTests` coverage exists for `MxSparseArray`, `MxSparseElement`, or `MxValue.KindOneofCase.SparseArrayValue` (field number 19). This is the same gap class as Contracts-007/010/018/022. |
|
||||
| 10 | Documentation & comments | Issues found: Contracts-024 (`docs/Contracts.md` has no mention of `MxSparseArray` — the canonical contracts document undercounts the public value surface); Contracts-025 (`GatewayContractInfoTests.GatewayProtocolVersion_IsVersionThree` summary enumerates alarm and bulk write/read extensions under version 3 but not the sparse array addition, leaving future readers without guidance on whether the new arm also ships under version 3 without a bump). |
|
||||
|
||||
### Contracts-023
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** No round-trip test or descriptor pin exists for the new `MxSparseArray` message, `MxSparseElement` message, or `MxValue.KindOneofCase.SparseArrayValue` (field number 19). A future renumber or type-narrowing of `sparse_array_value = 19`, or of `MxSparseArray`'s field numbers (1/2/3) or `MxSparseElement`'s field numbers (1/2), would not be caught at the contract level. This is the same gap class as Contracts-007 (`MxValue.raw_value`), Contracts-010 (bulk write/read), Contracts-018 (alarm-provider fallback), and Contracts-022 (`ReplayGap`) — all of which were resolved by adding focused round-trip tests.
|
||||
|
||||
Additionally, the `MxSparseElement.value` field is typed `MxValue` (the full value union), which means a client could legally set it to another `sparse_array_value = 19` arm, creating a recursive sparse structure. The `// scalar` comment documents the intent, but no test pins that a well-formed sparse element carries only a scalar kind (not `array_value`, `raw_value`, or another `sparse_array_value`). The gateway rejects recursive nesting at validation time, but the contract-level test would document the constraint explicitly.
|
||||
|
||||
**Recommendation:** Add round-trip / descriptor-pin tests to `ProtobufContractRoundTripTests`: (a) pin `MxValue.SparseArrayValueFieldNumber == 19` via the generated constant; (b) round-trip an `MxSparseArray` with `element_data_type`, `total_length`, and at least one `MxSparseElement` (covering `index` and a scalar `value`), embedded in an `MxValue` with `KindCase == SparseArrayValue`; (c) assert the `MxSparseArray` field numbers by name via `MxSparseArray.Descriptor.Fields` (1 = `element_data_type`, 2 = `total_length`, 3 = `elements`) and `MxSparseElement.Descriptor.Fields` (1 = `index`, 2 = `value`). Optionally add a second test with an empty `elements` list (valid all-defaults case) to pin that zero elements is not a proto-level error.
|
||||
|
||||
**Resolution:** _(2026-06-18)_ Confirmed all three gaps against the proto and generated constants. Added `ProtobufContractRoundTripTests.MxValue_RoundTripsSparseArrayValueAndPinsFieldNumbers` to `ProtobufContractRoundTripTests.cs`. The test: (a) pins `MxValue.SparseArrayValueFieldNumber == 19` via the generated constant; (b) pins all five field numbers by name + number via the descriptor (`MxSparseArray` fields 1/2/3 and `MxSparseElement` fields 1/2); (c) round-trips an `MxValue` with `KindCase == SparseArrayValue` carrying a populated `MxSparseArray` (one `MxSparseElement` with a scalar float value at index 2); (d) verifies an all-defaults `MxSparseArray` with no elements is not a proto-level error. The full `ProtobufContractRoundTrip|GatewayContractInfo` filter is 54/54 green.
|
||||
|
||||
### Contracts-024
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `docs/Contracts.md:9-11` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `docs/Contracts.md` lists `MxValue`, `MxArray`, and `MxStatusProxy` as the types defined in `mxaccess_gateway.proto`, and documents both bulk subscription and bulk write/read command families in detail. The new `MxSparseArray` value arm (`sparse_array_value = 19`) — a public-facing addition to the `MxValue` oneof that changes the write API available to every command variant — is not mentioned anywhere in `docs/Contracts.md`. The CLAUDE.md rule "Update docs in the same change as the source. When public APIs, contracts, configuration … change, the affected docs … must change in the same commit" was not satisfied for this addition; `docs/Contracts.md` now undercounts the public `MxValue` surface. `gateway.md` and `docs/WorkerConversion.md` were updated, but `docs/Contracts.md` — the canonical contracts document linked from the client generation doc — was not.
|
||||
|
||||
**Recommendation:** Extend `docs/Contracts.md` to describe `MxSparseArray`: the write-only `sparse_array_value = 19` arm on `MxValue`, the two messages (`MxSparseArray` with `element_data_type`, `total_length`, `elements`; `MxSparseElement` with `index`, `value`), the default-fill-not-preserve semantics for unmentioned indices, and the fact that it is accepted by every write variant (`Write`, `Write2`, `WriteSecured`, `WriteSecured2`, and each `*BulkEntry` entry) but rejected on read/event paths. Cross-reference `gateway.md` for the validation rules and expansion details rather than restating them.
|
||||
|
||||
**Resolution:** _(2026-06-18)_ Confirmed `docs/Contracts.md` had no mention of `MxSparseArray` / `MxSparseElement` / `sparse_array_value = 19`. Added a new paragraph in the "Files" section immediately after the `mxaccess_gateway.proto` intro sentence (before the bulk-subscription commands section): names `MxSparseArray` alongside `MxValue`, `MxArray`, and `MxStatusProxy` in the intro line; explains that `sparse_array_value = 19` is the `MxValue.kind` oneof arm for write-only partial-array writes; documents both messages with their fields and field numbers; states the default-fill-not-preserve semantics; and enumerates every write variant that accepts it plus the read/event rejection. Cross-references `gateway.md` for expansion rules and validation constraints.
|
||||
|
||||
### Contracts-025
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/GatewayContractInfoTests.cs:14-25` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The XML summary on `GatewayContractInfoTests.GatewayProtocolVersion_IsVersionThree` (updated under Contracts-013 resolution to enumerate the alarm and bulk write/read extensions shipped under version 3) does not mention the new `MxSparseArray` / `sparse_array_value = 19` addition, which is also a strictly additive contract change shipped under version 3 without a bump. A reader checking whether a new additive contract feature requires a `GatewayProtocolVersion` bump will look at this test for precedent; finding only the alarm and bulk write/read examples, they cannot tell whether the sparse array addition was also additive-under-3 or was simply omitted by mistake. This is the same class of stale-summary issue as Contracts-013 (which noted the bulk write/read extension was not mentioned after the alarm-only summary).
|
||||
|
||||
**Recommendation:** Extend the XML summary to list the `MxSparseArray` write ergonomics extension (`MxSparseArray` / `MxSparseElement` + `sparse_array_value = 19` on `MxValue`, plus the suffix-normalization behavior) alongside the alarm and bulk write/read extensions as a third example of a strictly additive change that shipped under version 3 without a bump. Comment-only change; no test logic or version constant changes.
|
||||
|
||||
**Resolution:** _(2026-06-18)_ Confirmed the XML summary on `GatewayProtocolVersion_IsVersionThree` enumerated only the alarm and bulk write/read extensions; the sparse-array addition was missing. Extended the summary to list all three additive-under-version-3 extensions as an ordered enumeration: (1) alarm proto extension; (2) bulk write/read command family; (3) sparse-array write ergonomics (`MxSparseArray` / `MxSparseElement` messages plus `sparse_array_value = 19` on the `MxValue` oneof). Comment-only change; test logic and version constant are unchanged. The full `ProtobufContractRoundTrip|GatewayContractInfo` filter is 54/54 green.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.IntegrationTests` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -135,6 +135,49 @@ parameter (`d692232`).
|
||||
| 9 | Testing coverage | Issues found: IntegrationTests-023 (`DashboardLdapLiveTests.AuthenticateAsync_AdminInGwAdminGroup_Succeeds` asserts the `ldap_group` claim but does not assert the emitted `Role: Admin` claim, leaving the role-mapping path untested). |
|
||||
| 10 | Documentation & comments | No issues found. |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the live-test delta: two new `[LiveMxAccessFact]` smoke tests (B8 new COM commands; buffered-item path) + `EmptyAlarmWatchListResolver`. Tests correctly gated and serialized; credential-redaction coverage present. Only Low docs/coverage items.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | IntegrationTests-030 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | IntegrationTests-032, IntegrationTests-033 |
|
||||
| 10 | Documentation & comments | IntegrationTests-030, IntegrationTests-031 |
|
||||
|
||||
### 2026-06-18 re-review (commit `88915c3`)
|
||||
|
||||
Scope: `git diff 8df5ab3..88915c3 -- src/ZB.MOM.WW.MxGateway.IntegrationTests/`. One
|
||||
commit touched the module: `6b5fe6a` ("fix: resolve code-review findings (locally
|
||||
verified)"). The IntegrationTests delta is exactly two hunks in
|
||||
`WorkerLiveMxAccessSmokeTests.cs`: (1) a comment reword on the Suspend/Activate block
|
||||
(IntegrationTests-031 resolution: "against the advised item" → "against the
|
||||
added-but-not-advised item (no Advise was issued between AddItem and this call)"); (2)
|
||||
`catch (TimeoutException)` widened to `catch (TimeoutException ex)` and
|
||||
`output.WriteLine($"B8: sample-bearing batch predicate timed out: {ex.Message}")` added
|
||||
before nulling `bufferedBatch` (IntegrationTests-032 resolution). Both are pure
|
||||
comment/diagnostic fixes with no assertion or logic changes. All ten categories clean.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found. Both hunks are comment/diagnostic-only; no assertions, conditions, or API calls changed. |
|
||||
| 2 | mxaccessgw conventions | No issues found. Tests remain correctly gated behind `[LiveMxAccessFact]`; no secrets logged; no synthesized events; no direct COM instantiation introduced. |
|
||||
| 3 | Concurrency & thread safety | No issues found. No concurrency-relevant code changed. |
|
||||
| 4 | Error handling & resilience | No issues found. The `TimeoutException` catch remains non-rethrowing; adding `ex` capture and a `WriteLine` does not affect the error path. |
|
||||
| 5 | Security | No issues found. `ex.Message` from `WaitForMessageAsync`'s `TimeoutException` carries only a scan count and timeout duration — no credential data. |
|
||||
| 6 | Performance & resource management | No issues found. No new allocations on non-timeout paths; the exception reference capture on the timeout path is negligible. |
|
||||
| 7 | Design-document adherence | No issues found. `docs/GatewayTesting.md` was already updated in the same `6b5fe6a` wave (IntegrationTests-030); these two comment fixes require no further doc change. |
|
||||
| 8 | Code organization & conventions | No issues found. Comment style is consistent with the surrounding block; no namespace or layout changes. |
|
||||
| 9 | Testing coverage | No issues found. No assertions changed; the `output.WriteLine` adds residual diagnostics only. IntegrationTests-033 remains Deferred (AddItem2/Write2 live parity requires the live rig). |
|
||||
| 10 | Documentation & comments | No issues found. Both hunks are the documentation fix — the reworded comment now accurately reflects that no `Advise` precedes Suspend/Activate, and the timeout log now surfaces the `WaitForMessageAsync` detail to distinguish the two residual failure modes. |
|
||||
|
||||
## Findings
|
||||
|
||||
### IntegrationTests-001
|
||||
@@ -608,3 +651,63 @@ The prior `DashboardAuthenticator` ctor took `IOptions<GatewayOptions>`, so the
|
||||
**Recommendation:** Reword the `docs/GatewayTesting.md` "Live LDAP" failure-branch sentences to describe observable behavior without referencing the now-internal "candidate bind" mechanics (e.g. "a wrong password is rejected without leaking the password", "an unknown username fails authentication"), and note that bind/search is delegated to the shared `ZB.MOM.WW.Auth.Ldap` provider so the prose stays accurate after the cutover.
|
||||
|
||||
**Resolution:** Resolved 2026-06-15: Reworded the "Live LDAP" failure-branch prose to describe observable behavior ("fails authentication without leaking the password", "an unknown username fails authentication") instead of the now-internal "candidate bind" / "no candidate" mechanics, and added a sentence noting `DashboardAuthenticator` delegates the bind/search to the shared `ZB.MOM.WW.Auth.Ldap` provider (`LdapAuthService`) and only maps groups to roles — matching the in-source test-comment cutover. Verified by inspection.
|
||||
|
||||
### IntegrationTests-030
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `docs/GatewayTesting.md:76`, `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:576,728` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `docs/GatewayTesting.md` says "All six tests are gated by MXGATEWAY_RUN_LIVE_MXACCESS_TESTS=1" and enumerates five parity paths. This diff adds two new `[LiveMxAccessFact]` tests (B8 new COM commands: AuthenticateUser/ArchestrAUserToId/Suspend/Activate; and the buffered-data path: AddBufferedItem/SetBufferedUpdateInterval), bringing the total to eight. The doc still says "six" and omits the two new parity surfaces.
|
||||
|
||||
**Recommendation:** Update GatewayTesting.md to "eight" and add bullets for the B8 new-COM-commands and buffered-data parity surfaces.
|
||||
|
||||
**Resolution:** 2026-06-16: Updated `docs/GatewayTesting.md` — changed "five parity paths" to "seven", "All six tests" to "All eight tests", and added bullets for the B8 new-COM-commands surface (AuthenticateUser/ArchestrAUserToId/Suspend/Activate against an added-but-not-advised item) and the buffered-data surface (AddBufferedItem/SetBufferedUpdateInterval/Advise round-trip with at least one OnBufferedDataChange event, residual noted for multi-sample conversion).
|
||||
|
||||
### IntegrationTests-031
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:672` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The inline comment at line 672 says "Suspend / Activate against the advised item", but no `Advise` call is made between `AddItem` (line 616) and `CreateSuspendRequest` (line 677) — the item is added but not advised. The comment mislabels the COM subscription state under test (the parity assertion only requires a real reply, not a successful one).
|
||||
|
||||
**Recommendation:** Change "against the advised item" to "against the added-but-not-advised item" (or remove "advised"), and note that Suspend/Activate is exercised without a prior Advise.
|
||||
|
||||
**Resolution:** 2026-06-16: Rewrote the comment to "Suspend / Activate against the added-but-not-advised item (no Advise was issued between AddItem and this call)," making the COM subscription state explicit and noting that parity requires only a real reply, not a successful one.
|
||||
|
||||
### IntegrationTests-032
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:823-865` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** In the buffered-item test, when no sample-bearing `OnBufferedDataChange` batch arrives, the sample-predicate `TimeoutException` is caught and discarded (line 831) before asserting `bootstrapBufferedEvents > 0`. The final failure message ("No OnBufferedDataChange event arrived at all") conflates two failure modes (NoData bootstrap not delivered vs. delivered-but-no-sample), reducing residual diagnostic quality.
|
||||
|
||||
**Recommendation:** Before nulling the batch, log the caught timeout message (e.g. `output.WriteLine($"B8: sample-bearing batch predicate timed out: {ex.Message}")`) so the residual log distinguishes the two cases.
|
||||
|
||||
**Resolution:** 2026-06-16: Added `output.WriteLine($"B8: sample-bearing batch predicate timed out: {ex.Message}")` inside the `catch (TimeoutException ex)` block before nulling `bufferedBatch`, so the residual log clearly records the timeout detail and distinguishes "predicate timed out" from "no OnBufferedDataChange arrived at all".
|
||||
|
||||
### IntegrationTests-033
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:577-709` |
|
||||
| Status | Deferred |
|
||||
|
||||
**Description:** The new-COM-commands live test covers AuthenticateUser/ArchestrAUserToId/Suspend/Activate but not `AddItem2`/`Write2` — the B8 extended commands with a second context parameter introduced in the same bundle. Only live COM tests can verify the COM call succeeds with the correct argument split; a parity regression short-circuiting AddItem2/Write2 to InvalidRequest would not be caught.
|
||||
|
||||
**Recommendation:** Add AddItem2/Write2 to the parity test (or a dedicated test) asserting each produces a real reply (not InvalidRequest) against a valid handle and item-definition split.
|
||||
|
||||
**Resolution:** 2026-06-16: requires a live MXAccess rig + provider state not available on this dev box; add the AddItem2/Write2 parity assertions when running on the MXAccess host.
|
||||
|
||||
+74
-11
@@ -10,17 +10,17 @@ Each module's `findings.md` is the source of truth; this file is generated from
|
||||
|
||||
| Module | Reviewer | Date | Commit | Status | Open | Total |
|
||||
|---|---|---|---|---|---|---|
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 25 |
|
||||
| [Client.Go](Client.Go/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 29 |
|
||||
| [Client.Java](Client.Java/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 39 |
|
||||
| [Client.Python](Client.Python/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 31 |
|
||||
| [Client.Rust](Client.Rust/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 32 |
|
||||
| [Contracts](Contracts/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 19 |
|
||||
| [IntegrationTests](IntegrationTests/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 29 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 53 |
|
||||
| [Tests](Tests/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 35 |
|
||||
| [Worker](Worker/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 28 |
|
||||
| [Worker.Tests](Worker.Tests/findings.md) | Claude Code | 2026-06-15 | `410acc9` | Re-reviewed | 0 | 33 |
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 30 |
|
||||
| [Client.Go](Client.Go/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 37 |
|
||||
| [Client.Java](Client.Java/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 51 |
|
||||
| [Client.Python](Client.Python/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 38 |
|
||||
| [Client.Rust](Client.Rust/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 40 |
|
||||
| [Contracts](Contracts/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 25 |
|
||||
| [IntegrationTests](IntegrationTests/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 33 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 58 |
|
||||
| [Tests](Tests/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 40 |
|
||||
| [Worker](Worker/findings.md) | Claude Code | 2026-06-16 | `8df5ab3` | Re-reviewed | 0 | 28 |
|
||||
| [Worker.Tests](Worker.Tests/findings.md) | Claude Code | 2026-06-18 | `88915c3` | Re-reviewed | 0 | 36 |
|
||||
|
||||
## Pending findings
|
||||
|
||||
@@ -66,11 +66,14 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Dotnet-003 | Medium | Resolved | Concurrency & thread safety | `clients/dotnet/MxGateway.Client/MxGatewaySession.cs:659-663`, `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:230-240` |
|
||||
| Client.Dotnet-018 | Medium | Resolved | Documentation & comments | `clients/dotnet/README.md:137-138` |
|
||||
| Client.Dotnet-022 | Medium | Resolved | mxaccessgw conventions | `clients/dotnet/Directory.Build.props:1-21` |
|
||||
| Client.Dotnet-028 | Medium | Resolved | Security | `clients/dotnet/.../MxGatewayClientCli.cs:156` |
|
||||
| Client.Dotnet-030 | Medium | Resolved | Correctness & logic bugs | `clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli/MxGatewayClientCli.cs:91-93,113,2023-2050` |
|
||||
| Client.Go-002 | Medium | Resolved | Error handling & resilience | `clients/go/mxgateway/session.go:440-516` |
|
||||
| Client.Go-003 | Medium | Resolved | Correctness & logic bugs | `clients/go/cmd/mxgw-go/main.go:517-532` |
|
||||
| Client.Go-022 | Medium | Resolved | Code organization & conventions | `clients/go/cmd/mxgw-go/main.go:398-412,417-519` |
|
||||
| Client.Go-023 | Medium | Resolved | Concurrency & thread safety | `clients/go/cmd/mxgw-go/main.go:604-606,616-632` |
|
||||
| Client.Go-028 | Medium | Resolved | Correctness & logic bugs | `scripts/tag-go-module.ps1:42-46` |
|
||||
| Client.Go-030 | Medium | Resolved | Concurrency & thread safety | `clients/go/cmd/mxgw-go/main.go:1491-1494` |
|
||||
| Client.Java-001 | Medium | Resolved | Security | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewaySecrets.java:30-32` |
|
||||
| Client.Java-002 | Medium | Resolved | Concurrency & thread safety | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxEventStream.java:31,66-92` |
|
||||
| Client.Java-003 | Medium | Resolved | mxaccessgw conventions | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:119-140` |
|
||||
@@ -84,6 +87,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Java-033 | Medium | Resolved | Correctness & logic bugs | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1078-1098` |
|
||||
| Client.Java-034 | Medium | Resolved | Correctness & logic bugs | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:182-198` |
|
||||
| Client.Java-037 | Medium | Resolved | Documentation & comments | `clients/java/README.md:138-149` |
|
||||
| Client.Java-040 | Medium | Resolved | Correctness & logic bugs | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1552-1561` |
|
||||
| Client.Python-003 | Medium | Resolved | Error handling & resilience | `clients/python/src/mxgateway/client.py:125-137,155-173` |
|
||||
| Client.Python-005 | Medium | Resolved | Performance & resource management | `clients/python/src/mxgateway/galaxy.py:117-140` |
|
||||
| Client.Python-009 | Medium | Resolved | Testing coverage | `clients/python/tests/` |
|
||||
@@ -92,6 +96,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Python-024 | Medium | Resolved | Code organization & conventions | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:13,48-119` |
|
||||
| Client.Python-027 | Medium | Resolved | Security | `clients/python/src/zb_mom_ww_mxgateway/client.py:36-54`, `clients/python/src/zb_mom_ww_mxgateway/galaxy.py:47-66`, `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:165-172,918-930` |
|
||||
| Client.Python-028 | Medium | Resolved | Error handling & resilience | `clients/python/src/zb_mom_ww_mxgateway/options.py:120-130`, `clients/python/src/zb_mom_ww_mxgateway/client.py:59`, `clients/python/src/zb_mom_ww_mxgateway/galaxy.py:71` |
|
||||
| Client.Python-036 | Medium | Resolved | Documentation & comments | `clients/python/README.md:143-158` |
|
||||
| Client.Rust-005 | Medium | Resolved | Correctness & logic bugs | `clients/rust/src/session.rs:489-520` |
|
||||
| Client.Rust-006 | Medium | Resolved | Error handling & resilience | `clients/rust/src/session.rs:531-555` |
|
||||
| Client.Rust-015 | Medium | Resolved | Error handling & resilience | `clients/rust/crates/mxgw-cli/src/main.rs:1053-1070` |
|
||||
@@ -100,6 +105,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Rust-022 | Medium | Resolved | Correctness & logic bugs | `clients/rust/src/session.rs:369-391,403-420,427-444,452-469,476-493,631-696,706-724` |
|
||||
| Client.Rust-024 | Medium | Resolved | Testing coverage | `clients/rust/tests/client_behavior.rs:405-415`; `clients/rust/src/session.rs:369-493`; `clients/rust/src/client.rs:265-291`; `clients/rust/crates/mxgw-cli/src/main.rs:1310-1505` |
|
||||
| Client.Rust-031 | Medium | Resolved | Error handling & resilience | `clients/rust/src/options.rs:196-240` (`build_tls_config`); `clients/rust/Cargo.toml:40` (tonic features); docs: `clients/rust/src/options.rs:76-101`, `clients/rust/README.md` (TLS trust section), `clients/rust/crates/mxgw-cli/src/main.rs:429-431`, `clients/rust/RustClientDesign.md:202` |
|
||||
| Client.Rust-033 | Medium | Resolved | Correctness & logic bugs | `clients/rust/crates/mxgw-cli/src/main.rs:485` |
|
||||
| Contracts-002 | Medium | Resolved | Error handling & resilience | `src/MxGateway.Contracts/Protos/mxaccess_gateway.proto:384-385`, `:95` |
|
||||
| Contracts-009 | Medium | Resolved | Design-document adherence | `docs/Contracts.md:13-24` |
|
||||
| IntegrationTests-003 | Medium | Resolved | Correctness & logic bugs | `src/MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:89-97` |
|
||||
@@ -124,6 +130,9 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Server-038 | Medium | Resolved | Security | `src/ZB.MOM.WW.MxGateway.Server/Dashboard/Hubs/EventsHub.cs:23-44` |
|
||||
| Server-044 | Medium | Resolved | Correctness & logic bugs | `src/ZB.MOM.WW.MxGateway.Server/Sessions/SessionManager.cs:216-254` |
|
||||
| Server-051 | Medium | Resolved | Error handling & resilience | `src/ZB.MOM.WW.MxGateway.Server/Alarms/AlarmWatchListResolver.cs:64-78` |
|
||||
| Server-054 | Medium | Resolved | Design-document adherence | `docs/DesignDecisions.md` (Session Reconnect / Event Subscribers / Later Revisit Items §470-471), `CLAUDE.md` (Repository-Specific Conventions) |
|
||||
| Server-056 | Medium | Resolved | Concurrency & thread safety | `src/ZB.MOM.WW.MxGateway.Server/Sessions/SessionEventDistributor.cs:296-310,449-453,629-635` |
|
||||
| Server-057 | Medium | Resolved | Correctness & logic bugs | `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:976-1000` (`NormalizeOutboundCommand`), `:1085-1095` (`MapCommand` tracking), `gateway.md` (array-write ergonomics section), `clients/*/README.md` |
|
||||
| Tests-003 | Medium | Resolved | Performance & resource management | `src/MxGateway.Tests/Security/Authentication/SqliteAuthStoreTests.cs:170-176`, `src/MxGateway.Tests/Security/Authentication/ApiKeyAdminCliRunnerTests.cs:252-258` |
|
||||
| Tests-004 | Medium | Resolved | Testing coverage | `src/MxGateway.Tests/Security/Authorization/GatewayGrpcAuthorizationInterceptorTests.cs` |
|
||||
| Tests-005 | Medium | Resolved | Testing coverage | `src/MxGateway.Tests/Gateway/Grpc/EventStreamServiceTests.cs:239-261`, `src/MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs` |
|
||||
@@ -172,6 +181,9 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Dotnet-023 | Low | Resolved | Code organization & conventions | `clients/dotnet/Directory.Build.props:17`, `clients/dotnet/ZB.MOM.WW.MxGateway.Client.Cli/IMxGatewayCliClient.cs:6`, `clients/dotnet/ZB.MOM.WW.MxGateway.Client.Tests/*.cs` |
|
||||
| Client.Dotnet-024 | Low | Resolved | Code organization & conventions | `clients/dotnet/Directory.Build.props:12`, `clients/dotnet/ZB.MOM.WW.MxGateway.Client/ZB.MOM.WW.MxGateway.Client.csproj:19-24` |
|
||||
| Client.Dotnet-025 | Low | Resolved | Concurrency & thread safety | `clients/dotnet/ZB.MOM.WW.MxGateway.Client/LazyBrowseNode.cs:38,41,54,82,94` |
|
||||
| Client.Dotnet-026 | Low | Resolved | Correctness & logic bugs | `clients/dotnet/.../MxGatewayClientCli.cs:306` (isLongRunning) |
|
||||
| Client.Dotnet-027 | Low | Won't Fix | Performance & resource management | `clients/dotnet/ZB.MOM.WW.MxGateway.Client/LazyBrowseNode.cs:15` |
|
||||
| Client.Dotnet-029 | Low | Resolved | Code organization & conventions | `clients/dotnet/.../IMxGatewayCliClient.cs:6` |
|
||||
| Client.Go-004 | Low | Resolved | mxaccessgw conventions | `clients/go/mxgateway/alarms_test.go:153-154`, `clients/go/mxgateway/galaxy_test.go:58-59` |
|
||||
| Client.Go-005 | Low | Resolved | Design-document adherence | `clients/go/mxgateway/client.go:64,68`, `clients/go/mxgateway/galaxy.go:83,87` |
|
||||
| Client.Go-006 | Low | Resolved | Error handling & resilience | `clients/go/mxgateway/errors.go:9-130` |
|
||||
@@ -195,6 +207,13 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Go-026 | Low | Resolved | Error handling & resilience | `clients/go/cmd/mxgw-go/main.go:1196-1222` |
|
||||
| Client.Go-027 | Low | Resolved | Code organization & conventions | `clients/go/cmd/mxgw-go/main.go:1195-1206` |
|
||||
| Client.Go-029 | Low | Resolved | Documentation & comments | `clients/go/README.md:300-303` |
|
||||
| Client.Go-031 | Low | Resolved | Correctness & logic bugs | `clients/go/cmd/mxgw-go/main.go:1037-1046` |
|
||||
| Client.Go-032 | Low | Resolved | Code organization & conventions | `clients/go/cmd/mxgw-go/main.go:839-841` |
|
||||
| Client.Go-033 | Low | Resolved | Testing coverage | `clients/go/cmd/mxgw-go/main_test.go` |
|
||||
| Client.Go-034 | Low | Resolved | Documentation & comments | `clients/go/README.md:245-263` |
|
||||
| Client.Go-035 | Low | Resolved | Code organization & conventions | `clients/go/cmd/mxgw-go/main.go:1298`, `clients/go/README.md:328-355` |
|
||||
| Client.Go-036 | Low | Resolved | Testing coverage | `clients/go/cmd/mxgw-go/main_test.go`, `clients/go/cmd/mxgw-go/main.go:364-399` |
|
||||
| Client.Go-037 | Low | Resolved | Documentation & comments | `clients/go/README.md:136-137` |
|
||||
| Client.Java-006 | Low | Resolved | Performance & resource management | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:323-328`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:279-284` |
|
||||
| Client.Java-007 | Low | Resolved | Testing coverage | `clients/java/mxgateway-client/src/test/java/com/dohertylan/mxgateway/client/` |
|
||||
| Client.Java-008 | Low | Resolved | Error handling & resilience | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:298-304` |
|
||||
@@ -218,6 +237,17 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Java-035 | Low | Resolved | Testing coverage | `clients/java/zb-mom-ww-mxgateway-client/src/test/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientSessionTests.java` |
|
||||
| Client.Java-036 | Low | Resolved | Code organization & conventions | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayAlarmFeedSubscription.java`, `MxGatewayEventSubscription.java`, `MxGatewayActiveAlarmsSubscription.java`, `DeployEventSubscription.java` |
|
||||
| Client.Java-038 | Low | Resolved | Code organization & conventions | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1347-1393` |
|
||||
| Client.Java-041 | Low | Resolved | Correctness & logic bugs | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:2187-2194` |
|
||||
| Client.Java-042 | Low | Resolved | Error handling & resilience | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1565-1567` |
|
||||
| Client.Java-043 | Low | Resolved | Code organization & conventions | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:241-264` |
|
||||
| Client.Java-044 | Low | Resolved | Code organization & conventions | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientVersion.java:12` |
|
||||
| Client.Java-045 | Low | Resolved | Testing coverage | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/InProcessGatewayHarness.java` |
|
||||
| Client.Java-046 | Low | Resolved | Testing coverage | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:680-696` |
|
||||
| Client.Java-047 | Low | Resolved | Documentation & comments | `clients/java/README.md` |
|
||||
| Client.Java-048 | Low | Resolved | Documentation & comments | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:88-105` |
|
||||
| Client.Java-049 | Low | Resolved | Code organization & conventions | `clients/java/build.gradle:16`, `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientVersion.java:12`, `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:59,89`, `clients/java/README.md:399` |
|
||||
| Client.Java-050 | Low | Resolved | Testing coverage | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1046-1068` (new `AdviseSupervisoryCommand`), `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:1306-1313` (stub) |
|
||||
| Client.Java-051 | Low | Resolved | Documentation & comments | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewaySession.java:622-657` |
|
||||
| Client.Python-001 | Low | Resolved | Documentation & comments | `clients/python/pyproject.toml:8,25`, `clients/python/src/mxgateway_cli/commands.py:25` |
|
||||
| Client.Python-002 | Low | Resolved | Code organization & conventions | `clients/python/src/mxgateway/__init__.py:27` |
|
||||
| Client.Python-004 | Low | Resolved | Correctness & logic bugs | `clients/python/src/mxgateway_cli/commands.py:386,402-404` |
|
||||
@@ -239,6 +269,12 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Python-029 | Low | Resolved | Correctness & logic bugs | `clients/python/src/zb_mom_ww_mxgateway/options.py:78-90` |
|
||||
| Client.Python-030 | Low | Resolved | Code organization & conventions | `clients/python/pyproject.toml:17` |
|
||||
| Client.Python-031 | Low | Resolved | Testing coverage | `clients/python/tests/test_tls.py:34`, `clients/python/pyproject.toml:53-56` |
|
||||
| Client.Python-032 | Low | Resolved | Correctness & logic bugs | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:1048,1065-1066` |
|
||||
| Client.Python-033 | Low | Resolved | Correctness & logic bugs | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:772,1490-1494` |
|
||||
| Client.Python-034 | Low | Resolved | Correctness & logic bugs | `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:1497-1501` |
|
||||
| Client.Python-035 | Low | Resolved | Code organization & conventions | `clients/python/src/zb_mom_ww_mxgateway/__init__.py`, `.../options.py:63-77`, `.../galaxy.py:293` |
|
||||
| Client.Python-037 | Low | Resolved | Correctness & logic bugs | `clients/python/pyproject.toml:10` |
|
||||
| Client.Python-038 | Low | Resolved | Testing coverage | `clients/python/tests/`, `clients/python/src/zb_mom_ww_mxgateway_cli/commands.py:280-299,742-758` |
|
||||
| Client.Rust-004 | Low | Resolved | Documentation & comments | `clients/rust/src/version.rs:7` |
|
||||
| Client.Rust-007 | Low | Resolved | Design-document adherence | `clients/rust/RustClientDesign.md:14-55` |
|
||||
| Client.Rust-008 | Low | Resolved | Performance & resource management | `clients/rust/src/value.rs:161-261` |
|
||||
@@ -256,6 +292,13 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Client.Rust-027 | Low | Resolved | Documentation & comments | `clients/rust/.cargo/config.toml:1-9` |
|
||||
| Client.Rust-028 | Low | Resolved | mxaccessgw conventions | `clients/rust/crates/mxgw-cli/src/main.rs:1126-1166` |
|
||||
| Client.Rust-032 | Low | Resolved | Design-document adherence | `clients/rust/RustClientDesign.md`; surface in `clients/rust/src/galaxy.rs:281-379` |
|
||||
| Client.Rust-034 | Low | Resolved | Correctness & logic bugs | `clients/rust/crates/mxgw-cli/src/main.rs:48-51,548` |
|
||||
| Client.Rust-035 | Low | Resolved | Security | `clients/rust/crates/mxgw-cli/src/main.rs:489-495` |
|
||||
| Client.Rust-036 | Low | Resolved | Design-document adherence | `clients/rust/RustClientDesign.md:351` |
|
||||
| Client.Rust-037 | Low | Resolved | Design-document adherence | `clients/rust/README.md:164-179` |
|
||||
| Client.Rust-038 | Low | Resolved | Testing coverage | `clients/rust/crates/mxgw-cli/src/main.rs:2336-2564` |
|
||||
| Client.Rust-039 | Low | Resolved | Design-document adherence | `clients/rust/RustClientDesign.md:101-131` (Session API block); `clients/rust/RustClientDesign.md:326-353` (CLI commands table) |
|
||||
| Client.Rust-040 | Low | Resolved | Testing coverage | `clients/rust/tests/client_behavior.rs:1195-1224` (`sparse_int32_value` helper); `clients/rust/tests/client_behavior.rs:1226-1264` (unit tests using the helper) |
|
||||
| Contracts-001 | Low | Resolved | Design-document adherence | `docs/Grpc.md:13` (and `:3`, `:32`, `:39`) |
|
||||
| Contracts-003 | Low | Won't Fix | Code organization & conventions | `src/MxGateway.Contracts/MxGateway.Contracts.csproj:10` |
|
||||
| Contracts-004 | Low | Resolved | Documentation & comments | `src/MxGateway.Contracts/GatewayContractInfo.cs:3-6` |
|
||||
@@ -273,6 +316,12 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Contracts-017 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto:23-29` (the `rpc QueryActiveAlarms` block) |
|
||||
| Contracts-018 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs:396` (`ActiveAlarmSnapshot_RoundTripsAllFields`) |
|
||||
| Contracts-019 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto:850-851` (`ActiveAlarmSnapshot`), `:318-324` (`AlarmProviderMode`) |
|
||||
| Contracts-020 | Low | Resolved | Design-document adherence | `gateway.md:1087,1101-1102` |
|
||||
| Contracts-021 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto:731-733` |
|
||||
| Contracts-022 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Contracts-023 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Contracts-024 | Low | Resolved | Documentation & comments | `docs/Contracts.md:9-11` |
|
||||
| Contracts-025 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/GatewayContractInfoTests.cs:14-25` |
|
||||
| IntegrationTests-007 | Low | Resolved | Concurrency & thread safety | `src/MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:20`, `src/MxGateway.IntegrationTests/Galaxy/GalaxyRepositoryLiveTests.cs:5`, `src/MxGateway.IntegrationTests/DashboardLdapLiveTests.cs:9` |
|
||||
| IntegrationTests-008 | Low | Resolved | Code organization & conventions | `src/MxGateway.IntegrationTests/LiveLdapFactAttribute.cs`, `src/MxGateway.IntegrationTests/Galaxy/LiveGalaxyRepositoryFactAttribute.cs`, `src/MxGateway.IntegrationTests/LiveMxAccessFactAttribute.cs` |
|
||||
| IntegrationTests-009 | Low | Resolved | Documentation & comments | `src/MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:372-375` |
|
||||
@@ -291,6 +340,10 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| IntegrationTests-027 | Low | Resolved | Code organization & conventions | `src/ZB.MOM.WW.MxGateway.IntegrationTests/ZB.MOM.WW.MxGateway.IntegrationTests.csproj`, `src/ZB.MOM.WW.MxGateway.IntegrationTests/DashboardLdapLiveTests.cs:4-5,134` |
|
||||
| IntegrationTests-028 | Low | Resolved | Design-document adherence | `src/ZB.MOM.WW.MxGateway.IntegrationTests/DashboardLdapLiveTests.cs:120-161`, `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardServiceCollectionExtensions.cs:35` |
|
||||
| IntegrationTests-029 | Low | Resolved | Documentation & comments | `docs/GatewayTesting.md:218-224` |
|
||||
| IntegrationTests-030 | Low | Resolved | Documentation & comments | `docs/GatewayTesting.md:76`, `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:576,728` |
|
||||
| IntegrationTests-031 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:672` |
|
||||
| IntegrationTests-032 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:823-865` |
|
||||
| IntegrationTests-033 | Low | Deferred | Testing coverage | `src/ZB.MOM.WW.MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:577-709` |
|
||||
| Server-007 | Low | Resolved | Performance & resource management | `src/MxGateway.Server/Galaxy/GalaxyHierarchyProjector.cs:55-70` |
|
||||
| Server-008 | Low | Resolved | Performance & resource management | `src/MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs:111-134,160-189` |
|
||||
| Server-009 | Low | Resolved | Error handling & resilience | `src/MxGateway.Server/Security/Authentication/AuthSqliteConnectionFactory.cs:15-32` |
|
||||
@@ -327,6 +380,8 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Server-050 | Low | Resolved | Error handling & resilience | `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardSessionAdminService.cs:42-75,92-125` |
|
||||
| Server-052 | Low | Resolved | Documentation & comments | `src/ZB.MOM.WW.MxGateway.Server/Alarms/IAlarmWatchListResolver.cs:24-30`, `src/ZB.MOM.WW.MxGateway.Server/Alarms/AlarmWatchListResolver.cs:101-114`, `docs/GatewayConfiguration.md:247` |
|
||||
| Server-053 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Alarms/AlarmWatchListResolverTests.cs`, `src/ZB.MOM.WW.MxGateway.Tests/Alarms/GatewayAlarmMonitorProviderModeTests.cs` |
|
||||
| Server-055 | Low | Resolved | Correctness & logic bugs | `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:842-851,1841-1871` |
|
||||
| Server-058 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Security/Authorization/ConstraintEnforcerTests.cs` |
|
||||
| Tests-007 | Low | Resolved | Code organization & conventions | `src/MxGateway.Tests/Gateway/Grpc/MxAccessGatewayServiceTests.cs:682`, `src/MxGateway.Tests/Gateway/Grpc/GalaxyRepositoryGrpcServiceTests.cs:324`, `src/MxGateway.Tests/Gateway/GatewayEndToEndFakeWorkerSmokeTests.cs:460`, `src/MxGateway.Tests/Security/Authorization/GatewayGrpcAuthorizationInterceptorTests.cs:233` |
|
||||
| Tests-008 | Low | Resolved | mxaccessgw conventions | `src/MxGateway.Tests/Gateway/Sessions/WorkerAlarmRpcDispatcherTests.cs:1-9`, `src/MxGateway.Tests/Gateway/Sessions/NotWiredAlarmRpcDispatcherTests.cs:1-3`, `src/MxGateway.Tests/Gateway/Sessions/SessionManagerAlarmAutoSubscribeTests.cs:1` |
|
||||
| Tests-009 | Low | Resolved | Documentation & comments | `src/MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs:36-37,99,365` |
|
||||
@@ -350,6 +405,11 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Tests-033 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardAlarmProviderStatus.cs`, `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardBrowseAndAlarmModelTests.cs:140-195` |
|
||||
| Tests-034 | Low | Resolved | mxaccessgw conventions | `src/ZB.MOM.WW.MxGateway.Tests/Diagnostics/GatewayLogRedactorSeamTests.cs:1-15` |
|
||||
| Tests-035 | Low | Resolved | Concurrency & thread safety | `src/ZB.MOM.WW.MxGateway.Tests/Alarms/AlarmFailoverEndToEndTests.cs:315-329` |
|
||||
| Tests-036 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Configuration/GatewayOptionsValidatorTests.cs` |
|
||||
| Tests-037 | Low | Won't Fix | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Tests-038 | Low | Resolved | Performance & resource management | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SessionEventDistributorTests.cs:702-713` |
|
||||
| Tests-039 | Low | Resolved | Concurrency & thread safety | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewaySessionDashboardMirrorTests.cs` (`DashboardMirror_AndGrpcSubscriber_BothReceiveEvents`) |
|
||||
| Tests-040 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewayArrayWriteWiringTests.cs` |
|
||||
| Worker-009 | Low | Resolved | Performance & resource management | `src/MxGateway.Worker/Ipc/WorkerFrameReader.cs:31,49`, `src/MxGateway.Worker/Ipc/WorkerFrameWriter.cs:57-58` |
|
||||
| Worker-010 | Low | Resolved | Correctness & logic bugs | `src/MxGateway.Worker/Conversion/VariantConverter.cs:204-226` |
|
||||
| Worker-011 | Low | Resolved | Correctness & logic bugs | `src/MxGateway.Worker/Ipc/WorkerPipeClient.cs:169-171` |
|
||||
@@ -387,3 +447,6 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Worker.Tests-030 | Low | Resolved | Documentation & comments | `src/MxGateway.Worker.Tests/Ipc/WorkerPipeSessionTests.cs:862-890` |
|
||||
| Worker.Tests-032 | Low | Resolved | Error handling & resilience | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs` |
|
||||
| Worker.Tests-033 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/SubtagAlarmStateMachineTests.cs` |
|
||||
| Worker.Tests-034 | Low | Resolved | Code organization & conventions | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs:2233`, `src/ZB.MOM.WW.MxGateway.Worker.Tests/TestSupport/NoopMxAccessServer.cs:97` |
|
||||
| Worker.Tests-035 | Low | Resolved | Testing coverage | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs`, `src/ZB.MOM.WW.MxGateway.Worker/MxAccess/MxAccessCommandExecutor.cs:99-136` |
|
||||
| Worker.Tests-036 | Low | Resolved | Concurrency & thread safety | `src/ZB.MOM.WW.MxGateway.Worker.Tests/Ipc/WorkerPipeSessionTests.cs:983-996` |
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.Server` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -69,6 +69,40 @@ findings (Server-001 through Server-032) are unchanged by this pass.
|
||||
| 9 | Testing coverage | Issues found: Server-037 (no test for the corrupt-snapshot restore path or for `PersistSnapshot = false` at the cache level). |
|
||||
| 10 | Documentation & comments | No issues found — XML docs match behavior; the `GalaxyRepository.md` "On-disk snapshot" section documents the Stale-on-restore lifecycle. |
|
||||
|
||||
### 2026-06-18 review (commit 88915c3)
|
||||
|
||||
Re-review of the array-write-ergonomics feature (`git diff 8df5ab3..88915c3 -- src/ZB.MOM.WW.MxGateway.Server/`): the new `SparseArrayExpander` and `ArrayAddressNormalizer`, the `NormalizeOutboundCommand` choke point in `GatewaySession.InvokeAsync`, the re-normalize at the `TrackCommandReply`/`MapCommand` tracking call sites, and the `ConstraintEnforcer.ResolveTarget` `[]`-suffix fallback. The range also lands the already-filed Server-055 (`_everHadEventSubscriber` detach-grace gate) and Server-056 (`SessionEventDistributor._completed` late-registrant guard) resolutions; both were re-verified sound here and remain closed. Security focus — the authorization `[]` fallback: it changes only *which* Galaxy metadata record resolves (turning a spurious `tag_metadata` deny into a real scope/classification decision), stays `IsArray`-gated, and the scope check (`MatchesPathOrTag` on `ContainedPath` + the original bare `tagAddress`) still runs unchanged, so a scoped key cannot reach a non-array tag or an out-of-scope array. The authz check (bare address) and the worker bind (suffixed via the same `IsArray`-gated probe) resolve the identical attribute — no check-vs-bind mismatch. Worker untouched; parity preserved (single whole-array COM write). Two new findings.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Issues found: Server-057 (`NormalizeOutboundCommand`/`MapCommand` normalize only single `AddItem`/`AddItem2`; bare array addresses added via `AddItemBulk`/`AddBufferedItem` register non-write-capable handles). SparseArrayExpander verified sound: total_length=0 / oversize / out-of-range / duplicate / kind-mismatch all → `InvalidArgument`; `Array.MaxLength` guard precedes the `(int)` cast; Int32-vs-Int64 selection mirrors the worker converter; expansion targets `Value` not `timestamp_value` across all 8 write variants. |
|
||||
| 2 | mxaccessgw conventions | No issues found — file-scoped namespaces, `sealed`/`internal static`, `Async` suffixes, primary constructors, MXAccess-aligned naming; worker still does an honest whole-array COM write; no UI libraries; no value/secret logging added (write payloads never logged). |
|
||||
| 3 | Concurrency & thread safety | No issues found — `NormalizeOutboundCommand` mutates the `WorkerCommand.Command` deep-clone produced by `MapCommand` (`request.Command.Clone()`), never the client's `MxCommand`, and the tracking path re-normalizes its own separate copy; `ArrayAddressNormalizer`/`SparseArrayExpander` are stateless (read the cache snapshot per call); `_everHadEventSubscriber` and the `SessionEventDistributor._completed` guard serialize correctly under their locks. |
|
||||
| 4 | Error handling & resilience | No issues found — invalid sparse payloads surface as `RpcException(InvalidArgument)` and propagate uncaught (correct client-facing error); the normalizer never throws and passes through when metadata is cold. |
|
||||
| 5 | Security | No issues found — the `ConstraintEnforcer` `[]` fallback is `IsArray`-gated, does not widen scope (scope/classification checks unchanged), and resolves the same attribute the worker binds; covered by `CheckReadTagAsync_WithBareArrayName_*` and the missing-non-array negative test. |
|
||||
| 6 | Performance & resource management | No issues found — expander allocates one array of `total_length` slots (inherent to a whole-array write) and the work is O(length + elements); the normalizer is a single dictionary probe per AddItem. |
|
||||
| 7 | Design-document adherence | No issues found — worker untouched, single whole-array COM write preserved; `gateway.md`, `docs/WorkerConversion.md`, `docs/DesignDecisions.md`, and all five client READMEs were updated in the same commit and match the code (sparse semantics, default-fill reset, `[]` normalization). |
|
||||
| 8 | Code organization & conventions | No issues found — new types live under `Sessions/`, registered as a singleton in `AddGatewaySessions` consistent with `IGalaxyHierarchyCache`; the optional `addressNormalizer` ctor parameter keeps legacy unit-construction paths working. |
|
||||
| 9 | Testing coverage | Issues found: Server-058 (no test asserts a bare-array name that resolves via the `[]` fallback is still *denied* when out of scope, and `CheckWriteHandleAsync` array-via-suffixed-registration / classification is untested). Otherwise strong: `SparseArrayExpanderTests` (210 lines), `ArrayAddressNormalizerTests` (105), and the two `ConstraintEnforcerTests` cases. |
|
||||
| 10 | Documentation & comments | No issues found — XML docs and inline comments on the new types are accurate (the GatewaySession comment correctly notes `MapCommand` deep-clones, contradicting the plan's stale "same instance" note); the `AddItemBulk` gap is documentation-adjacent and is captured under Server-057. |
|
||||
|
||||
### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the session-resilience epic + §8 delta (`git diff 410acc9..8df5ab3`): `SessionEventDistributor` multi-subscriber fan-out, replay-on-reconnect, detach-grace retention, bounded worker-ready wait, dashboard auto-login.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Server-055 |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found (replay/handoff atomicity, reconnect-vs-sweep, single-clock ready-wait all verified sound) |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found (DisableLogin auto-login is intentional/config-gated/documented) |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | Server-054 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | No issues found |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
### 2026-05-24 re-review (commit 42b0037)
|
||||
|
||||
Re-review pass at `42b0037` scoped to the dashboard destructive-action wave on
|
||||
@@ -1022,3 +1056,78 @@ Additionally, `GatewayAlarmMonitor.ApplyProviderModeChangeAsync` increments the
|
||||
**Recommendation:** Add resolver tests for (a) cancellation propagation and (b) an include that is also excluded; and a `GatewayAlarmMonitorProviderMode` test pinning the provider-switch counter behaviour for a same-mode repeat event (whichever semantics the team intends). These lock down the contracts the Server-051/052 findings expose.
|
||||
|
||||
**Resolution:** Resolved 2026-06-15. Added all three missing tests: (a) `AlarmWatchListResolverTests.ResolveAsync_RepositoryCancelled_PropagatesOperationCanceled` (cancellation propagation, also covers Server-051); (b) `AlarmWatchListResolverTests.ResolveAsync_ExcludeAlsoSuppressesMatchingExplicitInclude` (exclude-vs-include precedence, also Server-052 item 2); and (c) `GatewayAlarmMonitorProviderModeTests.ProviderModeChange_RepeatedSameMode_RecordsASwitchForEachEvent`, which pins the existing semantics — each worker-reported `OnAlarmProviderModeChanged` event records a `provider_switches` increment (and resets `_providerSince`) even when `toMode` equals the current mode, since the worker is the authority on when a mode change occurred and the gateway does not synthesize or suppress it.
|
||||
|
||||
### Server-054
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `docs/DesignDecisions.md` (Session Reconnect / Event Subscribers / Later Revisit Items §470-471), `CLAUDE.md` (Repository-Specific Conventions) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The session-resilience epic shipped multi-subscriber fan-out (`SessionEventDistributor`), reconnectable sessions with replay (`AttachEventSubscriberWithReplay`/`ReplayGap`), and detach-grace retention — but `docs/DesignDecisions.md` still states "no reconnectable sessions for v1" and "one active StreamEvents subscriber per session for v1", and still files both as post-v1 "Later Revisit Items". `CLAUDE.md` likewise still says these are "explicitly out of scope". This is the stale-prose-vs-shipped-behavior drift the "update docs in the same change as the source" rule prohibits.
|
||||
|
||||
**Recommendation:** Update both `DesignDecisions.md` sections and the revisit list to describe the shipped behavior (gated by `AllowMultipleEventSubscribers`, `DetachGraceSeconds`, replay options), and amend the CLAUDE.md convention bullet.
|
||||
|
||||
**Resolution:** 2026-06-16: updated `docs/DesignDecisions.md` (Session Reconnect section rewritten to describe the shipped detach-grace + replay-on-reconnect behavior with config references; Event Subscribers section rewritten to describe the config-gated multi-subscriber fan-out, mode-dependent `FailFast` semantics, and internal vs external subscriber distinction; Later Revisit Items list removes the two shipped items and records them as shipped with config cross-references) and the `CLAUDE.md` conventions bullet to describe the shipped config-gated multi-subscriber + reconnect-replay behavior while preserving the one-worker-per-session invariant.
|
||||
|
||||
### Server-055
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:842-851,1841-1871` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** When `AttachEventSubscriber`/`AttachEventSubscriberWithReplay` fails inside `StartDistributorAndRegister`, the catch calls `DetachEventSubscriber()`, which decrements the active count back to 0 and — because the session is still `Ready` and detach-grace is enabled — stamps `_detachedAtUtc = now`. A freshly-`Ready` session that never had a successful subscriber thus enters the detach-grace window on a failed first attach, making it sweep-eligible after `DetachGraceSeconds` even though no client ever streamed. Impact is minor (the lease still protects it; a later successful attach clears the stamp) but the "last subscriber dropped" semantics are violated.
|
||||
|
||||
**Recommendation:** Only stamp `_detachedAtUtc` on a detach that mirrors a prior successful attach — roll the failure path back without entering grace, or guard the stamp on "a subscriber had previously been registered."
|
||||
|
||||
**Resolution:** 2026-06-16: `GatewaySession` now tracks `_everHadEventSubscriber` (a `bool` field, set to `true` inside `MarkEventSubscriberAttached()` which is called only after `StartDistributorAndRegister` succeeds). `DetachEventSubscriber` gates the `_detachedAtUtc` stamp on `_everHadEventSubscriber`, so the catch-path rollback decrements the reserved slot but does not enter detach-grace. A regression `[Fact]` (`DetachGrace_FailedFirstAttach_DoesNotEnterGrace`) in `GatewaySessionTests.cs` verifies that after a failed first attach the session has `DetachedAtUtc == null`, `ActiveEventSubscriberCount == 0`, and `IsDetachGraceExpired` returns `false` regardless of clock advance.
|
||||
|
||||
### Server-056
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Server/Sessions/SessionEventDistributor.cs:296-310,449-453,629-635` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `SessionEventDistributor` orphaned any subscriber that registered in the window AFTER the pump ran its final `CompleteAllSubscribers` sweep (the event source completed or faulted and the pump exited) but BEFORE `DisposeAsync`. `RegisterSubscriber`/`RegisterWithReplay` guarded only against `_disposed`, not against the pump having already completed, so a subscriber added in that window got a channel the now-exited pump would never complete — its reader (`ReadAllAsync`) waited forever. In production this is the edge case of a client calling `StreamEvents` after the worker's event stream has ended but before the session is torn down. Discovered while diagnosing an order-dependent hang in `GatewaySessionDashboardMirrorTests`, where a gRPC subscriber attached after a fast-completing worker stream had already drained (its `await foreach` has no timeout, so the orphaned channel surfaced as an infinite hang rather than a clean failure).
|
||||
|
||||
**Recommendation:** Record terminal completion (a `_completed` flag plus the terminal error) under `_lifecycleLock` and have both register paths complete a late registrant's channel immediately with the same terminal state.
|
||||
|
||||
**Resolution:** 2026-06-17: added `_completed` + `_completionError`, set inside `CompleteAllSubscribers` under `_lifecycleLock` — the same lock the register paths take, so completion and registration serialize (a subscriber added before the sweep is completed by the loop; one racing in after sees `_completed` and self-completes). `Register` and `RegisterWithReplay` now `TryComplete` a late registrant's channel with `_completionError` when `_completed`; a late resume still receives its retained replay batch, then a cleanly-completed empty live channel. No lock-ordering risk — `CompleteAllSubscribers` takes only `_lifecycleLock`, and the subscriber channels use `AllowSynchronousContinuations=false` so `TryComplete` under the lock runs no continuation inline. New regression `[Fact]` `Register_AfterSourceCompletes_CompletesLateSubscriberInsteadOfHanging` (`SessionEventDistributorTests.cs`) registers a subscriber after the pump completes and asserts its channel completes (bounded read); verified it fails without the fix (5 s timeout) and passes with it (12 ms). The racy `GatewaySessionDashboardMirrorTests.DashboardMirror_AndGrpcSubscriber_BothReceiveEvents` that exposed it was also made deterministic — see Tests-039.
|
||||
|
||||
### Server-057
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:976-1000` (`NormalizeOutboundCommand`), `:1085-1095` (`MapCommand` tracking), `gateway.md` (array-write ergonomics section), `clients/*/README.md` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The array-suffix `[]` normalization runs only for the single-add commands `AddItem` and `AddItem2` — `NormalizeOutboundCommand` has no case for `AddItemBulk` (or `AddBufferedItem`), and the `MapCommand` tracking switch likewise normalizes only the `AddItem`/`AddItem2` arms (`AddItemBulk` flows through `TrackBulkItems`/`AddBufferedItem` with the raw address). MXAccess requires the `[]` suffix on the AddItem address for an array attribute to register a *write-capable* handle. A client that registers a bare array address via `AddItemBulk` therefore binds a non-write-capable handle, and a later `Write`/`WriteSecured`/sparse write against that handle silently lands on a read-only-ish handle — the exact failure mode this feature fixes for the single-add path. The behavior is inconsistent across the add family, and `gateway.md` / the client READMEs describe normalization as happening "at AddItem time" and explicitly carve out only `ReadBulk`, giving no signal that `AddItemBulk` is excluded. `AddBufferedItem` is lower-risk (buffered/historical read items are not normally written) but is the same gap.
|
||||
|
||||
**Recommendation:** Either (a) extend `NormalizeOutboundCommand` and the `MapCommand` tracking path to normalize each `AddItemBulk.TagAddresses` entry (and `AddBufferedItem.ItemDefinition`) the same `IsArray`-gated way, keeping the constraint check, the worker bind, and the stored `SessionItemRegistration.TagAddress` consistent; or (b) if bulk-add normalization is intentionally out of scope for this feature, state that explicitly in `gateway.md` and the client READMEs (alongside the existing `ReadBulk` carve-out) so clients know bulk-added array handles must carry the `[]` suffix themselves to be writable.
|
||||
|
||||
**Resolution:** 2026-06-18 — Took option (a). Root cause confirmed: `NormalizeOutboundCommand` had no `AddItemBulk`/`AddBufferedItem` case, so the worker bound bare (non-write-capable) array handles for those paths while single-add was already fixed. Added `AddItemBulk` (normalizes each `TagAddresses` entry in place) and `AddBufferedItem` (normalizes `ItemDefinition`) cases to `NormalizeOutboundCommand`; added the matching `AddBufferedItem` normalization to the `TrackCommandReply`/`MapCommand` tracking path (its registration keys off the command's `ItemDefinition`). `AddItemBulk` tracking needs no change — the worker echoes the already-suffixed address back in each `SubscribeResult.TagAddress`, which `TrackBulkItems` stores. Authz is unchanged and consistent: `FilterTagBulkAsync` checks the bare address through `ConstraintEnforcer.ResolveTarget`'s `[]` fallback, mirroring single-add. Updated `gateway.md` and all five client READMEs (dotnet/go/python/rust/java) so the add-family normalization no longer reads as AddItem-only; the `ReadBulk` carve-out stays. Regression tests: `GatewayArrayWriteWiringTests.AddItemBulk_BareArrayAddress_NormalizedOnWireAndInRegistration`, `.AddBufferedItem_BareArrayAddress_NormalizedOnWireAndInRegistration`.
|
||||
|
||||
### Server-058
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Security/Authorization/ConstraintEnforcerTests.cs` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The new `ConstraintEnforcer.ResolveTarget` `[]`-suffix fallback is security-sensitive: it turns a bare-array address that previously missed the Galaxy index (→ spurious `tag_metadata` deny) into a real scope/classification decision. The added tests cover the in-scope-allow case (`CheckReadTagAsync_WithBareArrayName_ResolvesViaArraySuffixFallback`) and the missing/non-array negative case (`CheckReadTagAsync_WithMissingNonArrayName_StillFailsToResolve`), but **no test asserts that a bare-array name which now resolves via the fallback is still denied when it is out of the key's read/write scope** — i.e. that the fallback widened resolution but not authorization. There is also no `CheckWriteHandleAsync` test exercising an array handle whose `SessionItemRegistration.TagAddress` is the suffixed form (`Obj.Arr[]`) resolving directly through `ResolveTarget` for the write-scope and `MaxWriteClassification` checks. These are the precise paths a regression could silently widen.
|
||||
|
||||
**Recommendation:** Add a `CheckReadTagAsync` (and a `CheckWriteHandleAsync`) case where the bare/suffixed array attribute resolves but the configured `ReadTagGlobs`/`WriteSubtrees` exclude it, asserting a `read_scope`/`write_scope` `ConstraintFailure` is still returned; and a `CheckWriteHandleAsync` case asserting `MaxWriteClassification` is enforced against the array attribute's `SecurityClassification` via the suffixed registration address.
|
||||
|
||||
**Resolution:** 2026-06-18 — Added three `ConstraintEnforcerTests` cases (the test fixture gained a second array attribute `Pump_001.Setpoints[]` with `SecurityClassification = 2` to exercise the classification path): `CheckReadTagAsync_WithBareArrayName_OutOfScope_StillDeniedReadScope` (bare array resolves via the `[]` fallback but is denied `read_scope` when out of `ReadTagGlobs` — guards that the fallback widened resolution, not authorization), `CheckWriteHandleAsync_WithSuffixedArrayRegistration_OutOfScope_StillDeniedWriteScope` (an array handle whose registration `TagAddress` is the suffixed `Pump_001.Levels[]` resolves through `ResolveTarget` and is denied `write_scope`), and `CheckWriteHandleAsync_WithSuffixedArrayRegistration_ClassificationTooHigh_StillDenied` (in-scope suffixed array handle denied `max_write_classification` when the attribute's `SecurityClassification` exceeds `MaxWriteClassification`). Tests-only change; no production code touched.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.Tests` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -111,6 +111,40 @@ fakes in two test files.
|
||||
| 9 | Testing coverage | Issues found: Tests-026 (no test proves `EventStreamService` actually calls `IDashboardEventBroadcaster.Publish` for each event — the only consumers in tests are `Null` fakes). |
|
||||
| 10 | Documentation & comments | No issues found in this diff. |
|
||||
|
||||
### 2026-06-18 re-review (commit `88915c3`)
|
||||
|
||||
Re-review of the `8df5ab3..88915c3` diff. New files: `SparseArrayExpanderTests.cs`, `ArrayAddressNormalizerTests.cs`, `GatewayArrayWriteWiringTests.cs` (array-write feature tests); additions to `ConstraintEnforcerTests.cs` (bare-array-name authz fallback), `ProtobufContractRoundTripTests.cs` (ReplayGap round-trip), `GatewaySessionTests.cs` (failed-first-attach regression), `SessionEventDistributorTests.cs` (register-after-completion + DrainUntilFaultAsync fix), `GatewaySessionDashboardMirrorTests.cs` (Tests-039 release-gate fix), `GatewayOptionsValidatorTests.cs` (DetachGraceSeconds / ReplayBuffer bounds).
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found. All new tests assert the correct post-condition; `AddItem_BareArrayAddress_NormalizedOnWireAndInRegistration` correctly verifies re-normalization in `TrackCommandReply` via a bare-address `trackingCopy`; `DrainUntilFaultAsync` graceful-completion guard is correct. |
|
||||
| 2 | mxaccessgw conventions | No issues found. All new files use file-scoped namespaces, sealed classes, target-typed `new()`, PascalCase `Method_Condition_Result` names, and explicit types. |
|
||||
| 3 | Concurrency & thread safety | No issues found. `Register_AfterSourceCompletes_CompletesLateSubscriberInsteadOfHanging` correctly proves pump-completed ordering via early-subscriber drain before registering the late subscriber. The Tests-039 release-gate uses `TaskCompletionSource(RunContinuationsAsynchronously)` and `ActiveEventSubscriberCount == 1` gating. |
|
||||
| 4 | Error handling & resilience | No issues found. `SparseArrayExpanderTests` covers all six `RpcException(InvalidArgument)` paths (zero length, oversize length, out-of-range index, duplicate index, unsupported type, element-kind mismatch). |
|
||||
| 5 | Security | No issues found. The two new `ConstraintEnforcerTests` correctly pin both directions of the `[]` suffix fallback: bare array name resolves (no spurious denial); bare non-array/missing name still fails to resolve (no false positive). |
|
||||
| 6 | Performance & resource management | No issues found. `GatewayArrayWriteWiringTests.CapturingWorkerClient` implements `IAsyncDisposable` as `ValueTask.CompletedTask`; no unowned resource. `GatewaySessionTests.DetachGrace_FailedFirstAttach_DoesNotEnterGrace` uses `await using`. |
|
||||
| 7 | Design-document adherence | No issues found. Sparse-expansion and array-suffix normalization tests reflect the CLAUDE.md parity contract ("MXAccess has no partial-array write primitive"); the `ConstraintEnforcer` bare-name fallback tests match the `ArrayAddressNormalizer` contract. |
|
||||
| 8 | Code organization & conventions | No issues found. The `StubGalaxyHierarchyCache` is duplicated between `ArrayAddressNormalizerTests` and `GatewayArrayWriteWiringTests` (both in the same namespace), but both are `private sealed class` — duplication is confined and the existing Tests-007 / Tests-021 consolidation pattern applies only to test-support types shared across multiple test classes, not private fakes within a single class. Acceptable. |
|
||||
| 9 | Testing coverage | Issue found: Tests-040 (`GatewayArrayWriteWiringTests` covers `Write` and `WriteBulk` sparse expansion wiring through `GatewaySession.InvokeAsync` but omits the other six write variants — `WriteSecured`, `Write2`, `WriteSecured2`, `Write2Bulk`, `WriteSecuredBulk`, `WriteSecured2Bulk` — each of which has its own `case` arm in `NormalizeOutboundCommand` and could silently regress). |
|
||||
| 10 | Documentation & comments | No issues found. New files carry accurate class-level `<summary>` docs; test method summaries correctly describe pre/post conditions; the `DrainUntilFaultAsync` comment update precisely describes the new escape hatch. |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the gateway-test delta (session-resilience epic + §8). New tests are high quality (bounded async waits, FakeTimeProvider, deterministic gating, meaningful assertions). Verified the §8 FakeWorkerProcess consolidation did NOT drop the `entireProcessTree` kill assertion. Only Low coverage-gap / one latent helper footgun.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | Tests-038 |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Tests-036, Tests-037 |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
## Findings
|
||||
|
||||
### Tests-001
|
||||
@@ -647,3 +681,78 @@ The cancellation tests for `WorkerClient` in `WorkerClientTests` *do* exercise t
|
||||
**Recommendation:** Bound the second-subscriber drain with the same `WaitTimeout` used elsewhere — e.g. link `newStreamCts` to a `CancellationTokenSource.CreateLinkedTokenSource` plus `CancelAfter(WaitTimeout)`, or wrap the drain in a `Task` awaited via `WaitAsync(WaitTimeout)` — so a missing `SnapshotComplete` surfaces as a deterministic failure rather than a hang.
|
||||
|
||||
**Resolution:** 2026-06-15 — Confirmed the unbounded `await foreach` in `DegradedTransition_CachedThenReplayed_CarriesDegradedAndSourceProviderToNewSubscriber`. Bounded the second-subscriber drain with a `CancellationTokenSource.CreateLinkedTokenSource(newStreamCts.Token, drainTimeoutCts.Token)` where `drainTimeoutCts.CancelAfter(WaitTimeout)`, and wrapped the loop in a `try/catch (OperationCanceledException) when (drainTimeoutCts.IsCancellationRequested)` that rethrows a `TimeoutException`. A regression that never emits `SnapshotComplete` now fails cleanly instead of hanging. Test still passes.
|
||||
|
||||
### Tests-036
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Configuration/GatewayOptionsValidatorTests.cs` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Three new validator rules — `DetachGraceSeconds >= 0` (GatewayOptionsValidator.cs:185-186), `ReplayBufferCapacity >= 0` (:215-216), `ReplayRetentionSeconds >= 0` (:219-220) — have no tests, while the sibling new options (`MaxEventSubscribersPerSession`, `WorkerReadyWaitTimeoutMs`) do. A regression dropping/inverting any of the three guards would pass with no failing test.
|
||||
|
||||
**Recommendation:** Add boundary theories mirroring the `MaxEventSubscribersPerSession` pattern: a failing case (`-1`) asserting the message contains each config path, and a succeeding boundary case (`0`).
|
||||
|
||||
**Resolution:** 2026-06-16 — Added six tests to `GatewayOptionsValidatorTests.cs` covering all three guards: `Validate_Fails_WhenDetachGraceSecondsIsNegative` / `Validate_Succeeds_WhenDetachGraceSecondsIsZero` (via `CloneWithSessions`); `Validate_Fails_WhenReplayBufferCapacityIsNegative` / `Validate_Succeeds_WhenReplayBufferCapacityIsZero` and `Validate_Fails_WhenReplayRetentionSecondsIsNegative` / `Validate_Succeeds_WhenReplayRetentionSecondsIsZero` (via a new `CloneWithEvents` helper). Each failing case asserts the failure message contains the config path; each boundary case asserts `Succeeded`. Mirrors the `MaxEventSubscribersPerSession` / `WorkerReadyWaitTimeoutMs` pattern.
|
||||
|
||||
### Tests-037
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Contracts/ProtobufContractRoundTripTests.cs` |
|
||||
| Status | Won't Fix |
|
||||
|
||||
**Description:** The reconnect/replay contract surface (`ReplayGap` message, `MxEvent.replay_gap = 14`, `StreamEventsRequest.after_worker_sequence`) has no protobuf serialize/parse round-trip test pinning the wire shape and the documented sentinel invariant (family UNSPECIFIED, body oneof and per-item fields unset). Behavior is exercised in EventStreamServiceTests; this is a wire-contract gap.
|
||||
|
||||
**Recommendation:** Add a round-trip test building an `MxEvent` with `ReplayGap` populated, asserting the two sequence fields survive and the sentinel invariants hold (field 14, `Family == Unspecified`, `BodyCase` unset).
|
||||
|
||||
**Resolution:** 2026-06-16: covered by the ReplayGap round-trip + descriptor-pin test added under Contracts-022 in ProtobufContractRoundTripTests.cs; a duplicate here would be redundant.
|
||||
|
||||
### Tests-038
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SessionEventDistributorTests.cs:702-713` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `DrainUntilFaultAsync` relies on the channel completing WITH a fault so `WaitToReadAsync` re-throws. Correct for current callers, but if reused on a channel that completes gracefully, `WaitToReadAsync` returns false without throwing and the helper spins in a tight CPU loop with no escape (ReadTimeout bounds only the individual wait). A maintenance hazard, not a current bug.
|
||||
|
||||
**Recommendation:** When `WaitToReadAsync` returns false, await `reader.Completion` (surfaces the fault or completes cleanly) and `Assert.Fail` on graceful completion, so the helper fails fast instead of spinning.
|
||||
|
||||
**Resolution:** 2026-06-16 — When `WaitToReadAsync` returns `false` (graceful completion), the helper now awaits `reader.Completion` (propagating any stored fault) and then calls `Assert.Fail` so the helper fails fast rather than spinning; the fault-path behavior (re-throw from `WaitToReadAsync`) is preserved unchanged.
|
||||
|
||||
### Tests-039
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewaySessionDashboardMirrorTests.cs` (`DashboardMirror_AndGrpcSubscriber_BothReceiveEvents`) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `DashboardMirror_AndGrpcSubscriber_BothReceiveEvents` attached its gRPC subscriber via `StreamEventsAsync` AFTER `MarkReady()` had already started the pump draining a fast-completing 3-event fake worker — a register-vs-pump race. It passed alone but hung the whole `GatewaySessionDashboardMirrorTests` class when another test ran first (warm JIT let the pump drain and complete before the gRPC subscriber registered). Its `await foreach` over the gRPC stream uses `CancellationToken.None` with no timeout, so the race surfaced as an indefinite hang rather than a clean failure (unlike the sibling tests' `WaitUntilAsync`, which self-times-out at 5s). This exposed the production race fixed under Server-056.
|
||||
|
||||
**Recommendation:** Make the test deterministic — hold the worker stream until both the dashboard mirror and the gRPC subscriber have attached, then release, so neither subscriber can miss an event regardless of scheduling.
|
||||
|
||||
**Resolution:** 2026-06-17 — added a release-gate to the test's `FakeWorkerClient` (`HoldEventsUntilReleased()` / `ReleaseEvents()`; `ReadEventsAsync` awaits the gate before yielding, ungated by default so other tests are unaffected). The test now holds the stream, starts the gRPC reader on a background task, waits for `session.ActiveEventSubscriberCount == 1` (the internal dashboard mirror is excluded from the count, so this confirms the gRPC subscriber attached), then releases — both subscribers deterministically receive all three events. With the Server-056 production fix in place, the full `GatewaySessionDashboardMirrorTests` class now passes (5/5) instead of hanging.
|
||||
|
||||
### Tests-040
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewayArrayWriteWiringTests.cs` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `GatewayArrayWriteWiringTests` covers two of eight sparse-write arms in `GatewaySession.NormalizeOutboundCommand`: `Write` (single) and `WriteBulk`. The remaining six arms — `WriteSecured`, `Write2`, `WriteSecured2`, `Write2Bulk`, `WriteSecuredBulk`, `WriteSecured2Bulk` — all call `ExpandValue(entry.Value)` through the same switch, but no wiring test exercises them through `GatewaySession.InvokeAsync`. The class summary says it covers "the single outbound choke point" and that "no sparse value is ever forwarded"; the claim is accurate for the two covered variants but unverified for the other six. A regression accidentally dropping one of the six remaining `case` arms, or shifting a brace so a case falls through to the default (no-op), would pass the entire `GatewayArrayWriteWiringTests` suite.
|
||||
|
||||
**Recommendation:** Add one wiring test per uncovered variant (or a single `[Theory]` over the six command kinds), constructing the matching command type with a `SparseArrayValue` and asserting `worker.LastCommand!.Command.<Variant>.Value.KindCase == MxValue.KindOneofCase.ArrayValue` after `session.InvokeAsync`. The `SparseArrayExpanderTests` already pin the expander logic exhaustively; the wiring tests need only check that the choke point invokes expansion for each variant, not the expansion semantics themselves. The four secured variants (`WriteSecured`, `Write2`, `WriteSecured2`, `WriteSecured2Bulk`) can reuse the same `CapturingWorkerClient` stub.
|
||||
|
||||
**Resolution:** 2026-06-18 — root cause confirmed: the six arms (`WriteSecured`, `Write2`, `WriteSecured2`, `Write2Bulk`, `WriteSecuredBulk`, `WriteSecured2Bulk`) each had a `case` in `NormalizeOutboundCommand` calling `ExpandValue` but no wiring test. Server-057's additions (`AddItemBulk`, `AddBufferedItem`) covered address-normalization tests only, not the missing write-expansion variants. Added six tests to `GatewayArrayWriteWiringTests.cs` — one per uncovered arm — each constructing the matching command with a 4-element `SparseArrayValue` (Integer, single element set), driving it through `GatewaySession.InvokeAsync`, and asserting `worker.LastCommand.Command.<Variant>.Value.KindCase == ArrayValue` and the expected element positions. Tests: `WriteSecured_SparseArrayValue_ExpandedBeforeReachingWorker`, `Write2_SparseArrayValue_ExpandedBeforeReachingWorker`, `WriteSecured2_SparseArrayValue_ExpandedBeforeReachingWorker`, `Write2Bulk_SparseArrayEntryValue_ExpandedBeforeReachingWorker`, `WriteSecuredBulk_SparseArrayEntryValue_ExpandedBeforeReachingWorker`, `WriteSecured2Bulk_SparseArrayEntryValue_ExpandedBeforeReachingWorker`. All 13 `GatewayArrayWriteWiring` tests pass (7 pre-existing + 6 new).
|
||||
|
||||
@@ -4,11 +4,38 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.Worker.Tests` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-18 |
|
||||
| Commit reviewed | `88915c3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
## 2026-06-18 re-review (commit `88915c3`)
|
||||
|
||||
Re-review of `git diff 8df5ab3..88915c3 -- src/ZB.MOM.WW.MxGateway.Worker.Tests/`. Three files changed, all applying previously-recorded findings rather than introducing new feature logic:
|
||||
|
||||
- `WorkerPipeSessionTests.cs` — Worker.Tests-036 fix: removes the redundant wall-clock `elapsed < 5s` assertion from `RunAsync_SendsFirstHeartbeatImmediatelyOnEnteringLoop` and replaces it with an inline comment explaining the omission.
|
||||
- `MxAccessCommandExecutorTests.cs` — Worker.Tests-035 fix: adds `DispatchAsync_WithUnknownCommandKind_ReturnsInvalidRequestWithUnsupportedDiagnostic`; removes the nested `FakeMxStatus` class (Worker.Tests-034 consolidation).
|
||||
- `TestSupport/NoopMxAccessServer.cs` — Worker.Tests-034 fix: adds the consolidation remarks block to `FakeMxStatus` explaining it was previously duplicated in `MxAccessCommandExecutorTests`.
|
||||
|
||||
The array-write-ergonomics feature (the bulk of commits between `8df5ab3` and `88915c3`) deliberately left the Worker untouched — sparse arrays are expanded gateway-side before reaching the worker IPC boundary — which is consistent with the task brief. No new test code was introduced as part of that feature in this module.
|
||||
|
||||
Net48 compatibility check: no `init`-only properties, no positional records, no `IsExternalInit`, no other net48-hostile constructs in the additions.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found — the new executor test correctly casts `int.MaxValue` to `MxCommandKind`, dispatches through the real `MxAccessStaSession.DispatchAsync`, and asserts both the status code and the diagnostic substring; the assertion is neither too weak nor tautological. |
|
||||
| 2 | mxaccessgw conventions | No issues found — test method name follows `Method_Scenario_Expectation`; `NoopMxAccessServer.cs` remarks block cites the correct finding ID; no style drift. |
|
||||
| 3 | Concurrency & thread safety | No issues found — no new shared mutable state or async patterns introduced; the wall-clock assertion removal in `WorkerPipeSessionTests` is strictly a deletion. |
|
||||
| 4 | Error handling & resilience | No issues found — the unknown-kind test pins the discard arm returns `InvalidRequest` rather than throwing; no new error paths added. |
|
||||
| 5 | Security | No issues found — no credentials, secrets, or sensitive data involved. |
|
||||
| 6 | Performance & resource management | No issues found — no new disposable objects introduced without `using`; `StaRuntime` and `MxAccessStaSession` remain `using`-declared in the new test. |
|
||||
| 7 | Design-document adherence | No issues found — the executor test correctly exercises the gateway↔worker boundary at the `MxAccessStaSession` level (not raw COM), consistent with `docs/MxAccessWorkerInstanceDesign.md`. |
|
||||
| 8 | Code organization & conventions | No issues found — `FakeMxStatus` deduplication is complete; the nested class is gone from `MxAccessCommandExecutorTests` and the shared copy's XML doc now explains the consolidation. |
|
||||
| 9 | Testing coverage | No issues found — Worker.Tests-034/035/036 are the last open items from the prior pass; all three are now closed. No new coverage gaps introduced. |
|
||||
| 10 | Documentation & comments | No issues found — the inline comment in `WorkerPipeSessionTests` correctly attributes the omission to the Worker.Tests-003/004/013/020 pattern; the `FakeMxStatus` remarks block is accurate. |
|
||||
|
||||
**No new findings. All prior open items (Worker.Tests-034, -035, -036) resolved.**
|
||||
|
||||
## 2026-06-15 re-review (commit `410acc9`)
|
||||
|
||||
Re-review of the alarm-fallback test additions in `git diff 42b0037..HEAD --
|
||||
@@ -119,6 +146,23 @@ findings (Worker.Tests-001 through -030) are unaffected.
|
||||
| 9 | Testing coverage | No issues found in this diff. |
|
||||
| 10 | Documentation & comments | No issues found in this diff. |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the worker-test delta covering the new COM seam (`MxAccessCommandExecutorTests`, `MxAccessComServerTests`) and alarm work. Tests genuinely exercise STA dispatch and parity; only Low organization/coverage/flakiness items.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | Worker.Tests-036 |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found (password-no-leak test present) |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | Worker.Tests-034 |
|
||||
| 9 | Testing coverage | Worker.Tests-035 |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
## Findings
|
||||
|
||||
### Worker.Tests-001
|
||||
@@ -615,3 +659,48 @@ findings (Worker.Tests-001 through -030) are unaffected.
|
||||
**Recommendation:** Add (a) `AckedTrueWhileInactive_EmitsNothingAndDoesNotLatch` — apply `.acked=true` with no prior active raise, assert `Apply` returns empty, then raise active and clear and assert the clear emits `UnackRtn` (proving the stale ack did not latch); and (b) `PriorityChange_FlowsIntoEmittedRecord` — apply a priority value then an active raise and assert the emitted record's `Priority` equals the supplied value (and a `CoerceInt` string/garbage case falls back).
|
||||
|
||||
**Resolution:** 2026-06-15 — Added both tests to `SubtagAlarmStateMachineTests`. `AckedTrueWhileInactive_EmitsNothingAndDoesNotLatch` applies `.acked=true` with no preceding active raise (asserts `Apply` returns empty), then drives a fresh raise→clear episode and asserts the clear emits `UnackRtn` — proving the stale inactive ack did not latch `AckedDuringEpisode`. `PriorityChange_FlowsIntoEmittedRecord` (the target now includes a `PrioritySubtag`) applies an `int` priority `750` (asserts the priority change emits nothing), raises active and asserts the emitted record's `Priority == 750` (exercising `CoerceInt`'s `int` path and the priority assignment), then applies a non-numeric `"not-a-number"` priority and asserts the snapshot `Priority` is still `750` (the `CoerceInt` string fallback keeps the prior value, not zero).
|
||||
|
||||
### Worker.Tests-034
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs:2233`, `src/ZB.MOM.WW.MxGateway.Worker.Tests/TestSupport/NoopMxAccessServer.cs:97` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `FakeMxStatus` is defined twice — file-scope in `TestSupport/NoopMxAccessServer.cs:97` and nested in `MxAccessCommandExecutorTests.FakeMxAccessComObject:2233` — both exposing the same four public fields that `MxStatusProxyConverter` reflects over. The two copies must stay structurally identical; a future field change to the real COM struct requires updating two places, and the duplication is invisible to a reader consulting only one file.
|
||||
|
||||
**Recommendation:** Extract `FakeMxStatus` into its own `TestSupport/FakeMxStatus.cs` (or colocate both doubles) and have `MxAccessCommandExecutorTests` use the shared type instead of its nested copy.
|
||||
|
||||
**Resolution:** 2026-06-16 — Removed the nested `FakeMxStatus` class from `MxAccessCommandExecutorTests.FakeMxAccessComObject`; the two `new FakeMxStatus { ... }` usages in `Suspend`/`Activate` now resolve to the shared `TestSupport.FakeMxStatus` via the pre-existing `using ZB.MOM.WW.MxGateway.Worker.Tests.TestSupport;` import. Updated the XML doc on `TestSupport/NoopMxAccessServer.cs:FakeMxStatus` to note the consolidation. Fix applied 2026-06-16, verified on windev 2026-06-17 (dotnet test -p:Platform=x86: 344 passed, 0 failed).
|
||||
|
||||
### Worker.Tests-035
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs`, `src/ZB.MOM.WW.MxGateway.Worker/MxAccess/MxAccessCommandExecutor.cs:99-136` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `MxAccessCommandExecutor.Execute` has a `_` discard arm returning `CreateInvalidRequestReply(... "Unsupported MXAccess command kind ...")` — the safety net for an unknown `MxCommandKind` (e.g. a future gateway enum value before the worker is updated). No test passes an unknown kind and asserts `InvalidRequest`. A regression changing the arm to `throw` would propagate an unhandled exception through `WorkerPipeSession` and no test would catch it.
|
||||
|
||||
**Recommendation:** Add a `[Fact]` constructing a `StaCommand` with an undefined `MxCommandKind` value and asserting the reply is `ProtocolStatusCode.InvalidRequest` with "Unsupported" in the diagnostic.
|
||||
|
||||
**Resolution:** 2026-06-16 — Added `DispatchAsync_WithUnknownCommandKind_ReturnsInvalidRequestWithUnsupportedDiagnostic` to `MxAccessCommandExecutorTests`. Casts `int.MaxValue` to `MxCommandKind` (an undefined value not present in the proto-generated enum), dispatches it through `MxAccessStaSession.DispatchAsync`, asserts `ProtocolStatusCode.InvalidRequest`, and asserts `reply.DiagnosticMessage` contains "Unsupported" (case-insensitive — matching `CreateInvalidRequestReply`'s `"Unsupported MXAccess command kind ..."` message). Fix applied 2026-06-16, verified on windev 2026-06-17 (dotnet test -p:Platform=x86: 344 passed, 0 failed).
|
||||
|
||||
### Worker.Tests-036
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/Ipc/WorkerPipeSessionTests.cs:983-996` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `RunAsync_SendsFirstHeartbeatImmediatelyOnEnteringLoop` carries a redundant wall-clock assertion `Assert.True(elapsed < TimeSpan.FromSeconds(5), ...)`. The existing `heartbeatWait` CTS (cancel-after 5s) already enforces the same bound — the extra wall-clock check can only fire if the heartbeat arrived but took >5s to be received, which the CTS already prevents. It is the same coarse wall-clock pattern prior findings (Worker.Tests-003/004/013/020) corrected.
|
||||
|
||||
**Recommendation:** Remove the `start`/`elapsed`/`Assert.True(elapsed < ...)` check; the CTS timeout already pins the timing contract.
|
||||
|
||||
**Resolution:** 2026-06-16 — Removed the `DateTimeOffset start`, `TimeSpan elapsed`, and `Assert.True(elapsed < TimeSpan.FromSeconds(5), ...)` wall-clock assertions from `RunAsync_SendsFirstHeartbeatImmediatelyOnEnteringLoop`. The `heartbeatWait` CTS (cancel-after 5s) already enforces the same timing bound. Added an inline comment explaining why the wall-clock floor is omitted, consistent with the Worker.Tests-003/004/013/020 pattern. Fix applied 2026-06-16, verified on windev 2026-06-17 (dotnet test -p:Platform=x86: 344 passed, 0 failed).
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/ZB.MOM.WW.MxGateway.Worker` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-06-15 |
|
||||
| Commit reviewed | `410acc9` |
|
||||
| Review date | 2026-06-16 |
|
||||
| Commit reviewed | `8df5ab3` |
|
||||
| Status | Re-reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -87,6 +87,23 @@ contention with the gateway-side watchdog (Server-031) is unchanged.
|
||||
| 9 | Testing coverage | No issues found in this diff. |
|
||||
| 10 | Documentation & comments | No issues found in this diff. |
|
||||
|
||||
#### 2026-06-16 re-review (commit 8df5ab3)
|
||||
|
||||
Re-review of the worker delta (`git diff 410acc9..8df5ab3`): the `IMxAccessServer`/`MxAccessComServer`/`MxAccessSession`/`MxAccessCommandExecutor` seam-extraction refactor plus alarm failover/subtag work. **No new findings.** Prior findings Worker-026/027/028 confirmed resolved at this commit. Every MXAccess COM call in the new seam is reachable only via `StaCommandDispatcher`→`staRuntime.InvokeAsync` (STA affinity preserved); MXAccess parity preserved (no synthesized events, HRESULTs surfaced); the single COM RCW is released exactly once; net48 idioms respected.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | mxaccessgw conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found (STA affinity preserved across the new seam) |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found (no secret/WriteSecured-payload logging) |
|
||||
| 6 | Performance & resource management | No issues found (single FinalReleaseComObject) |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | No issues found |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
|
||||
## Findings
|
||||
|
||||
### Worker-001
|
||||
|
||||
+18
-1
@@ -8,7 +8,24 @@ recreated by the contracts project build.
|
||||
|
||||
`src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto` defines the public
|
||||
`MxAccessGateway` gRPC service, command payloads, command replies, event DTOs,
|
||||
`MxValue`, `MxArray`, and `MxStatusProxy`.
|
||||
`MxValue`, `MxArray`, `MxSparseArray`, and `MxStatusProxy`.
|
||||
|
||||
`MxValue` carries a `kind` oneof with arms for all scalar and array types. One
|
||||
arm is `sparse_array_value = 19` (field number 19), which carries an
|
||||
`MxSparseArray`. `MxSparseArray` is a write-only value type: the gateway accepts
|
||||
it on every write variant (`Write`, `Write2`, `WriteSecured`, `WriteSecured2`,
|
||||
and the corresponding `*BulkEntry` shapes), expands it into a full,
|
||||
default-filled `MxArray` before forwarding to the worker, and rejects it on
|
||||
read or event paths. The worker never receives or produces it.
|
||||
|
||||
`MxSparseArray` has three fields: `element_data_type` (1, the `MxDataType` of
|
||||
every element), `total_length` (2, the length of the expanded full array), and
|
||||
`elements` (3, `repeated MxSparseElement`). Each `MxSparseElement` has `index`
|
||||
(1, zero-based position in the expanded array) and `value` (2, a scalar
|
||||
`MxValue`). Indices not mentioned in `elements` take the element type's default
|
||||
value — they are reset, not preserved. See `gateway.md` section
|
||||
"MxSparseArray — default-fill partial array writes" for the expansion rules,
|
||||
validation constraints, and the scope requirements per write variant.
|
||||
|
||||
The public command model includes bulk subscription command kinds for
|
||||
`AddItemBulk`, `AdviseItemBulk`, `RemoveItemBulk`, `UnAdviseItemBulk`,
|
||||
|
||||
+60
-21
@@ -62,37 +62,67 @@ Implementation guidance:
|
||||
|
||||
## Session Reconnect
|
||||
|
||||
Decision: no reconnectable sessions for v1.
|
||||
Reconnectable sessions with event replay are shipped and config-gated. The
|
||||
original "no reconnectable sessions" constraint is superseded.
|
||||
|
||||
One `OpenSession` creates one gateway session and one worker process. The
|
||||
session ends on `CloseSession`, client disconnect policy, lease expiry, worker
|
||||
fault, or gateway shutdown.
|
||||
fault, gateway shutdown, or — when `DetachGraceSeconds > 0` — detach-grace
|
||||
expiry after the last external event subscriber drops.
|
||||
|
||||
Rationale: reconnectable sessions require event replay, orphan ownership,
|
||||
security checks, and more complicated worker lifetime rules. They are not needed
|
||||
for the first parity slice.
|
||||
`MxGateway:Sessions:DetachGraceSeconds` (default `30`) controls the retention
|
||||
window. When positive, a session whose last external gRPC event-stream
|
||||
subscriber drops stays `Ready` for that many seconds so a client can reconnect
|
||||
to the same session instead of triggering a new `OpenSession` → worker spawn.
|
||||
Setting it to `0` reverts to closing only on normal lease expiry.
|
||||
|
||||
A reconnecting client issues `StreamEvents` with `after_worker_sequence` set to
|
||||
the last sequence it observed; the gateway replays retained events newer than
|
||||
that watermark (capped by `MxGateway:Events:ReplayBufferCapacity` and
|
||||
`MxGateway:Events:ReplayRetentionSeconds`) then transitions seamlessly to live
|
||||
delivery. If the requested position precedes the oldest retained event, a
|
||||
`ReplayGap` sentinel signals the client to re-snapshot. The replay→live handoff
|
||||
is atomic (no gap, no duplicate). See [Sessions](./Sessions.md) for the full
|
||||
reconnect and replay protocol.
|
||||
|
||||
## Event Subscribers
|
||||
|
||||
Decision: one active `StreamEvents` subscriber per session for v1.
|
||||
Multi-subscriber fan-out for data-side `StreamEvents` is shipped and
|
||||
config-gated. The original "one active subscriber per session" constraint is
|
||||
superseded for deployments that opt in.
|
||||
|
||||
A second subscriber should be rejected with a clear session error. Multi-client
|
||||
fan-out may be added later with explicit backpressure semantics.
|
||||
`MxGateway:Sessions:AllowMultipleEventSubscribers` (default `false`) controls
|
||||
the mode. When `false` the session still rejects a second `StreamEvents`
|
||||
subscriber with `EventSubscriberAlreadyActive`, preserving the original
|
||||
single-subscriber behavior. When `true`, up to
|
||||
`MxGateway:Sessions:MaxEventSubscribersPerSession` (default `8`) concurrent
|
||||
external subscribers may attach; a new attach that would exceed the cap is
|
||||
rejected with `EventSubscriberLimitReached`. The count-check-and-increment is
|
||||
atomic under the session lock.
|
||||
|
||||
Rationale: one subscriber preserves simple event ordering and failure behavior
|
||||
while parity is being proven.
|
||||
Failure semantics differ by mode: in single-subscriber mode a slow consumer's
|
||||
channel overflow faults the whole session (`FailFast` backpressure); in
|
||||
multi-subscriber mode the same condition disconnects only that subscriber so one
|
||||
slow consumer never faults a session shared by others. The mode is fixed at
|
||||
session construction and is not changed by a live subscriber-count snapshot.
|
||||
|
||||
### Alarms — superseded for the alarm subsystem
|
||||
The gateway-owned internal dashboard mirror subscribes directly on the
|
||||
distributor with `isInternal: true` and is not counted toward the cap or the
|
||||
detach-grace subscriber-count in either mode.
|
||||
|
||||
The single-subscriber rule above no longer applies to alarms. The gateway runs
|
||||
an always-on central alarm monitor (`GatewayAlarmMonitor`) that owns one
|
||||
See [Sessions](./Sessions.md) for the full event-distributor and backpressure
|
||||
design.
|
||||
|
||||
### Alarms — separate fan-out architecture
|
||||
|
||||
The single-subscriber rule never applied to alarms. The gateway runs an
|
||||
always-on central alarm monitor (`GatewayAlarmMonitor`) that owns one
|
||||
gateway-managed worker session, caches the active-alarm set, and fans it out to
|
||||
any number of clients through the session-less `StreamAlarms` RPC. Per-session
|
||||
alarm auto-subscribe is removed; `AcknowledgeAlarm` is session-less and routes
|
||||
through the monitor. Data-side `StreamEvents` remains one subscriber per
|
||||
session. Rationale: alarm state is gateway-wide, not session-scoped — every
|
||||
client wants the same current set plus updates, and forcing each to own a
|
||||
worker would multiply AVEVA polling load for no benefit.
|
||||
any number of clients through the session-less `StreamAlarms` RPC.
|
||||
`AcknowledgeAlarm` is session-less and routes through the monitor. Rationale:
|
||||
alarm state is gateway-wide, not session-scoped — every client wants the same
|
||||
current set plus updates, and forcing each to own a worker would multiply AVEVA
|
||||
polling load for no benefit.
|
||||
|
||||
## Authentication
|
||||
|
||||
@@ -467,12 +497,21 @@ against the live MXAccess attribute set.
|
||||
|
||||
These are explicit post-v1 revisit items, not open blockers:
|
||||
|
||||
- reconnectable sessions,
|
||||
- multiple event subscribers per session,
|
||||
- restricted worker service account,
|
||||
- production coalescing by item handle,
|
||||
- command batching for high-volume tag setup.
|
||||
|
||||
The following items were previously listed here and have since shipped:
|
||||
|
||||
- **Reconnectable sessions with replay** — shipped, config-gated via
|
||||
`MxGateway:Sessions:DetachGraceSeconds` and
|
||||
`MxGateway:Events:ReplayBufferCapacity` / `ReplayRetentionSeconds`.
|
||||
See [Session Reconnect](#session-reconnect) above and [Sessions](./Sessions.md).
|
||||
- **Multiple event subscribers per session** — shipped, config-gated via
|
||||
`MxGateway:Sessions:AllowMultipleEventSubscribers` and
|
||||
`MxGateway:Sessions:MaxEventSubscribersPerSession`.
|
||||
See [Event Subscribers](#event-subscribers) above and [Sessions](./Sessions.md).
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Gateway Process Detailed Design](./GatewayProcessDesign.md)
|
||||
|
||||
@@ -37,8 +37,10 @@ paths, timeouts, queue sizes, enum values, or protocol values are invalid.
|
||||
"MaxPendingCommandsPerSession": 128,
|
||||
"DefaultLeaseSeconds": 1800,
|
||||
"LeaseSweepIntervalSeconds": 30,
|
||||
"DetachGraceSeconds": 30,
|
||||
"AllowMultipleEventSubscribers": false,
|
||||
"MaxEventSubscribersPerSession": 8
|
||||
"MaxEventSubscribersPerSession": 8,
|
||||
"WorkerReadyWaitTimeoutMs": 0
|
||||
},
|
||||
"Events": {
|
||||
"QueueCapacity": 10000,
|
||||
@@ -126,8 +128,10 @@ to avoid accidental large allocations from malformed or oversized frames.
|
||||
| `MxGateway:Sessions:MaxPendingCommandsPerSession` | `128` | Maximum number of pending worker commands for one session. Excess commands fail fast instead of queueing indefinitely. |
|
||||
| `MxGateway:Sessions:DefaultLeaseSeconds` | `1800` | Initial session lease and refresh duration. Unary client activity extends the lease by this duration. |
|
||||
| `MxGateway:Sessions:LeaseSweepIntervalSeconds` | `30` | Hosted monitor interval for closing expired leases. Active event-stream subscribers keep a session from expiring while the stream remains attached. |
|
||||
| `MxGateway:Sessions:DetachGraceSeconds` | `30` | Detach-grace retention window. When positive, a session whose last external (gRPC) event-stream subscriber drops is retained in `Ready` for this many seconds so a client can reconnect; if no external subscriber re-attaches within the window, the lease monitor closes it with `detach-grace-expired`. The internal dashboard mirror does not count as an external subscriber, so a dashboard-only session still enters detach-grace. `0` disables retention and reverts to closing only on normal lease expiry. Must be zero or greater. Reconnect/replay itself is implemented separately (Task 12); this option controls retention and expiry only. The effective close happens within the next sweep cycle after the window elapses — up to `LeaseSweepIntervalSeconds` after expiry. Operators wanting a firm minimum retention bound should set `DetachGraceSeconds` greater than `LeaseSweepIntervalSeconds`. |
|
||||
| `MxGateway:Sessions:AllowMultipleEventSubscribers` | `false` | Controls whether multiple `StreamEvents` subscribers may attach to one session. When `false` the session refuses a second subscriber with `AlreadyExists`. Set to `true` to enable fan-out via the `SessionEventDistributor`. |
|
||||
| `MxGateway:Sessions:MaxEventSubscribersPerSession` | `8` | Maximum number of concurrent `StreamEvents` subscribers per session when `AllowMultipleEventSubscribers` is `true`. Effectively 1 when `AllowMultipleEventSubscribers` is `false`. Must be greater than zero. |
|
||||
| `MxGateway:Sessions:WorkerReadyWaitTimeoutMs` | `0` | Bounded time, in milliseconds, the gateway will wait for a worker to reach `Ready` when the session is already `Ready` but the worker state has transiently diverged (e.g. `Handshaking` after a heartbeat blip). Applies only to transient worker states; terminal states (`Faulted`/`Closing`/`Closed`/no worker) fail fast immediately regardless of this setting. `0` (the default) disables the wait and preserves the original fail-fast behavior. Must be greater than or equal to zero. |
|
||||
|
||||
All numeric session options must be greater than zero.
|
||||
|
||||
@@ -169,6 +173,8 @@ events (a "gap") and must re-snapshot; whatever is still retained is replayed.
|
||||
| `MxGateway:Dashboard:RecentSessionLimit` | `200` | Maximum number of session summaries projected into each dashboard snapshot. |
|
||||
| `MxGateway:Dashboard:ShowTagValues` | `false` | Reserved display control for tag values. The dashboard does not show full tag values by default. |
|
||||
| `MxGateway:Dashboard:GroupToRole` | _(empty)_ | LDAP group → dashboard role mapping. Keys are LDAP group names (short CN or full DN — leading-RDN match). Values must be `Admin` (read/write, API-key CRUD) or `Viewer` (read-only). A user whose LDAP groups don't intersect this map cannot sign in; with no mapping at all, only the loopback bypass admits anyone. |
|
||||
| `MxGateway:Dashboard:DisableLogin` | `false` | Dev/test only. When `true`, replaces the cookie authentication handler with `DashboardAutoLoginAuthenticationHandler`, which auto-authenticates every dashboard request — including requests from remote browsers, not just loopback — as `AutoLoginUser` holding both `Administrator` and `Viewer` roles. No login form, LDAP bind, or cookie is involved. A loud one-time startup warning is logged. Differs from `AllowAnonymousLocalhost`: `DisableLogin` mints a real authenticated principal (so role-gated write affordances appear), whereas `AllowAnonymousLocalhost` satisfies the authorization requirement on loopback only without minting a principal (write affordances stay hidden). Never enable in production. |
|
||||
| `MxGateway:Dashboard:AutoLoginUser` | `(null)` | Username stamped on the synthetic principal when `DisableLogin` is `true`. Default `(null)` — a null or blank value falls back to `multi-role`. Has no effect when `DisableLogin` is `false`. |
|
||||
|
||||
`SnapshotIntervalMilliseconds` must be greater than zero. `RecentFaultLimit`
|
||||
and `RecentSessionLimit` must be greater than or equal to zero.
|
||||
|
||||
@@ -442,6 +442,37 @@ authorizes every request, and `MxGateway:Dashboard:AllowAnonymousLocalhost`
|
||||
requests always require an authenticated principal carrying at least the
|
||||
Viewer role.
|
||||
|
||||
### DisableLogin dev bypass
|
||||
|
||||
`MxGateway:Dashboard:DisableLogin` (default `false`) is a third bypass for
|
||||
dev and test environments where LDAP is unavailable or irrelevant.
|
||||
|
||||
When the flag is `true`, the `DashboardAuthenticator`-backed cookie handler is
|
||||
replaced by `DashboardAutoLoginAuthenticationHandler`, registered under the
|
||||
same scheme name (`MxGateway.Dashboard`). The handler auto-authenticates every
|
||||
incoming request — including requests from remote browsers, not just loopback —
|
||||
as a principal for `MxGateway:Dashboard:AutoLoginUser` (default `multi-role`)
|
||||
holding both the `Administrator` and `Viewer` role claims.
|
||||
|
||||
The same-scheme-name swap is intentional: every authorization policy
|
||||
(`MxGateway.Dashboard.Viewer`, `MxGateway.Dashboard.Admin`,
|
||||
`MxGateway.Dashboard.HubClients`) resolves the `MxGateway.Dashboard` scheme,
|
||||
so the handler replacement requires zero changes to policies, Razor page
|
||||
attributes, or hub authorization attributes. `UseAuthentication()` stamps the
|
||||
principal on `HttpContext.User` for the full HTTP pipeline, the Blazor circuit,
|
||||
and the SignalR hubs uniformly — there is no separate path for each surface.
|
||||
|
||||
This differs from `AllowAnonymousLocalhost`: that flag satisfies the Viewer
|
||||
authorization requirement on loopback without minting an authenticated
|
||||
principal, so role-gated write affordances (Admin-only API-key CRUD, Close/Kill
|
||||
controls) stay hidden. `DisableLogin` mints a real multi-role principal, so
|
||||
those affordances appear — which is the point for dev scenarios where a
|
||||
developer needs the full Admin surface without standing up LDAP.
|
||||
|
||||
A loud one-time startup warning is logged when `DisableLogin` is `true`. The
|
||||
gRPC API-key authentication path is untouched; only the dashboard cookie
|
||||
surface is affected. Never enable in production.
|
||||
|
||||
### Hub bearer flow
|
||||
|
||||
SignalR connections cannot reuse the `__Host-` cookie when the JS client
|
||||
|
||||
+12
-4
@@ -51,7 +51,7 @@ shutdown request even when a command or event assertion fails. Cleanup failures
|
||||
in that `finally` block are logged rather than thrown, so a real assertion
|
||||
failure is never masked by a shutdown timeout.
|
||||
|
||||
`WorkerLiveMxAccessSmokeTests` additionally covers five MXAccess parity paths the
|
||||
`WorkerLiveMxAccessSmokeTests` additionally covers seven MXAccess parity paths the
|
||||
fake-worker tests cannot validate:
|
||||
|
||||
- a `Write` round-trip against an advised item, asserting both that the reply is
|
||||
@@ -67,13 +67,21 @@ fake-worker tests cannot validate:
|
||||
- a `WriteSecured` round-trip after `AuthenticateUser`, asserting the reply
|
||||
carries `MxCommandKind.WriteSecured` and the credential password never
|
||||
appears in the diagnostic message (parity for both the secured-write
|
||||
ordering rule and the "do not log secrets" contract), and
|
||||
ordering rule and the "do not log secrets" contract),
|
||||
- an abnormal worker exit (the worker process is killed mid-session) where the
|
||||
gateway must transition the session to `SessionState.Faulted` with a
|
||||
non-empty fault description carrying a known worker-client classification
|
||||
(pipe disconnected / worker faulted / end-of-stream / heartbeat expired).
|
||||
(pipe disconnected / worker faulted / end-of-stream / heartbeat expired),
|
||||
- the B8 new COM commands — `AuthenticateUser`, `ArchestrAUserToId`, `Suspend`,
|
||||
and `Activate` — each asserting a real MXAccess reply (not `InvalidRequest`)
|
||||
is returned against an added-but-not-advised item, and
|
||||
- the buffered-data path — `AddBufferedItem` and `SetBufferedUpdateInterval` —
|
||||
asserting the commands round-trip and that the worker delivers at least one
|
||||
`OnBufferedDataChange` event (the empty NoData bootstrap) without crashing
|
||||
or dropping frames; live §3.2 multi-sample conversion is noted as a residual
|
||||
when the rig does not drive sample-bearing buffered batches on demand.
|
||||
|
||||
All six tests are gated by the same `MXGATEWAY_RUN_LIVE_MXACCESS_TESTS=1`
|
||||
All eight tests are gated by the same `MXGATEWAY_RUN_LIVE_MXACCESS_TESTS=1`
|
||||
opt-in variable.
|
||||
|
||||
Build the worker before running the smoke:
|
||||
|
||||
+32
-3
@@ -72,7 +72,7 @@ private void EnsureSessionCapacity()
|
||||
}
|
||||
```
|
||||
|
||||
`SessionManager` also defines three close-reason constants — `DefaultCloseReason` (`"client-close"`), `GatewayShutdownReason` (`"gateway-shutdown"`), and `LeaseExpiredReason` (`"lease-expired"`) — so that the metrics and worker shutdown paths agree on a fixed vocabulary.
|
||||
`SessionManager` also defines four close-reason constants — `DefaultCloseReason` (`"client-close"`), `GatewayShutdownReason` (`"gateway-shutdown"`), `LeaseExpiredReason` (`"lease-expired"`), and `DetachGraceExpiredReason` (`"detach-grace-expired"`) — so that the metrics and worker shutdown paths agree on a fixed vocabulary.
|
||||
|
||||
### SessionRegistry (ISessionRegistry)
|
||||
|
||||
@@ -150,7 +150,8 @@ public sealed record SessionCloseResult(
|
||||
|------|---------|
|
||||
| `SessionNotFound` | The session id is not in the registry. |
|
||||
| `SessionNotReady` | The session or its `IWorkerClient` is not in `Ready` state. |
|
||||
| `EventSubscriberAlreadyActive` | A second event subscriber attached when only one is allowed. |
|
||||
| `EventSubscriberAlreadyActive` | A second event subscriber attached in single-subscriber mode (`AllowMultipleEventSubscribers` is `false`). |
|
||||
| `EventSubscriberLimitReached` | In multi-subscriber mode, an attach exceeded `MaxEventSubscribersPerSession` concurrent external subscribers. |
|
||||
| `EventQueueOverflow` | Reserved for the worker event channel overflow path. |
|
||||
| `SessionLimitExceeded` | `MaxSessions` is in use. |
|
||||
| `OpenFailed` | `OpenSessionAsync` failed; the inner exception carries the cause. |
|
||||
@@ -192,10 +193,38 @@ The order — fault, deregister, dispose, release slot, record metric, log, reth
|
||||
|
||||
While `Ready`, callers reach the worker through `SessionManager.InvokeAsync` or `ReadEventsAsync`. Both delegate to `GatewaySession`, which checks the state under lock and updates `LastClientActivityAt` on every invocation. `GatewaySession` also exposes typed bulk helpers (`AddItemBulkAsync`, `SubscribeBulkAsync`, etc.) that wrap `WorkerCommand` round-trips and translate non-`Ok` `ProtocolStatus` replies into `SessionManagerException` with `SessionNotReady`.
|
||||
|
||||
Event streaming uses `AttachEventSubscriber` which returns a disposable lease. When `allowMultipleSubscribers` is false the second attach throws `EventSubscriberAlreadyActive`; this prevents two gRPC streams from racing on the same worker event channel. Active event subscribers keep the session lease from expiring until the stream is disposed.
|
||||
Event streaming uses `AttachEventSubscriber` which returns a disposable lease. When `allowMultipleSubscribers` is false (single-subscriber mode) a second attach throws `EventSubscriberAlreadyActive`; this prevents two gRPC streams from racing on the same worker event channel. When it is true, up to `MaxEventSubscribersPerSession` concurrent external subscribers are allowed and the next attach throws `EventSubscriberLimitReached`. The count-check-and-increment is atomic under the session lock, so concurrent attaches can never exceed the cap. The gateway-owned internal dashboard mirror subscriber is registered directly on the distributor and does not count toward the cap. Active event subscribers keep the session lease from expiring until the stream is disposed.
|
||||
|
||||
`FailFast` event backpressure faults the whole session only in single-subscriber mode; in multi-subscriber mode it degrades to a per-subscriber disconnect so one slow consumer never faults a session shared by others. The session passes its mode to the `SessionEventDistributor` at construction, so this decision is made on the fixed mode rather than a live subscriber-count snapshot.
|
||||
|
||||
Sessions open with `MxGateway:Sessions:DefaultLeaseSeconds` (default 1800) added to the open timestamp. Unary client activity refreshes the lease by the same duration. `ExtendLease` and `IsLeaseExpired` cooperate with `SessionManager.CloseExpiredLeasesAsync`, which iterates a registry snapshot and closes any session whose lease has expired with `LeaseExpiredReason`. `SessionLeaseMonitorHostedService` runs that sweep every `MxGateway:Sessions:LeaseSweepIntervalSeconds` seconds (default 30).
|
||||
|
||||
#### Detach-grace retention
|
||||
|
||||
`MxGateway:Sessions:DetachGraceSeconds` (default 30) is a bounded retention window kept after a session's *last external (gRPC) event-stream subscriber* drops, so a client can reconnect to the same session instead of having it torn down on the first stream disconnect. While the window is open the session stays `Ready` and fully usable — worker commands continue to work and a reconnecting subscriber re-attaches normally. Because retention is keyed on the *external* subscriber count (`_activeEventSubscriberCount`), and the gateway-owned internal dashboard mirror registers directly on the distributor with `isInternal: true` and is therefore *not* counted, a session whose only remaining subscriber is the dashboard mirror still enters detach-grace.
|
||||
|
||||
Mechanically: when the last external subscriber detaches and `DetachGraceSeconds > 0`, `DetachEventSubscriber` stamps `DetachedAtUtc` from the session's `TimeProvider` under `_syncRoot` (the detach→grace-start transition). `AttachEventSubscriber` clears `DetachedAtUtc` under the same lock when a subscriber re-attaches (the reattach→grace-cancel transition), so the two races and the sweeper's read all serialize on `_syncRoot`. `SessionManager.CloseExpiredLeasesAsync` checks `IsDetachGraceExpired(now)` alongside `IsLeaseExpired(now)`: a session detached for at least `DetachGraceSeconds` with no active external subscriber is closed by the same lease sweep, with the distinct `DetachGraceExpiredReason` (`"detach-grace-expired"`) so operators can tell a short reconnect-window expiry from a long idle-lease expiry. Setting `DetachGraceSeconds` to `0` disables retention and reverts to the original behavior: a detached session is retained only until its normal lease expires.
|
||||
|
||||
`DetachGraceSeconds` controls retention and expiry only; the reconnect/replay path that re-attaches a dropped client to a retained session is described in [Reconnect and replay](#reconnect-and-replay).
|
||||
|
||||
#### Reconnect and replay
|
||||
|
||||
A client that drops mid-stream reconnects by re-issuing `StreamEvents` with `StreamEventsRequest.after_worker_sequence` set to the last `worker_sequence` it observed. A non-zero `after_worker_sequence` means *resume*; `0` means *fresh stream* and behaves exactly as a first-time subscribe — no replay, no sentinel.
|
||||
|
||||
On a resume, `EventStreamService.StreamEventsAsync` attaches through `GatewaySession.AttachEventSubscriberWithReplay`, which calls `SessionEventDistributor.RegisterWithReplay`. That method snapshots the session's replay ring for events newer than `after_worker_sequence` **and** registers the live subscriber inside a single `_replayLock` critical section. This atomicity is what makes the replay→live handoff free of gaps and duplicates: the pump appends each event to the replay ring (under `_replayLock`) before fanning it to subscriber channels, so relative to that one critical section every event is either in the replay snapshot or fanned into the freshly-registered live channel — never both observably, never neither.
|
||||
|
||||
The handoff is sealed by a watermark. `RegisterWithReplay` returns `LiveResumeSequence` (the highest replayed sequence, or `after_worker_sequence` when nothing was replayed); `EventStreamService` then filters the live channel to events strictly greater than that watermark. An event that was both included in the replay snapshot and — racing the registration — also written to the live channel has `worker_sequence <= LiveResumeSequence`, so the live filter drops it exactly once (no duplicate), while every newer event is delivered (no gap). The same per-item filter governs replayed and live events identically, so a constrained or resuming caller never sees a replayed event it could not have seen live.
|
||||
|
||||
Emit order on a resumed stream:
|
||||
|
||||
1. **ReplayGap sentinel (only when events were evicted).** If the requested `after_worker_sequence` predates the oldest event still retained — i.e. events in the open interval were dropped by capacity or age eviction and are unrecoverable — the gateway first yields a single sentinel `MxEvent` with `replay_gap` populated (`requested_after_sequence` = the requested watermark, `oldest_available_sequence` = the oldest still-retained sequence). The sentinel carries the session id; its `family` is `UNSPECIFIED`, its `body` oneof is unset, and no per-item fields are populated. It is an explicit, documented control signal — *not* a synthesized MXAccess event — telling the client to discard local state and re-snapshot. A client that wants to resume without another gap should set `after_worker_sequence = oldest_available_sequence - 1` on its next request.
|
||||
2. **Retained replay batch.** The still-retained events newer than the requested watermark, in ascending `worker_sequence` order.
|
||||
3. **Live events**, resuming strictly after `LiveResumeSequence`.
|
||||
|
||||
When `after_worker_sequence` is inside the retained window (nothing was evicted), step 1 is skipped: the stream replays the retained tail then resumes live with no sentinel.
|
||||
|
||||
The ReplayGap sentinel is emitted **only** on the `StreamEvents` server stream and only to the resuming subscriber — it is never fanned to other subscribers and never appears in `DrainEventsReply` (the diagnostic drain path is untouched). Replay retention itself is bounded by `MxGateway:Events:ReplayBufferCapacity` (count) and `ReplayRetentionSeconds` (age); see [Configuration](GatewayConfiguration.md).
|
||||
|
||||
### Close
|
||||
|
||||
`GatewaySession.CloseAsync` is serialized by a per-session `SemaphoreSlim` (`_closeLock`) so only one close runs at a time, but every read/write of `_state` still passes through `_syncRoot` (via `TryBeginClose` and `MarkClosed`). The close path therefore obeys the same lock discipline as `TransitionTo` / `MarkFaulted`: it transitions to `Closing`, asks the worker client to shut down within `ShutdownTimeout`, and on success transitions to `Closed`. `DisposeAsync` waits on `_closeLock` once before disposing the semaphore so an in-flight close's `Release()` cannot race against the dispose. If `WorkerClient.ShutdownAsync` throws, the session falls back to `IWorkerClient.Kill` (forced close):
|
||||
|
||||
@@ -75,6 +75,20 @@ private static MxValue CreateNullValue(
|
||||
}
|
||||
```
|
||||
|
||||
### Sparse array expansion (write path, gateway only)
|
||||
|
||||
`MxSparseArray` — the `sparse_array_value` arm on `MxValue` — is a write-only
|
||||
shorthand. The worker never produces or receives it; the gateway expands it into
|
||||
a full `MxArray` before the command reaches the named pipe. Expansion allocates
|
||||
a complete array of `total_length` slots, initializes every slot to the element
|
||||
type's default (bool → `false`; numeric → `0`; string → `""`; time/timestamp →
|
||||
Unix epoch), then writes each `MxSparseElement` at its declared index. The
|
||||
resulting `array_value` is an ordinary `MxArray` that passes through the
|
||||
conversion layer unchanged. The worker therefore still performs a single
|
||||
whole-array COM write, preserving MXAccess parity. Unmentioned indices are
|
||||
**reset** to their type default, not preserved from prior state — there is no
|
||||
read-modify-write merge.
|
||||
|
||||
### Array projection
|
||||
|
||||
`ConvertArray` records the rank and per-dimension lengths so multi-dimensional `SAFEARRAY` shapes survive the round trip. The element type is resolved from the caller-supplied hint or the CLR element type via `ResolveArrayElementDataType`, then dispatched to the matching typed builder (`ConvertBoolArray`, `ConvertInt64Array`, `ConvertTimestampArray`, and so on).
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-15-session-resilience.md",
|
||||
"tasks": [
|
||||
{"id": 108, "subject": "Task 1: Add OwnerKeyId to the session", "status": "pending"},
|
||||
{"id": 109, "subject": "Task 2: SessionEventDistributor skeleton", "status": "pending", "blockedBy": [108]},
|
||||
{"id": 110, "subject": "Task 3: Bounded replay ring buffer", "status": "pending", "blockedBy": [109]},
|
||||
{"id": 111, "subject": "Task 4: Rewire AttachEventSubscriber + EventStreamService onto distributor", "status": "pending", "blockedBy": [110]},
|
||||
{"id": 112, "subject": "Task 5: Per-subscriber backpressure isolation", "status": "pending", "blockedBy": [111]},
|
||||
{"id": 113, "subject": "Task 6: Dashboard broadcaster becomes a distributor subscriber", "status": "pending", "blockedBy": [111]},
|
||||
{"id": 114, "subject": "Task 7: Remove validator block + add subscriber cap option", "status": "pending", "blockedBy": [112]},
|
||||
{"id": 115, "subject": "Task 8: Subscriber-lease collection + cap enforcement", "status": "pending", "blockedBy": [114]},
|
||||
{"id": 116, "subject": "Task 9: Multi-subscriber end-to-end test (FakeWorkerHarness)", "status": "pending", "blockedBy": [115]},
|
||||
{"id": 117, "subject": "Task 10: Proto - ReplayGap signal", "status": "pending", "blockedBy": [116]},
|
||||
{"id": 118, "subject": "Task 11: Detach-grace session retention", "status": "pending", "blockedBy": [117]},
|
||||
{"id": 119, "subject": "Task 12: Replay-on-reconnect + emit ReplayGap", "status": "pending", "blockedBy": [118, 110]},
|
||||
{"id": 108, "subject": "Task 1: Add OwnerKeyId to the session", "status": "completed"},
|
||||
{"id": 109, "subject": "Task 2: SessionEventDistributor skeleton", "status": "completed", "blockedBy": [108]},
|
||||
{"id": 110, "subject": "Task 3: Bounded replay ring buffer", "status": "completed", "blockedBy": [109]},
|
||||
{"id": 111, "subject": "Task 4: Rewire AttachEventSubscriber + EventStreamService onto distributor", "status": "completed", "blockedBy": [110]},
|
||||
{"id": 112, "subject": "Task 5: Per-subscriber backpressure isolation", "status": "completed", "blockedBy": [111]},
|
||||
{"id": 113, "subject": "Task 6: Dashboard broadcaster becomes a distributor subscriber", "status": "completed", "blockedBy": [111]},
|
||||
{"id": 114, "subject": "Task 7: Remove validator block + add subscriber cap option", "status": "completed", "blockedBy": [112]},
|
||||
{"id": 115, "subject": "Task 8: Subscriber-lease collection + cap enforcement", "status": "completed", "blockedBy": [114]},
|
||||
{"id": 116, "subject": "Task 9: Multi-subscriber end-to-end test (FakeWorkerHarness)", "status": "completed", "blockedBy": [115]},
|
||||
{"id": 117, "subject": "Task 10: Proto - ReplayGap signal", "status": "completed", "blockedBy": [116]},
|
||||
{"id": 118, "subject": "Task 11: Detach-grace session retention", "status": "completed", "blockedBy": [117]},
|
||||
{"id": 119, "subject": "Task 12: Replay-on-reconnect + emit ReplayGap", "status": "completed", "blockedBy": [118, 110]},
|
||||
{"id": 120, "subject": "Task 13: Owner re-validation on reconnect", "status": "pending", "blockedBy": [119, 108]},
|
||||
{"id": 121, "subject": "Task 14: Client ReplayGap handling - all 5 clients", "status": "pending", "blockedBy": [117]},
|
||||
{"id": 122, "subject": "Task 15: Reconnect integration test (fake worker)", "status": "pending", "blockedBy": [119]},
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
# Dashboard "Disable Login" Dev Flag — Design
|
||||
|
||||
**Date:** 2026-06-16
|
||||
**Status:** Approved (brainstorming) — ready for implementation plan.
|
||||
|
||||
## Goal
|
||||
|
||||
A config flag that **disables login in the gateway dashboard**. When enabled, every
|
||||
request is auto-authenticated as a fixed dev user (default **`multi-role`**) holding
|
||||
**both** dashboard roles (`Administrator` + `Viewer`), so no login form, cookie, or LDAP
|
||||
bind is involved and the whole UI behaves as a signed-in multi-role admin. Default
|
||||
**off**. Mirrors the sister project OtOpcUa's `Security:Auth:DisableLogin` feature.
|
||||
|
||||
## Why / scope
|
||||
|
||||
Speeds up dashboard testing against the remote dev boxes (10.100.0.48, wonder) with no
|
||||
sign-in round-trip and no GLAuth dependency. Scope is the **dashboard cookie web surface
|
||||
only** — the gRPC API-key auth path (`authorization: Bearer mxgw_…`) and its scopes are a
|
||||
separate auth model and are **untouched**.
|
||||
|
||||
## Background (current dashboard auth, verified)
|
||||
|
||||
- Dashboard auth is a **single cookie scheme** `MxGateway.Dashboard` registered in
|
||||
`Dashboard/DashboardServiceCollectionExtensions.cs::AddGatewayDashboard`
|
||||
(`AddAuthentication("MxGateway.Dashboard").AddCookie(...)`), plus a bearer scheme
|
||||
`MxGateway.Dashboard.HubToken` (`HubTokenAuthenticationHandler`) for SignalR hubs.
|
||||
- Real login: `/login` → `DashboardAuthenticator.AuthenticateAsync` → shared
|
||||
`ILdapAuthService` bind/search → `IGroupRoleMapper<string>` → `CreatePrincipal` builds a
|
||||
`ClaimsPrincipal` (`ZbClaimTypes.Name`/`Username`/`DisplayName` + one `ZbClaimTypes.Role`
|
||||
per role + `LdapGroupClaimType` group claims; identity authType = the cookie scheme,
|
||||
nameType = `ZbClaimTypes.Name`, roleType = `ZbClaimTypes.Role`) → cookie sign-in.
|
||||
- Authorization: a custom `DashboardAuthorizationHandler` evaluates
|
||||
`DashboardAuthorizationRequirement`. Policies: `ViewerPolicy` (AnyDashboardRole),
|
||||
`AdminPolicy` (AdminOnly), `HubClientsPolicy` (cookie **or** hub-token scheme,
|
||||
AnyDashboardRole).
|
||||
- Roles: exactly two — `DashboardRoles.Admin` (`"Administrator"`) and
|
||||
`DashboardRoles.Viewer` (`"Viewer"`).
|
||||
- **Existing escapes (important):** `DashboardAuthorizationHandler` already short-circuits
|
||||
when `Authentication.Mode == Disabled` or when `Dashboard.AllowAnonymousLocalhost`
|
||||
(default **true**) and the request is loopback. **But both only `context.Succeed(...)`
|
||||
the authorization requirement — they do not mint an authenticated principal.** So
|
||||
`HttpContext.User.Identity.IsAuthenticated` stays false, `Identity.Name` is null, and
|
||||
role-gated `AuthorizeView` write affordances stay **hidden**. That is precisely why they
|
||||
do not deliver the "logged-in multi-role admin" experience this feature needs.
|
||||
|
||||
## Approach (chosen: always-authenticating handler under the cookie scheme name)
|
||||
|
||||
When the flag is **on**, **replace the `.AddCookie(...)` registration with a custom
|
||||
`AuthenticationHandler` registered under the *same* scheme name**
|
||||
(`DashboardAuthenticationDefaults.AuthenticationScheme` = `"MxGateway.Dashboard"`). Its
|
||||
`HandleAuthenticateAsync` **always returns `AuthenticateResult.Success`** with the fixed
|
||||
dev principal (configured username, both roles), shaped identically to what
|
||||
`DashboardAuthenticator.CreatePrincipal` produces. `UseAuthentication()` stamps that
|
||||
principal on `HttpContext.User` for **every** request.
|
||||
|
||||
Registering under the cookie scheme name (not a new name) is the load-bearing detail: the
|
||||
`ViewerPolicy`, `AdminPolicy`, and `HubClientsPolicy` all resolve through that scheme via
|
||||
`DashboardAuthorizationHandler`'s role check, so they pass with **no policy or page
|
||||
changes**. The HTTP pipeline (Razor pages, admin endpoints), the Blazor circuit
|
||||
(`AuthorizeView`, `[CascadingParameter] AuthenticationState`), and the SignalR hubs are
|
||||
all covered by the single `HttpContext.User` seam. Because the handler authenticates every
|
||||
request, the feature is inherently **global** (all clients, including remote browsers) —
|
||||
the agreed scope.
|
||||
|
||||
`SignInAsync`/`SignOutAsync` are no-ops (no cookie to write or clear; the next request
|
||||
re-authenticates through the handler).
|
||||
|
||||
**Alternatives rejected:** (2) mint the principal inside the existing
|
||||
`DashboardAuthorizationHandler` bypass branches — authorization runs after authentication,
|
||||
so `HttpContext.User` is set too late for the Blazor auth state, and two seams must agree
|
||||
(this is essentially today's half-feature); (3) a pipeline middleware plus a stubbed
|
||||
`AuthenticationStateProvider` — two components to keep in sync, and a page request still
|
||||
302s to `/login` unless `HttpContext.User` is also set.
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Config surface — two new fields on `DashboardOptions` (`MxGateway:Dashboard:*`)
|
||||
- `DisableLogin` (bool, default **false**).
|
||||
- `AutoLoginUser` (string, default **`"multi-role"`** — this project's GLAuth
|
||||
Administrator test user). Used as `Name`/`Username`/`DisplayName` of the minted
|
||||
principal; blank falls back to `"multi-role"`.
|
||||
|
||||
"All permissions" = principal minted with **both** `DashboardRoles.Admin` and
|
||||
`DashboardRoles.Viewer`.
|
||||
|
||||
### 2. `DashboardAutoLoginAuthenticationHandler`
|
||||
`AuthenticationHandler<AuthenticationSchemeOptions>` implementing
|
||||
`IAuthenticationSignInHandler`. Mirrors OtOpcUa's `AutoLoginAuthenticationHandler`, adapted
|
||||
to this project's claim shape (`ZbClaimTypes.*`, `DashboardRoles.*`). Always `Success`;
|
||||
SignIn/SignOut no-ops.
|
||||
|
||||
### 3. Wiring in `AddGatewayDashboard`
|
||||
Read `MxGateway:Dashboard:DisableLogin` directly from `IConfiguration` at registration
|
||||
time (the same idiom OtOpcUa uses, since scheme registration precedes options binding).
|
||||
- On → `AddScheme<AuthenticationSchemeOptions, DashboardAutoLoginAuthenticationHandler>(
|
||||
"MxGateway.Dashboard", _ => {})` in place of `AddCookie`; the `HubToken` scheme stays
|
||||
registered unchanged.
|
||||
- Off → existing `AddCookie(...)` path unchanged.
|
||||
|
||||
### 4. Safety
|
||||
- Default **off**.
|
||||
- A **loud one-time startup `LogWarning`** ("DASHBOARD LOGIN DISABLED
|
||||
(MxGateway:Dashboard:DisableLogin=true) — every request authenticated as '{user}' with
|
||||
full permissions (Administrator, Viewer). Dev/test only; never enable in production.")
|
||||
via the same options `PostConfigure<ILoggerFactory>` idiom OtOpcUa uses.
|
||||
- The existing `AllowAnonymousLocalhost` / `Authentication.Mode == Disabled` escapes are
|
||||
left untouched — `DisableLogin` is orthogonal (it changes *authentication*, minting a
|
||||
principal, not authorization bypass); when it is on the authorization handler's normal
|
||||
role-check branch succeeds, so the bypass branches simply do not matter.
|
||||
|
||||
## Error handling / edge cases
|
||||
|
||||
- Blank `AutoLoginUser` → falls back to `"multi-role"` (handler never mints a nameless
|
||||
principal).
|
||||
- `/login` still renders when the flag is on but is pointless (the user is already
|
||||
authenticated); `POST /login`'s `SignInAsync` is a no-op. `/logout` is likewise a no-op.
|
||||
No redirect added (YAGNI).
|
||||
- No interaction with the gRPC API-key path — that auth is entirely separate.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Handler unit test:** `HandleAuthenticateAsync` returns `Success`;
|
||||
`principal.Identity.IsAuthenticated`, `Identity.Name == AutoLoginUser`,
|
||||
`IsInRole("Administrator")` && `IsInRole("Viewer")`; blank-user fallback.
|
||||
- **Wiring / integration (`WebApplicationFactory`):** with `DisableLogin=true`, an
|
||||
`AdminPolicy`-gated endpoint returns 200 with **no** cookie, and a `/hubs/*` negotiate
|
||||
authorizes; the startup warning is emitted.
|
||||
- **Regression:** with the flag off (default), the real cookie handler is still registered
|
||||
and existing dashboard auth tests pass.
|
||||
|
||||
## Docs to update in the same change
|
||||
|
||||
- `docs/GatewayConfiguration.md` — new `MxGateway:Dashboard:DisableLogin` /
|
||||
`AutoLoginUser` options.
|
||||
- The dashboard design doc (`docs/GatewayDashboardDesign.md`).
|
||||
- The CLAUDE.md dashboard-auth note (alongside the `AllowAnonymousLocalhost` mention).
|
||||
|
||||
## Scope / verification
|
||||
|
||||
Gateway-server-side only (.NET 10, x64) — builds and tests entirely on macOS. No worker,
|
||||
no `.proto`, no client, no gRPC changes.
|
||||
@@ -0,0 +1,447 @@
|
||||
# Dashboard "Disable Login" Dev Flag — Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Add a `MxGateway:Dashboard:DisableLogin` config flag that, when on, auto-authenticates every dashboard request as a fixed dev user (default `multi-role`) holding both dashboard roles — no login form, cookie, or LDAP bind.
|
||||
|
||||
**Architecture:** When the flag is on, the dashboard's `AddCookie(...)` registration is replaced by a custom `AuthenticationHandler` registered **under the same scheme name** (`MxGateway.Dashboard`) whose `HandleAuthenticateAsync` always succeeds with a multi-role principal. `UseAuthentication()` stamps that principal on `HttpContext.User` for every request, so every policy (Viewer/Admin/HubClients), the Blazor circuit, and the SignalR hubs see a signed-in admin with **zero policy or page changes**. Mirrors the sister project OtOpcUa's `Security:Auth:DisableLogin`.
|
||||
|
||||
**Tech Stack:** .NET 10 (x64) gateway server; ASP.NET Core authentication/authorization; xUnit. Server-side only — no worker, no `.proto`, no clients, no gRPC API-key changes. Builds and tests entirely on macOS.
|
||||
|
||||
**Design doc:** `docs/plans/2026-06-16-dashboard-disable-login-design.md`
|
||||
|
||||
**Key existing files (verified):**
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Configuration/DashboardOptions.cs` — options bound from `MxGateway:Dashboard`.
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardServiceCollectionExtensions.cs::AddGatewayDashboard` — auth scheme + policy wiring.
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardAuthenticationDefaults.cs` — scheme/policy name constants (`AuthenticationScheme = "MxGateway.Dashboard"`, `AdminPolicy`, `ViewerPolicy`).
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardRoles.cs` — `Admin = "Administrator"`, `Viewer = "Viewer"`.
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardAuthenticator.cs::CreatePrincipal` — the claim shape to mirror (`ZbClaimTypes.Name/Username/DisplayName` + `ZbClaimTypes.Role` per role; identity authType = scheme, nameType = `ZbClaimTypes.Name`, roleType = `ZbClaimTypes.Role`).
|
||||
- `ZbClaimTypes` (from `ZB.MOM.WW.Auth.AspNetCore`): `Name` (= `ClaimTypes.Name`), `Role` (= `ClaimTypes.Role`), `Username` (`"zb:username"`), `DisplayName` (`"zb:displayname"`).
|
||||
- `src/ZB.MOM.WW.MxGateway.Server/Properties/AssemblyInfo.cs` — `InternalsVisibleTo("ZB.MOM.WW.MxGateway.Tests")` (so `internal` members are test-visible).
|
||||
|
||||
**Test conventions (verified):** no Moq/NSubstitute — hand-written stubs only. Integration-style tests build the real app with `GatewayApplication.Build(["--MxGateway:Dashboard:Key=value"])` and resolve services from `app.Services` (see `DashboardCookieOptionsTests`, `DashboardHubsRegistrationTests`). Run filtered tests only (per standing guidance), with `MSBUILDDISABLENODEREUSE=1`.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Config fields on `DashboardOptions`
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (Tasks 2/3 depend on these fields)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Configuration/DashboardOptions.cs`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Configuration/GatewayOptionsTests.cs`
|
||||
|
||||
**Step 1: Write the failing test** — add to `GatewayOptionsTests.cs`:
|
||||
|
||||
```csharp
|
||||
[Fact]
|
||||
public void DashboardOptions_DisableLogin_DefaultsToFalse()
|
||||
{
|
||||
Assert.False(new DashboardOptions().DisableLogin);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DashboardOptions_AutoLoginUser_DefaultsToNull()
|
||||
{
|
||||
Assert.Null(new DashboardOptions().AutoLoginUser);
|
||||
}
|
||||
```
|
||||
|
||||
(If `GatewayOptionsTests` lacks `using ZB.MOM.WW.MxGateway.Server.Configuration;`, add it.)
|
||||
|
||||
**Step 2: Run it, expect FAIL** (compile error: no such members)
|
||||
|
||||
Run: `MSBUILDDISABLENODEREUSE=1 dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter "FullyQualifiedName~GatewayOptionsTests.DashboardOptions"`
|
||||
|
||||
**Step 3: Add the two `init` properties** to `DashboardOptions.cs` (place near `AllowAnonymousLocalhost`):
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// DEV/TEST ONLY. When true, the dashboard bypasses the login form entirely and
|
||||
/// auto-authenticates EVERY request as <see cref="AutoLoginUser"/> holding both
|
||||
/// dashboard roles (Administrator + Viewer). No cookie, no LDAP bind. Default false.
|
||||
/// Unlike <see cref="AllowAnonymousLocalhost"/> (which only succeeds the authorization
|
||||
/// requirement without authenticating), this mints a real principal, so the UI behaves
|
||||
/// as a signed-in admin and applies to all clients (not just loopback). Never enable in
|
||||
/// production. See docs/plans/2026-06-16-dashboard-disable-login-design.md.
|
||||
/// </summary>
|
||||
public bool DisableLogin { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username minted for the auto-login principal when <see cref="DisableLogin"/> is true.
|
||||
/// Null/blank falls back to the GLAuth Administrator test user <c>multi-role</c>.
|
||||
/// </summary>
|
||||
public string? AutoLoginUser { get; init; }
|
||||
```
|
||||
|
||||
**Step 4: Run the test, expect PASS.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Configuration/DashboardOptions.cs src/ZB.MOM.WW.MxGateway.Tests/Configuration/GatewayOptionsTests.cs
|
||||
git commit -m "feat(dashboard): add DisableLogin + AutoLoginUser options (default off)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: `DashboardAutoLoginAuthenticationHandler` + unit tests
|
||||
|
||||
**Classification:** high-risk (security/auth code)
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 4
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardAutoLoginAuthenticationHandler.cs`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardAutoLoginAuthenticationHandlerTests.cs`
|
||||
|
||||
**Step 1: Write the failing test** (`DashboardAutoLoginAuthenticationHandlerTests.cs`):
|
||||
|
||||
```csharp
|
||||
using System.Security.Claims;
|
||||
using ZB.MOM.WW.MxGateway.Server.Dashboard;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Dashboard;
|
||||
|
||||
public sealed class DashboardAutoLoginAuthenticationHandlerTests
|
||||
{
|
||||
[Fact]
|
||||
public void CreatePrincipal_MintsAuthenticatedMultiRoleUser()
|
||||
{
|
||||
ClaimsPrincipal principal = DashboardAutoLoginAuthenticationHandler.CreatePrincipal("multi-role");
|
||||
|
||||
Assert.True(principal.Identity!.IsAuthenticated);
|
||||
Assert.Equal("multi-role", principal.Identity!.Name);
|
||||
Assert.True(principal.IsInRole(DashboardRoles.Admin));
|
||||
Assert.True(principal.IsInRole(DashboardRoles.Viewer));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(null)]
|
||||
[InlineData("")]
|
||||
[InlineData(" ")]
|
||||
public void CreatePrincipal_BlankUser_FallsBackToDefault(string? user)
|
||||
{
|
||||
ClaimsPrincipal principal = DashboardAutoLoginAuthenticationHandler.CreatePrincipal(user);
|
||||
|
||||
Assert.Equal(DashboardAutoLoginAuthenticationHandler.DefaultUser, principal.Identity!.Name);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CreatePrincipal_TrimsUser()
|
||||
{
|
||||
ClaimsPrincipal principal = DashboardAutoLoginAuthenticationHandler.CreatePrincipal(" multi-role ");
|
||||
|
||||
Assert.Equal("multi-role", principal.Identity!.Name);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run it, expect FAIL** (type does not exist).
|
||||
|
||||
Run: `MSBUILDDISABLENODEREUSE=1 dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter "FullyQualifiedName~DashboardAutoLoginAuthenticationHandlerTests"`
|
||||
|
||||
**Step 3: Implement** `DashboardAutoLoginAuthenticationHandler.cs`:
|
||||
|
||||
```csharp
|
||||
using System.Security.Claims;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.Auth.AspNetCore;
|
||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Dashboard;
|
||||
|
||||
/// <summary>
|
||||
/// Authentication handler used ONLY when <c>MxGateway:Dashboard:DisableLogin</c> is true.
|
||||
/// Registered under the dashboard cookie scheme name
|
||||
/// (<see cref="DashboardAuthenticationDefaults.AuthenticationScheme"/>), it authenticates
|
||||
/// EVERY request as the configured dev user with both dashboard roles — no credential check,
|
||||
/// no cookie, no LDAP bind. The minted principal mirrors the shape the real login
|
||||
/// (<see cref="DashboardAuthenticator"/>) produces, so policies and the UI cannot tell it
|
||||
/// apart. DEV/TEST ONLY; never enable in production.
|
||||
/// </summary>
|
||||
public sealed class DashboardAutoLoginAuthenticationHandler
|
||||
: AuthenticationHandler<AuthenticationSchemeOptions>, IAuthenticationSignInHandler
|
||||
{
|
||||
/// <summary>Username used when <c>AutoLoginUser</c> is null or blank.</summary>
|
||||
public const string DefaultUser = "multi-role";
|
||||
|
||||
private readonly string _user;
|
||||
|
||||
/// <summary>Initializes the handler with scheme plumbing and the dashboard options.</summary>
|
||||
/// <param name="options">The per-scheme authentication options monitor.</param>
|
||||
/// <param name="logger">The logger factory the base handler uses.</param>
|
||||
/// <param name="encoder">The URL encoder the base handler uses.</param>
|
||||
/// <param name="gatewayOptions">Gateway options carrying the dashboard auto-login user.</param>
|
||||
public DashboardAutoLoginAuthenticationHandler(
|
||||
IOptionsMonitor<AuthenticationSchemeOptions> options,
|
||||
ILoggerFactory logger,
|
||||
UrlEncoder encoder,
|
||||
IOptions<GatewayOptions> gatewayOptions)
|
||||
: base(options, logger, encoder)
|
||||
=> _user = gatewayOptions.Value.Dashboard.AutoLoginUser ?? DefaultUser;
|
||||
|
||||
/// <summary>No-op: auto-login writes no cookie, so a sign-in has nothing to persist.</summary>
|
||||
/// <param name="user">Ignored.</param>
|
||||
/// <param name="properties">Ignored.</param>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task SignInAsync(ClaimsPrincipal user, AuthenticationProperties? properties) => Task.CompletedTask;
|
||||
|
||||
/// <summary>No-op: there is no auth cookie to clear; the next request re-authenticates.</summary>
|
||||
/// <param name="properties">Ignored.</param>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task SignOutAsync(AuthenticationProperties? properties) => Task.CompletedTask;
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override Task<AuthenticateResult> HandleAuthenticateAsync()
|
||||
{
|
||||
ClaimsPrincipal principal = CreatePrincipal(_user);
|
||||
AuthenticationTicket ticket = new(principal, Scheme.Name);
|
||||
|
||||
return Task.FromResult(AuthenticateResult.Success(ticket));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the multi-role dev principal. Null/blank <paramref name="user"/> falls back to
|
||||
/// <see cref="DefaultUser"/>. Claim shape mirrors <see cref="DashboardAuthenticator"/>.
|
||||
/// </summary>
|
||||
/// <param name="user">The configured auto-login username (may be null/blank).</param>
|
||||
/// <returns>An authenticated principal holding both dashboard roles.</returns>
|
||||
internal static ClaimsPrincipal CreatePrincipal(string? user)
|
||||
{
|
||||
string name = string.IsNullOrWhiteSpace(user) ? DefaultUser : user.Trim();
|
||||
|
||||
Claim[] claims =
|
||||
[
|
||||
new Claim(ClaimTypes.NameIdentifier, name),
|
||||
new Claim(ZbClaimTypes.Username, name),
|
||||
new Claim(ZbClaimTypes.Name, name),
|
||||
new Claim(ZbClaimTypes.DisplayName, name),
|
||||
new Claim(ZbClaimTypes.Role, DashboardRoles.Admin),
|
||||
new Claim(ZbClaimTypes.Role, DashboardRoles.Viewer),
|
||||
];
|
||||
|
||||
ClaimsIdentity identity = new(
|
||||
claims,
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme,
|
||||
ZbClaimTypes.Name,
|
||||
ZbClaimTypes.Role);
|
||||
|
||||
return new ClaimsPrincipal(identity);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Run the test, expect PASS.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardAutoLoginAuthenticationHandler.cs src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardAutoLoginAuthenticationHandlerTests.cs
|
||||
git commit -m "feat(dashboard): add auto-login auth handler for DisableLogin mode"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Wire the scheme swap + startup warning + wiring/authorization tests
|
||||
|
||||
**Classification:** high-risk (security wiring)
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Task 2's handler)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardServiceCollectionExtensions.cs`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardDisableLoginTests.cs` (create)
|
||||
|
||||
**Step 1: Write the failing tests** (`DashboardDisableLoginTests.cs`):
|
||||
|
||||
```csharp
|
||||
using System.Security.Claims;
|
||||
using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.AspNetCore.Authentication.Cookies;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using ZB.MOM.WW.MxGateway.Server;
|
||||
using ZB.MOM.WW.MxGateway.Server.Dashboard;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Dashboard;
|
||||
|
||||
public sealed class DashboardDisableLoginTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task DisableLoginOff_CookieSchemeUsesCookieHandler()
|
||||
{
|
||||
await using WebApplication app = GatewayApplication.Build([]);
|
||||
IAuthenticationSchemeProvider provider =
|
||||
app.Services.GetRequiredService<IAuthenticationSchemeProvider>();
|
||||
|
||||
AuthenticationScheme? scheme = await provider.GetSchemeAsync(
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme);
|
||||
|
||||
Assert.NotNull(scheme);
|
||||
Assert.Equal(typeof(CookieAuthenticationHandler), scheme!.HandlerType);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DisableLoginOn_CookieSchemeUsesAutoLoginHandler()
|
||||
{
|
||||
await using WebApplication app = GatewayApplication.Build(
|
||||
["--MxGateway:Dashboard:DisableLogin=true"]);
|
||||
IAuthenticationSchemeProvider provider =
|
||||
app.Services.GetRequiredService<IAuthenticationSchemeProvider>();
|
||||
|
||||
AuthenticationScheme? scheme = await provider.GetSchemeAsync(
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme);
|
||||
|
||||
Assert.NotNull(scheme);
|
||||
Assert.Equal(typeof(DashboardAutoLoginAuthenticationHandler), scheme!.HandlerType);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DisableLoginOn_AutoLoginPrincipalSatisfiesAdminAndViewerPolicies()
|
||||
{
|
||||
await using WebApplication app = GatewayApplication.Build(
|
||||
["--MxGateway:Dashboard:DisableLogin=true"]);
|
||||
IAuthorizationService authorization =
|
||||
app.Services.GetRequiredService<IAuthorizationService>();
|
||||
ClaimsPrincipal user = DashboardAutoLoginAuthenticationHandler.CreatePrincipal("multi-role");
|
||||
|
||||
Assert.True((await authorization.AuthorizeAsync(
|
||||
user, resource: null, DashboardAuthenticationDefaults.AdminPolicy)).Succeeded);
|
||||
Assert.True((await authorization.AuthorizeAsync(
|
||||
user, resource: null, DashboardAuthenticationDefaults.ViewerPolicy)).Succeeded);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> Note: `AuthorizeAsync` invokes the real `DashboardAuthorizationHandler` against the minted
|
||||
> principal — its role-check branch succeeds independent of `HttpContext` (loopback check
|
||||
> returns false with no request, and `Authentication.Mode` defaults to `ApiKey`), so this
|
||||
> proves the policies pass purely on the minted roles.
|
||||
|
||||
**Step 2: Run them, expect FAIL** (the `DisableLoginOn_*` tests fail — handler not yet wired; cookie handler still registered).
|
||||
|
||||
Run: `MSBUILDDISABLENODEREUSE=1 dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter "FullyQualifiedName~DashboardDisableLoginTests"`
|
||||
|
||||
**Step 3: Rewire `AddGatewayDashboard`.** In `DashboardServiceCollectionExtensions.cs`, replace the current authentication-builder block:
|
||||
|
||||
```csharp
|
||||
services
|
||||
.AddAuthentication(DashboardAuthenticationDefaults.AuthenticationScheme)
|
||||
.AddCookie(DashboardAuthenticationDefaults.AuthenticationScheme, cookieOptions =>
|
||||
{
|
||||
// ... existing cookie config ...
|
||||
})
|
||||
.AddScheme<AuthenticationSchemeOptions, HubTokenAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.HubAuthenticationScheme,
|
||||
_ => { });
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```csharp
|
||||
// DEV/TEST ONLY. Read directly from configuration here because authentication scheme
|
||||
// registration runs before options binding. Key mirrors DashboardOptions.DisableLogin.
|
||||
bool disableLogin = configuration.GetValue<bool>("MxGateway:Dashboard:DisableLogin");
|
||||
|
||||
AuthenticationBuilder authentication =
|
||||
services.AddAuthentication(DashboardAuthenticationDefaults.AuthenticationScheme);
|
||||
|
||||
if (disableLogin)
|
||||
{
|
||||
// Register an always-authenticating handler UNDER the cookie scheme name, so the
|
||||
// Viewer/Admin/HubClients policies (which all resolve this scheme) authenticate
|
||||
// through it as the multi-role dev user — zero policy or page changes.
|
||||
authentication.AddScheme<AuthenticationSchemeOptions, DashboardAutoLoginAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme,
|
||||
_ => { });
|
||||
|
||||
// Loud, once-at-startup warning (emitted when GatewayOptions is first resolved).
|
||||
services.AddOptions<GatewayOptions>().PostConfigure<ILoggerFactory>((gatewayOptions, loggerFactory) =>
|
||||
loggerFactory
|
||||
.CreateLogger("ZB.MOM.WW.MxGateway.Server.Dashboard.DisableLogin")
|
||||
.LogWarning(
|
||||
"DASHBOARD LOGIN DISABLED (MxGateway:Dashboard:DisableLogin=true) — every request is "
|
||||
+ "authenticated as '{User}' with full permissions ({Roles}). Dev/test only; never "
|
||||
+ "enable in production.",
|
||||
gatewayOptions.Dashboard.AutoLoginUser ?? DashboardAutoLoginAuthenticationHandler.DefaultUser,
|
||||
$"{DashboardRoles.Admin}, {DashboardRoles.Viewer}"));
|
||||
}
|
||||
else
|
||||
{
|
||||
authentication.AddCookie(DashboardAuthenticationDefaults.AuthenticationScheme, cookieOptions =>
|
||||
{
|
||||
// ... MOVE the existing cookie config body here unchanged ...
|
||||
});
|
||||
}
|
||||
|
||||
authentication.AddScheme<AuthenticationSchemeOptions, HubTokenAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.HubAuthenticationScheme,
|
||||
_ => { });
|
||||
```
|
||||
|
||||
Notes for the implementer:
|
||||
- Keep the existing `services.AddOptions<CookieAuthenticationOptions>(scheme).Configure(...)` block (RequireHttpsCookie / cookie-name) as-is. When `disableLogin` is on it configures an options object no handler reads — harmless dead config; not worth guarding.
|
||||
- Required usings should already be present (`Microsoft.AspNetCore.Authentication`, `Microsoft.Extensions.Configuration`, `Microsoft.Extensions.Logging`, the `Configuration` namespace for `GatewayOptions`). Add any that are missing.
|
||||
- `configuration.GetValue<bool>` defaults to `false` when the key is absent — preserves default-off.
|
||||
|
||||
**Step 4: Run the tests, expect PASS** (all three).
|
||||
|
||||
**Step 5: Run the broader dashboard auth tests to confirm no regression:**
|
||||
|
||||
Run: `MSBUILDDISABLENODEREUSE=1 dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter "FullyQualifiedName~Dashboard"`
|
||||
Expected: all pass (existing `DashboardCookieOptionsTests`, `DashboardHubsRegistrationTests`, etc., still green — they build with the flag off).
|
||||
|
||||
> The startup warning is verified by inspection / manual run (`dotnet run … --MxGateway:Dashboard:DisableLogin=true` logs the warning once). It is not asserted automatically — capturing a startup log line would require injecting a log provider the `Build` harness does not expose, and the warning is a safety nicety, not core behavior.
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardServiceCollectionExtensions.cs src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardDisableLoginTests.cs
|
||||
git commit -m "feat(dashboard): swap to auto-login handler when DisableLogin is set"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Documentation
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 2 (disjoint files — docs vs src/test)
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/GatewayConfiguration.md`
|
||||
- Modify: `docs/GatewayDashboardDesign.md`
|
||||
- Modify: `CLAUDE.md`
|
||||
|
||||
**Step 1:** In `docs/GatewayConfiguration.md`, add `MxGateway:Dashboard:DisableLogin` (bool, default `false`) and `MxGateway:Dashboard:AutoLoginUser` (string, default `multi-role`) to the dashboard options section. Describe: dev/test only; auto-authenticates every request as `AutoLoginUser` with both roles; applies to all clients (not just loopback); never enable in production. Note it differs from `AllowAnonymousLocalhost` (which only bypasses authorization without minting a principal).
|
||||
|
||||
**Step 2:** In `docs/GatewayDashboardDesign.md`, document the auth-scheme swap: when the flag is on, the cookie handler is replaced by `DashboardAutoLoginAuthenticationHandler` under the same scheme name; explain *why* (every policy resolves that scheme, so no policy/page changes), and that it is dev/test only with a loud startup warning.
|
||||
|
||||
**Step 3:** In `CLAUDE.md`, in the Authentication section near the `Dashboard:AllowAnonymousLocalhost` sentence, add one sentence: `MxGateway:Dashboard:DisableLogin` (default off) auto-authenticates every dashboard request as `AutoLoginUser` (default `multi-role`) with all roles — dev/test only.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/GatewayConfiguration.md docs/GatewayDashboardDesign.md CLAUDE.md
|
||||
git commit -m "docs: document dashboard DisableLogin / AutoLoginUser dev flag"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification (after all tasks)
|
||||
|
||||
```bash
|
||||
MSBUILDDISABLENODEREUSE=1 dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj \
|
||||
--filter "FullyQualifiedName~Dashboard|FullyQualifiedName~GatewayOptions"
|
||||
```
|
||||
|
||||
Expected: all dashboard + options tests pass. (Known macOS-only failures `OrphanWorkerTerminatorTests` ×2 and the parallel-load `SqliteAuthStoreTests` TLS temp-file test are unrelated and out of this filter.)
|
||||
|
||||
Then `superpowers-extended-cc:finishing-a-development-branch` to merge/push.
|
||||
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-16-dashboard-disable-login.md",
|
||||
"tasks": [
|
||||
{"id": 136, "subject": "Task 1: Config fields on DashboardOptions", "status": "completed"},
|
||||
{"id": 137, "subject": "Task 2: DashboardAutoLoginAuthenticationHandler + unit tests", "status": "completed", "blockedBy": [136]},
|
||||
{"id": 138, "subject": "Task 3: Wire scheme swap + startup warning + wiring/authorization tests", "status": "completed", "blockedBy": [137]},
|
||||
{"id": 139, "subject": "Task 4: Documentation", "status": "completed", "blockedBy": [136]}
|
||||
],
|
||||
"lastUpdated": "2026-06-16"
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
# Still-Pending §8 Completion — Design
|
||||
|
||||
> **Status:** Approved 2026-06-16. Next step: `superpowers-extended-cc:writing-plans`.
|
||||
|
||||
**Goal:** Close the actionable items in `stillpending.md` §8 ("Deferred test-coverage
|
||||
follow-ups, never filed as findings") — the only Bucket-A work that is neither
|
||||
vendor-gated nor live-rig-gated and is not already covered by the session-resilience
|
||||
epic plan.
|
||||
|
||||
**Scope decision:** Bucket A only (actionable code/test work). The session-resilience
|
||||
epic (Tasks 13–28) is already planned in `docs/plans/2026-06-15-session-resilience.md`
|
||||
and is explicitly **out of scope** here — resume it separately. Vendor-gated
|
||||
(§1.4/§3.4/§3.5) and live-rig/capture-gated (§1.3/§3.x/§5/§6.1) items cannot be
|
||||
completed from this dev box and are out of scope.
|
||||
|
||||
**Approach:** "C" — the complete option, including new in-process gRPC test
|
||||
infrastructure for the Java streaming/galaxy CLI commands and a full bounded
|
||||
ready-wait in the gateway session hot path.
|
||||
|
||||
---
|
||||
|
||||
## Important correction (verified 2026-06-16)
|
||||
|
||||
The three §8 items cite findings marked **Resolved** in the review backlog, but those
|
||||
resolutions did **not** survive into the current tree:
|
||||
|
||||
- The Java bulk-family CLI tests that `Client.Java-026` (resolved 2026-05-20) describes
|
||||
were written against the old `com.dohertylan.mxgateway` package. After the rename to
|
||||
`com.zb.mom.ww`, the current
|
||||
`clients/java/zb-mom-ww-mxgateway-cli/.../MxGatewayCliTests.java` has **zero** coverage
|
||||
for `read-bulk`, `write-bulk`, `write2-bulk`, `write-secured-bulk`,
|
||||
`write-secured2-bulk`, `bench-read-bulk`, `stream-events`, `close-session`,
|
||||
`galaxy-discover`, `galaxy-watch`. (`galaxy-test-connection`/`galaxy-last-deploy`/
|
||||
`galaxy-browse`/`stream-alarms` **do** have tests now.)
|
||||
- `Server-030` (both states in the not-ready diagnostic) **is** done — confirmed at
|
||||
`src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:1676`. The *deferred
|
||||
follow-up* — should the gateway briefly wait for worker-Ready before failing fast? —
|
||||
is genuinely unbuilt.
|
||||
- `Tests-023` extracted a canonical `TestSupport/FakeWorkerProcess(int)`, yet three test
|
||||
files still define private nested copies.
|
||||
|
||||
So §8's gap is real and current.
|
||||
|
||||
---
|
||||
|
||||
## Workstreams
|
||||
|
||||
Four independently landable workstreams.
|
||||
|
||||
| WS | Title | Files (language) | Classification | Depends on |
|
||||
|----|-------|------------------|----------------|------------|
|
||||
| A | Synchronous Java CLI tests (7 commands) | Java CLI test | small | — |
|
||||
| B | In-process gRPC harness + streaming/galaxy CLI tests (3 commands) | Java CLI test + small CLI seam | standard | A (shares test file) |
|
||||
| C | Worker-Ready bounded ready-wait | C# server session hot path | high-risk | — |
|
||||
| D | `FakeWorkerProcess` consolidation | C# tests | small | — |
|
||||
|
||||
A, C, D are mutually independent (disjoint files/languages) and may be dispatched in
|
||||
parallel. B follows A because both edit `MxGatewayCliTests.java`.
|
||||
|
||||
---
|
||||
|
||||
## WS-A — Synchronous Java CLI tests
|
||||
|
||||
**What:** Round-trip CLI tests for the 7 commands testable through the existing
|
||||
`FakeSession`/`FakeClient` seam (the same seam `subscribe-bulk`/`write` already use):
|
||||
`read-bulk`, `write-bulk`, `write2-bulk`, `write-secured-bulk`, `write-secured2-bulk`,
|
||||
`bench-read-bulk`, `close-session`.
|
||||
|
||||
**How:** Upgrade `FakeSession` (currently returns empty lists) to per-call recorders
|
||||
that capture the parsed entries (timeout, typed values via the shared `parseValue(type,
|
||||
text)` switch, user-ids, timestamp) and synthesize one `BulkReadResult`/`BulkWriteResult`
|
||||
per requested handle, so JSON-shape assertions exercise the
|
||||
`bulkReadResultMap`/`bulkWriteResultMap` serializers. One `@Test` per command:
|
||||
|
||||
- `read-bulk`: `--timeout-ms` reaches session; JSON carries `tagAddress`/`itemHandle`/
|
||||
`wasCached`/`quality`.
|
||||
- `write-bulk`: `--type int32 --values 111,222 --user-id 5` parses through `parseValue`;
|
||||
entries built with the expected typed `MxValue` + `userId`.
|
||||
- `write2-bulk`: `--timestamp …Z` reaches the entry as `timestampValue`
|
||||
(`hasTimestampValue()` true).
|
||||
- `write-secured-bulk`: `--current-user-id`/`--verifier-user-id` both propagate.
|
||||
- `write-secured2-bulk`: timestamp + both user-ids.
|
||||
- `bench-read-bulk`: 1s steady / 0s warmup; assert cross-language schema keys
|
||||
(`language=java`, `command=bench-read-bulk`, `totalCalls`, `successfulCalls`,
|
||||
`failedCalls`, `callsPerSecond`, `latencyMs.p50/p95/p99`).
|
||||
- `close-session`: `CloseSessionReply` round-trips through `FakeClient`.
|
||||
|
||||
**Verify:** `gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests`.
|
||||
|
||||
---
|
||||
|
||||
## WS-B — In-process gRPC harness + streaming/galaxy CLI tests
|
||||
|
||||
**Why infra is required:** `MxEventStream` and `DeployEventStream` have package-private
|
||||
constructors; `GalaxyRepositoryClient` is `final` with a static `connect()` and
|
||||
`GalaxyCommand` has **no** injectable factory. None of `stream-events`/`galaxy-watch`/
|
||||
`galaxy-discover` can be faked through the `FakeSession` seam.
|
||||
|
||||
**What:** A JUnit fixture that starts a gRPC **`InProcessServer`** hosting scripted
|
||||
`MxAccessGateway` + `GalaxyRepository` service implementations and exposes an in-process
|
||||
`Channel`. The **real** `MxGatewayClient`/`GalaxyRepositoryClient` connect to it, so the
|
||||
real `MxEventStream`/`DeployEventStream` queue-draining and `GalaxyRepositoryClient`
|
||||
paging are exercised end-to-end (highest fidelity; no reflection, no package hacks).
|
||||
|
||||
- **Production change (CLI module only, not the library):** add a `GalaxyClientFactory`
|
||||
seam to `GalaxyCommand` mirroring the existing `MxGatewayCliClientFactory`, so galaxy
|
||||
commands can target the in-process channel.
|
||||
- `stream-events`: server streams a scripted `MxEvent` sequence → assert CLI render,
|
||||
including the unsigned-uint64 worker-sequence regression.
|
||||
- `galaxy-watch`: server streams scripted deploy events → assert CLI feed output.
|
||||
- `galaxy-discover`: server returns a paged `GalaxyObject` hierarchy → assert CLI JSON.
|
||||
|
||||
The 7 synchronous commands stay on the lightweight `FakeSession` seam (YAGNI — no reason
|
||||
to route them through a server).
|
||||
|
||||
**Verify:** `gradle :zb-mom-ww-mxgateway-cli:test --tests *MxGatewayCliTests`.
|
||||
|
||||
---
|
||||
|
||||
## WS-C — Worker-Ready bounded ready-wait
|
||||
|
||||
**Problem:** `GetReadyWorkerClient` (`GatewaySession.cs:1665`) fails fast when the session
|
||||
is `Ready` but the worker client's `WorkerClientState` has diverged (`Handshaking` after a
|
||||
heartbeat blip, etc.). The both-states diagnostic exists; a brief wait does not.
|
||||
|
||||
**Constraint:** the check runs inside the `_syncRoot` lock — we cannot sleep/poll there.
|
||||
|
||||
**Design (pinned decisions):**
|
||||
|
||||
- New `GetReadyWorkerClientAsync`: read state under `_syncRoot`; **if** session is `Ready`
|
||||
but worker is **transient** (`Handshaking`/`Created`), release the lock, poll at a short
|
||||
interval (e.g. 25 ms) until the worker reaches `Ready` or a bounded timeout elapses, then
|
||||
re-check under the lock.
|
||||
- **Terminal worker states (`Faulted`/`Closing`/`Closed`/null) fail fast immediately** —
|
||||
never wait; retrying a faulted worker is pointless and would mask the fault.
|
||||
- New config `MxGateway:Sessions:WorkerReadyWaitTimeout` on `GatewaySessionOptions`,
|
||||
**default `0` = disabled** (preserves today's exact fail-fast behavior unless opted in),
|
||||
validated `>= 0` by the options validator. Document in `docs/GatewayConfiguration.md`.
|
||||
- The both-states diagnostic is preserved for the final failure. Callers at
|
||||
`GatewaySession.cs:918` and `:1263` become `await`.
|
||||
|
||||
**Tests:**
|
||||
- Handshaking→Ready within the timeout succeeds (worker invoked once).
|
||||
- Faulted fails fast with both states in the message, zero waiting.
|
||||
- Timeout elapses → fails with both states.
|
||||
- Default `0` → unchanged fail-fast (no wait, no behavior change).
|
||||
|
||||
**Verify:** `dotnet test src/ZB.MOM.WW.MxGateway.Tests --filter "FullyQualifiedName~SessionManager"`
|
||||
(plus the options-validator test class).
|
||||
|
||||
---
|
||||
|
||||
## WS-D — `FakeWorkerProcess` consolidation
|
||||
|
||||
**What:** Replace the private nested `FakeWorkerProcess` in
|
||||
`SessionWorkerClientFactoryFakeWorkerTests`, `WorkerProcessLauncherTests`, and
|
||||
`WorkerClientTests` with the canonical `TestSupport/FakeWorkerProcess(int)` (which already
|
||||
has `MarkExited`/`Kill`/TCS-backed `WaitForExitAsync`). Where a nested copy carries extra
|
||||
behavior the canonical lacks, fold that into the canonical first, then delete the copies.
|
||||
|
||||
**Verify:** `dotnet test src/ZB.MOM.WW.MxGateway.Tests --filter "FullyQualifiedName~WorkerClient | FullyQualifiedName~WorkerProcessLauncher | FullyQualifiedName~SessionWorkerClientFactory"`.
|
||||
|
||||
---
|
||||
|
||||
## Testing & sequencing
|
||||
|
||||
Per the targeted-test rule in `CLAUDE.md` (Source Update Workflow): each task runs only
|
||||
its own filtered tests. Run the full gateway suite at most once, after WS-C + WS-D land.
|
||||
|
||||
Out-of-scope items remain recorded in `stillpending.md` (vendor/rig-gated) and the
|
||||
session-resilience epic (`oldtasks.md`).
|
||||
@@ -0,0 +1,327 @@
|
||||
# Still-Pending §8 Completion Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans (or subagent-driven-development) to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Close the three actionable `stillpending.md` §8 test-coverage follow-ups — Java CLI coverage for the 10 untested subcommands, the gateway Worker-Ready bounded ready-wait, and the `FakeWorkerProcess` de-duplication.
|
||||
|
||||
**Architecture:** Four independent workstreams. Java CLI tests split into a synchronous tier (existing `FakeSession` seam) and a streaming/galaxy tier (new in-process gRPC harness over the *real* client, using the public `Channel` constructors that already exist). C# work adds an opt-in bounded ready-wait in the session hot path (default off = no behavior change) and consolidates three duplicate test fakes onto the canonical `TestSupport/FakeWorkerProcess`.
|
||||
|
||||
**Tech Stack:** Java 21 + Gradle + picocli + grpc-java (`grpc-inprocess`, `grpc-testing`); .NET 10 + xUnit.
|
||||
|
||||
**Design doc:** `docs/plans/2026-06-16-stillpending-section8-design.md`. Branch: `feat/stillpending-section8`.
|
||||
|
||||
**Key facts verified during planning:**
|
||||
- `clients/java/.../cli/MxGatewayCli.java`: `GatewayCommand` has a `clientFactory` (`MxGatewayCliClientFactory`) seam tests already override; `GalaxyCommand.connect()` (line ~368) calls the *static* `GalaxyRepositoryClient.connect(...)` with **no** injectable seam. The `FakeSession`/`FakeClient`/`FakeClientFactory` test doubles live in `MxGatewayCliTests.java` (~lines 636–984).
|
||||
- `MxGatewayClient(Channel, MxGatewayClientOptions)` and `GalaxyRepositoryClient(Channel, MxGatewayClientOptions)` are **public** constructors (line 67 of each) — point them at an in-process channel, no library change needed.
|
||||
- `grpc-inprocess` + `grpc-testing` are test deps in the **client** module only; the **cli** module's `build.gradle` needs them added.
|
||||
- C#: option class is `SessionOptions` (`src/ZB.MOM.WW.MxGateway.Server/Configuration/SessionOptions.cs`), config section `MxGateway:Sessions`, `{ get; init; }` style. `GetReadyWorkerClient()` is at `GatewaySession.cs:1665`; callers are `InvokeAsync` (`:918`, already async) and `ReadEventsAsync` (`:1263`, returns `IAsyncEnumerable` non-async — must become an async iterator).
|
||||
|
||||
**Out of scope:** Session-resilience epic (Tasks 13–28, see `docs/plans/2026-06-15-session-resilience.md`); vendor/rig-gated §1.3/§1.4/§3.x/§5/§6.1 items.
|
||||
|
||||
**Testing rule (CLAUDE.md):** Each task runs ONLY its own filtered tests. Full gateway suite at most once, after Tasks 8 + 9 land.
|
||||
|
||||
---
|
||||
|
||||
## Workstream / dependency overview
|
||||
|
||||
| Task | WS | Title | Class | Files | blockedBy | ∥ with |
|
||||
|------|----|-------|-------|-------|-----------|--------|
|
||||
| 1 | A | FakeSession recorders + read/write/write2-bulk tests | small | cli test | — | 3,4,7,9 |
|
||||
| 2 | A | secured/secured2/bench-bulk + close-session tests | small | cli test | 1 | 3,4,7,9 |
|
||||
| 3 | B | GalaxyClientFactory seam + cli grpc test deps | small | cli main + build.gradle | — | 1,2,4,7,8,9 |
|
||||
| 4 | B | In-process gRPC harness fixture | standard | new cli test file | — | 1,2,3,7,8,9 |
|
||||
| 5 | B | stream-events test via harness | standard | cli test | 2,4 | 7,8,9 |
|
||||
| 6 | B | galaxy-watch + galaxy-discover tests via harness | standard | cli test | 3,5 | 7,8,9 |
|
||||
| 7 | C | WorkerReadyWaitTimeoutMs option + validator + doc | small | C# config + doc | — | all Java |
|
||||
| 8 | C | Bounded ready-wait in GatewaySession + tests | high-risk | C# server + test | 7 | all Java, 9 |
|
||||
| 9 | D | FakeWorkerProcess consolidation | standard | C# tests | — | all |
|
||||
|
||||
---
|
||||
|
||||
## Task 1: FakeSession recorders + read-bulk / write-bulk / write2-bulk CLI tests
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 3, 4, 7, 9
|
||||
|
||||
**Files:**
|
||||
- Test: `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java`
|
||||
|
||||
**Context:** `FakeSession` (a `MxGatewayCli.MxGatewayCliSession` impl, ~line 812) currently returns empty lists from `readBulk`/`writeBulk`/`write2Bulk`. Empty returns make CLI JSON-shape assertions vacuous. Mirror the existing `subscribeBulkCommandPrintsResults` test style (uses `FakeClientFactory` → `execute(...)` → asserts on captured stdout JSON).
|
||||
|
||||
**Step 1: Upgrade `FakeSession` to record + synthesize.** Add fields capturing the last call args (e.g. `lastReadBulkTimeoutMs`, `lastReadBulkItems`, `lastWriteBulkEntries`, `lastWrite2BulkEntries`) and change `readBulk`/`writeBulk`/`write2Bulk` to synthesize **one** result per requested handle: a `BulkReadResult` carrying `tagAddress`, `itemHandle`, `wasCached`, `quality`; a `BulkWriteResult` carrying the handle + an `Ok` status. Keep empty-list default only when no handles requested.
|
||||
|
||||
**Step 2: Write the three failing tests:**
|
||||
- `readBulkCommandForwardsTimeoutAndPrintsResults` — run `read-bulk` with `--timeout-ms 750` + two tags; assert `lastReadBulkTimeoutMs == 750` and the stdout JSON carries per-tag `tagAddress`/`itemHandle`/`wasCached`/`quality`.
|
||||
- `writeBulkCommandParsesTypedValuesAndPrintsResults` — `--type int32 --values 111,222 --user-id 5`; assert entries parsed through the shared `parseValue` switch into typed `MxValue`s with `userId==5`, and JSON shows the `bulkWriteResultMap`.
|
||||
- `write2BulkCommandForwardsTimestampAndPrintsResults` — `--timestamp 2026-05-20T00:00:00Z`; assert the entry's `hasTimestampValue()` is true.
|
||||
|
||||
**Step 3: Run them and confirm they fail** (empty/zero before the recorder upgrade):
|
||||
`gradle :zb-mom-ww-mxgateway-cli:test --tests '*MxGatewayCliTests'` (from `clients/java`).
|
||||
|
||||
**Step 4: With the Step-1 recorder in place, run again — expect PASS.**
|
||||
|
||||
**Step 5: Commit.**
|
||||
```bash
|
||||
git add clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java
|
||||
git commit -m "test(java-cli): cover read-bulk/write-bulk/write2-bulk round trips"
|
||||
```
|
||||
|
||||
**Acceptance:** 3 new green tests; `FakeSession` records args and returns one row per handle.
|
||||
|
||||
---
|
||||
|
||||
## Task 2: secured / secured2 / bench-read-bulk + close-session CLI tests
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 3, 4, 7, 9
|
||||
**blockedBy:** Task 1 (same test file + shared `FakeSession` recorders)
|
||||
|
||||
**Files:**
|
||||
- Test: `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java`
|
||||
|
||||
**Step 1: Extend `FakeSession`/`FakeClient` recorders** for `writeSecuredBulk`/`writeSecured2Bulk` (capture `currentUserId`/`verifierUserId`) and add a `CloseSessionReply` recorder to `FakeClient` for `closeSession`.
|
||||
|
||||
**Step 2: Write the four failing tests:**
|
||||
- `writeSecuredBulkCommandForwardsUserIdsAndPrintsResults` — `--current-user-id 7 --verifier-user-id 8`; assert both propagate.
|
||||
- `writeSecured2BulkCommandForwardsTimestampAndUserIdsAndPrintsResults` — timestamp + both user-ids.
|
||||
- `benchReadBulkCommandEmitsJsonSchemaKeys` — `--duration-seconds 1 --warmup-seconds 0`; assert the JSON contains `language=java`, `command=bench-read-bulk`, `bulkSize`, `totalCalls`, `successfulCalls`, `failedCalls`, `callsPerSecond`, `latencyMs.p50/p95/p99`, and the synthesized `tags`. Assert schema keys, NOT numeric values.
|
||||
- `closeSessionCommandPrintsReply` — assert the `CloseSessionReply` round-trips to stdout.
|
||||
|
||||
**Step 3–4: Run failing → implement recorders → run PASS** (same gradle command as Task 1, narrowed `--tests '*MxGatewayCliTests'`).
|
||||
|
||||
**Step 5: Commit** `test(java-cli): cover secured/secured2/bench bulk + close-session`.
|
||||
|
||||
**Acceptance:** 4 new green tests; bench test pins schema keys only.
|
||||
|
||||
---
|
||||
|
||||
## Task 3: GalaxyClientFactory seam + cli grpc test deps
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 1, 2, 4, 7, 8, 9
|
||||
|
||||
**Files:**
|
||||
- Modify: `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java` (`GalaxyCommand`, ~lines 361–371; `connect()` ~line 368)
|
||||
- Modify: `clients/java/zb-mom-ww-mxgateway-cli/build.gradle`
|
||||
|
||||
**Context:** `GalaxyCommand.connect()` hard-calls `GalaxyRepositoryClient.connect(...)`. Mirror the existing `MxGatewayCliClientFactory` seam so tests can supply an in-process-backed client.
|
||||
|
||||
**Step 1: Add the seam.** Introduce an interface `GalaxyClientFactory { GalaxyRepositoryClient connect(MxGatewayClientOptions options); }`, give `GalaxyCommand` a `final GalaxyClientFactory galaxyClientFactory` field (constructor-injected, defaulting to `GalaxyRepositoryClient::connect` for production wiring), and change `connect()` to delegate to it. Thread the factory through the picocli command construction the same way `clientFactory` is threaded for gateway commands. Keep the default production path identical (no behavior change).
|
||||
|
||||
**Step 2: Add test deps** to `clients/java/zb-mom-ww-mxgateway-cli/build.gradle`:
|
||||
```gradle
|
||||
testImplementation "io.grpc:grpc-inprocess:${grpcVersion}"
|
||||
testImplementation "io.grpc:grpc-testing:${grpcVersion}"
|
||||
```
|
||||
|
||||
**Step 3: Build to confirm wiring compiles + production galaxy commands still resolve:**
|
||||
`gradle :zb-mom-ww-mxgateway-cli:compileJava :zb-mom-ww-mxgateway-cli:compileTestJava` (from `clients/java`).
|
||||
|
||||
**Step 4: Run the existing galaxy CLI tests to confirm no regression:**
|
||||
`gradle :zb-mom-ww-mxgateway-cli:test --tests '*MxGatewayCliTests'`.
|
||||
|
||||
**Step 5: Commit** `feat(java-cli): inject GalaxyClientFactory seam; add grpc inprocess test deps`.
|
||||
|
||||
**Acceptance:** Seam present, default wiring unchanged, existing galaxy tests green, in-process deps available to the test source set.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: In-process gRPC harness fixture
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, 2, 3, 7, 8, 9
|
||||
|
||||
**Files:**
|
||||
- Create: `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/InProcessGatewayHarness.java`
|
||||
|
||||
**Context:** Streaming/galaxy commands can't use `FakeSession` (real `MxEventStream`/`DeployEventStream` package-private ctors; `GalaxyRepositoryClient` final). Drive the *real* client over an in-process channel against scripted fake services. The public `MxGatewayClient(Channel, options)` / `GalaxyRepositoryClient(Channel, options)` ctors make this clean.
|
||||
|
||||
**Step 1: Build the fixture** — `AutoCloseable`, unique server name per instance:
|
||||
- Start `InProcessServerBuilder.forName(name).directExecutor().addService(fakeGateway).addService(fakeGalaxy).build().start()`.
|
||||
- Expose `ManagedChannel channel()` via `InProcessChannelBuilder.forName(name).directExecutor().build()`.
|
||||
- `fakeGateway` extends `MxAccessGatewayGrpc.MxAccessGatewayImplBase`, overriding `streamEvents` (push a scripted `List<MxEvent>` to the `StreamObserver`, then `onCompleted`) and `closeSession`. `fakeGalaxy` extends `GalaxyRepositoryGrpc.GalaxyRepositoryImplBase`, overriding `discoverHierarchy` (return a small paged `GalaxyObject` set) and `watchDeployEvents` (stream scripted deploy events). Make the scripted payloads settable on the harness (constructor args or setters).
|
||||
- Provide helpers: `MxGatewayClient gatewayClient()` → `new MxGatewayClient(channel(), testOptions())`; `GalaxyRepositoryClient galaxyClient()` → `new GalaxyRepositoryClient(channel(), testOptions())`, where `testOptions()` builds an `MxGatewayClientOptions` with a dummy api-key.
|
||||
- `close()` shuts down channel + server.
|
||||
|
||||
**Step 2: Smoke-verify the harness in isolation** — add a temporary `@Test` (or a tiny self-test) that opens the harness, calls `gatewayClient()` and streams one scripted event, then delete it before commit. Build:
|
||||
`gradle :zb-mom-ww-mxgateway-cli:compileTestJava`.
|
||||
|
||||
**Step 3: Run the cli test set to confirm nothing breaks:**
|
||||
`gradle :zb-mom-ww-mxgateway-cli:test --tests '*MxGatewayCliTests'`.
|
||||
|
||||
**Step 4: Commit** `test(java-cli): add in-process gRPC harness fixture`.
|
||||
|
||||
**Acceptance:** Compiles; harness starts/stops cleanly; scripted services reachable through the real client types. (No assertions on CLI yet — that's Tasks 5–6.)
|
||||
|
||||
---
|
||||
|
||||
## Task 5: stream-events CLI test via harness
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 7, 8, 9
|
||||
**blockedBy:** Task 2 (same test file ordering), Task 4 (harness)
|
||||
|
||||
**Files:**
|
||||
- Test: `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java`
|
||||
|
||||
**Step 1: Wire a harness-backed `MxGatewayCliClientFactory`** in the test that builds the CLI client over `harness.gatewayClient()` (reuse the production adapter that wraps `MxGatewayClient` as `MxGatewayCliClient`; it is package-visible from the test's package). Script ≥2 `MxEvent`s including one with a **high uint64 worker sequence** to cover the unsigned-format regression.
|
||||
|
||||
**Step 2: Write the failing test** `streamEventsRendersScriptedEventsIncludingHighUint64Sequence` — run `stream-events`, assert stdout contains the scripted event fields and the high sequence renders unsigned (not negative).
|
||||
|
||||
**Step 3: Run failing → (harness already supplies behavior) → PASS:**
|
||||
`gradle :zb-mom-ww-mxgateway-cli:test --tests '*MxGatewayCliTests'`.
|
||||
|
||||
**Step 4: Commit** `test(java-cli): cover stream-events over in-process harness`.
|
||||
|
||||
**Acceptance:** stream-events exercised through the real `MxEventStream`; unsigned-sequence rendering asserted.
|
||||
|
||||
---
|
||||
|
||||
## Task 6: galaxy-watch + galaxy-discover CLI tests via harness
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 7, 8, 9
|
||||
**blockedBy:** Task 3 (GalaxyClientFactory seam), Task 5 (same test file ordering)
|
||||
|
||||
**Files:**
|
||||
- Test: `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java`
|
||||
|
||||
**Step 1: Wire a harness-backed `GalaxyClientFactory`** (from Task 3) that returns `harness.galaxyClient()`.
|
||||
|
||||
**Step 2: Write the failing tests:**
|
||||
- `galaxyDiscoverPrintsPagedHierarchyJson` — assert the scripted `GalaxyObject` hierarchy renders in CLI JSON (object fields + counts).
|
||||
- `galaxyWatchRendersScriptedDeployEvents` — assert the scripted deploy events render in the CLI feed; honor `--limit` if the command supports it.
|
||||
|
||||
**Step 3: Run failing → PASS:** `gradle :zb-mom-ww-mxgateway-cli:test --tests '*MxGatewayCliTests'`.
|
||||
|
||||
**Step 4: Full cli module verification** (this closes the §8 Java item): `gradle :zb-mom-ww-mxgateway-cli:test`.
|
||||
|
||||
**Step 5: Commit** `test(java-cli): cover galaxy-discover/galaxy-watch over in-process harness`.
|
||||
|
||||
**Acceptance:** All 10 previously-untested subcommands now have CLI coverage; `MxGatewayCliTests` green.
|
||||
|
||||
---
|
||||
|
||||
## Task 7: WorkerReadyWaitTimeoutMs option + validator + doc
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** all Java tasks (1–6), Task 9
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Configuration/SessionOptions.cs`
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Configuration/GatewayOptionsValidator.cs`
|
||||
- Modify: `docs/GatewayConfiguration.md`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Configuration/GatewayOptionsTests.cs` (or the existing options-validator test class)
|
||||
|
||||
**Step 1: Add the option** to `SessionOptions` (match the `{ get; init; }` + XML-doc style):
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Gets the bounded time, in milliseconds, the gateway will wait for a worker client
|
||||
/// to reach <c>Ready</c> when the session itself is already <c>Ready</c> but the worker
|
||||
/// state has transiently diverged (e.g. <c>Handshaking</c> after a heartbeat blip).
|
||||
/// The wait applies only to transient worker states; terminal states
|
||||
/// (<c>Faulted</c>/<c>Closing</c>/<c>Closed</c>/no worker) fail fast immediately.
|
||||
/// A value of <c>0</c> (the default) disables the wait — the gateway keeps the original
|
||||
/// fail-fast behavior. Must be greater than or equal to zero.
|
||||
/// </summary>
|
||||
public int WorkerReadyWaitTimeoutMs { get; init; }
|
||||
```
|
||||
|
||||
**Step 2: Validate `>= 0`** in `GatewayOptionsValidator` (mirror an existing numeric check; message e.g. `MxGateway:Sessions:WorkerReadyWaitTimeoutMs must be greater than or equal to zero.`).
|
||||
|
||||
**Step 3: Document** the new key in `docs/GatewayConfiguration.md` under the `MxGateway:Sessions` section (default 0 = disabled; transient-only; terminal fails fast).
|
||||
|
||||
**Step 4: Write + run the failing test** asserting default is `0` and a negative value fails validation:
|
||||
`dotnet test src/ZB.MOM.WW.MxGateway.Tests --filter "FullyQualifiedName~GatewayOptions"` → expect FAIL pre-impl, PASS post.
|
||||
|
||||
**Step 5: Commit** `feat(server): add MxGateway:Sessions:WorkerReadyWaitTimeoutMs (default off)`.
|
||||
|
||||
**Acceptance:** Option binds, default 0, negative rejected, doc updated.
|
||||
|
||||
---
|
||||
|
||||
## Task 8: Bounded ready-wait in GatewaySession + tests
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min (split if it grows)
|
||||
**Parallelizable with:** all Java tasks, Task 9
|
||||
**blockedBy:** Task 7
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs` (`GetReadyWorkerClient` `:1665`; `InvokeAsync` `:918`; `ReadEventsAsync` `:1263`)
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs` (or the test class covering `GetReadyWorkerClient` diagnostics)
|
||||
|
||||
**Context & constraints:** The not-ready check runs inside the `_syncRoot` lock — **never sleep/poll inside the lock**. Read state under the lock, release, await, re-check. The both-states diagnostic (`Session state is {_state}; worker state is {workerState}.`) MUST be preserved for the final failure. Default timeout 0 ⇒ behavior identical to today.
|
||||
|
||||
**Step 1: Write failing tests first (TDD)** in the session-manager test class, using a `FakeWorkerClient` whose `State` is settable:
|
||||
- `InvokeAsync_WhenWorkerHandshakingThenReadyWithinTimeout_Succeeds` — option `WorkerReadyWaitTimeoutMs=500`; worker starts `Handshaking`, flips to `Ready` after ~50 ms (e.g. via a background `Task` or a `TimeProvider`-driven advance); assert the invoke succeeds and the worker is invoked once.
|
||||
- `InvokeAsync_WhenWorkerFaulted_FailsFastWithBothStates` — worker `Faulted`, timeout 500; assert it throws *immediately* (no meaningful delay) and the message contains both `Session state is Ready` and `worker state is Faulted`.
|
||||
- `InvokeAsync_WhenTimeoutElapsesStillNotReady_FailsWithBothStates` — worker stays `Handshaking`, timeout small (e.g. 100 ms); assert throw after ~timeout with both states.
|
||||
- `InvokeAsync_WhenTimeoutZero_FailsFastUnchanged` — worker `Handshaking`, timeout 0; assert immediate fail-fast (pins the no-behavior-change default).
|
||||
|
||||
**Step 2: Run tests → expect FAIL/compile-error** (`GetReadyWorkerClientAsync` not present):
|
||||
`dotnet test src/ZB.MOM.WW.MxGateway.Tests --filter "FullyQualifiedName~SessionManager"`.
|
||||
|
||||
**Step 3: Implement `GetReadyWorkerClientAsync(CancellationToken)`:**
|
||||
- Under `_syncRoot`: capture `_state` and `_workerClient?.State`. If session is `Ready` and worker is `Ready` → return it (fast path, no await). If worker is terminal (`Faulted`/`Closing`/`Closed`) or null, or session not `Ready` → throw the both-states `SessionManagerException` now (fail fast). If worker is transient (`Handshaking`/`Created`) AND `WorkerReadyWaitTimeoutMs > 0` → fall through to the wait.
|
||||
- Wait loop OUTSIDE the lock: until a deadline (`now + WorkerReadyWaitTimeoutMs`), `await Task.Delay(pollIntervalMs, ct)` (const `pollIntervalMs = 25`), then re-acquire `_syncRoot` and re-evaluate: Ready → return; terminal/null/session-not-Ready → fail fast with both states; still transient → keep waiting. On deadline → throw both-states.
|
||||
- Keep the existing synchronous `GetReadyWorkerClient()` for any non-async caller, or have it delegate to a zero-wait evaluation to avoid duplicated message logic (extract a private `EvaluateReadyUnderLock(out string failureMessage)` helper used by both).
|
||||
|
||||
**Step 4: Update callers:**
|
||||
- `InvokeAsync` (`:918`): `IWorkerClient workerClient = await GetReadyWorkerClientAsync(cancellationToken).ConfigureAwait(false);`.
|
||||
- `ReadEventsAsync` (`:1263`): convert to an async iterator — `public async IAsyncEnumerable<WorkerEvent> ReadEventsAsync([EnumeratorCancellation] CancellationToken cancellationToken)`, `await GetReadyWorkerClientAsync(...)`, `TouchClientActivity(...)`, then `await foreach (var e in workerClient.ReadEventsAsync(cancellationToken)) yield return e;`. Verify no caller relied on eager (pre-enumeration) throw semantics — if one does, note it for the reviewer.
|
||||
|
||||
**Step 5: Run the targeted tests → PASS** (same filter). Confirm the 4 new tests + pre-existing `GetReadyWorkerClient` diagnostic test all pass.
|
||||
|
||||
**Step 6: Commit** `feat(server): bounded worker-ready wait in GatewaySession (default off)`.
|
||||
|
||||
**Acceptance:** Transient states wait up to the timeout; terminal states fail fast with both states; default 0 is byte-for-byte the old behavior; no sleeping under `_syncRoot`.
|
||||
|
||||
---
|
||||
|
||||
## Task 9: FakeWorkerProcess consolidation
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** all tasks
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Tests/TestSupport/FakeWorkerProcess.cs` (canonical — extend if needed)
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SessionWorkerClientFactoryFakeWorkerTests.cs` (nested copy ~line 343)
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Workers/WorkerProcessLauncherTests.cs` (nested copy ~line 244)
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs` (nested copy ~line 767; already `using ...TestSupport`)
|
||||
|
||||
**Context:** Canonical `TestSupport/FakeWorkerProcess(int)` has `MarkExited`/`Kill`/TCS-backed `WaitForExitAsync`. Three test files still declare private nested `FakeWorkerProcess`. Consolidate.
|
||||
|
||||
**Step 1: Diff each nested copy against the canonical.** For each, list any members/behavior the canonical lacks (e.g. extra counters, scripted exit codes). Fold those into the canonical `TestSupport/FakeWorkerProcess` **first** (additively, so existing canonical users keep compiling).
|
||||
|
||||
**Step 2: Delete each nested class** and update references to the canonical type; add/confirm `using ZB.MOM.WW.MxGateway.Tests.TestSupport;`.
|
||||
|
||||
**Step 3: Run the three affected test classes:**
|
||||
```
|
||||
dotnet test src/ZB.MOM.WW.MxGateway.Tests --filter "FullyQualifiedName~WorkerClientTests|FullyQualifiedName~WorkerProcessLauncherTests|FullyQualifiedName~SessionWorkerClientFactoryFakeWorkerTests"
|
||||
```
|
||||
Expected: all pass (behavior preserved; `KillCount`/`HasExited`/`ExitCode` semantics intact).
|
||||
|
||||
**Step 4: Commit** `refactor(tests): consolidate FakeWorkerProcess onto TestSupport canonical`.
|
||||
|
||||
**Acceptance:** Exactly one `FakeWorkerProcess` definition (the canonical); three files import it; affected tests green.
|
||||
|
||||
---
|
||||
|
||||
## Final verification (after Tasks 8 + 9)
|
||||
|
||||
Run the full gateway suite once to confirm no cross-cutting regression:
|
||||
```
|
||||
dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj
|
||||
```
|
||||
Expected baseline: prior green count + the new Task 7/8/9 tests; the 3 known macOS-environmental failures (TLS temp-file, OrphanWorkerTerminator ×2) may persist — confirm no *new* failures.
|
||||
|
||||
Then finish via `superpowers-extended-cc:finishing-a-development-branch`.
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-16-stillpending-section8.md",
|
||||
"tasks": [
|
||||
{"id": 140, "subject": "Task 1: FakeSession recorders + read/write/write2-bulk CLI tests", "status": "pending"},
|
||||
{"id": 141, "subject": "Task 2: secured/secured2/bench-bulk + close-session CLI tests", "status": "pending", "blockedBy": [140]},
|
||||
{"id": 142, "subject": "Task 3: GalaxyClientFactory seam + cli grpc test deps", "status": "pending"},
|
||||
{"id": 143, "subject": "Task 4: In-process gRPC harness fixture", "status": "pending"},
|
||||
{"id": 144, "subject": "Task 5: stream-events CLI test via harness", "status": "pending", "blockedBy": [141, 143]},
|
||||
{"id": 145, "subject": "Task 6: galaxy-watch + galaxy-discover CLI tests via harness", "status": "pending", "blockedBy": [142, 144]},
|
||||
{"id": 146, "subject": "Task 7: WorkerReadyWaitTimeoutMs option + validator + doc", "status": "pending"},
|
||||
{"id": 147, "subject": "Task 8: Bounded ready-wait in GatewaySession + tests", "status": "pending", "blockedBy": [146]},
|
||||
{"id": 148, "subject": "Task 9: FakeWorkerProcess consolidation", "status": "pending"}
|
||||
],
|
||||
"lastUpdated": "2026-06-16"
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
# Array Write Ergonomics & Default-Fill Partial Writes — Design
|
||||
|
||||
Date: 2026-06-18
|
||||
|
||||
## Problem
|
||||
|
||||
Writing array-typed MXAccess attributes through the gateway has two ergonomic
|
||||
shortfalls:
|
||||
|
||||
1. **Asymmetric addressing.** An array attribute reads fine by its bare name
|
||||
(`Obj.Arr`), but writes require the `[]` body suffix (`Obj.Arr[]`). The
|
||||
handle registered from the bare name is read-capable but not cleanly
|
||||
write-capable.
|
||||
2. **Whole-array writes only.** Every write replaces the entire array; to change
|
||||
a few elements the client must marshal and send the full array. This is a
|
||||
native MXAccess COM constraint (there is no element-wise write API), but it
|
||||
pushes avoidable cost onto clients for large arrays.
|
||||
|
||||
This design removes both frictions without breaking MXAccess parity. The worker
|
||||
is not modified — it continues to perform an honest whole-array COM write. All
|
||||
new behavior lives in the gateway and the contract.
|
||||
|
||||
## Why MXAccess forces whole-array writes
|
||||
|
||||
The native MXAccess COM `Write` takes a complete VARIANT (`SAFEARRAY` for
|
||||
arrays). There is no `WriteArrayElement(index, value)`. Confirmed in the worker:
|
||||
`VariantConverter.ConvertToComArray` marshals the entire CLR array in one shot,
|
||||
and `MxAccessSession.Write` forwards it verbatim to the COM proxy. Any "partial
|
||||
write" feature must therefore reconstruct a full array before the COM call.
|
||||
|
||||
We deliberately do **not** reconstruct it from current state (no
|
||||
read-modify-write merge): that would add latency, cache-staleness, and a race
|
||||
window against other writers, and would paper over MXAccess semantics. Instead
|
||||
partial writes are **stateless default-fill** (see below).
|
||||
|
||||
## Goals
|
||||
|
||||
- Writing an array attribute by its bare name works like reading does — the
|
||||
gateway appends `[]` automatically when it knows the attribute is an array.
|
||||
- A client can send only the indices it wants plus a total length, instead of
|
||||
the full array.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **No preserve-unchanged merge.** Unmentioned indices are written as the
|
||||
element type's default, **not** kept at their current value.
|
||||
- No element-wise COM write — MXAccess has no such API; every write is
|
||||
whole-array and we keep it that way.
|
||||
- No change to `ReadBulk` string addressing.
|
||||
- The gateway does **not** infer total length; the client supplies it.
|
||||
|
||||
## Decisions (resolved during brainstorming)
|
||||
|
||||
| Question | Decision |
|
||||
|---|---|
|
||||
| Scope | Both: suffix ergonomics **and** partial writes |
|
||||
| Partial-write semantics | Stateless **default-fill**: unmentioned indices = type default (reset, not preserved) |
|
||||
| Total length | **Client specifies** `total_length` explicitly |
|
||||
| Time/timestamp default | **Unix epoch** |
|
||||
| Suffix fix location/actor | **Gateway**, using in-memory Galaxy `is_array` metadata, at **AddItem** time |
|
||||
| Suffix fallback when metadata unavailable | **Pass through unchanged** (no regression) |
|
||||
| Partial-write contract shape | New `MxSparseArray` as a `oneof` arm on `MxValue` |
|
||||
| Per-client helpers | **Included** in this change |
|
||||
|
||||
## Contract changes (`mxaccess_gateway.proto`)
|
||||
|
||||
A write-only sparse representation, added as a `oneof kind` arm on `MxValue` so
|
||||
every write command (`Write`, `Write2`, `WriteSecured`, `WriteSecured2`,
|
||||
`WriteBulkEntry`) accepts it without new RPCs:
|
||||
|
||||
```proto
|
||||
message MxSparseArray {
|
||||
MxDataType element_data_type = 1;
|
||||
uint32 total_length = 2;
|
||||
repeated MxSparseElement elements = 3;
|
||||
}
|
||||
|
||||
message MxSparseElement {
|
||||
uint32 index = 1;
|
||||
MxValue value = 2; // scalar
|
||||
}
|
||||
|
||||
// added to MxValue oneof kind:
|
||||
// MxSparseArray sparse_array_value = 19;
|
||||
```
|
||||
|
||||
`sparse_array_value` is **write-only**: the worker never produces it, and the
|
||||
gateway rejects it on any read/event path. Regenerate `Generated/` and commit
|
||||
the generated `.cs` (the net48 worker build needs the checked-in types — see the
|
||||
proto-codegen-regen rule).
|
||||
|
||||
## Suffix normalization — at `AddItem`, in the gateway
|
||||
|
||||
The item handle binds to the literal address string at `AddItem` and is reused
|
||||
for both reads and writes; at write time only the integer handle is available,
|
||||
which is too late to change the address. So normalization happens at
|
||||
registration.
|
||||
|
||||
In the gateway's `AddItemCommand` / `AddItem2Command` handling
|
||||
(`GatewaySession`), before forwarding to the worker:
|
||||
|
||||
1. If `item_definition` already ends with `[]` → leave unchanged.
|
||||
2. Else look up `item_definition + "[]"` in the in-memory Galaxy hierarchy cache
|
||||
(`IGalaxyHierarchyCache` → `GalaxyTagLookup.Attribute.IsArray`). The index is
|
||||
keyed by `FullTagReference`, which already carries the `[]` suffix for
|
||||
arrays, so the lookup key must include `[]`. If found and `is_array` →
|
||||
rewrite `item_definition` to the `[]` form.
|
||||
3. **Fallback:** metadata unavailable or address not found as an array →
|
||||
forward verbatim (current behavior).
|
||||
|
||||
Store the **normalized** address in `SessionItemRegistration.TagAddress` so
|
||||
write-time constraint checks (`ConstraintEnforcer`) and readback resolve
|
||||
consistently against the `[]`-keyed index.
|
||||
|
||||
This is safe for reads: both the bare and `[]` forms return the array on read,
|
||||
so promoting a registration to the `[]` form does not change read behavior — it
|
||||
only makes the handle write-capable.
|
||||
|
||||
`AddItem2Command` (with `item_context`) normalizes `item_definition` the same
|
||||
way. `ReadBulk` is unaffected — it uses raw address strings with its own
|
||||
ephemeral registration, so bare-name reads continue to work unchanged.
|
||||
|
||||
## Partial-write expansion — at the gateway, worker untouched
|
||||
|
||||
In the gateway write path, before forwarding any write command to the worker, if
|
||||
`MxValue.KindCase == SparseArrayValue`:
|
||||
|
||||
1. Allocate a full array of `total_length`, element type `element_data_type`.
|
||||
2. Initialize every slot to the type default:
|
||||
- `bool` → `false`
|
||||
- `int32` / `int64` → `0`
|
||||
- `float` / `double` → `0`
|
||||
- `string` → `""`
|
||||
- `time` / `timestamp` → Unix epoch
|
||||
3. For each `MxSparseElement`, set `array[index]` from the scalar `value`.
|
||||
4. Replace the `MxValue` with a normal `array_value` (full `MxArray`).
|
||||
|
||||
The worker then receives an ordinary whole-array `MxValue`;
|
||||
`VariantConverter.ConvertToComArray` and the COM `Write` are unchanged. Parity
|
||||
preserved — it really is a whole-array write.
|
||||
|
||||
Expansion is applied uniformly to every write variant by normalizing the
|
||||
`MxValue` of each command (`Write`, `Write2`, `WriteSecured`, `WriteSecured2`,
|
||||
and each `WriteBulkEntry`) before it leaves the gateway.
|
||||
|
||||
## Validation & errors (gateway, `InvalidArgument`)
|
||||
|
||||
- `total_length == 0`, or any `index >= total_length` → reject.
|
||||
- Duplicate indices → reject (no silent last-wins).
|
||||
- `element_data_type` must be a supported scalar element type (not `Raw` /
|
||||
`Unspecified`); each element `value` must match it.
|
||||
- Empty `elements` with `total_length = N` → valid: writes an all-defaults array
|
||||
of length N (explicit reset).
|
||||
- A sparse value arriving on a read/event path → reject (guard; the worker never
|
||||
produces one).
|
||||
|
||||
## Clients (all five) & docs — same change
|
||||
|
||||
Per the repo rule that docs change with the source:
|
||||
|
||||
- Regenerate proto types for dotnet, go, python, rust, java. Watch the Java
|
||||
generated-file churn — revert spurious protobuf-version diffs when no `.proto`
|
||||
semantics changed beyond the new messages; commit the net48-relevant regen.
|
||||
- Add a thin per-client helper to build a sparse write, e.g.
|
||||
`WriteArrayElements(handle, totalLength, {index → value})`.
|
||||
- Update the **"Array writes replace the whole array"** section in all five
|
||||
client READMEs: document default-fill semantics (unmentioned = reset to
|
||||
default, not preserved), the `total_length` requirement, and that bare-name
|
||||
array writes now auto-normalize to the `[]` form.
|
||||
- Update `gateway.md` (command/value surface) and the value-conversion doc.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Gateway (FakeWorkerHarness):**
|
||||
- Sparse → full expansion per element type; default-fill sizing; correct
|
||||
placement of specified indices.
|
||||
- `total_length == 0`, index-out-of-range, and duplicate-index rejection.
|
||||
- Empty-elements all-defaults case.
|
||||
- Suffix normalization: bare array → `[]`; bare scalar → unchanged;
|
||||
already-`[]` → unchanged; metadata-cold → pass-through.
|
||||
- **Clients:** helper + round-trip serialization per language.
|
||||
- **Live MXAccess (opt-in, windev):** one default-fill write and one bare-name
|
||||
array write against real COM.
|
||||
|
||||
## Affected components
|
||||
|
||||
- Contracts: `mxaccess_gateway.proto` + regenerated `Generated/`.
|
||||
- Gateway: `GatewaySession` (AddItem normalization, write expansion),
|
||||
`SessionItemRegistration` (store normalized address), interaction with
|
||||
`IGalaxyHierarchyCache` / `GalaxyTagLookup`.
|
||||
- Worker: **unchanged.**
|
||||
- Clients: dotnet, go, python, rust, java (regenerated types + helper + README).
|
||||
- Docs: `gateway.md`, value-conversion doc, five client READMEs.
|
||||
@@ -0,0 +1,329 @@
|
||||
# Array Write Ergonomics & Default-Fill Partial Writes — Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Let clients write array attributes by their bare name (gateway auto-appends `[]` at AddItem), and write a sparse, default-filled array (only the indices they care about + a total length) instead of marshalling the whole array.
|
||||
|
||||
**Architecture:** All new behavior lives in the **contract** and the **gateway**; the worker is untouched and keeps doing an honest whole-array COM write. The gateway intercepts outbound commands at the single choke point `GatewaySession.InvokeAsync(WorkerCommand)`: it (a) normalizes `AddItem`/`AddItem2` `item_definition` to the `[]` form when Galaxy metadata says the attribute is an array, and (b) expands an `MxSparseArray` write value into a full default-filled `MxArray` before it leaves the gateway. Partial writes are **stateless default-fill** — unmentioned indices are the type default (reset), never preserved.
|
||||
|
||||
**Tech Stack:** .NET 10 (gateway) / .NET Framework 4.8 x86 (worker, unchanged), protobuf + Grpc.Tools, xUnit + FakeWorkerHarness; clients in C#, Go, Python, Rust, Java.
|
||||
|
||||
**Design doc:** `docs/plans/2026-06-18-array-write-ergonomics-design.md`
|
||||
|
||||
**Key references for the implementer:**
|
||||
- Choke point for all outbound commands: `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs:947-955` (`InvokeAsync(WorkerCommand command, ...)`). `command.Command` is the `MxCommand`.
|
||||
- Handle→address tracking: `GatewaySession.TrackCommandReply` (lines 975-1014, AddItem at 989, AddItem2 at 992) → `TrackItem` (1826-1837) → `SessionItemRegistration` record (`Sessions/SessionItemRegistration.cs`). Tracking reads the **same** `MxCommand` instance that passed through `InvokeAsync`, so mutating `item_definition` there flows through automatically.
|
||||
- Galaxy metadata lookup: `IGalaxyHierarchyCache.Current.Index.TagsByAddress.TryGetValue(addr, out GalaxyTagLookup)`, then `lookup.Attribute?.IsArray`. The index is keyed by `FullTagReference`, which **already contains** `[]` for arrays — look up `addr + "[]"`. See `Security/Authorization/ConstraintEnforcer.cs:15-17,197-204` for the injection + lookup pattern.
|
||||
- Proto: `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto` — `MxValue` (1026-1044), `MxArray` (1046-1063), `WriteCommand` (244-249), `Write2Command` (251-257), `WriteSecuredCommand`/`WriteSecured2Command`, `WriteBulk*`, `AddItemCommand` (192-195), `AddItem2Command` (197-201). Generated into `Contracts/Generated/MxaccessGateway.cs` (Compile-Removed + regenerated by Grpc.Tools).
|
||||
- Gateway proto regen + commit rule (memory `project_proto_codegen_regen`): after a `.proto` edit, delete `Generated/*.cs`, rebuild contracts to regenerate, and **commit** `Generated/` or the net48 worker build fails CS0246.
|
||||
- Java client gotcha (memory `project_java_generated_churn`): gradle regenerates a tracked 64k-line file with spurious protobuf-version churn — revert that churn; build/test Java on **windev** (memory `project_java_build_host`), Mac has no JRE.
|
||||
|
||||
---
|
||||
|
||||
## Task 0: Contract — add `MxSparseArray` and regenerate
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (blocks all other tasks)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto` (MxValue oneof ~line 1043; new messages after MxArray ~line 1063)
|
||||
- Regenerate + commit: `src/ZB.MOM.WW.MxGateway.Contracts/Generated/MxaccessGateway.cs`
|
||||
|
||||
**Step 1: Add the messages to the proto.** After the `MxArray` message (line 1063), add:
|
||||
|
||||
```proto
|
||||
// Write-only sparse array value. The gateway expands this into a full,
|
||||
// default-filled MxArray before forwarding to the worker; the worker never
|
||||
// receives or produces it. Unmentioned indices take the element type's
|
||||
// default (reset, NOT preserved).
|
||||
message MxSparseArray {
|
||||
MxDataType element_data_type = 1;
|
||||
uint32 total_length = 2;
|
||||
repeated MxSparseElement elements = 3;
|
||||
}
|
||||
|
||||
message MxSparseElement {
|
||||
uint32 index = 1;
|
||||
MxValue value = 2; // scalar
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Add the oneof arm to `MxValue`.** Inside the `oneof kind { ... }` block, after `bytes raw_value = 18;`:
|
||||
|
||||
```proto
|
||||
MxSparseArray sparse_array_value = 19;
|
||||
```
|
||||
|
||||
**Step 3: Regenerate generated code.**
|
||||
|
||||
Run (PowerShell on windev, or locally on Mac — .NET builds fine):
|
||||
```
|
||||
del src/ZB.MOM.WW.MxGateway.Contracts/Generated/*.cs
|
||||
dotnet build src/ZB.MOM.WW.MxGateway.Contracts/ZB.MOM.WW.MxGateway.Contracts.csproj
|
||||
```
|
||||
Expected: build succeeds; `Generated/MxaccessGateway.cs` now contains `MxSparseArray`, `MxSparseElement`, and `MxValue.SparseArrayValue`.
|
||||
|
||||
**Step 4: Verify net10 + net48 both compile** (the worker consumes these types via net48):
|
||||
```
|
||||
dotnet build src/ZB.MOM.WW.MxGateway.slnx
|
||||
```
|
||||
Expected: PASS (no CS0246 on the new types).
|
||||
|
||||
**Step 5: Commit** (include regenerated `Generated/`):
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Contracts/Protos/mxaccess_gateway.proto \
|
||||
src/ZB.MOM.WW.MxGateway.Contracts/Generated/MxaccessGateway.cs
|
||||
git commit -m "feat(contracts): add MxSparseArray write-only value for default-fill partial writes"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Gateway — `SparseArrayExpander` (pure expansion + validation)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 2, Tasks 4-9
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ZB.MOM.WW.MxGateway.Server/Sessions/SparseArrayExpander.cs`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SparseArrayExpanderTests.cs`
|
||||
|
||||
**Step 1: Write failing tests.** Cover: default-fill sizing + placement (one per element type is enough for two types here, rest in step 4); `total_length == 0` → `InvalidArgument`; index `>= total_length` → `InvalidArgument`; duplicate index → `InvalidArgument`; `Raw`/`Unspecified` element type → `InvalidArgument`; empty `elements` → all-defaults array of length N; timestamp default == Unix epoch.
|
||||
|
||||
```csharp
|
||||
using Grpc.Core;
|
||||
using Mxaccess.Gateway.V1; // adjust to the generated namespace
|
||||
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
using Xunit;
|
||||
|
||||
public sealed class SparseArrayExpanderTests
|
||||
{
|
||||
private static MxValue Sparse(MxDataType type, uint length, params (uint Index, MxValue Value)[] els)
|
||||
{
|
||||
MxSparseArray sparse = new() { ElementDataType = type, TotalLength = length };
|
||||
foreach ((uint index, MxValue value) in els)
|
||||
sparse.Elements.Add(new MxSparseElement { Index = index, Value = value });
|
||||
return new MxValue { SparseArrayValue = sparse };
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Expand_Int32_FillsDefaultsAndPlacesValues()
|
||||
{
|
||||
MxValue v = Sparse(MxDataType.Integer, 4, (1, new MxValue { Int32Value = 7 }));
|
||||
SparseArrayExpander.Expand(v);
|
||||
Assert.Equal(MxValue.KindOneofCase.ArrayValue, v.KindCase);
|
||||
Assert.Equal(new[] { 0, 7, 0, 0 }, v.ArrayValue.Int32Values.Values);
|
||||
Assert.Equal((uint)4, v.ArrayValue.Dimensions[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Expand_EmptyElements_ProducesAllDefaults()
|
||||
{
|
||||
MxValue v = Sparse(MxDataType.Boolean, 3);
|
||||
SparseArrayExpander.Expand(v);
|
||||
Assert.Equal(new[] { false, false, false }, v.ArrayValue.BoolValues.Values);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(0u, 0u)] // total_length == 0
|
||||
[InlineData(2u, 5u)] // index >= total_length
|
||||
public void Expand_InvalidShape_Throws(uint length, uint badIndex)
|
||||
{
|
||||
MxValue v = Sparse(MxDataType.Integer, length, (badIndex, new MxValue { Int32Value = 1 }));
|
||||
RpcException ex = Assert.Throws<RpcException>(() => SparseArrayExpander.Expand(v));
|
||||
Assert.Equal(StatusCode.InvalidArgument, ex.StatusCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Expand_DuplicateIndex_Throws()
|
||||
{
|
||||
MxValue v = Sparse(MxDataType.Integer, 4,
|
||||
(1, new MxValue { Int32Value = 1 }), (1, new MxValue { Int32Value = 2 }));
|
||||
Assert.Throws<RpcException>(() => SparseArrayExpander.Expand(v));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run, confirm fail.** `dotnet test src/ZB.MOM.WW.MxGateway.Tests/... --filter FullyQualifiedName~SparseArrayExpanderTests` → FAIL (type not defined).
|
||||
|
||||
**Step 3: Implement `SparseArrayExpander`.** Mutates the passed `MxValue` in place, replacing `SparseArrayValue` with `ArrayValue`. Throw `RpcException(new Status(StatusCode.InvalidArgument, msg))` on any validation failure. Element-type switch must cover the supported scalar element types (`Boolean`, `Integer` → int32 or int64, `Float`, `Double`, `String`, `Time`); default/timestamp default = Unix epoch (`new Timestamp { Seconds = 0, Nanos = 0 }`); reject `Raw`/`Unknown`/`Unspecified`. Set `MxArray.Dimensions = { total_length }` and `ElementDataType`. Validate: `total_length > 0`, every index `< total_length`, no duplicate indices, each element `value` scalar kind matches `element_data_type`.
|
||||
|
||||
(Mirror the typed sub-array shapes from `VariantConverter.ConvertToComArray` in the worker so the worker's existing read path is satisfied: `Int32Values`/`Int64Values`/`BoolValues`/`FloatValues`/`DoubleValues`/`StringValues`/`TimestampValues` with their `Values` repeated fields.)
|
||||
|
||||
**Step 4: Add remaining element-type tests** (int64, float, double, string, time/epoch, type-mismatch element → throws). Run filter → PASS.
|
||||
|
||||
**Step 5: Commit.**
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Sessions/SparseArrayExpander.cs \
|
||||
src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/SparseArrayExpanderTests.cs
|
||||
git commit -m "feat(gateway): add SparseArrayExpander for default-fill partial array writes"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Gateway — `ArrayAddressNormalizer` (suffix normalization)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, Tasks 4-9
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ZB.MOM.WW.MxGateway.Server/Sessions/ArrayAddressNormalizer.cs`
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/ArrayAddressNormalizerTests.cs`
|
||||
|
||||
**Step 1: Write failing tests** using a fake/in-memory `IGalaxyHierarchyCache` whose `Current.Index.TagsByAddress` contains `"Obj.Arr[]"` (array) and `"Obj.Scalar"` (non-array). Cases:
|
||||
- `"Obj.Arr"` (bare, is array) → `"Obj.Arr[]"`.
|
||||
- `"Obj.Arr[]"` (already suffixed) → unchanged.
|
||||
- `"Obj.Scalar"` (non-array) → unchanged.
|
||||
- `"Obj.Unknown"` (not in cache / metadata cold) → unchanged (pass-through fallback).
|
||||
|
||||
```csharp
|
||||
[Fact]
|
||||
public void Normalize_BareArrayName_AppendsSuffix()
|
||||
{
|
||||
ArrayAddressNormalizer normalizer = new(FakeCacheWith("Obj.Arr[]", isArray: true));
|
||||
Assert.Equal("Obj.Arr[]", normalizer.Normalize("Obj.Arr"));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("Obj.Arr[]")] // already suffixed
|
||||
[InlineData("Obj.Scalar")] // non-array
|
||||
[InlineData("Obj.Unknown")] // not in cache → fallback pass-through
|
||||
public void Normalize_LeavesUnchanged(string address) =>
|
||||
Assert.Equal(address, new ArrayAddressNormalizer(FakeCacheWith("Obj.Arr[]", true)).Normalize(address));
|
||||
```
|
||||
|
||||
**Step 2: Run, confirm fail.**
|
||||
|
||||
**Step 3: Implement.** Constructor injects `IGalaxyHierarchyCache cache`. `Normalize(string)`:
|
||||
1. If `string.IsNullOrWhiteSpace(address)` or `address.EndsWith("[]", StringComparison.Ordinal)` → return unchanged.
|
||||
2. Look up `address + "[]"` in `cache.Current.Index.TagsByAddress`. If found and `lookup.Attribute?.IsArray == true` → return `address + "[]"`.
|
||||
3. Otherwise return `address` unchanged. Never throw (best-effort convenience).
|
||||
|
||||
**Step 4: Run filter → PASS.**
|
||||
|
||||
**Step 5: Commit.**
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Sessions/ArrayAddressNormalizer.cs \
|
||||
src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/ArrayAddressNormalizerTests.cs
|
||||
git commit -m "feat(gateway): add ArrayAddressNormalizer for bare-name array AddItem"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Gateway — wire normalization + expansion into the outbound path
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Tasks 1, 2)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs` (constructor — inject `ArrayAddressNormalizer`; `InvokeAsync` at 947-955)
|
||||
- Modify: DI registration wherever `ArrayAddressNormalizer`/`GatewaySession` deps are registered (search `Security/Authorization/ConstraintEnforcer` registration for the pattern; register `ArrayAddressNormalizer` as scoped/singleton consistent with `IGalaxyHierarchyCache`)
|
||||
- Test: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewayArrayWriteWiringTests.cs`
|
||||
|
||||
**Step 1: Write a failing integration test** with `FakeWorkerHarness` (pattern: `src/ZB.MOM.WW.MxGateway.Tests/Gateway/Workers/FakeWorkerHarnessTests.cs:51-69` — `CreateConnectedPairAsync`, `ReadCommandAsync`, `ReplyToCommandAsync`). Two assertions:
|
||||
1. Client sends `AddItemCommand{ item_definition = "Obj.Arr" }` (array per the test's Galaxy cache) → the `WorkerEnvelope` the fake worker reads has `item_definition == "Obj.Arr[]"`.
|
||||
2. Client sends `WriteCommand{ value = MxSparseArray(Integer, 4, {1:7}) }` → the worker receives `value.ArrayValue.Int32Values.Values == [0,7,0,0]` (no `SparseArrayValue` reaches the worker).
|
||||
|
||||
**Step 2: Run, confirm fail.**
|
||||
|
||||
**Step 3: Implement.** At the top of `InvokeAsync(WorkerCommand command, ...)`, before forwarding, transform `command.Command` (the `MxCommand`) by `PayloadCase`:
|
||||
- `AddItem` → `command.Command.AddItem.ItemDefinition = _addressNormalizer.Normalize(command.Command.AddItem.ItemDefinition);`
|
||||
- `AddItem2` → same on `AddItem2.ItemDefinition`.
|
||||
- `Write`/`WriteSecured` → if `cmd.Value?.KindCase == SparseArrayValue` call `SparseArrayExpander.Expand(cmd.Value)`.
|
||||
- `Write2`/`WriteSecured2` → expand `Value` only (not `TimestampValue`).
|
||||
- `WriteBulk`/`Write2Bulk`/`WriteSecuredBulk`/`WriteSecured2Bulk` → expand each `entry.Value`.
|
||||
|
||||
Keep it a single private helper `NormalizeOutbound(MxCommand)` called once at the choke point. Because `TrackCommandReply` later reads the **same** `MxCommand` instance, the normalized `item_definition` flows into `SessionItemRegistration` with no extra change — add an assertion in the test that `TryGetItemRegistration(...).TagAddress == "Obj.Arr[]"` to lock that in.
|
||||
|
||||
**Step 4: Run the wiring test + Tasks 1/2 filters → PASS.** Then run the AddItem/Write fake-worker regression group once:
|
||||
```
|
||||
dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter "FullyQualifiedName~ArrayWrite|FullyQualifiedName~SparseArray|FullyQualifiedName~ArrayAddressNormalizer"
|
||||
```
|
||||
|
||||
**Step 5: Commit.**
|
||||
```bash
|
||||
git add src/ZB.MOM.WW.MxGateway.Server/Sessions/GatewaySession.cs \
|
||||
src/ZB.MOM.WW.MxGateway.Tests/Gateway/Sessions/GatewayArrayWriteWiringTests.cs
|
||||
git commit -m "feat(gateway): normalize array AddItem suffix and expand sparse writes at the worker boundary"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Tasks 4-8: Client helpers + READMEs (one task per client, parallelizable)
|
||||
|
||||
Each client task does the same four things; only paths/idioms differ. **Depends on Task 0** (needs regenerated proto types). All five are parallelizable with each other and with Tasks 1-3, 9.
|
||||
|
||||
Common helper contract: `WriteArrayElements(serverHandle, itemHandle, elementDataType, totalLength, elements /* index→scalar MxValue */, userId)` builds an `MxValue { SparseArrayValue = MxSparseArray{...} }` and calls the existing raw write. Add a unit test that the built command carries `sparse_array_value` with the right `total_length`/indices (serialization round-trip; no live gateway). Update the **"Array writes replace the whole array"** README section to document: default-fill semantics (unmentioned = reset to default, not preserved), the required `total_length`, and that bare-name array writes now auto-normalize.
|
||||
|
||||
### Task 4: .NET client
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 5-9, 1-3
|
||||
- Regenerate types: `dotnet build clients/dotnet/ZB.MOM.WW.MxGateway.Client.slnx`.
|
||||
- Add helper next to `WriteAsync` in `clients/dotnet/ZB.MOM.WW.MxGateway.Client/MxGatewaySession.cs:678-688`.
|
||||
- Test alongside existing client tests; README `clients/dotnet/README.md:162-170`.
|
||||
- Verify: build slnx + `dotnet test` the client test project.
|
||||
|
||||
### Task 5: Go client
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 4,6-9, 1-3
|
||||
- Regenerate per `clients/go` README; helper next to `Write`/`WriteRaw` in `clients/go/mxgateway/session.go:559-581`.
|
||||
- README `clients/go/README.md:139-147`.
|
||||
- Verify: `gofmt`, `go build ./...`, `go test ./...` from `clients/go`.
|
||||
|
||||
### Task 6: Python client
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 4-5,7-9, 1-3
|
||||
- Regenerate per `clients/python` README; helper next to `write` in `clients/python/src/zb_mom_ww_mxgateway/session.py:469-490`.
|
||||
- README `clients/python/README.md:142-150`.
|
||||
- Verify: `python -m pytest` from `clients/python`.
|
||||
|
||||
### Task 7: Rust client
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 4-6,8-9, 1-3
|
||||
- Helper next to `write` in `clients/rust/src/session.rs:530-548`; conversion helpers in `clients/rust/src/value.rs`.
|
||||
- README `clients/rust/README.md:162-170`.
|
||||
- Verify: `cargo fmt`, `cargo check --workspace`, `cargo test --workspace`, `cargo clippy --all-targets -- -D warnings` from `clients/rust`.
|
||||
|
||||
### Task 8: Java client
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 4-7,9, 1-3
|
||||
- Helper next to `write`/`writeRaw` in `clients/java/.../client/MxGatewaySession.java:581-604`.
|
||||
- README `clients/java/README.md:118-126`.
|
||||
- **Build/test on windev (JDK 21) via an isolated `origin/<branch>` worktree — Mac has no JRE** (memory `project_java_build_host`). After gradle regen, **revert the spurious protobuf-version churn** in `clients/java/src/main/generated/.../MxaccessGateway.java` if no proto semantics beyond the new messages changed (memory `project_java_generated_churn`); keep only the real `MxSparseArray` additions.
|
||||
- Verify: `gradle test` on windev.
|
||||
|
||||
(Each client task ends with its own commit, e.g. `feat(client-<lang>): add WriteArrayElements default-fill helper and document semantics`.)
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Docs — gateway.md + value conversion
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Tasks 1-8 (depends on Task 0 only)
|
||||
|
||||
**Files:**
|
||||
- Modify: `gateway.md` (command/value surface — document `MxSparseArray` as a write-only value and bare-name AddItem normalization)
|
||||
- Modify: the value-conversion doc under `docs/` (search for where `MxArray`/value conversion is described) — add the default-fill + epoch-default note and the parity statement (worker still does a whole-array COM write)
|
||||
|
||||
**Step 1:** Add a subsection describing: `sparse_array_value` is write-only and gateway-expanded; default-fill semantics (epoch for time); `total_length` required; bare-name array writes auto-normalize to `[]` at AddItem with metadata pass-through fallback; non-goal: no preserve-unchanged merge, no element-wise COM write.
|
||||
|
||||
**Step 2: Commit.**
|
||||
```bash
|
||||
git add gateway.md docs/
|
||||
git commit -m "docs: document MxSparseArray default-fill writes and bare-name array AddItem"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependency summary
|
||||
|
||||
- **Task 0** blocks everything.
|
||||
- **Tasks 1, 2** depend on 0; parallel with each other.
|
||||
- **Task 3** depends on 1 and 2.
|
||||
- **Tasks 4-8** depend on 0; parallel with each other and with 1-3, 9.
|
||||
- **Task 9** depends on 0; parallel with all.
|
||||
|
||||
## Verification gates (per CLAUDE.md targeted-tests rule)
|
||||
|
||||
- Run only the `--filter` for the task you touched; run the array-write fake-worker group once after Task 3.
|
||||
- Java verified on windev only. .NET/Go/Rust/Python verified locally.
|
||||
- Live MXAccess (opt-in, windev): after merge, one default-fill write and one bare-name array write against real COM (`MXGATEWAY_RUN_LIVE_MXACCESS_TESTS=1`).
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-18-array-write-ergonomics.md",
|
||||
"tasks": [
|
||||
{"id": 0, "subject": "Task 0: Contract — add MxSparseArray + regenerate", "status": "pending"},
|
||||
{"id": 1, "subject": "Task 1: Gateway SparseArrayExpander", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 2, "subject": "Task 2: Gateway ArrayAddressNormalizer", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 3, "subject": "Task 3: Wire normalization + expansion into GatewaySession", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 4, "subject": "Task 4: .NET client WriteArrayElements + README", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 5, "subject": "Task 5: Go client WriteArrayElements + README", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 6, "subject": "Task 6: Python client write_array_elements + README", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 7, "subject": "Task 7: Rust client write_array_elements + README", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 8, "subject": "Task 8: Java client writeArrayElements + README (windev)", "status": "pending", "blockedBy": [0]},
|
||||
{"id": 9, "subject": "Task 9: Docs — gateway.md + value conversion", "status": "pending", "blockedBy": [0]}
|
||||
],
|
||||
"lastUpdated": "2026-06-18"
|
||||
}
|
||||
+96
-5
@@ -481,6 +481,68 @@ metadata rather than dropped. If a value cannot be losslessly converted, the
|
||||
worker should return both the best typed projection and enough diagnostic
|
||||
metadata to reproduce the case.
|
||||
|
||||
### MxSparseArray — default-fill partial array writes (write-only)
|
||||
|
||||
`MxSparseArray` is a write-only `oneof kind` arm on `MxValue` that lets clients
|
||||
send only the indices they want to change plus a total length, rather than
|
||||
marshalling the entire array every write. The worker never produces or receives
|
||||
it; expansion happens entirely in the gateway before the command reaches the pipe.
|
||||
|
||||
```protobuf
|
||||
message MxSparseArray {
|
||||
MxDataType element_data_type = 1;
|
||||
uint32 total_length = 2;
|
||||
repeated MxSparseElement elements = 3;
|
||||
}
|
||||
|
||||
message MxSparseElement {
|
||||
uint32 index = 1;
|
||||
MxValue value = 2; // scalar
|
||||
}
|
||||
```
|
||||
|
||||
**Expansion.** Before forwarding any write command to the worker the gateway
|
||||
allocates a full array of `total_length` slots, initializes every slot to the
|
||||
element type's default, places each `MxSparseElement` at its index, then
|
||||
replaces the `MxValue` with a normal `array_value` (`MxArray`). The worker
|
||||
receives an ordinary whole-array write — parity is preserved.
|
||||
|
||||
Default values by element type:
|
||||
|
||||
| Element type | Default |
|
||||
|---|---|
|
||||
| `Boolean` | `false` |
|
||||
| `Integer` | `0` (int32, or int64 when an element value is 64-bit) |
|
||||
| `Float` / `Double` | `0` |
|
||||
| `String` | `""` |
|
||||
| `Time` | Unix epoch (1970-01-01T00:00:00Z) |
|
||||
|
||||
Unmentioned indices take the element type's default — this is a **reset**, not a
|
||||
preserve. There is no read-modify-write merge: adding that would introduce cache
|
||||
staleness, a race window against other writers, and the latency of a round-trip
|
||||
read, all of which contradict MXAccess semantics.
|
||||
|
||||
**Validation.** The gateway rejects the following with `InvalidArgument`:
|
||||
|
||||
- `total_length == 0`
|
||||
- any `index >= total_length`
|
||||
- duplicate indices
|
||||
- `element_data_type` that is `Raw` or `Unspecified`
|
||||
- an element `value` whose kind does not match `element_data_type`
|
||||
- `total_length` exceeds the gateway-configured maximum array length
|
||||
|
||||
An empty `elements` list with a non-zero `total_length` is valid — it writes an
|
||||
all-defaults array of length `total_length` (explicit reset). A `sparse_array_value`
|
||||
arriving on any read or event path is rejected as a guard; the worker never
|
||||
produces one.
|
||||
|
||||
**Non-goals.** There is no preserve-unchanged read-modify-write merge, no
|
||||
element-wise COM write (MXAccess has no such API), and no change to `ReadBulk`
|
||||
string addressing.
|
||||
|
||||
`sparse_array_value` is accepted by every write variant: `Write`, `Write2`,
|
||||
`WriteSecured`, `WriteSecured2`, and each `*BulkEntry` entry.
|
||||
|
||||
## Status Model
|
||||
|
||||
Represent `MXSTATUS_PROXY` explicitly:
|
||||
@@ -1049,6 +1111,30 @@ Known important parity areas from existing captures:
|
||||
- Invalid handles and cross-server handles have specific exception/status
|
||||
behavior.
|
||||
- STA message pumping is required for event delivery.
|
||||
- A plain `Write`/`Write2` only honors its `user_id` when the item has an active
|
||||
supervisory advise. Callers that do not go through the
|
||||
`AuthenticateUser` → `WriteSecured`/`WriteSecured2` path must send
|
||||
`AdviseSupervisory` for the item before a user id on a plain write is
|
||||
recorded; otherwise the user id is ignored.
|
||||
- Writing an array attribute replaces the whole array — it is not an
|
||||
element-wise patch. To change a subset of elements the caller must send the
|
||||
full array (unchanged elements included); sending only the changed elements
|
||||
resizes the attribute. `MxSparseArray` provides a default-fill shorthand for
|
||||
this: the gateway reconstructs the full array from the supplied sparse
|
||||
representation (unmentioned indices → type default) before sending the
|
||||
whole-array write to the worker.
|
||||
- Array attribute addresses require the `[]` body suffix to be write-capable.
|
||||
The gateway normalizes bare-name addresses at add-item time across the whole
|
||||
add family — single `AddItem`/`AddItem2`, the batched `AddItemBulk`, and
|
||||
`AddBufferedItem`: when Galaxy metadata confirms `is_array`, the gateway
|
||||
appends `[]` before registering the handle with the worker. When metadata is
|
||||
unavailable or the address is not recognized as an array, the address is
|
||||
forwarded unchanged so existing behavior is not regressed. The normalized
|
||||
address is stored in `SessionItemRegistration.TagAddress` (for `AddItemBulk`
|
||||
the worker echoes the suffixed address it bound back in each
|
||||
`SubscribeResult.TagAddress`) and applies consistently to all subsequent
|
||||
writes on that handle. `ReadBulk` is unaffected — it uses raw address strings
|
||||
with its own ephemeral registration.
|
||||
|
||||
The gateway should not "fix" these behaviors unless the client explicitly opts
|
||||
into a non-parity mode.
|
||||
@@ -1084,12 +1170,19 @@ Resolved for v1:
|
||||
|
||||
- MXAccess COM target is `ArchestrA.MxAccess.LMXProxyServerClass` /
|
||||
`LMXProxy.LMXProxyServer.1` from the installed 32-bit `LmxProxy.dll`.
|
||||
- One `OpenSession` maps to one worker process; no reconnectable sessions.
|
||||
- One active event subscriber per session.
|
||||
- One `OpenSession` maps to one worker process.
|
||||
- Reconnectable sessions: clients reconnect by re-issuing `StreamEvents` with
|
||||
`after_worker_sequence`; the gateway replays the retained ring tail and emits
|
||||
a `ReplayGap` sentinel when events were evicted. See `docs/Sessions.md`.
|
||||
- Multi-subscriber event fan-out: multiple concurrent `StreamEvents` callers on
|
||||
the same session are supported; single-subscriber mode uses fail-fast
|
||||
backpressure, multi-subscriber mode disconnects only the slow consumer. See
|
||||
`docs/Sessions.md`.
|
||||
- API key authentication with hashed keys in gateway-owned SQLite.
|
||||
- Basic Blazor Server dashboard with Bootstrap CSS/JS and real-time updates.
|
||||
- Workers run as the gateway service identity.
|
||||
- Event backpressure is fail-fast with bounded queues.
|
||||
- Event backpressure is fail-fast with bounded queues (single-subscriber) or
|
||||
per-subscriber disconnect (multi-subscriber).
|
||||
- No public command batching.
|
||||
- `OperationComplete` is forwarded only when native MXAccess raises it.
|
||||
- `OnBufferedDataChange` is modeled now; multi-sample payload conversion remains
|
||||
@@ -1098,8 +1191,6 @@ Resolved for v1:
|
||||
Post-v1 revisit items:
|
||||
|
||||
- production event-rate target and optional coalescing,
|
||||
- reconnectable sessions,
|
||||
- multi-subscriber event fan-out,
|
||||
- restricted worker process identity,
|
||||
- command batching for high-volume setup.
|
||||
|
||||
|
||||
+68
@@ -0,0 +1,68 @@
|
||||
# Saved Task List — Session Resilience Epic
|
||||
|
||||
> Snapshot taken 2026-06-16, before switching to the dashboard disable-login feature.
|
||||
> This is the in-flight epic from `docs/plans/2026-06-15-session-resilience.md`.
|
||||
|
||||
## How to resume
|
||||
|
||||
```
|
||||
/superpowers-extended-cc:executing-plans docs/plans/2026-06-15-session-resilience.md
|
||||
```
|
||||
|
||||
The authoritative resume state lives in
|
||||
`docs/plans/2026-06-15-session-resilience.md.tasks.json` (tasks 1–12 completed,
|
||||
13–28 pending). This file is just a human-readable mirror.
|
||||
|
||||
## Status
|
||||
|
||||
**12 of 28 tasks complete** (Phases 1–2 + reconnect core of Phase 3). All completed
|
||||
work is merged to `main` (commit `c446bef`, pushed to origin).
|
||||
|
||||
### Completed — Phase 1 (Foundation)
|
||||
- ✅ Task 1 (#108): Add OwnerKeyId to the session
|
||||
- ✅ Task 2 (#109): SessionEventDistributor skeleton
|
||||
- ✅ Task 3 (#110): Bounded replay ring buffer
|
||||
- ✅ Task 4 (#111): Rewire AttachEventSubscriber + EventStreamService onto distributor
|
||||
- ✅ Task 5 (#112): Per-subscriber backpressure isolation
|
||||
- ✅ Task 6 (#113): Dashboard broadcaster becomes a distributor subscriber
|
||||
|
||||
### Completed — Phase 2 (Multi-subscriber fan-out)
|
||||
- ✅ Task 7 (#114): Remove validator block + add subscriber cap option
|
||||
- ✅ Task 8 (#115): Subscriber-lease collection + cap enforcement
|
||||
- ✅ Task 9 (#116): Multi-subscriber end-to-end test (FakeWorkerHarness)
|
||||
|
||||
### Completed — Phase 3 (Reconnect core)
|
||||
- ✅ Task 10 (#117): Proto — ReplayGap signal
|
||||
- ✅ Task 11 (#118): Detach-grace session retention
|
||||
- ✅ Task 12 (#119): Replay-on-reconnect + emit ReplayGap
|
||||
|
||||
### Pending — Phase 3 finish
|
||||
- ⏳ Task 13 (#120): Owner re-validation on reconnect — blockedBy 12, 1
|
||||
- ⏳ Task 14 (#121): Client ReplayGap handling — all 5 clients — blockedBy 10
|
||||
- Carry the per-language presence-check idiom note for `optional` message fields.
|
||||
- ⏳ Task 15 (#122): Reconnect integration test (fake worker) — blockedBy 12
|
||||
|
||||
### Pending — Phase 4 (Per-session dashboard ACL)
|
||||
- ⏳ Task 16 (#123): gRPC session-owner gate + all-sessions admin scope — blockedBy 9, 1
|
||||
- ⏳ Task 17 (#124): Session Tag + dashboard group-to-tag config — blockedBy 9
|
||||
- ⏳ Task 18 (#125): EventsHub per-session ACL + hub-token tag claim — blockedBy 17
|
||||
- Open decision: Viewer default (admin-sees-all vs strict per-session).
|
||||
- ⏳ Task 19 (#126): ACL tests incl. live LDAP users — blockedBy 18
|
||||
|
||||
### Pending — Phase 5 (Orphan-worker reattach)
|
||||
- ⏳ Task 20 (#127): Stable gateway-instance id + stable pipe naming — blockedBy 19
|
||||
- ⏳ Task 21 (#128): Adoption manifest store (SQLite) — blockedBy 20
|
||||
- ⏳ Task 22 (#129): Proto — worker adopt/reconnect frame — blockedBy 21
|
||||
- ⏳ Task 23 (#130): Worker phone-home reconnect loop + self-terminate — blockedBy 22 (net48/x86, windev)
|
||||
- ⏳ Task 24 (#131): Gateway adoption — re-open pipes, nonce-validate, reject impostors — blockedBy 23
|
||||
- ⏳ Task 25 (#132): Resync adopted worker + ReplayGap to subscribers — blockedBy 24, 12
|
||||
- ⏳ Task 26 (#133): EnableOrphanReattach flag (default off) + terminator fallback — blockedBy 24
|
||||
- ⏳ Task 27 (#134): Gateway-restart reattach round-trip (WINDEV + live worker) — blockedBy 25, 26
|
||||
- ⏳ Task 28 (#135): Documented-rule reversals + stillpending refresh — blockedBy 27
|
||||
|
||||
## Notes
|
||||
- Phase 5 reverses the documented "Gateway restart does not reattach orphan workers"
|
||||
rule (CLAUDE.md) — this was explicitly approved during design.
|
||||
- Two deferred follow-ups noted earlier: dashboard visibility of `DetachedAtUtc` on
|
||||
`DashboardSessionSummary`.
|
||||
- Worker (net48/x86) tasks build/test on windev; everything else builds on macOS.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -715,6 +715,22 @@ message MxEvent {
|
||||
google.protobuf.Timestamp gateway_receive_timestamp = 11;
|
||||
optional int32 hresult = 12;
|
||||
string raw_status = 13;
|
||||
// Gateway-synthesized reconnect-replay gap signal. Set ONLY on the single
|
||||
// sentinel MxEvent the gateway emits at the head of a StreamEvents stream
|
||||
// that was resumed via StreamEventsRequest.after_worker_sequence when the
|
||||
// requested sequence is older than the oldest event still retained in the
|
||||
// session replay ring (i.e. events were evicted and cannot be replayed).
|
||||
// On that sentinel, `family` is UNSPECIFIED, the `body` oneof is unset, and
|
||||
// no per-item fields (server_handle/item_handle/value/...) are populated;
|
||||
// clients MUST treat a present `replay_gap` as "you missed events — discard
|
||||
// local state and re-snapshot" and read `requested_after_sequence` /
|
||||
// `oldest_available_sequence` from it. Unset on every normal MXAccess event.
|
||||
// This field is ONLY ever set on events returned from the StreamEvents server
|
||||
// stream; it is ALWAYS unset on events in DrainEventsReply (the diagnostic
|
||||
// drain path never emits the sentinel).
|
||||
// Additive (proto3): existing clients that ignore this field continue to
|
||||
// deserialize the stream unchanged.
|
||||
optional ReplayGap replay_gap = 14;
|
||||
|
||||
oneof body {
|
||||
OnDataChangeEvent on_data_change = 20;
|
||||
@@ -726,6 +742,27 @@ message MxEvent {
|
||||
}
|
||||
}
|
||||
|
||||
// Reconnect-replay gap signal carried by a sentinel MxEvent (MxEvent.replay_gap)
|
||||
// when a client resumes StreamEvents via after_worker_sequence but the requested
|
||||
// sequence predates the oldest event still held in the session replay ring.
|
||||
// The events in the open interval (requested_after_sequence, oldest_available_sequence)
|
||||
// were evicted from the ring and cannot be replayed, so the client must
|
||||
// re-snapshot rather than assume a contiguous event history.
|
||||
message ReplayGap {
|
||||
// The worker_sequence the client asked to resume after
|
||||
// (StreamEventsRequest.after_worker_sequence).
|
||||
uint64 requested_after_sequence = 1;
|
||||
// The oldest worker_sequence still retained in the replay ring and available
|
||||
// for replay. Events with worker_sequence in the open interval
|
||||
// (requested_after_sequence, oldest_available_sequence) were evicted and are
|
||||
// unrecoverable. oldest_available_sequence itself IS still retained: a client
|
||||
// that wishes to resume without incurring another gap MUST set
|
||||
// after_worker_sequence = oldest_available_sequence - 1 in the next
|
||||
// StreamEventsRequest, which will cause the server to replay starting at
|
||||
// oldest_available_sequence (the first retained event).
|
||||
uint64 oldest_available_sequence = 2;
|
||||
}
|
||||
|
||||
enum MxEventFamily {
|
||||
MX_EVENT_FAMILY_UNSPECIFIED = 0;
|
||||
MX_EVENT_FAMILY_ON_DATA_CHANGE = 1;
|
||||
@@ -1003,6 +1040,7 @@ message MxValue {
|
||||
google.protobuf.Timestamp timestamp_value = 16;
|
||||
MxArray array_value = 17;
|
||||
bytes raw_value = 18;
|
||||
MxSparseArray sparse_array_value = 19;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1025,6 +1063,21 @@ message MxArray {
|
||||
}
|
||||
}
|
||||
|
||||
// Write-only sparse array value. The gateway expands this into a full,
|
||||
// default-filled MxArray before forwarding to the worker; the worker never
|
||||
// receives or produces it. Unmentioned indices take the element type's
|
||||
// default (reset, NOT preserved).
|
||||
message MxSparseArray {
|
||||
MxDataType element_data_type = 1;
|
||||
uint32 total_length = 2;
|
||||
repeated MxSparseElement elements = 3;
|
||||
}
|
||||
|
||||
message MxSparseElement {
|
||||
uint32 index = 1;
|
||||
MxValue value = 2; // scalar
|
||||
}
|
||||
|
||||
message BoolArray {
|
||||
repeated bool values = 1;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
<PropertyGroup>
|
||||
<IsPackable>true</IsPackable>
|
||||
<PackageId>ZB.MOM.WW.MxGateway.Contracts</PackageId>
|
||||
<Version>0.1.1</Version>
|
||||
<Version>0.1.2</Version>
|
||||
<Authors>Joseph Doherty</Authors>
|
||||
<Company>ZB MOM WW</Company>
|
||||
<Copyright>Copyright (c) ZB MOM WW. All rights reserved.</Copyright>
|
||||
|
||||
@@ -669,11 +669,12 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
Assert.NotEqual(0, userToIdReply.ArchestraUserToId.UserId);
|
||||
}
|
||||
|
||||
// Suspend / Activate against the advised item. The dev-rig TestInt item class
|
||||
// may not be suspendable (MXAccess returns 0x80070057 / E_INVALIDARG for a
|
||||
// wrong item class — see B8 notes). That is MXAccess parity: assert the reply
|
||||
// kind and a non-INVALID_REQUEST status, surface the HResult and MxStatusProxy
|
||||
// for the record, and do NOT treat a provider-side rejection as a test failure.
|
||||
// Suspend / Activate against the added-but-not-advised item (no Advise was issued
|
||||
// between AddItem and this call). The dev-rig TestInt item class may not be
|
||||
// suspendable (MXAccess returns 0x80070057 / E_INVALIDARG for a wrong item class
|
||||
// — see B8 notes). That is MXAccess parity: assert the reply kind and a
|
||||
// non-INVALID_REQUEST status, surface the HResult and MxStatusProxy for the
|
||||
// record, and do NOT treat a provider-side rejection as a test failure.
|
||||
MxCommandReply suspendReply = await fixture.Service.Invoke(
|
||||
CreateSuspendRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
@@ -827,8 +828,9 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
streamCancellation.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (TimeoutException)
|
||||
catch (TimeoutException ex)
|
||||
{
|
||||
output.WriteLine($"B8: sample-bearing batch predicate timed out: {ex.Message}");
|
||||
bufferedBatch = null;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,23 @@ public sealed class DashboardOptions
|
||||
/// <summary>Gets whether anonymous localhost access to dashboard is allowed.</summary>
|
||||
public bool AllowAnonymousLocalhost { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// DEV/TEST ONLY. When true, the dashboard bypasses the login form entirely and
|
||||
/// auto-authenticates EVERY request as <see cref="AutoLoginUser"/> holding both
|
||||
/// dashboard roles (Administrator + Viewer). No cookie, no LDAP bind. Default false.
|
||||
/// Unlike <see cref="AllowAnonymousLocalhost"/> (which only succeeds the authorization
|
||||
/// requirement without authenticating), this mints a real principal, so the UI behaves
|
||||
/// as a signed-in admin and applies to all clients (not just loopback). Never enable in
|
||||
/// production. See docs/plans/2026-06-16-dashboard-disable-login-design.md.
|
||||
/// </summary>
|
||||
public bool DisableLogin { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Username minted for the auto-login principal when <see cref="DisableLogin"/> is true.
|
||||
/// Null/blank falls back to the GLAuth Administrator test user <c>multi-role</c>.
|
||||
/// </summary>
|
||||
public string? AutoLoginUser { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// When true (default), the dashboard auth cookie is restricted to HTTPS
|
||||
/// requests via <see cref="Microsoft.AspNetCore.Http.CookieSecurePolicy.Always"/>.
|
||||
|
||||
@@ -6,5 +6,6 @@ public sealed record EffectiveSessionConfiguration(
|
||||
int MaxPendingCommandsPerSession,
|
||||
int DefaultLeaseSeconds,
|
||||
int LeaseSweepIntervalSeconds,
|
||||
int DetachGraceSeconds,
|
||||
bool AllowMultipleEventSubscribers,
|
||||
int MaxEventSubscribersPerSession);
|
||||
|
||||
@@ -46,6 +46,7 @@ public sealed class GatewayConfigurationProvider(IOptions<GatewayOptions> option
|
||||
MaxPendingCommandsPerSession: value.Sessions.MaxPendingCommandsPerSession,
|
||||
DefaultLeaseSeconds: value.Sessions.DefaultLeaseSeconds,
|
||||
LeaseSweepIntervalSeconds: value.Sessions.LeaseSweepIntervalSeconds,
|
||||
DetachGraceSeconds: value.Sessions.DetachGraceSeconds,
|
||||
AllowMultipleEventSubscribers: value.Sessions.AllowMultipleEventSubscribers,
|
||||
MaxEventSubscribersPerSession: value.Sessions.MaxEventSubscribersPerSession),
|
||||
Events: new EffectiveEventConfiguration(
|
||||
|
||||
@@ -181,6 +181,23 @@ public sealed class GatewayOptionsValidator : OptionsValidatorBase<GatewayOption
|
||||
options.MaxEventSubscribersPerSession,
|
||||
"MxGateway:Sessions:MaxEventSubscribersPerSession must be greater than zero.",
|
||||
builder);
|
||||
AddIfNegative(
|
||||
options.DetachGraceSeconds,
|
||||
"MxGateway:Sessions:DetachGraceSeconds must be zero or greater (0 disables detach-grace retention).",
|
||||
builder);
|
||||
AddIfNegative(
|
||||
options.WorkerReadyWaitTimeoutMs,
|
||||
"MxGateway:Sessions:WorkerReadyWaitTimeoutMs must be greater than or equal to zero.",
|
||||
builder);
|
||||
|
||||
// NOTE: We intentionally do NOT reject !AllowMultipleEventSubscribers &&
|
||||
// MaxEventSubscribersPerSession > 1 as a hard validation error here. The default
|
||||
// SessionOptions ships with AllowMultipleEventSubscribers=false and
|
||||
// MaxEventSubscribersPerSession=8; making those defaults a validation failure would
|
||||
// break every deployment that has not explicitly set the cap. The cap is simply
|
||||
// ignored in single-subscriber mode (AttachEventSubscriber derives effectiveCap=1),
|
||||
// so the only practical consequence of the apparent inconsistency is a dead config
|
||||
// knob, not incorrect behavior.
|
||||
}
|
||||
|
||||
private static void ValidateEvents(EventOptions options, ValidationBuilder builder)
|
||||
|
||||
@@ -23,6 +23,28 @@ public sealed class SessionOptions
|
||||
/// <summary>Gets the interval for sweeping expired session leases in seconds.</summary>
|
||||
public int LeaseSweepIntervalSeconds { get; init; } = 30;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the detach-grace retention window, in seconds, that a session is kept alive
|
||||
/// after its last external (gRPC) event-stream subscriber drops, so a client can
|
||||
/// reconnect to it. While within the window the session stays in
|
||||
/// <c>Ready</c> and remains usable; if no new external subscriber attaches before the
|
||||
/// window elapses, the lease monitor closes the session exactly as it closes an
|
||||
/// expired lease. The gateway-owned internal dashboard subscriber does not count as an
|
||||
/// external subscriber, so a session whose only remaining subscriber is the dashboard
|
||||
/// mirror still enters detach-grace. A value of <c>0</c> disables retention: the
|
||||
/// session reverts to the original behavior of lingering only until its normal lease
|
||||
/// expires. The reconnect/replay itself is implemented separately (Task 12); this
|
||||
/// option controls retention and expiry only.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The effective close happens within the next sweep cycle after the window elapses —
|
||||
/// up to <see cref="LeaseSweepIntervalSeconds"/> after expiry. Operators who want a
|
||||
/// firm minimum bound should set <c>DetachGraceSeconds</c> greater than
|
||||
/// <see cref="LeaseSweepIntervalSeconds"/>; otherwise a session whose window expires
|
||||
/// just before a sweep run may be closed within seconds of detach.
|
||||
/// </remarks>
|
||||
public int DetachGraceSeconds { get; init; } = 30;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether multiple event subscribers are allowed per session.
|
||||
/// </summary>
|
||||
@@ -34,4 +56,15 @@ public sealed class SessionOptions
|
||||
/// effectively 1 when it is <see langword="false"/>. Must be greater than zero.
|
||||
/// </summary>
|
||||
public int MaxEventSubscribersPerSession { get; init; } = 8;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the bounded time, in milliseconds, the gateway will wait for a worker client
|
||||
/// to reach <c>Ready</c> when the session itself is already <c>Ready</c> but the worker
|
||||
/// state has transiently diverged (e.g. <c>Handshaking</c> after a heartbeat blip).
|
||||
/// The wait applies only to transient worker states; terminal states
|
||||
/// (<c>Faulted</c>/<c>Closing</c>/<c>Closed</c>/no worker) fail fast immediately.
|
||||
/// A value of <c>0</c> (the default) disables the wait — the gateway keeps the original
|
||||
/// fail-fast behavior. Must be greater than or equal to zero.
|
||||
/// </summary>
|
||||
public int WorkerReadyWaitTimeoutMs { get; init; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
using System.Security.Claims;
|
||||
using System.Text.Encodings.Web;
|
||||
using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.Auth.AspNetCore;
|
||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Dashboard;
|
||||
|
||||
/// <summary>
|
||||
/// Authentication handler used ONLY when <c>MxGateway:Dashboard:DisableLogin</c> is true.
|
||||
/// Registered under the dashboard cookie scheme name
|
||||
/// (<see cref="DashboardAuthenticationDefaults.AuthenticationScheme"/>), it authenticates
|
||||
/// EVERY request as the configured dev user with both dashboard roles — no credential check,
|
||||
/// no cookie, no LDAP bind. The minted principal mirrors the shape the real login
|
||||
/// (<see cref="DashboardAuthenticator"/>) produces, so policies and the UI cannot tell it
|
||||
/// apart. DEV/TEST ONLY; never enable in production.
|
||||
/// </summary>
|
||||
public sealed class DashboardAutoLoginAuthenticationHandler
|
||||
: AuthenticationHandler<AuthenticationSchemeOptions>, IAuthenticationSignInHandler
|
||||
{
|
||||
/// <summary>Username used when <c>AutoLoginUser</c> is null or blank.</summary>
|
||||
public const string DefaultUser = "multi-role";
|
||||
|
||||
private readonly string _user;
|
||||
|
||||
/// <summary>Initializes the handler with scheme plumbing and the dashboard options.</summary>
|
||||
/// <param name="options">The per-scheme authentication options monitor.</param>
|
||||
/// <param name="logger">The logger factory the base handler uses.</param>
|
||||
/// <param name="encoder">The URL encoder the base handler uses.</param>
|
||||
/// <param name="gatewayOptions">Gateway options carrying the dashboard auto-login user.</param>
|
||||
public DashboardAutoLoginAuthenticationHandler(
|
||||
IOptionsMonitor<AuthenticationSchemeOptions> options,
|
||||
ILoggerFactory logger,
|
||||
UrlEncoder encoder,
|
||||
IOptions<GatewayOptions> gatewayOptions)
|
||||
: base(options, logger, encoder)
|
||||
{
|
||||
string? configured = gatewayOptions.Value.Dashboard.AutoLoginUser;
|
||||
_user = string.IsNullOrWhiteSpace(configured) ? DefaultUser : configured.Trim();
|
||||
}
|
||||
|
||||
/// <summary>No-op: auto-login writes no cookie, so a sign-in has nothing to persist.</summary>
|
||||
/// <param name="user">Ignored.</param>
|
||||
/// <param name="properties">Ignored.</param>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task SignInAsync(ClaimsPrincipal user, AuthenticationProperties? properties) => Task.CompletedTask;
|
||||
|
||||
/// <summary>No-op: there is no auth cookie to clear; the next request re-authenticates.</summary>
|
||||
/// <param name="properties">Ignored.</param>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task SignOutAsync(AuthenticationProperties? properties) => Task.CompletedTask;
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override Task<AuthenticateResult> HandleAuthenticateAsync()
|
||||
{
|
||||
ClaimsPrincipal principal = CreatePrincipal(_user);
|
||||
AuthenticationTicket ticket = new(principal, Scheme.Name);
|
||||
|
||||
return Task.FromResult(AuthenticateResult.Success(ticket));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the multi-role dev principal. Null/blank <paramref name="user"/> falls back to
|
||||
/// <see cref="DefaultUser"/>. The authorization-relevant claim shape mirrors
|
||||
/// <see cref="DashboardAuthenticator"/>; LDAP group claims (<c>LdapGroupClaimType</c>) are
|
||||
/// intentionally omitted because auto-login has no real LDAP context.
|
||||
/// </summary>
|
||||
/// <param name="user">The configured auto-login username (may be null/blank).</param>
|
||||
/// <returns>An authenticated principal holding both dashboard roles.</returns>
|
||||
internal static ClaimsPrincipal CreatePrincipal(string? user)
|
||||
{
|
||||
string name = string.IsNullOrWhiteSpace(user) ? DefaultUser : user.Trim();
|
||||
|
||||
// LdapGroupClaimType claims are omitted — no LDAP groups exist in the auto-login context.
|
||||
Claim[] claims =
|
||||
[
|
||||
new Claim(ClaimTypes.NameIdentifier, name),
|
||||
new Claim(ZbClaimTypes.Username, name),
|
||||
new Claim(ZbClaimTypes.Name, name),
|
||||
new Claim(ZbClaimTypes.DisplayName, name),
|
||||
new Claim(ZbClaimTypes.Role, DashboardRoles.Admin),
|
||||
new Claim(ZbClaimTypes.Role, DashboardRoles.Viewer),
|
||||
];
|
||||
|
||||
ClaimsIdentity identity = new(
|
||||
claims,
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme,
|
||||
ZbClaimTypes.Name,
|
||||
ZbClaimTypes.Role);
|
||||
|
||||
return new ClaimsPrincipal(identity);
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.AspNetCore.Authentication.Cookies;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.Auth.Abstractions.Roles;
|
||||
using ZB.MOM.WW.Auth.AspNetCore;
|
||||
@@ -21,7 +22,8 @@ public static class DashboardServiceCollectionExtensions
|
||||
/// <param name="services">Service collection to register services.</param>
|
||||
/// <param name="configuration">
|
||||
/// Application configuration, used to bind the shared LDAP provider's options
|
||||
/// from the <c>MxGateway:Ldap</c> section.
|
||||
/// from the <c>MxGateway:Ldap</c> section. Also read to select the dashboard
|
||||
/// authentication scheme via the <c>MxGateway:Dashboard:DisableLogin</c> dev flag.
|
||||
/// </param>
|
||||
public static IServiceCollection AddGatewayDashboard(
|
||||
this IServiceCollection services,
|
||||
@@ -55,9 +57,39 @@ public static class DashboardServiceCollectionExtensions
|
||||
.AddInteractiveServerComponents();
|
||||
services.AddSignalR();
|
||||
|
||||
services
|
||||
.AddAuthentication(DashboardAuthenticationDefaults.AuthenticationScheme)
|
||||
.AddCookie(DashboardAuthenticationDefaults.AuthenticationScheme, cookieOptions =>
|
||||
// DEV/TEST ONLY. Read directly from configuration here because authentication scheme
|
||||
// registration runs before options binding. Key mirrors DashboardOptions.DisableLogin.
|
||||
bool disableLogin = configuration.GetValue<bool>("MxGateway:Dashboard:DisableLogin");
|
||||
|
||||
AuthenticationBuilder authentication =
|
||||
services.AddAuthentication(DashboardAuthenticationDefaults.AuthenticationScheme);
|
||||
|
||||
if (disableLogin)
|
||||
{
|
||||
// Register an always-authenticating handler UNDER the cookie scheme name, so the
|
||||
// Viewer/Admin/HubClients policies (which all resolve this scheme) authenticate
|
||||
// through it as the multi-role dev user — zero policy or page changes.
|
||||
authentication.AddScheme<AuthenticationSchemeOptions, DashboardAutoLoginAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.AuthenticationScheme,
|
||||
_ => { });
|
||||
|
||||
// Loud warning, emitted on first resolution of GatewayOptions (i.e. on the first
|
||||
// request/options access, not guaranteed at process start). Dev-only safety notice.
|
||||
services.AddOptions<GatewayOptions>().PostConfigure<ILoggerFactory>((gatewayOptions, loggerFactory) =>
|
||||
loggerFactory
|
||||
.CreateLogger("ZB.MOM.WW.MxGateway.Server.Dashboard.DisableLogin")
|
||||
.LogWarning(
|
||||
"DASHBOARD LOGIN DISABLED (MxGateway:Dashboard:DisableLogin=true) — every request is "
|
||||
+ "authenticated as '{User}' with full permissions ({Roles}). Dev/test only; never "
|
||||
+ "enable in production.",
|
||||
string.IsNullOrWhiteSpace(gatewayOptions.Dashboard.AutoLoginUser)
|
||||
? DashboardAutoLoginAuthenticationHandler.DefaultUser
|
||||
: gatewayOptions.Dashboard.AutoLoginUser!.Trim(),
|
||||
$"{DashboardRoles.Admin}, {DashboardRoles.Viewer}"));
|
||||
}
|
||||
else
|
||||
{
|
||||
authentication.AddCookie(DashboardAuthenticationDefaults.AuthenticationScheme, cookieOptions =>
|
||||
{
|
||||
// Hardened defaults (HttpOnly, SameSite=Strict, SecurePolicy, SlidingExpiration,
|
||||
// ExpireTimeSpan) via the shared ZbCookieDefaults.Apply. requireHttps is set to
|
||||
@@ -73,10 +105,12 @@ public static class DashboardServiceCollectionExtensions
|
||||
cookieOptions.LoginPath = "/login";
|
||||
cookieOptions.LogoutPath = "/logout";
|
||||
cookieOptions.AccessDeniedPath = "/denied";
|
||||
})
|
||||
.AddScheme<AuthenticationSchemeOptions, HubTokenAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.HubAuthenticationScheme,
|
||||
_ => { });
|
||||
});
|
||||
}
|
||||
|
||||
authentication.AddScheme<AuthenticationSchemeOptions, HubTokenAuthenticationHandler>(
|
||||
DashboardAuthenticationDefaults.HubAuthenticationScheme,
|
||||
_ => { });
|
||||
|
||||
// Honour DashboardOptions.RequireHttpsCookie (default true / Always; set false for dev
|
||||
// HTTP deployments → SameAsRequest) and the optional per-environment cookie-name
|
||||
|
||||
@@ -106,6 +106,13 @@ public sealed class GalaxyHierarchyIndex
|
||||
{
|
||||
parentKey = 0;
|
||||
}
|
||||
// Re-root orphans whose parent object is absent from the set (e.g. a deleted or
|
||||
// never-loaded container area). Otherwise they bucket under a phantom parent id
|
||||
// that is never reached from the root, so they vanish from browse entirely.
|
||||
else if (parentKey != 0 && !objectsById.ContainsKey(parentKey))
|
||||
{
|
||||
parentKey = 0;
|
||||
}
|
||||
if (!childrenByParent.TryGetValue(parentKey, out List<GalaxyObjectView>? bucket))
|
||||
{
|
||||
bucket = [];
|
||||
|
||||
@@ -172,6 +172,11 @@ public sealed class GalaxyRepository(GalaxyRepositoryOptions options) : IGalaxyR
|
||||
AckCommentSubtag = string.Empty,
|
||||
};
|
||||
|
||||
// Area objects (category 13) are returned even when undeployed (deployed_package_id = 0):
|
||||
// they are organizational/model nodes that group deployed objects, so excluding them
|
||||
// orphans every area whose containing area is not itself deployed. All non-area objects
|
||||
// still require deployment. Orphans left by a missing/deleted parent area are re-rooted
|
||||
// by GalaxyHierarchyIndex.Build so nothing disappears from browse.
|
||||
private const string HierarchySql = @"
|
||||
;WITH template_chain AS (
|
||||
SELECT g.gobject_id AS instance_gobject_id, t.gobject_id AS template_gobject_id,
|
||||
@@ -218,7 +223,7 @@ INNER JOIN template_definition td
|
||||
ON g.template_definition_id = td.template_definition_id
|
||||
WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26)
|
||||
AND g.is_template = 0
|
||||
AND g.deployed_package_id <> 0
|
||||
AND (g.deployed_package_id <> 0 OR td.category_id = 13)
|
||||
ORDER BY parent_gobject_id, g.tag_name";
|
||||
|
||||
// Unlike HierarchySql, this query has diverged from the OtOpcUa original. It returns two
|
||||
|
||||
@@ -68,17 +68,79 @@ public sealed class EventStreamService(
|
||||
// No `using` here — subscriber.Dispose() is called exactly once in the finally
|
||||
// block below, which also disposes the reader. A `using` declaration would add a
|
||||
// second Dispose on the same path and double-decrement the session subscriber count.
|
||||
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
|
||||
options.Value.Sessions.AllowMultipleEventSubscribers);
|
||||
// The subscriber mode (single vs. multi) is derived inside AttachEventSubscriber from
|
||||
// the session's own SessionEventStreaming.AllowMultipleEventSubscribers field — the
|
||||
// same source the distributor uses — so the two cannot diverge.
|
||||
//
|
||||
// Reconnect/resume (Task 12): when AfterWorkerSequence > 0 the client is resuming, so
|
||||
// attach via the replay variant that atomically snapshots the replay ring AND registers
|
||||
// the live subscriber under one lock. That single critical section is the crux of the
|
||||
// no-gap/no-duplicate handoff: every replayed event has sequence <= LiveResumeSequence
|
||||
// and every live event delivered below is filtered to sequence > LiveResumeSequence, so
|
||||
// an event that was both replayed and (racing the registration) fanned into the live
|
||||
// channel is dropped exactly once, while no newer event is skipped. See
|
||||
// SessionEventDistributor.RegisterWithReplay for the full argument.
|
||||
//
|
||||
// AfterWorkerSequence == 0 (fresh stream, not a resume) keeps the pre-Task-12 behavior:
|
||||
// a plain attach, no replay, no sentinel, and the live filter watermark stays 0.
|
||||
ulong afterWorkerSequence = request.AfterWorkerSequence;
|
||||
IEventSubscriberLease subscriber;
|
||||
IReadOnlyList<MxEvent> replayedEvents = [];
|
||||
bool replayGap = false;
|
||||
ulong oldestAvailableSequence = 0;
|
||||
|
||||
if (afterWorkerSequence > 0)
|
||||
{
|
||||
EventSubscriberReplayAttachment attachment = session.AttachEventSubscriberWithReplay(
|
||||
options.Value.Sessions.MaxEventSubscribersPerSession,
|
||||
afterWorkerSequence);
|
||||
subscriber = attachment.Lease;
|
||||
replayedEvents = attachment.ReplayedEvents;
|
||||
replayGap = attachment.Gap;
|
||||
oldestAvailableSequence = attachment.OldestAvailableSequence;
|
||||
|
||||
// The live filter resumes strictly after the last replayed sequence (or, when
|
||||
// nothing was replayed, after the requested watermark). This is what makes the
|
||||
// handoff free of duplicates: anything <= this watermark was already replayed.
|
||||
afterWorkerSequence = attachment.LiveResumeSequence;
|
||||
}
|
||||
else
|
||||
{
|
||||
subscriber = session.AttachEventSubscriber(
|
||||
options.Value.Sessions.MaxEventSubscribersPerSession);
|
||||
}
|
||||
|
||||
int streamQueueDepth = 0;
|
||||
ulong afterWorkerSequence = request.AfterWorkerSequence;
|
||||
IAsyncEnumerator<MxEvent> reader = subscriber.Reader
|
||||
.ReadAllAsync(cancellationToken)
|
||||
.GetAsyncEnumerator(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
// Emit order for a resume: the ReplayGap sentinel FIRST (only when events were
|
||||
// evicted), then the still-retained replay batch, then live. The sentinel is an
|
||||
// explicit documented control signal (not a synthesized MXAccess event) and is
|
||||
// delivered ONLY to this resuming subscriber — it is never fanned to other
|
||||
// subscribers and never appears in DrainEventsReply (that path is untouched).
|
||||
if (replayGap)
|
||||
{
|
||||
yield return CreateReplayGapSentinel(
|
||||
request.SessionId,
|
||||
request.AfterWorkerSequence,
|
||||
oldestAvailableSequence);
|
||||
}
|
||||
|
||||
foreach (MxEvent replayedEvent in replayedEvents)
|
||||
{
|
||||
// RegisterWithReplay already returns only events strictly newer than
|
||||
// AfterWorkerSequence, so no per-item sequence guard is needed here.
|
||||
// There is no per-event constraint filter on the event stream: events are
|
||||
// fanned as-is by the distributor pump. The only dedup watermark is the
|
||||
// LiveResumeSequence applied in the live loop below (to drop any event
|
||||
// that was both replayed and raced into the live channel).
|
||||
yield return replayedEvent;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
MxEvent mxEvent;
|
||||
@@ -141,4 +203,24 @@ public sealed class EventStreamService(
|
||||
metrics.StreamDisconnected("Detached");
|
||||
}
|
||||
}
|
||||
|
||||
// Builds the single ReplayGap control sentinel emitted at the head of a resumed
|
||||
// StreamEvents stream when the requested AfterWorkerSequence predates the oldest event
|
||||
// still retained (events were evicted). Per the proto contract (MxEvent.replay_gap),
|
||||
// the sentinel carries the session id and the populated ReplayGap, with family
|
||||
// UNSPECIFIED, no body, and no per-item fields. It is a documented control signal — NOT a
|
||||
// synthesized MXAccess event — so emitting it does not violate the no-synthesis rule.
|
||||
private static MxEvent CreateReplayGapSentinel(
|
||||
string sessionId,
|
||||
ulong requestedAfterSequence,
|
||||
ulong oldestAvailableSequence)
|
||||
=> new()
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ReplayGap = new ReplayGap
|
||||
{
|
||||
RequestedAfterSequence = requestedAfterSequence,
|
||||
OldestAvailableSequence = oldestAvailableSequence,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -949,6 +949,7 @@ public sealed class MxAccessGatewayService(
|
||||
SessionManagerErrorCode.SessionNotFound => StatusCode.NotFound,
|
||||
SessionManagerErrorCode.SessionNotReady => StatusCode.FailedPrecondition,
|
||||
SessionManagerErrorCode.EventSubscriberAlreadyActive => StatusCode.ResourceExhausted,
|
||||
SessionManagerErrorCode.EventSubscriberLimitReached => StatusCode.ResourceExhausted,
|
||||
SessionManagerErrorCode.EventQueueOverflow => StatusCode.ResourceExhausted,
|
||||
SessionManagerErrorCode.SessionLimitExceeded => StatusCode.ResourceExhausted,
|
||||
SessionManagerErrorCode.OpenFailed => StatusCode.Unavailable,
|
||||
|
||||
@@ -196,11 +196,30 @@ public sealed class ConstraintEnforcer(
|
||||
|
||||
private GalaxyTagLookup? ResolveTarget(string tagAddress)
|
||||
{
|
||||
GalaxyHierarchyCacheEntry entry = cache.Current;
|
||||
return !string.IsNullOrWhiteSpace(tagAddress)
|
||||
&& entry.Index.TagsByAddress.TryGetValue(tagAddress, out GalaxyTagLookup? lookup)
|
||||
? lookup
|
||||
: null;
|
||||
if (string.IsNullOrWhiteSpace(tagAddress))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
IReadOnlyDictionary<string, GalaxyTagLookup> tagsByAddress = cache.Current.Index.TagsByAddress;
|
||||
if (tagsByAddress.TryGetValue(tagAddress, out GalaxyTagLookup? lookup))
|
||||
{
|
||||
return lookup;
|
||||
}
|
||||
|
||||
// Galaxy SQL keys array attributes by their suffixed FullTagReference (e.g. "Obj.Arr[]"),
|
||||
// but callers pass the bare address ("Obj.Arr") before the worker-boundary normalization
|
||||
// runs. Probe the suffixed form so a bare array name resolves to its array attribute,
|
||||
// consistent with ArrayAddressNormalizer. Only build the suffixed string on a direct miss
|
||||
// when the address is not already suffixed, and only accept it when it is truly an array.
|
||||
if (!tagAddress.EndsWith("[]", StringComparison.Ordinal)
|
||||
&& tagsByAddress.TryGetValue(tagAddress + "[]", out GalaxyTagLookup? arrayLookup)
|
||||
&& arrayLookup.Attribute?.IsArray == true)
|
||||
{
|
||||
return arrayLookup;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool MatchesPathOrTag(
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
using ZB.MOM.WW.MxGateway.Server.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
|
||||
/// <summary>
|
||||
/// Rewrites a bare MXAccess attribute address to its writable array form by appending the
|
||||
/// trailing <c>[]</c> suffix when Galaxy Repository metadata reports the attribute as an array.
|
||||
/// MXAccess requires the <c>[]</c> suffix on the AddItem address for an array attribute to be
|
||||
/// writable; the bare name registers a read-only-ish handle. This is best-effort: when metadata
|
||||
/// is cold, the address is unknown, or the attribute is not an array, the address is returned
|
||||
/// unchanged and no exception is thrown.
|
||||
/// </summary>
|
||||
public sealed class ArrayAddressNormalizer(IGalaxyHierarchyCache cache)
|
||||
{
|
||||
private const string ArraySuffix = "[]";
|
||||
|
||||
/// <summary>
|
||||
/// Returns <paramref name="address"/> with a trailing <c>[]</c> appended when Galaxy metadata
|
||||
/// reports it as an array attribute; otherwise returns it unchanged. Never throws.
|
||||
/// </summary>
|
||||
/// <param name="address">The MXAccess attribute address to normalize.</param>
|
||||
/// <returns>The normalized address, or the original address when no rewrite applies.</returns>
|
||||
public string Normalize(string address)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(address))
|
||||
{
|
||||
return address;
|
||||
}
|
||||
|
||||
if (address.EndsWith(ArraySuffix, StringComparison.Ordinal))
|
||||
{
|
||||
return address;
|
||||
}
|
||||
|
||||
// Galaxy SQL keys array attributes by their suffixed FullTagReference (e.g. "Obj.Arr[]"),
|
||||
// so probe for the suffixed form to decide whether the bare name is an array.
|
||||
string suffixed = address + ArraySuffix;
|
||||
return cache.Current.Index.TagsByAddress.TryGetValue(suffixed, out GalaxyTagLookup? lookup)
|
||||
&& lookup.Attribute?.IsArray == true
|
||||
? suffixed
|
||||
: address;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
|
||||
/// <summary>
|
||||
/// The result of a reconnect/resume attach
|
||||
/// (<see cref="GatewaySession.AttachEventSubscriberWithReplay"/>, Task 12): the live
|
||||
/// subscriber lease plus the replay batch and resume watermarks snapshotted atomically
|
||||
/// with the registration, so the replay→live handoff has no gap and no duplicate.
|
||||
/// </summary>
|
||||
/// <param name="Lease">
|
||||
/// The live event subscriber lease. Disposing it unregisters the distributor subscriber
|
||||
/// and decrements the session's active-subscriber count, exactly as a fresh attach.
|
||||
/// </param>
|
||||
/// <param name="ReplayedEvents">
|
||||
/// Retained events with worker sequence strictly greater than the requested
|
||||
/// <c>afterSequence</c>, in ascending order. These must be yielded (after the optional
|
||||
/// gap sentinel) before live events. Never null; empty when nothing newer is retained.
|
||||
/// </param>
|
||||
/// <param name="Gap">
|
||||
/// <see langword="true"/> when events between the requested <c>afterSequence</c> and the
|
||||
/// oldest retained event were already evicted, so the client missed unrecoverable events.
|
||||
/// When <see langword="true"/> the caller emits a <c>ReplayGap</c> sentinel before the
|
||||
/// replay batch.
|
||||
/// </param>
|
||||
/// <param name="OldestAvailableSequence">
|
||||
/// The oldest worker sequence still retained and replayable; <c>0</c> when nothing is
|
||||
/// retained. Populates the <c>ReplayGap.oldest_available_sequence</c> field. Meaningful
|
||||
/// only when <paramref name="Gap"/> is <see langword="true"/>.
|
||||
/// </param>
|
||||
/// <param name="LiveResumeSequence">
|
||||
/// The worker sequence the live channel must resume strictly after: the highest replayed
|
||||
/// sequence, or the requested <c>afterSequence</c> when nothing was replayed. The caller
|
||||
/// applies this as the per-subscriber live filter so any event both replayed and fanned
|
||||
/// into the live channel is dropped exactly once (no duplicate) while every newer event
|
||||
/// is delivered (no gap).
|
||||
/// </param>
|
||||
public readonly record struct EventSubscriberReplayAttachment(
|
||||
IEventSubscriberLease Lease,
|
||||
IReadOnlyList<MxEvent> ReplayedEvents,
|
||||
bool Gap,
|
||||
ulong OldestAvailableSequence,
|
||||
ulong LiveResumeSequence);
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
@@ -21,6 +22,15 @@ public sealed class GatewaySession
|
||||
private DateTimeOffset? _leaseExpiresAt;
|
||||
private bool _closeStarted;
|
||||
private int _activeEventSubscriberCount;
|
||||
private readonly TimeSpan _detachGrace;
|
||||
private readonly TimeSpan _workerReadyWaitTimeout;
|
||||
private DateTimeOffset? _detachedAtUtc;
|
||||
// True once at least one external subscriber attached SUCCESSFULLY. Detach-grace's
|
||||
// "last subscriber dropped" stamp (see DetachEventSubscriber) is gated on this so a
|
||||
// FAILED first attach — which still runs the rollback DetachEventSubscriber from the
|
||||
// attach catch path — does not push a never-subscribed session into the grace window
|
||||
// (Server-055).
|
||||
private bool _everHadEventSubscriber;
|
||||
private SessionEventDistributor? _eventDistributor;
|
||||
private bool _eventDistributorStarted;
|
||||
private bool _dashboardMirrorStarted;
|
||||
@@ -28,6 +38,7 @@ public sealed class GatewaySession
|
||||
private Task? _dashboardMirrorTask;
|
||||
private CancellationTokenSource? _dashboardMirrorCts;
|
||||
private readonly Dictionary<(int ServerHandle, int ItemHandle), SessionItemRegistration> _items = [];
|
||||
private readonly ArrayAddressNormalizer? _addressNormalizer;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a gateway session with session metadata and timeout configuration.
|
||||
@@ -102,6 +113,33 @@ public sealed class GatewaySession
|
||||
/// session directly still get a working distributor. Production passes the
|
||||
/// DI-resolved dependencies.
|
||||
/// </param>
|
||||
/// <param name="detachGrace">
|
||||
/// Retention window kept after the last external (gRPC) event subscriber drops, so a
|
||||
/// client can reconnect (Task 12). When the window is positive and the active external
|
||||
/// subscriber count falls to zero, the session stays <see cref="SessionState.Ready"/>
|
||||
/// and records a detached timestamp; the lease monitor closes it once the window
|
||||
/// elapses with no subscriber having re-attached. <see cref="TimeSpan.Zero"/> (the
|
||||
/// default) disables retention and preserves the original lease-only expiry behavior.
|
||||
/// The clock comes from <paramref name="eventStreaming"/>'s
|
||||
/// <see cref="SessionEventStreaming.TimeProvider"/> so the timer is unit-testable.
|
||||
/// </param>
|
||||
/// <param name="workerReadyWaitTimeout">
|
||||
/// Bounded time the session will wait, on the command/event hot path, for the worker
|
||||
/// client to reach <see cref="WorkerClientState.Ready"/> when the session is already
|
||||
/// <see cref="SessionState.Ready"/> but the worker state has transiently diverged
|
||||
/// (e.g. <see cref="WorkerClientState.Handshaking"/> after a heartbeat blip). The wait
|
||||
/// applies only to transient worker states; terminal states
|
||||
/// (<see cref="WorkerClientState.Faulted"/>/<see cref="WorkerClientState.Closing"/>/
|
||||
/// <see cref="WorkerClientState.Closed"/>/no worker) and a non-<c>Ready</c> session fail
|
||||
/// fast immediately. <see cref="TimeSpan.Zero"/> (the default) disables the wait and
|
||||
/// preserves the original fail-fast behavior byte-for-byte.
|
||||
/// </param>
|
||||
/// <param name="addressNormalizer">
|
||||
/// Rewrites bare array <c>AddItem</c>/<c>AddItem2</c> addresses to their writable <c>[]</c>
|
||||
/// form using Galaxy metadata at the outbound choke point (and on registration tracking).
|
||||
/// When <see langword="null"/> (legacy unit-construction paths that do not exercise Galaxy
|
||||
/// metadata), addresses pass through unchanged.
|
||||
/// </param>
|
||||
public GatewaySession(
|
||||
string sessionId,
|
||||
string backendName,
|
||||
@@ -116,7 +154,10 @@ public sealed class GatewaySession
|
||||
TimeSpan shutdownTimeout,
|
||||
TimeSpan leaseDuration,
|
||||
DateTimeOffset openedAt,
|
||||
SessionEventStreaming? eventStreaming = null)
|
||||
SessionEventStreaming? eventStreaming = null,
|
||||
TimeSpan detachGrace = default,
|
||||
TimeSpan workerReadyWaitTimeout = default,
|
||||
ArrayAddressNormalizer? addressNormalizer = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sessionId))
|
||||
{
|
||||
@@ -154,6 +195,9 @@ public sealed class GatewaySession
|
||||
_lastClientActivityAt = openedAt;
|
||||
_leaseExpiresAt = openedAt + leaseDuration;
|
||||
_eventStreaming = eventStreaming ?? SessionEventStreaming.Default;
|
||||
_detachGrace = detachGrace > TimeSpan.Zero ? detachGrace : TimeSpan.Zero;
|
||||
_workerReadyWaitTimeout = workerReadyWaitTimeout > TimeSpan.Zero ? workerReadyWaitTimeout : TimeSpan.Zero;
|
||||
_addressNormalizer = addressNormalizer;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -299,6 +343,25 @@ public sealed class GatewaySession
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the UTC timestamp at which the session entered its detach-grace retention
|
||||
/// window (the last external event subscriber dropped while a positive
|
||||
/// detach-grace was configured), or <see langword="null"/> when the session is not
|
||||
/// currently within a detach-grace window. Re-attaching an external subscriber clears
|
||||
/// this. Always <see langword="null"/> when detach-grace is disabled
|
||||
/// (<c>DetachGraceSeconds == 0</c>).
|
||||
/// </summary>
|
||||
public DateTimeOffset? DetachedAtUtc
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
return _detachedAtUtc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attaches the worker client for this session.
|
||||
/// </summary>
|
||||
@@ -399,6 +462,32 @@ public sealed class GatewaySession
|
||||
return lease;
|
||||
}
|
||||
|
||||
// Reconnect/resume variant of StartDistributorAndRegister (Task 12). Snapshots the replay
|
||||
// ring for events newer than afterSequence AND registers the live subscriber atomically
|
||||
// under the distributor's replay lock, so the replay→live handoff has no gap and no
|
||||
// duplicate (see SessionEventDistributor.RegisterWithReplay). The pump is started after
|
||||
// registration, exactly as the fresh-attach path, so the very first subscriber on a
|
||||
// freshly-Ready session still sees the stream from its beginning.
|
||||
private IEventSubscriberLease StartDistributorAndRegisterWithReplay(
|
||||
ulong afterSequence,
|
||||
out IReadOnlyList<MxEvent> replayedEvents,
|
||||
out bool gap,
|
||||
out ulong oldestAvailableSequence,
|
||||
out ulong liveResumeSequence)
|
||||
{
|
||||
SessionEventDistributor distributor = EnsureDistributorCreated(out bool startNow);
|
||||
|
||||
IEventSubscriberLease lease = distributor.RegisterWithReplay(
|
||||
afterSequence,
|
||||
out replayedEvents,
|
||||
out gap,
|
||||
out oldestAvailableSequence,
|
||||
out liveResumeSequence);
|
||||
StartPumpIfRequested(distributor, startNow);
|
||||
|
||||
return lease;
|
||||
}
|
||||
|
||||
// Constructs the distributor exactly once and reports whether THIS caller is the one
|
||||
// that should start the pump (i.e. it observed the unstarted state and claimed the
|
||||
// start). Both the construction and the started-flag flip happen under _syncRoot so two
|
||||
@@ -419,7 +508,8 @@ public sealed class GatewaySession
|
||||
eventOptions.ReplayRetentionSeconds,
|
||||
_eventStreaming.DistributorLogger,
|
||||
_eventStreaming.TimeProvider,
|
||||
CreateOverflowHandler(eventOptions.BackpressurePolicy));
|
||||
CreateOverflowHandler(eventOptions.BackpressurePolicy),
|
||||
singleSubscriberMode: !_eventStreaming.AllowMultipleEventSubscribers);
|
||||
}
|
||||
|
||||
startNow = false;
|
||||
@@ -677,17 +767,61 @@ public sealed class GatewaySession
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the session's detach-grace retention window has elapsed: the
|
||||
/// session entered detach-grace (its last external event subscriber dropped while a
|
||||
/// positive detach-grace was configured) and has had no external subscriber re-attach
|
||||
/// for longer than the configured detach-grace. The lease monitor closes such a
|
||||
/// session exactly as it closes an expired lease. Always returns <see langword="false"/>
|
||||
/// when detach-grace is disabled or when an external subscriber is attached (the
|
||||
/// detached timestamp is cleared on re-attach, so an attached session is never within a
|
||||
/// window).
|
||||
/// </summary>
|
||||
/// <param name="now">Current timestamp for comparison.</param>
|
||||
public bool IsDetachGraceExpired(DateTimeOffset now)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
return _detachGrace > TimeSpan.Zero
|
||||
&& _activeEventSubscriberCount == 0
|
||||
&& _detachedAtUtc is not null
|
||||
&& now - _detachedAtUtc.Value >= _detachGrace;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attaches an event subscriber and returns a lease whose
|
||||
/// <see cref="IEventSubscriberLease.Reader"/> reads the fanned public
|
||||
/// <see cref="MxEvent"/>s for this subscriber. The single-subscriber guard
|
||||
/// (Tasks 7/8 relax it) is unchanged: with multi-subscriber disabled a second
|
||||
/// attach is rejected. The returned lease, when disposed, unregisters the
|
||||
/// distributor subscriber AND decrements the active-subscriber count.
|
||||
/// <see cref="MxEvent"/>s for this subscriber. The returned lease, when disposed,
|
||||
/// unregisters the distributor subscriber AND decrements the active-subscriber count.
|
||||
/// </summary>
|
||||
/// <param name="allowMultipleSubscribers">If true, allows multiple concurrent event subscribers.</param>
|
||||
public IEventSubscriberLease AttachEventSubscriber(bool allowMultipleSubscribers)
|
||||
/// <param name="maxSubscribers">
|
||||
/// Maximum concurrent external subscribers in multi-subscriber mode
|
||||
/// (<c>MxGateway:Sessions:MaxEventSubscribersPerSession</c>). Ignored when the
|
||||
/// session is in single-subscriber mode (<c>AllowMultipleEventSubscribers == false</c>);
|
||||
/// the effective cap is then 1. The gateway-owned internal dashboard subscriber is
|
||||
/// registered directly on the distributor and is NOT counted here, so it never
|
||||
/// consumes cap budget.
|
||||
/// </param>
|
||||
/// <remarks>
|
||||
/// The subscriber mode is derived internally from
|
||||
/// <see cref="SessionEventStreaming.AllowMultipleEventSubscribers"/> — the same source
|
||||
/// the <see cref="SessionEventDistributor"/> uses to gate its FailFast decision — so
|
||||
/// the cap-enforcement mode and the distributor's <c>singleSubscriberMode</c> field
|
||||
/// cannot diverge. The count-check-and-increment runs atomically under
|
||||
/// <c>_syncRoot</c>, so two concurrent attaches racing toward the cap can never both
|
||||
/// succeed past it. On distributor-register failure the count is rolled back (see the
|
||||
/// catch below).
|
||||
/// </remarks>
|
||||
public IEventSubscriberLease AttachEventSubscriber(int maxSubscribers)
|
||||
{
|
||||
// Derive the mode from the same source the distributor uses so the two can never
|
||||
// diverge. Effective cap: 1 in single-subscriber mode, otherwise the configured
|
||||
// maximum (clamped to at least 1 so a misconfigured non-positive value can never
|
||||
// deadlock attaches in multi-subscriber mode).
|
||||
bool allowMultipleSubscribers = _eventStreaming.AllowMultipleEventSubscribers;
|
||||
int effectiveCap = allowMultipleSubscribers ? Math.Max(1, maxSubscribers) : 1;
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
|
||||
@@ -697,14 +831,24 @@ public sealed class GatewaySession
|
||||
$"Session {SessionId} is not ready for event streaming. Current state is {_state}.");
|
||||
}
|
||||
|
||||
if (!allowMultipleSubscribers && _activeEventSubscriberCount > 0)
|
||||
if (_activeEventSubscriberCount >= effectiveCap)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.EventSubscriberAlreadyActive,
|
||||
$"Session {SessionId} already has an active event stream subscriber.");
|
||||
throw allowMultipleSubscribers
|
||||
? new SessionManagerException(
|
||||
SessionManagerErrorCode.EventSubscriberLimitReached,
|
||||
$"Session {SessionId} has reached its maximum of {effectiveCap} concurrent event stream subscribers.")
|
||||
: new SessionManagerException(
|
||||
SessionManagerErrorCode.EventSubscriberAlreadyActive,
|
||||
$"Session {SessionId} already has an active event stream subscriber.");
|
||||
}
|
||||
|
||||
_activeEventSubscriberCount++;
|
||||
|
||||
// An external subscriber (re)attached: cancel any in-flight detach-grace window so
|
||||
// the lease monitor no longer treats this session as eligible for grace-expiry
|
||||
// close. This is the reattach→grace-cancel transition; it races the sweeper's
|
||||
// IsDetachGraceExpired read, and both run under _syncRoot so they serialize.
|
||||
_detachedAtUtc = null;
|
||||
}
|
||||
|
||||
// Construct/start the distributor and register this subscriber. Done outside the
|
||||
@@ -713,6 +857,7 @@ public sealed class GatewaySession
|
||||
try
|
||||
{
|
||||
IEventSubscriberLease distributorLease = StartDistributorAndRegister();
|
||||
MarkEventSubscriberAttached();
|
||||
return new EventSubscriberLease(this, distributorLease);
|
||||
}
|
||||
catch
|
||||
@@ -722,6 +867,87 @@ public sealed class GatewaySession
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reconnect/resume variant of <see cref="AttachEventSubscriber"/> (Task 12). Attaches
|
||||
/// an event subscriber AND atomically snapshots the session replay ring for events newer
|
||||
/// than <paramref name="afterSequence"/>, so a resuming client can replay what it missed
|
||||
/// before live delivery resumes — with no gap and no duplicate across the handoff.
|
||||
/// </summary>
|
||||
/// <param name="maxSubscribers">See <see cref="AttachEventSubscriber"/>.</param>
|
||||
/// <param name="afterSequence">
|
||||
/// The last worker sequence the resuming client already observed. Replay returns events
|
||||
/// strictly newer than this; the caller must filter the live channel to events strictly
|
||||
/// newer than <see cref="EventSubscriberReplayAttachment.LiveResumeSequence"/>.
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// The lease plus the replay batch, gap flag, and resume watermarks. See
|
||||
/// <see cref="SessionEventDistributor.RegisterWithReplay"/> for the no-gap/no-duplicate
|
||||
/// guarantee.
|
||||
/// </returns>
|
||||
public EventSubscriberReplayAttachment AttachEventSubscriberWithReplay(int maxSubscribers, ulong afterSequence)
|
||||
{
|
||||
bool allowMultipleSubscribers = _eventStreaming.AllowMultipleEventSubscribers;
|
||||
int effectiveCap = allowMultipleSubscribers ? Math.Max(1, maxSubscribers) : 1;
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotReady,
|
||||
$"Session {SessionId} is not ready for event streaming. Current state is {_state}.");
|
||||
}
|
||||
|
||||
if (_activeEventSubscriberCount >= effectiveCap)
|
||||
{
|
||||
throw allowMultipleSubscribers
|
||||
? new SessionManagerException(
|
||||
SessionManagerErrorCode.EventSubscriberLimitReached,
|
||||
$"Session {SessionId} has reached its maximum of {effectiveCap} concurrent event stream subscribers.")
|
||||
: new SessionManagerException(
|
||||
SessionManagerErrorCode.EventSubscriberAlreadyActive,
|
||||
$"Session {SessionId} already has an active event stream subscriber.");
|
||||
}
|
||||
|
||||
_activeEventSubscriberCount++;
|
||||
_detachedAtUtc = null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
IEventSubscriberLease distributorLease = StartDistributorAndRegisterWithReplay(
|
||||
afterSequence,
|
||||
out IReadOnlyList<MxEvent> replayedEvents,
|
||||
out bool gap,
|
||||
out ulong oldestAvailableSequence,
|
||||
out ulong liveResumeSequence);
|
||||
|
||||
MarkEventSubscriberAttached();
|
||||
return new EventSubscriberReplayAttachment(
|
||||
new EventSubscriberLease(this, distributorLease),
|
||||
replayedEvents,
|
||||
gap,
|
||||
oldestAvailableSequence,
|
||||
liveResumeSequence);
|
||||
}
|
||||
catch
|
||||
{
|
||||
DetachEventSubscriber();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// Records that an external subscriber attached successfully. Gates the detach-grace
|
||||
// "last subscriber dropped" stamp so a FAILED first attach (which still rolls back via
|
||||
// DetachEventSubscriber) never pushes a never-subscribed session into grace (Server-055).
|
||||
private void MarkEventSubscriberAttached()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_everHadEventSubscriber = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Invokes a worker command synchronously and returns the reply.
|
||||
/// </summary>
|
||||
@@ -731,12 +957,108 @@ public sealed class GatewaySession
|
||||
WorkerCommand command,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
IWorkerClient workerClient = GetReadyWorkerClient();
|
||||
TouchClientActivity(DateTimeOffset.UtcNow);
|
||||
ArgumentNullException.ThrowIfNull(command);
|
||||
if (command.Command is not null)
|
||||
{
|
||||
NormalizeOutboundCommand(command.Command);
|
||||
}
|
||||
|
||||
IWorkerClient workerClient = await GetReadyWorkerClientAsync(cancellationToken).ConfigureAwait(false);
|
||||
TouchClientActivity(_eventStreaming.TimeProvider.GetUtcNow());
|
||||
|
||||
return await workerClient.InvokeAsync(command, CommandTimeout, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Single outbound choke point for the two array-write ergonomics shims (Task 3):
|
||||
// 1. AddItem/AddItem2 array addresses gain the writable "[]" suffix when Galaxy metadata
|
||||
// reports them as arrays, so the worker registers a write-capable handle. The mutation
|
||||
// lands on the same MxCommand instance forwarded to the worker.
|
||||
// 2. Sparse array write values are expanded to whole-array values, because MXAccess has no
|
||||
// partial-array write primitive — the worker only ever sees a full MxArray.
|
||||
// SparseArrayExpander.Expand throws RpcException(InvalidArgument) for an invalid sparse payload;
|
||||
// that propagates out of InvokeAsync as the desired client-facing error and is deliberately not
|
||||
// caught here.
|
||||
private void NormalizeOutboundCommand(MxCommand command)
|
||||
{
|
||||
switch (command.PayloadCase)
|
||||
{
|
||||
case MxCommand.PayloadOneofCase.AddItem:
|
||||
command.AddItem.ItemDefinition = NormalizeAddress(command.AddItem.ItemDefinition);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.AddItem2:
|
||||
command.AddItem2.ItemDefinition = NormalizeAddress(command.AddItem2.ItemDefinition);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.AddBufferedItem:
|
||||
command.AddBufferedItem.ItemDefinition = NormalizeAddress(command.AddBufferedItem.ItemDefinition);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.AddItemBulk:
|
||||
// Normalize each bare array address in place so the worker binds a write-capable handle
|
||||
// for every array tag in the batch (the same IsArray-gated rewrite the single-add path
|
||||
// applies). Scalar addresses pass through unchanged.
|
||||
for (int i = 0; i < command.AddItemBulk.TagAddresses.Count; i++)
|
||||
{
|
||||
command.AddItemBulk.TagAddresses[i] = NormalizeAddress(command.AddItemBulk.TagAddresses[i]);
|
||||
}
|
||||
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.Write:
|
||||
ExpandValue(command.Write.Value);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.WriteSecured:
|
||||
ExpandValue(command.WriteSecured.Value);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.Write2:
|
||||
ExpandValue(command.Write2.Value);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.WriteSecured2:
|
||||
ExpandValue(command.WriteSecured2.Value);
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.WriteBulk:
|
||||
foreach (WriteBulkEntry entry in command.WriteBulk.Entries)
|
||||
{
|
||||
ExpandValue(entry.Value);
|
||||
}
|
||||
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.Write2Bulk:
|
||||
foreach (Write2BulkEntry entry in command.Write2Bulk.Entries)
|
||||
{
|
||||
ExpandValue(entry.Value);
|
||||
}
|
||||
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.WriteSecuredBulk:
|
||||
foreach (WriteSecuredBulkEntry entry in command.WriteSecuredBulk.Entries)
|
||||
{
|
||||
ExpandValue(entry.Value);
|
||||
}
|
||||
|
||||
break;
|
||||
case MxCommand.PayloadOneofCase.WriteSecured2Bulk:
|
||||
foreach (WriteSecured2BulkEntry entry in command.WriteSecured2Bulk.Entries)
|
||||
{
|
||||
ExpandValue(entry.Value);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort array-suffix rewrite; the normalizer is null in legacy unit-construction paths
|
||||
// that do not exercise Galaxy metadata, in which case the address passes through unchanged.
|
||||
private string NormalizeAddress(string address) =>
|
||||
_addressNormalizer?.Normalize(address) ?? address;
|
||||
|
||||
// MXAccess writes replace the whole array; expand a sparse value in place so the worker only
|
||||
// ever receives a whole-array MxValue. No-op for null or non-sparse values.
|
||||
private static void ExpandValue(MxValue? value)
|
||||
{
|
||||
if (value is not null)
|
||||
{
|
||||
SparseArrayExpander.Expand(value);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Gets the item registration for a server and item handle pair.</summary>
|
||||
/// <param name="serverHandle">The MXAccess server handle.</param>
|
||||
/// <param name="itemHandle">The MXAccess item handle.</param>
|
||||
@@ -768,16 +1090,28 @@ public sealed class GatewaySession
|
||||
{
|
||||
switch (command.Kind)
|
||||
{
|
||||
// The public reply is tracked from the pre-mapping MxCommand instance, which is a
|
||||
// separate copy from the one mutated at the InvokeAsync choke point (the gRPC mapper
|
||||
// deep-clones before forwarding). Re-apply the array-suffix normalization here so the
|
||||
// registration's TagAddress matches the address the worker actually registered.
|
||||
// Normalize is idempotent for an already-suffixed address.
|
||||
case MxCommandKind.AddItem when reply.AddItem is not null:
|
||||
TrackItem(command.AddItem.ServerHandle, reply.AddItem.ItemHandle, command.AddItem.ItemDefinition);
|
||||
TrackItem(command.AddItem.ServerHandle, reply.AddItem.ItemHandle, NormalizeAddress(command.AddItem.ItemDefinition));
|
||||
break;
|
||||
case MxCommandKind.AddItem2 when reply.AddItem2 is not null:
|
||||
TrackItem(command.AddItem2.ServerHandle, reply.AddItem2.ItemHandle, command.AddItem2.ItemDefinition);
|
||||
TrackItem(command.AddItem2.ServerHandle, reply.AddItem2.ItemHandle, NormalizeAddress(command.AddItem2.ItemDefinition));
|
||||
break;
|
||||
case MxCommandKind.AddBufferedItem when reply.AddBufferedItem is not null:
|
||||
TrackItem(command.AddBufferedItem.ServerHandle, reply.AddBufferedItem.ItemHandle, command.AddBufferedItem.ItemDefinition);
|
||||
// The reply carries no address, so tracking keys off the command's ItemDefinition;
|
||||
// re-apply the array-suffix normalization (the tracking copy is a separate, un-mutated
|
||||
// instance from the one forwarded at the InvokeAsync choke point) so the registration
|
||||
// matches the write-capable handle the worker bound.
|
||||
TrackItem(command.AddBufferedItem.ServerHandle, reply.AddBufferedItem.ItemHandle, NormalizeAddress(command.AddBufferedItem.ItemDefinition));
|
||||
break;
|
||||
case MxCommandKind.AddItemBulk when reply.AddItemBulk is not null:
|
||||
// The worker echoes back the (already-normalized) address it bound in each
|
||||
// SubscribeResult.TagAddress, so TrackBulkItems stores the suffixed array address
|
||||
// without re-normalizing here.
|
||||
TrackBulkItems(reply.AddItemBulk);
|
||||
break;
|
||||
case MxCommandKind.SubscribeBulk when reply.SubscribeBulk is not null:
|
||||
@@ -1074,12 +1408,20 @@ public sealed class GatewaySession
|
||||
/// Reads events from the worker as an asynchronous enumerable stream.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
public IAsyncEnumerable<WorkerEvent> ReadEventsAsync(CancellationToken cancellationToken)
|
||||
/// <returns>An asynchronous stream of worker events.</returns>
|
||||
public async IAsyncEnumerable<WorkerEvent> ReadEventsAsync(
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
IWorkerClient workerClient = GetReadyWorkerClient();
|
||||
TouchClientActivity(DateTimeOffset.UtcNow);
|
||||
IWorkerClient workerClient = await GetReadyWorkerClientAsync(cancellationToken).ConfigureAwait(false);
|
||||
TouchClientActivity(_eventStreaming.TimeProvider.GetUtcNow());
|
||||
|
||||
return workerClient.ReadEventsAsync(cancellationToken);
|
||||
await foreach (WorkerEvent workerEvent in workerClient
|
||||
.ReadEventsAsync(cancellationToken)
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
yield return workerEvent;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -1171,6 +1513,74 @@ public sealed class GatewaySession
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Atomically re-verifies that the session is still eligible for sweep-initiated close
|
||||
/// (lease expired OR detach-grace expired, with no active external subscriber) and, if so,
|
||||
/// transitions to <c>Closing</c> in a single lock acquisition.
|
||||
/// </summary>
|
||||
/// <param name="now">Current timestamp used for expiry re-check.</param>
|
||||
/// <param name="alreadyClosing">
|
||||
/// Set to <see langword="true"/> when a concurrent close is already in flight; the caller
|
||||
/// should treat the session as already being closed (same semantics as
|
||||
/// <see cref="CloseAsync"/>).
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// <see langword="true"/> when the state was flipped to <c>Closing</c> and the caller
|
||||
/// should proceed with teardown; <see langword="false"/> when the session is already
|
||||
/// closed OR is no longer eligible (a subscriber re-attached between the eligibility
|
||||
/// check in the sweep loop and this call — the reconnect won the race and the session
|
||||
/// should be left open).
|
||||
/// </returns>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Race: <c>CloseExpiredLeasesAsync</c> evaluates <see cref="IsLeaseExpired"/> /
|
||||
/// <see cref="IsDetachGraceExpired"/> outside the close lock, then calls
|
||||
/// <see cref="CloseAsync"/> which takes <c>_closeLock</c>. A client can call
|
||||
/// <see cref="AttachEventSubscriber"/> in between, clearing <c>_detachedAtUtc</c> and
|
||||
/// incrementing <c>_activeEventSubscriberCount</c> — the session is no longer expired.
|
||||
/// This method re-checks eligibility atomically under <c>_syncRoot</c> before
|
||||
/// committing to <c>Closing</c>, so a reattach that wins the race leaves the session
|
||||
/// in <c>Ready</c> and usable.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal bool TryBeginCloseIfExpired(DateTimeOffset now, out bool alreadyClosing)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_state is SessionState.Closed)
|
||||
{
|
||||
alreadyClosing = _closeStarted;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Re-verify eligibility atomically. If a subscriber reattached between the sweep's
|
||||
// eligibility check and this point, neither condition holds and we decline.
|
||||
bool eligible = IsLeaseExpiredCore(now) || IsDetachGraceExpiredCore(now);
|
||||
if (!eligible)
|
||||
{
|
||||
alreadyClosing = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
alreadyClosing = _closeStarted;
|
||||
_closeStarted = true;
|
||||
_state = SessionState.Closing;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Lock-free (must be called under _syncRoot) helpers used by TryBeginCloseIfExpired.
|
||||
private bool IsLeaseExpiredCore(DateTimeOffset now)
|
||||
=> _activeEventSubscriberCount == 0
|
||||
&& _leaseExpiresAt is not null
|
||||
&& _leaseExpiresAt <= now;
|
||||
|
||||
private bool IsDetachGraceExpiredCore(DateTimeOffset now)
|
||||
=> _detachGrace > TimeSpan.Zero
|
||||
&& _activeEventSubscriberCount == 0
|
||||
&& _detachedAtUtc is not null
|
||||
&& now - _detachedAtUtc.Value >= _detachGrace;
|
||||
|
||||
// Final terminal transition; under _syncRoot to keep _state writes single-lock.
|
||||
// Closed is unconditionally terminal — TransitionTo refuses to overwrite it —
|
||||
// so we don't need to re-check the precondition here.
|
||||
@@ -1398,34 +1808,136 @@ public sealed class GatewaySession
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the worker client iff both the gateway-side session state AND
|
||||
/// the worker client's own state are <see cref="SessionState.Ready"/> /
|
||||
/// <see cref="WorkerClientState.Ready"/>. The two states can diverge under
|
||||
/// load: <c>_state</c> only transitions on gateway-driven events (open,
|
||||
/// close, fault), while <see cref="WorkerClient.State"/> can shift on
|
||||
/// worker-side signals (heartbeat watchdog, pipe disconnect) before the
|
||||
/// gateway's session-level reaction observes them. When that happens the
|
||||
/// in-flight RPC fails fast here with both states surfaced in the
|
||||
/// diagnostic (Server-030) so the actual mismatch is actionable instead
|
||||
/// of misleading. The session usually transitions to <c>Faulted</c>
|
||||
/// shortly after.
|
||||
/// Bounded, opt-in async variant of the fail-fast readiness check. When the
|
||||
/// session is <see cref="SessionState.Ready"/> but the worker has transiently diverged
|
||||
/// to a non-terminal state (<see cref="WorkerClientState.Handshaking"/>/
|
||||
/// <see cref="WorkerClientState.Created"/>) and the configured worker-ready wait timeout
|
||||
/// is positive, this polls (outside <c>_syncRoot</c>) until the worker reaches
|
||||
/// <see cref="WorkerClientState.Ready"/> or the deadline elapses, re-evaluating the
|
||||
/// fast-path/fail-fast decision under the lock on each poll. Terminal worker states, a
|
||||
/// missing worker, or a non-<c>Ready</c> session fail fast immediately. With the default
|
||||
/// timeout of zero this behaves byte-for-byte like the synchronous fail-fast path: no
|
||||
/// await, no delay.
|
||||
/// </summary>
|
||||
private IWorkerClient GetReadyWorkerClient()
|
||||
/// <param name="cancellationToken">Token to cancel the wait.</param>
|
||||
/// <returns>The worker client once both the session and worker are <c>Ready</c>.</returns>
|
||||
private async Task<IWorkerClient> GetReadyWorkerClientAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
const int pollIntervalMs = 25;
|
||||
|
||||
string? failureMessage;
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
|
||||
IWorkerClient? ready = EvaluateReadyUnderLock(out failureMessage);
|
||||
if (ready is not null)
|
||||
{
|
||||
string workerState = _workerClient is null
|
||||
? "<no worker>"
|
||||
: _workerClient.State.ToString();
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotReady,
|
||||
$"Session {SessionId} is not ready. Session state is {_state}; worker state is {workerState}.");
|
||||
return ready;
|
||||
}
|
||||
|
||||
// Only transient (non-terminal) worker states with a positive wait timeout fall
|
||||
// through to the bounded wait loop. Everything else (terminal worker, no worker,
|
||||
// session not Ready, or a zero timeout) fails fast right here under the lock. When
|
||||
// the worker is merely transient (failureMessage is null) but the wait is disabled,
|
||||
// build the both-states diagnostic so the zero-timeout path is byte-for-byte the
|
||||
// original fail-fast message.
|
||||
if (failureMessage is not null || _workerReadyWaitTimeout <= TimeSpan.Zero)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotReady,
|
||||
failureMessage ?? BuildNotReadyMessage());
|
||||
}
|
||||
}
|
||||
|
||||
DateTimeOffset deadline = _eventStreaming.TimeProvider.GetUtcNow() + _workerReadyWaitTimeout;
|
||||
while (true)
|
||||
{
|
||||
await Task.Delay(
|
||||
TimeSpan.FromMilliseconds(pollIntervalMs),
|
||||
_eventStreaming.TimeProvider,
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
IWorkerClient? ready = EvaluateReadyUnderLock(out failureMessage);
|
||||
if (ready is not null)
|
||||
{
|
||||
return ready;
|
||||
}
|
||||
|
||||
// A terminal worker / missing worker / non-Ready session surfaced while we
|
||||
// waited: fail fast immediately rather than burning the rest of the deadline.
|
||||
if (failureMessage is not null)
|
||||
{
|
||||
throw new SessionManagerException(SessionManagerErrorCode.SessionNotReady, failureMessage);
|
||||
}
|
||||
}
|
||||
|
||||
if (_eventStreaming.TimeProvider.GetUtcNow() >= deadline)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
IWorkerClient? ready = EvaluateReadyUnderLock(out failureMessage);
|
||||
if (ready is not null)
|
||||
{
|
||||
return ready;
|
||||
}
|
||||
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotReady,
|
||||
failureMessage ?? BuildNotReadyMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates readiness while the caller already holds <c>_syncRoot</c>. Returns the
|
||||
/// worker client when both the session and worker are <see cref="WorkerClientState.Ready"/>
|
||||
/// (with <paramref name="failureMessage"/> set to <see langword="null"/>). Returns
|
||||
/// <see langword="null"/> together with the both-states diagnostic in
|
||||
/// <paramref name="failureMessage"/> when the worker is in a terminal state
|
||||
/// (<see cref="WorkerClientState.Faulted"/>/<see cref="WorkerClientState.Closing"/>/
|
||||
/// <see cref="WorkerClientState.Closed"/>), there is no worker, or the session is not
|
||||
/// <see cref="SessionState.Ready"/>. Returns <see langword="null"/> with a
|
||||
/// <see langword="null"/> <paramref name="failureMessage"/> when the session is
|
||||
/// <c>Ready</c> but the worker is in a transient state
|
||||
/// (<see cref="WorkerClientState.Handshaking"/>/<see cref="WorkerClientState.Created"/>) —
|
||||
/// the signal for the async path to keep waiting.
|
||||
/// </summary>
|
||||
/// <param name="failureMessage">
|
||||
/// The fail-fast both-states diagnostic when readiness cannot succeed, or
|
||||
/// <see langword="null"/> for the keep-waiting (transient) signal.
|
||||
/// </param>
|
||||
/// <returns>The ready worker client, or <see langword="null"/>.</returns>
|
||||
private IWorkerClient? EvaluateReadyUnderLock(out string? failureMessage)
|
||||
{
|
||||
if (_state == SessionState.Ready && _workerClient?.State == WorkerClientState.Ready)
|
||||
{
|
||||
failureMessage = null;
|
||||
return _workerClient;
|
||||
}
|
||||
|
||||
// Keep-waiting signal: session is Ready and the worker is merely transient.
|
||||
if (_state == SessionState.Ready
|
||||
&& _workerClient is { State: WorkerClientState.Handshaking or WorkerClientState.Created })
|
||||
{
|
||||
failureMessage = null;
|
||||
return null;
|
||||
}
|
||||
|
||||
failureMessage = BuildNotReadyMessage();
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>Builds the both-states not-ready diagnostic (must be called under <c>_syncRoot</c>).</summary>
|
||||
/// <returns>The diagnostic message surfacing both the session and worker states.</returns>
|
||||
private string BuildNotReadyMessage()
|
||||
{
|
||||
string workerState = _workerClient is null
|
||||
? "<no worker>"
|
||||
: _workerClient.State.ToString();
|
||||
return $"Session {SessionId} is not ready. Session state is {_state}; worker state is {workerState}.";
|
||||
}
|
||||
|
||||
private void TrackItem(
|
||||
@@ -1466,10 +1978,37 @@ public sealed class GatewaySession
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
// Assert in debug so a genuine double-decrement (a logic error) surfaces
|
||||
// loudly; the clamp below keeps release builds safe if it somehow fires.
|
||||
Debug.Assert(_activeEventSubscriberCount > 0,
|
||||
"DetachEventSubscriber called with _activeEventSubscriberCount already at 0 — possible double-dispose.");
|
||||
if (_activeEventSubscriberCount > 0)
|
||||
{
|
||||
_activeEventSubscriberCount--;
|
||||
}
|
||||
|
||||
// When the LAST external subscriber drops and detach-grace is enabled, retain the
|
||||
// session instead of letting it linger only on the (long) lease: stamp the detached
|
||||
// time so the lease monitor can close it once the grace window elapses. The session
|
||||
// stays in its current (Ready) state and remains usable, so a reconnecting subscriber
|
||||
// (Task 12) re-attaches normally. The gateway-owned internal dashboard subscriber is
|
||||
// NOT counted in _activeEventSubscriberCount (it registers on the distributor with
|
||||
// isInternal: true), so a session whose only remaining subscriber is the dashboard
|
||||
// mirror still enters grace. Only stamp while the session is alive — once
|
||||
// Closing/Closed/Faulted there is nothing to retain. This is the detach→grace-start
|
||||
// transition; it shares _syncRoot with the reattach→grace-cancel write above and the
|
||||
// sweeper's IsDetachGraceExpired read, so the three serialize.
|
||||
// Only stamp a detach that mirrors a prior SUCCESSFUL attach. The attach catch path
|
||||
// calls this same method to roll back a reserved slot when the FIRST attach failed
|
||||
// before any subscriber registered; that never-subscribed session must not enter the
|
||||
// grace window (Server-055).
|
||||
if (_everHadEventSubscriber
|
||||
&& _detachGrace > TimeSpan.Zero
|
||||
&& _activeEventSubscriberCount == 0
|
||||
&& _state is not (SessionState.Closing or SessionState.Closed or SessionState.Faulted))
|
||||
{
|
||||
_detachedAtUtc = _eventStreaming.TimeProvider.GetUtcNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,12 +13,16 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
/// regardless of what the handler does.
|
||||
/// </summary>
|
||||
/// <param name="isOnlySubscriber">
|
||||
/// <see langword="true"/> when the overflowing subscriber is the sole registered
|
||||
/// subscriber at the moment of overflow (legacy single-subscriber mode). FailFast faults
|
||||
/// the session only in this case; with multiple subscribers FailFast degrades to a
|
||||
/// per-subscriber disconnect so one slow consumer never faults a session shared by others.
|
||||
/// Always <see langword="false"/> for internal subscribers (the dashboard mirror) because
|
||||
/// <see cref="SessionEventDistributor"/> excludes them from the external-subscriber count.
|
||||
/// <see langword="true"/> when FailFast is allowed to fault the whole session for this
|
||||
/// overflow. As of Task 8 this is gated on the SESSION MODE, not a live count: it is
|
||||
/// <see langword="true"/> only for an external subscriber in single-subscriber mode
|
||||
/// (<c>AllowMultipleEventSubscribers == false</c>), where at most one external subscriber
|
||||
/// can ever exist. In multi-subscriber mode it is always <see langword="false"/>, so
|
||||
/// FailFast degrades to a per-subscriber disconnect and one slow consumer never faults a
|
||||
/// session shared by others; gating on the fixed mode also removes the Task 5 race where a
|
||||
/// concurrent registration could make a count snapshot falsely report a sole subscriber.
|
||||
/// Always <see langword="false"/> for internal subscribers (the dashboard mirror) so a
|
||||
/// slow/broken dashboard can never fault the session.
|
||||
/// </param>
|
||||
/// <param name="isInternal">
|
||||
/// <see langword="true"/> when the overflowing subscriber is the gateway-owned internal
|
||||
@@ -40,8 +44,10 @@ public delegate void SubscriberOverflowHandler(bool isOnlySubscriber, bool isInt
|
||||
/// policy (Task 5) is implemented here: a slow subscriber overflows only its own
|
||||
/// bounded channel and the pump applies the policy to that subscriber alone (see
|
||||
/// <see cref="SubscriberOverflowHandler"/> and <c>OnSubscriberOverflow</c>), leaving
|
||||
/// the pump, the session, and other subscribers running. The class does not yet
|
||||
/// remove the single-subscriber guard (Tasks 7/8). The ring buffer supports capacity
|
||||
/// the pump, the session, and other subscribers running. Task 8 made the
|
||||
/// FailFast-faults-session decision mode-gated: it fires only in single-subscriber
|
||||
/// mode (<c>singleSubscriberMode</c>), so multi-subscriber FailFast always degrades to
|
||||
/// a per-subscriber disconnect — see <c>OnSubscriberOverflow</c>. The ring buffer supports capacity
|
||||
/// eviction (oldest entry dropped when the count exceeds
|
||||
/// <c>replayBufferCapacity</c>) and age eviction (entries older than
|
||||
/// <c>replayRetentionSeconds</c> dropped on the next append or query), and is
|
||||
@@ -83,6 +89,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
private readonly string _sessionId;
|
||||
private readonly Func<CancellationToken, IAsyncEnumerable<MxEvent>> _eventSourceFactory;
|
||||
private readonly int _subscriberQueueCapacity;
|
||||
private readonly bool _singleSubscriberMode;
|
||||
private readonly SubscriberOverflowHandler? _overflowHandler;
|
||||
private readonly TimeSpan _shutdownTimeout;
|
||||
private readonly ILogger<SessionEventDistributor> _logger;
|
||||
@@ -109,6 +116,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
private bool _started;
|
||||
private bool _disposed;
|
||||
|
||||
// Set once the pump has run its final CompleteAllSubscribers sweep — the event source
|
||||
// completed or faulted and the pump exited. Guarded by _lifecycleLock together with the
|
||||
// subscriber add. A subscriber that registers AFTER this point but BEFORE DisposeAsync
|
||||
// (the source ended but the session is not yet torn down) would otherwise be added with a
|
||||
// channel the now-exited pump never completes, hanging its reader forever. The register
|
||||
// paths complete such a late registrant's channel immediately with the same terminal
|
||||
// state. _completionError carries the terminal exception (source fault) or null (graceful
|
||||
// source completion), mirroring what the final CompleteAllSubscribers passed.
|
||||
private bool _completed;
|
||||
private Exception? _completionError;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a per-session event distributor.
|
||||
/// </summary>
|
||||
@@ -134,7 +152,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
|
||||
int subscriberQueueCapacity,
|
||||
ILogger<SessionEventDistributor> logger,
|
||||
SubscriberOverflowHandler? overflowHandler = null)
|
||||
SubscriberOverflowHandler? overflowHandler = null,
|
||||
bool singleSubscriberMode = true)
|
||||
: this(
|
||||
sessionId,
|
||||
eventSourceFactory,
|
||||
@@ -143,7 +162,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
replayRetentionSeconds: 0,
|
||||
logger,
|
||||
TimeProvider.System,
|
||||
overflowHandler)
|
||||
overflowHandler,
|
||||
singleSubscriberMode)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -181,6 +201,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
/// handler. When <see langword="null"/> (unit/skeleton use) the offending subscriber is
|
||||
/// still disconnected but no metric/fault side effect runs.
|
||||
/// </param>
|
||||
/// <param name="singleSubscriberMode">
|
||||
/// <see langword="true"/> when the owning session is in single-subscriber mode
|
||||
/// (<c>AllowMultipleEventSubscribers == false</c>). This gates the FailFast
|
||||
/// session-fault decision in <c>OnSubscriberOverflow</c>: an external subscriber that
|
||||
/// overflows reports <c>isOnlySubscriber == true</c> (legacy FailFast faults the
|
||||
/// session) ONLY in single-subscriber mode. In multi-subscriber mode it is always
|
||||
/// <see langword="false"/>, so FailFast degrades to a per-subscriber disconnect and a
|
||||
/// transient registration race can never falsely fault a shared session (Task 8;
|
||||
/// resolves the Task 5 REVISIT race). Defaults to <see langword="true"/> so existing
|
||||
/// call sites and unit tests keep legacy single-subscriber FailFast behavior.
|
||||
/// </param>
|
||||
public SessionEventDistributor(
|
||||
string sessionId,
|
||||
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
|
||||
@@ -189,7 +220,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
double replayRetentionSeconds,
|
||||
ILogger<SessionEventDistributor> logger,
|
||||
TimeProvider timeProvider,
|
||||
SubscriberOverflowHandler? overflowHandler = null)
|
||||
SubscriberOverflowHandler? overflowHandler = null,
|
||||
bool singleSubscriberMode = true)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(sessionId);
|
||||
ArgumentNullException.ThrowIfNull(eventSourceFactory);
|
||||
@@ -202,6 +234,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
_sessionId = sessionId;
|
||||
_eventSourceFactory = eventSourceFactory;
|
||||
_subscriberQueueCapacity = subscriberQueueCapacity;
|
||||
_singleSubscriberMode = singleSubscriberMode;
|
||||
_overflowHandler = overflowHandler;
|
||||
_shutdownTimeout = DefaultShutdownTimeout;
|
||||
_replayBufferCapacity = replayBufferCapacity;
|
||||
@@ -214,7 +247,11 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the count of currently-registered subscribers.
|
||||
/// Gets the count of currently-registered subscribers. This count INCLUDES internal
|
||||
/// subscribers (e.g. the gateway-owned dashboard mirror registered via
|
||||
/// <c>Register(isInternal: true)</c>), and therefore differs from
|
||||
/// <see cref="GatewaySession.ActiveEventSubscriberCount"/>, which tracks only external
|
||||
/// (gRPC) subscribers and excludes the internal dashboard subscriber.
|
||||
/// </summary>
|
||||
public int SubscriberCount => _subscribers.Count;
|
||||
|
||||
@@ -261,31 +298,14 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
/// </param>
|
||||
public IEventSubscriberLease Register(bool isInternal = false)
|
||||
{
|
||||
// The pump is the single writer for this channel; readers are single-consumer
|
||||
// (one gRPC stream / dashboard subscriber). Synchronous continuations are
|
||||
// disabled so a slow reader can never stall the pump on its completion.
|
||||
//
|
||||
// The pump MUST stay non-blocking: it writes with the non-blocking TryWrite so one
|
||||
// slow reader can never stall the single pump that feeds every subscriber. FullMode
|
||||
// is deliberately Wait — NOT because the pump ever blocks (it never calls the blocking
|
||||
// WriteAsync overload), but because Wait is the only BoundedChannelFullMode under
|
||||
// which TryWrite returns false when the channel is full. That false return IS the
|
||||
// overflow signal the pump needs to apply the per-subscriber backpressure policy. The
|
||||
// Drop* modes would make TryWrite silently succeed-and-drop, hiding overflow and
|
||||
// re-introducing the silent data loss this task removes. So: Wait mode + TryWrite =
|
||||
// a non-blocking pump that still detects a full subscriber channel.
|
||||
Channel<MxEvent> channel = Channel.CreateBounded<MxEvent>(
|
||||
new BoundedChannelOptions(_subscriberQueueCapacity)
|
||||
{
|
||||
SingleReader = true,
|
||||
SingleWriter = true,
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
AllowSynchronousContinuations = false,
|
||||
});
|
||||
|
||||
Channel<MxEvent> channel = CreateSubscriberChannel();
|
||||
long id = Interlocked.Increment(ref _nextSubscriberId);
|
||||
Subscriber subscriber = new(id, channel, isInternal);
|
||||
return RegisterSubscriber(subscriber);
|
||||
}
|
||||
|
||||
private IEventSubscriberLease RegisterSubscriber(Subscriber subscriber)
|
||||
{
|
||||
// The disposed check AND the map add happen under the same lock with no await
|
||||
// in between. DisposeAsync sets _disposed=true under this same lock before it
|
||||
// calls CompleteAllSubscribers, so once disposal has begun no further subscriber
|
||||
@@ -294,7 +314,172 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_subscribers[id] = subscriber;
|
||||
_subscribers[subscriber.Id] = subscriber;
|
||||
|
||||
// Close the register-after-pump-completion window: if the pump already ran its
|
||||
// final CompleteAllSubscribers (source completed/faulted) but the distributor is
|
||||
// not yet disposed, no further completion sweep will run, so complete this late
|
||||
// registrant's channel now with the same terminal state instead of leaving its
|
||||
// reader hanging.
|
||||
if (_completed)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(_completionError);
|
||||
}
|
||||
}
|
||||
|
||||
return new SubscriberLease(this, subscriber);
|
||||
}
|
||||
|
||||
// Creates a per-subscriber bounded channel. The pump is the single writer; readers are
|
||||
// single-consumer (one gRPC stream / dashboard subscriber). Synchronous continuations are
|
||||
// disabled so a slow reader can never stall the pump on its completion.
|
||||
//
|
||||
// The pump MUST stay non-blocking: it writes with the non-blocking TryWrite so one slow
|
||||
// reader can never stall the single pump that feeds every subscriber. FullMode is
|
||||
// deliberately Wait — NOT because the pump ever blocks (it never calls the blocking
|
||||
// WriteAsync overload), but because Wait is the only BoundedChannelFullMode under which
|
||||
// TryWrite returns false when the channel is full. That false return IS the overflow signal
|
||||
// the pump needs to apply the per-subscriber backpressure policy. The Drop* modes would
|
||||
// make TryWrite silently succeed-and-drop, hiding overflow and re-introducing silent data
|
||||
// loss. So: Wait mode + TryWrite = a non-blocking pump that still detects a full channel.
|
||||
private Channel<MxEvent> CreateSubscriberChannel()
|
||||
=> Channel.CreateBounded<MxEvent>(
|
||||
new BoundedChannelOptions(_subscriberQueueCapacity)
|
||||
{
|
||||
SingleReader = true,
|
||||
SingleWriter = true,
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
AllowSynchronousContinuations = false,
|
||||
});
|
||||
|
||||
/// <summary>
|
||||
/// Atomically snapshots the replay ring for events newer than
|
||||
/// <paramref name="afterSequence"/> AND registers a live subscriber, so the
|
||||
/// replay→live handoff has no gap and no duplicate (Task 12 reconnect/resume).
|
||||
/// </summary>
|
||||
/// <param name="afterSequence">
|
||||
/// The last worker sequence the reconnecting client already observed. Replay returns
|
||||
/// events strictly newer than this; the live channel is filtered (by the caller) to
|
||||
/// events strictly newer than the last replayed sequence.
|
||||
/// </param>
|
||||
/// <param name="replayedEvents">
|
||||
/// The retained events newer than <paramref name="afterSequence"/>, in ascending
|
||||
/// sequence order. Never null; empty when nothing newer is retained.
|
||||
/// </param>
|
||||
/// <param name="gap">
|
||||
/// <see langword="true"/> when events between <paramref name="afterSequence"/> and the
|
||||
/// oldest retained event were already evicted (capacity/age), so the client missed
|
||||
/// events that can no longer be replayed and must re-snapshot. Mirrors
|
||||
/// <see cref="TryGetReplayFrom"/> gap semantics.
|
||||
/// </param>
|
||||
/// <param name="oldestAvailableSequence">
|
||||
/// The oldest worker sequence still retained and replayable. <c>0</c> when nothing is
|
||||
/// retained. Meaningful to the caller only when <paramref name="gap"/> is
|
||||
/// <see langword="true"/> (it populates the ReplayGap sentinel's
|
||||
/// <c>oldest_available_sequence</c>).
|
||||
/// </param>
|
||||
/// <param name="liveResumeSequence">
|
||||
/// The worker sequence the live channel must resume strictly after: the highest
|
||||
/// replayed sequence, or <paramref name="afterSequence"/> when nothing was replayed.
|
||||
/// The caller MUST apply this as the per-subscriber live filter so any event that was
|
||||
/// both replayed here and subsequently fanned into this subscriber's live channel is
|
||||
/// dropped exactly once (no duplicate), while every newer event is delivered (no gap).
|
||||
/// </param>
|
||||
/// <param name="isInternal">
|
||||
/// <see langword="true"/> for a gateway-owned internal subscriber. See
|
||||
/// <see cref="Register"/>.
|
||||
/// </param>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Why this is atomic and the handoff is correct.</b> The replay snapshot and the
|
||||
/// subscriber registration both run inside the SAME <c>_replayLock</c> critical
|
||||
/// section. The pump appends each event to the replay buffer under <c>_replayLock</c>
|
||||
/// <em>before</em> fanning it to subscribers (outside the lock). Therefore, relative
|
||||
/// to this method's critical section, for every event E:
|
||||
/// </para>
|
||||
/// <list type="bullet">
|
||||
/// <item>
|
||||
/// If the pump appended E before this critical section, E is in
|
||||
/// <paramref name="replayedEvents"/> (when newer than
|
||||
/// <paramref name="afterSequence"/>). The pump's fan-out of E may race the
|
||||
/// registration: if it writes E to this new channel too, E's sequence is
|
||||
/// <c><= liveResumeSequence</c>, so the caller's live filter DROPS it — no
|
||||
/// duplicate.
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// If the pump appends E after this critical section, E is NOT in the snapshot,
|
||||
/// but this subscriber is already registered, so the pump fans E into the live
|
||||
/// channel with sequence <c>> liveResumeSequence</c> — delivered as live, no
|
||||
/// gap.
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// <para>
|
||||
/// Lock ordering: this is the only path that holds both <c>_replayLock</c> and
|
||||
/// <c>_lifecycleLock</c>; it always takes <c>_replayLock</c> first then
|
||||
/// <c>_lifecycleLock</c>. No other path acquires both, so there is no inversion.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public IEventSubscriberLease RegisterWithReplay(
|
||||
ulong afterSequence,
|
||||
out IReadOnlyList<MxEvent> replayedEvents,
|
||||
out bool gap,
|
||||
out ulong oldestAvailableSequence,
|
||||
out ulong liveResumeSequence,
|
||||
bool isInternal = false)
|
||||
{
|
||||
Channel<MxEvent> channel = CreateSubscriberChannel();
|
||||
long id = Interlocked.Increment(ref _nextSubscriberId);
|
||||
Subscriber subscriber = new(id, channel, isInternal);
|
||||
|
||||
// Snapshot replay AND register under a single _replayLock section so the live channel
|
||||
// begins exactly where the replay snapshot ends — see the remarks for the no-gap /
|
||||
// no-duplicate argument. _lifecycleLock is nested inside (consistent ordering) only to
|
||||
// honor the disposed check and the same add semantics as Register.
|
||||
lock (_replayLock)
|
||||
{
|
||||
EvictAged();
|
||||
|
||||
List<MxEvent> newer = [];
|
||||
ulong highestReplayed = afterSequence;
|
||||
|
||||
if (_replayBuffer.Count == 0)
|
||||
{
|
||||
gap = _anyEventSeen && afterSequence < _highestSequenceSeen;
|
||||
oldestAvailableSequence = 0; // meaningful only when gap == true; 0 here since nothing is retained
|
||||
}
|
||||
else
|
||||
{
|
||||
ulong oldestRetained = _replayBuffer.First!.Value.Event.WorkerSequence;
|
||||
gap = oldestRetained > 0 && afterSequence < oldestRetained - 1;
|
||||
// Per the contract on OldestAvailableSequence: meaningful only when gap == true.
|
||||
oldestAvailableSequence = gap ? oldestRetained : 0;
|
||||
|
||||
foreach (ReplayEntry entry in _replayBuffer)
|
||||
{
|
||||
if (entry.Event.WorkerSequence > afterSequence)
|
||||
{
|
||||
newer.Add(entry.Event);
|
||||
highestReplayed = entry.Event.WorkerSequence;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
replayedEvents = newer;
|
||||
liveResumeSequence = highestReplayed;
|
||||
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_subscribers[id] = subscriber;
|
||||
|
||||
// Same register-after-pump-completion guard as Register: a resume that races in
|
||||
// after the source already ended still gets its retained replay batch (snapshot
|
||||
// above), but its live channel must be completed now since the pump is gone.
|
||||
if (_completed)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(_completionError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new SubscriberLease(this, subscriber);
|
||||
@@ -416,28 +601,25 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
// slow consumer must not fault a session shared by other healthy subscribers.
|
||||
private void OnSubscriberOverflow(Subscriber subscriber, ulong workerSequence)
|
||||
{
|
||||
// Snapshot whether this is the sole subscriber BEFORE we unregister it. This drives
|
||||
// the FailFast-fault-session-vs-disconnect decision: FailFast only faults the session
|
||||
// when the overflowing subscriber is the sole subscriber.
|
||||
// Decide whether FailFast may fault the whole session for this overflow. This is the
|
||||
// "isOnlySubscriber" signal the legacy single-subscriber FailFast path keys on.
|
||||
//
|
||||
// This snapshot is safe in v1 because AllowMultipleEventSubscribers=false is enforced
|
||||
// by the validator and the single-subscriber guard in AttachEventSubscriber — a
|
||||
// concurrent second registration is impossible, so the false-FailFast race (two
|
||||
// subscribers, one overflows, Count reads as 1 after the other concurrently unregisters,
|
||||
// FailFast wrongly faults the session) cannot occur today.
|
||||
// Task 8 resolution of the Task 5/7 REVISIT race: gate this on the SESSION MODE
|
||||
// (_singleSubscriberMode), NOT on a live count snapshot. The old
|
||||
// `CountExternalSubscribers() == 1` snapshot raced once multi-subscriber became real —
|
||||
// a concurrent second registration/unregistration could make the count read as 1 with
|
||||
// two subscribers actually present, producing a false FailFast that faults a shared
|
||||
// session. The mode is fixed for the session's lifetime, so reading it is race-free:
|
||||
// - single-subscriber mode: at most one external subscriber can ever exist (the
|
||||
// AttachEventSubscriber guard enforces it), so an overflowing external subscriber
|
||||
// IS the sole subscriber — preserve the legacy FailFast session-fault behavior.
|
||||
// - multi-subscriber mode: never fault the shared session; FailFast degrades to a
|
||||
// per-subscriber disconnect so one slow consumer cannot punish healthy ones.
|
||||
//
|
||||
// REVISIT (Task 7/8): when multi-subscriber is enabled the guard is removed and the
|
||||
// race window opens — a concurrent second registration could cause Count to read as 1
|
||||
// here even with two subscribers, producing a false FailFast that faults a shared
|
||||
// session. Resolve before enabling multi-subscriber.
|
||||
//
|
||||
// Task 6: the gateway-owned internal dashboard subscriber is excluded from this
|
||||
// accounting. (a) An internal subscriber that overflows is NEVER the "only subscriber"
|
||||
// — a slow/broken dashboard must never fault the session, only disconnect its own
|
||||
// mirror. (b) Internal subscribers are excluded from the count, so a lone external
|
||||
// gRPC subscriber still reports isOnlySubscriber==true and preserves the legacy
|
||||
// FailFast session-fault behavior even while the dashboard mirror is attached.
|
||||
bool isOnlySubscriber = !subscriber.IsInternal && CountExternalSubscribers() == 1;
|
||||
// Task 6: the gateway-owned internal dashboard subscriber is excluded — an internal
|
||||
// subscriber that overflows is NEVER the "only subscriber", so a slow/broken dashboard
|
||||
// can only disconnect its own mirror and never fault the session.
|
||||
bool isOnlySubscriber = !subscriber.IsInternal && _singleSubscriberMode;
|
||||
|
||||
_logger.LogDebug(
|
||||
"Event distributor disconnecting subscriber {SubscriberId} in session {SessionId} after queue overflow (worker sequence {WorkerSequence}).",
|
||||
@@ -473,27 +655,23 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
}
|
||||
}
|
||||
|
||||
// Counts external (non-internal) subscribers. Drives the isOnlySubscriber FailFast
|
||||
// decision so the gateway-owned internal dashboard subscriber never inflates the count.
|
||||
private int CountExternalSubscribers()
|
||||
{
|
||||
int count = 0;
|
||||
foreach (Subscriber subscriber in _subscribers.Values)
|
||||
{
|
||||
if (!subscriber.IsInternal)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
private void CompleteAllSubscribers(Exception? error)
|
||||
{
|
||||
foreach (Subscriber subscriber in _subscribers.Values)
|
||||
// Record the terminal state AND complete the current subscribers under _lifecycleLock
|
||||
// so this serializes with the subscriber-add in Register/RegisterWithReplay: a
|
||||
// subscriber added before this runs is in the map and completed by the loop; one that
|
||||
// races in afterward sees _completed and completes its own channel in the register
|
||||
// path. Exactly one of the two completes each subscriber. TryComplete is non-blocking
|
||||
// and (channels use AllowSynchronousContinuations=false) runs no continuation inline,
|
||||
// so holding the lock across the loop cannot stall or re-enter.
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(error);
|
||||
_completed = true;
|
||||
_completionError = error;
|
||||
foreach (Subscriber subscriber in _subscribers.Values)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,13 +38,24 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
/// EventsHub group regardless of whether a gRPC client is streaming. When null
|
||||
/// (unit tests that don't exercise the dashboard mirror) no mirror is started.
|
||||
/// </param>
|
||||
/// <param name="AllowMultipleEventSubscribers">
|
||||
/// The session's effective multi-subscriber mode (Task 8). Carried here so the session
|
||||
/// can pass it to its <see cref="SessionEventDistributor"/> at construction — the
|
||||
/// distributor is created at <c>MarkReady</c> (for the dashboard mirror) before any gRPC
|
||||
/// subscriber attaches, so the mode cannot be learned from a later
|
||||
/// <c>AttachEventSubscriber</c> call. The distributor gates its FailFast session-fault
|
||||
/// decision on this mode (single-subscriber only) instead of a live count snapshot,
|
||||
/// closing the Task 5 false-FailFast race. Defaults to <see langword="false"/>
|
||||
/// (single-subscriber) so existing call sites and unit tests are unchanged.
|
||||
/// </param>
|
||||
public sealed record SessionEventStreaming(
|
||||
MxAccessGrpcMapper Mapper,
|
||||
EventOptions EventOptions,
|
||||
ILogger<SessionEventDistributor> DistributorLogger,
|
||||
TimeProvider TimeProvider,
|
||||
GatewayMetrics Metrics,
|
||||
IDashboardEventBroadcaster? DashboardBroadcaster = null)
|
||||
IDashboardEventBroadcaster? DashboardBroadcaster = null,
|
||||
bool AllowMultipleEventSubscribers = false)
|
||||
{
|
||||
/// <summary>
|
||||
/// Defaults used when a session is constructed without explicit streaming
|
||||
|
||||
@@ -17,6 +17,7 @@ public sealed class SessionManager : ISessionManager
|
||||
public const string DefaultCloseReason = "client-close";
|
||||
public const string GatewayShutdownReason = "gateway-shutdown";
|
||||
public const string LeaseExpiredReason = "lease-expired";
|
||||
public const string DetachGraceExpiredReason = "detach-grace-expired";
|
||||
|
||||
private readonly ISessionRegistry _registry;
|
||||
private readonly ISessionWorkerClientFactory _workerClientFactory;
|
||||
@@ -28,6 +29,7 @@ public sealed class SessionManager : ISessionManager
|
||||
private readonly Grpc.MxAccessGrpcMapper _eventMapper;
|
||||
private readonly ILogger<SessionEventDistributor> _distributorLogger;
|
||||
private readonly Dashboard.Hubs.IDashboardEventBroadcaster? _dashboardEventBroadcaster;
|
||||
private readonly ArrayAddressNormalizer? _addressNormalizer;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="SessionManager"/>.
|
||||
@@ -46,6 +48,11 @@ public sealed class SessionManager : ISessionManager
|
||||
/// dashboard receives events regardless of whether a gRPC client is streaming. Null in
|
||||
/// unit tests that do not exercise the dashboard mirror.
|
||||
/// </param>
|
||||
/// <param name="addressNormalizer">
|
||||
/// Rewrites bare array AddItem addresses to their writable <c>[]</c> form using Galaxy
|
||||
/// metadata; handed to each session so the normalization runs at the outbound choke point.
|
||||
/// Null in unit tests that do not exercise array-write ergonomics.
|
||||
/// </param>
|
||||
public SessionManager(
|
||||
ISessionRegistry registry,
|
||||
ISessionWorkerClientFactory workerClientFactory,
|
||||
@@ -55,7 +62,8 @@ public sealed class SessionManager : ISessionManager
|
||||
ILogger<SessionManager>? logger = null,
|
||||
Grpc.MxAccessGrpcMapper? eventMapper = null,
|
||||
ILogger<SessionEventDistributor>? distributorLogger = null,
|
||||
Dashboard.Hubs.IDashboardEventBroadcaster? dashboardEventBroadcaster = null)
|
||||
Dashboard.Hubs.IDashboardEventBroadcaster? dashboardEventBroadcaster = null,
|
||||
ArrayAddressNormalizer? addressNormalizer = null)
|
||||
{
|
||||
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
||||
_workerClientFactory = workerClientFactory ?? throw new ArgumentNullException(nameof(workerClientFactory));
|
||||
@@ -66,6 +74,7 @@ public sealed class SessionManager : ISessionManager
|
||||
_eventMapper = eventMapper ?? new Grpc.MxAccessGrpcMapper();
|
||||
_distributorLogger = distributorLogger ?? NullLogger<SessionEventDistributor>.Instance;
|
||||
_dashboardEventBroadcaster = dashboardEventBroadcaster;
|
||||
_addressNormalizer = addressNormalizer;
|
||||
_options = options.Value;
|
||||
_sessionSlots = new SemaphoreSlim(_options.Sessions.MaxSessions, _options.Sessions.MaxSessions);
|
||||
}
|
||||
@@ -295,12 +304,37 @@ public sealed class SessionManager : ISessionManager
|
||||
int closedCount = 0;
|
||||
foreach (GatewaySession session in _registry.Snapshot())
|
||||
{
|
||||
if (!session.IsLeaseExpired(now))
|
||||
// A session is swept when its normal lease has expired OR its detach-grace
|
||||
// retention window has elapsed (last external subscriber dropped and no client
|
||||
// reconnected within DetachGraceSeconds). The detach-grace close is the same
|
||||
// teardown as a lease-expiry close; only the reason differs so operators can tell
|
||||
// a short reconnect-window expiry from a long idle-lease expiry in logs/metrics.
|
||||
// Lease-expiry takes PRECEDENCE over detach-grace when both conditions fire
|
||||
// simultaneously (reason will be lease-expired, not detach-grace-expired).
|
||||
//
|
||||
// TOCTOU note: eligibility is re-verified atomically inside TryBeginCloseIfExpired
|
||||
// under _syncRoot, so a client that reattaches a subscriber between the check above
|
||||
// and the close call wins the race and the session is left open and usable.
|
||||
string? reason = session.IsLeaseExpired(now)
|
||||
? LeaseExpiredReason
|
||||
: session.IsDetachGraceExpired(now)
|
||||
? DetachGraceExpiredReason
|
||||
: null;
|
||||
if (reason is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await CloseSessionCoreAsync(session, LeaseExpiredReason, cancellationToken).ConfigureAwait(false);
|
||||
// Re-verify eligibility atomically and begin the Closing transition before
|
||||
// delegating to CloseSessionCoreAsync. If a subscriber reattached between the
|
||||
// IsLeaseExpired/IsDetachGraceExpired check above and here, TryBeginCloseIfExpired
|
||||
// returns false and we skip this session (it is no longer expired).
|
||||
if (!session.TryBeginCloseIfExpired(now, out bool alreadyClosing) && !alreadyClosing)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await CloseSessionCoreAsync(session, reason, cancellationToken).ConfigureAwait(false);
|
||||
closedCount++;
|
||||
}
|
||||
|
||||
@@ -461,7 +495,8 @@ public sealed class SessionManager : ISessionManager
|
||||
_distributorLogger,
|
||||
_timeProvider,
|
||||
_metrics,
|
||||
_dashboardEventBroadcaster);
|
||||
_dashboardEventBroadcaster,
|
||||
_options.Sessions.AllowMultipleEventSubscribers);
|
||||
|
||||
return new GatewaySession(
|
||||
sessionId,
|
||||
@@ -477,7 +512,10 @@ public sealed class SessionManager : ISessionManager
|
||||
shutdownTimeout,
|
||||
leaseDuration,
|
||||
openedAt,
|
||||
eventStreaming);
|
||||
eventStreaming,
|
||||
TimeSpan.FromSeconds(Math.Max(0, _options.Sessions.DetachGraceSeconds)),
|
||||
TimeSpan.FromMilliseconds(Math.Max(0, _options.Sessions.WorkerReadyWaitTimeoutMs)),
|
||||
_addressNormalizer);
|
||||
}
|
||||
|
||||
private static string CreateClientCorrelationId(
|
||||
|
||||
@@ -5,6 +5,7 @@ public enum SessionManagerErrorCode
|
||||
SessionNotFound,
|
||||
SessionNotReady,
|
||||
EventSubscriberAlreadyActive,
|
||||
EventSubscriberLimitReached,
|
||||
EventQueueOverflow,
|
||||
SessionLimitExceeded,
|
||||
OpenFailed,
|
||||
|
||||
@@ -8,6 +8,9 @@ public static class SessionServiceCollectionExtensions
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddGatewaySessions(this IServiceCollection services)
|
||||
{
|
||||
// Lifetime consistent with IGalaxyHierarchyCache (singleton); the normalizer reads the
|
||||
// cache's current snapshot per call, so it holds no per-session or per-request state.
|
||||
services.AddSingleton<ArrayAddressNormalizer>();
|
||||
services.AddSingleton<ISessionRegistry, SessionRegistry>();
|
||||
services.AddSingleton<ISessionWorkerClientFactory, SessionWorkerClientFactory>();
|
||||
services.AddSingleton<ISessionManager, SessionManager>();
|
||||
|
||||
@@ -0,0 +1,285 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using Grpc.Core;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
|
||||
/// <summary>
|
||||
/// Expands a client-supplied sparse array write (<see cref="MxSparseArray"/>) into a
|
||||
/// full, default-filled <see cref="MxArray"/> in place.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// MXAccess has no partial-array write primitive: a write replaces the whole array.
|
||||
/// Clients that only care about a few indices send an <see cref="MxSparseArray"/> with
|
||||
/// the total length plus the indices they want to set; this expander materializes the
|
||||
/// full array so the worker can do an ordinary whole-array COM write. Indices the client
|
||||
/// did not mention are reset to the element type's default (they are NOT preserved from
|
||||
/// the live value); this is intentional, because the gateway cannot read-modify-write
|
||||
/// without racing the provider.
|
||||
///
|
||||
/// For the MXAccess <c>Integer</c> element type the worker's COM-array converter (see
|
||||
/// <c>VariantConverter.ConvertArray</c>) chooses between a 32-bit and 64-bit sub-array
|
||||
/// based on the CLR element type. The sparse value carries no CLR array, so this expander
|
||||
/// mirrors that choice by inspecting the supplied element value kinds: if any element is an
|
||||
/// <see cref="MxValue.KindOneofCase.Int64Value"/> the whole array is emitted as
|
||||
/// <see cref="MxArray.Int64Values"/>; otherwise it is emitted as
|
||||
/// <see cref="MxArray.Int32Values"/> (matching a default Integer array).
|
||||
/// </remarks>
|
||||
internal static class SparseArrayExpander
|
||||
{
|
||||
/// <summary>
|
||||
/// Replaces <paramref name="value"/>'s <see cref="MxValue.SparseArrayValue"/> with an
|
||||
/// equivalent full <see cref="MxValue.ArrayValue"/>. If <paramref name="value"/> is not
|
||||
/// a sparse array this is a no-op, so callers may invoke it unconditionally.
|
||||
/// </summary>
|
||||
/// <param name="value">The value to expand in place.</param>
|
||||
/// <exception cref="RpcException">
|
||||
/// <see cref="StatusCode.InvalidArgument"/> when the sparse payload is invalid: zero
|
||||
/// total length, an index at or beyond the total length, a duplicate index, an
|
||||
/// unsupported element type, or an element value whose kind does not match the declared
|
||||
/// element type.
|
||||
/// </exception>
|
||||
public static void Expand(MxValue value)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(value);
|
||||
|
||||
if (value.KindCase != MxValue.KindOneofCase.SparseArrayValue)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
MxSparseArray sparse = value.SparseArrayValue;
|
||||
MxDataType elementType = sparse.ElementDataType;
|
||||
uint totalLength = sparse.TotalLength;
|
||||
|
||||
if (totalLength == 0)
|
||||
{
|
||||
throw Invalid("Sparse array total_length must be greater than zero.");
|
||||
}
|
||||
|
||||
if (!IsSupportedElementType(elementType))
|
||||
{
|
||||
throw Invalid($"Sparse array element_data_type '{elementType}' is not a supported scalar element type.");
|
||||
}
|
||||
|
||||
if (totalLength > (uint)Array.MaxLength)
|
||||
{
|
||||
throw Invalid(
|
||||
$"Sparse array total_length {totalLength} exceeds the maximum supported array length {Array.MaxLength}.");
|
||||
}
|
||||
|
||||
int length = (int)totalLength;
|
||||
HashSet<uint> seenIndices = new();
|
||||
|
||||
foreach (MxSparseElement element in sparse.Elements)
|
||||
{
|
||||
if (element.Index >= totalLength)
|
||||
{
|
||||
throw Invalid(
|
||||
$"Sparse array index {element.Index} is out of range for total_length {totalLength}.");
|
||||
}
|
||||
|
||||
if (!seenIndices.Add(element.Index))
|
||||
{
|
||||
throw Invalid($"Sparse array has a duplicate index {element.Index}.");
|
||||
}
|
||||
|
||||
ValidateElementKind(elementType, element);
|
||||
}
|
||||
|
||||
MxArray array = BuildArray(elementType, length, sparse.Elements);
|
||||
array.ElementDataType = elementType;
|
||||
array.Dimensions.Add(totalLength);
|
||||
|
||||
// Assigning ArrayValue switches the oneof and clears SparseArrayValue.
|
||||
value.ArrayValue = array;
|
||||
}
|
||||
|
||||
private static MxArray BuildArray(
|
||||
MxDataType elementType,
|
||||
int length,
|
||||
IReadOnlyList<MxSparseElement> elements)
|
||||
{
|
||||
MxArray array = new();
|
||||
|
||||
switch (elementType)
|
||||
{
|
||||
case MxDataType.Boolean:
|
||||
{
|
||||
BoolArray values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(false);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.BoolValue;
|
||||
}
|
||||
|
||||
array.BoolValues = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.Integer when UsesInt64(elements):
|
||||
{
|
||||
Int64Array values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(0L);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = ReadInt64(element.Value);
|
||||
}
|
||||
|
||||
array.Int64Values = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.Integer:
|
||||
{
|
||||
Int32Array values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(0);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.Int32Value;
|
||||
}
|
||||
|
||||
array.Int32Values = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.Float:
|
||||
{
|
||||
FloatArray values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(0f);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.FloatValue;
|
||||
}
|
||||
|
||||
array.FloatValues = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.Double:
|
||||
{
|
||||
DoubleArray values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(0d);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.DoubleValue;
|
||||
}
|
||||
|
||||
array.DoubleValues = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.String:
|
||||
{
|
||||
StringArray values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(string.Empty);
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.StringValue;
|
||||
}
|
||||
|
||||
array.StringValues = values;
|
||||
break;
|
||||
}
|
||||
|
||||
case MxDataType.Time:
|
||||
{
|
||||
TimestampArray values = new();
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
values.Values.Add(new Timestamp { Seconds = 0, Nanos = 0 });
|
||||
}
|
||||
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
values.Values[(int)element.Index] = element.Value.TimestampValue;
|
||||
}
|
||||
|
||||
array.TimestampValues = values;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// Unreachable: IsSupportedElementType gates the element type before BuildArray.
|
||||
throw Invalid($"Sparse array element_data_type '{elementType}' is not supported.");
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
private static bool IsSupportedElementType(MxDataType elementType) => elementType switch
|
||||
{
|
||||
MxDataType.Boolean => true,
|
||||
MxDataType.Integer => true,
|
||||
MxDataType.Float => true,
|
||||
MxDataType.Double => true,
|
||||
MxDataType.String => true,
|
||||
MxDataType.Time => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
private static void ValidateElementKind(MxDataType elementType, MxSparseElement element)
|
||||
{
|
||||
MxValue.KindOneofCase kind = element.Value?.KindCase ?? MxValue.KindOneofCase.None;
|
||||
|
||||
bool matches = elementType switch
|
||||
{
|
||||
MxDataType.Boolean => kind == MxValue.KindOneofCase.BoolValue,
|
||||
MxDataType.Integer => kind is MxValue.KindOneofCase.Int32Value or MxValue.KindOneofCase.Int64Value,
|
||||
MxDataType.Float => kind == MxValue.KindOneofCase.FloatValue,
|
||||
MxDataType.Double => kind == MxValue.KindOneofCase.DoubleValue,
|
||||
MxDataType.String => kind == MxValue.KindOneofCase.StringValue,
|
||||
MxDataType.Time => kind == MxValue.KindOneofCase.TimestampValue,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if (!matches)
|
||||
{
|
||||
throw Invalid(
|
||||
$"Sparse array element at index {element.Index} has value kind '{kind}' which does not match element_data_type '{elementType}'.");
|
||||
}
|
||||
}
|
||||
|
||||
private static bool UsesInt64(IReadOnlyList<MxSparseElement> elements)
|
||||
{
|
||||
foreach (MxSparseElement element in elements)
|
||||
{
|
||||
if (element.Value.KindCase == MxValue.KindOneofCase.Int64Value)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static long ReadInt64(MxValue value) =>
|
||||
value.KindCase == MxValue.KindOneofCase.Int64Value ? value.Int64Value : value.Int32Value;
|
||||
|
||||
private static RpcException Invalid(string message) =>
|
||||
new(new Status(StatusCode.InvalidArgument, message));
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user