Code-review 2026-05-20 sweep: re-review at 1cd51bb, resolve 72 findings across all 11 modules
Re-reviewed every module/client against the 10-category checklist
(REVIEW-PROCESS.md) at commit 1cd51bb, filed 72 new findings, and
fixed them in three priority waves (3 High, 17 Medium, 52 Low).
Highs
- Server-017: enumerate AcknowledgeAlarm / QueryActiveAlarms in
GatewayGrpcScopeResolver so non-admin keys can use them; document
the mapping in docs/Authorization.md; add interceptor tests.
- Client.Java-013: add the five missing bulk-method stubs to the
CLI FakeSession so the test module compiles on a clean tree.
- Client.Rust-013: fix the clippy::doc_lazy_continuation regression
in generated tonic code by reformatting the ReadBulkCommand proto
comment and scoping a #![allow(...)] to the generated submodules.
Mediums (highlights)
- Server: unify GatewaySession state-lock discipline (-015) and
make DisposeAsync race-safe against in-flight CloseAsync (-016);
add constraint-enforcement test coverage for the bulk-plan path
(-021).
- Worker: introduce StaRuntimeShutdownException so RunAlarmPollLoop
can distinguish graceful shutdown from a real STA-affinity
violation (-016); have the watchdog skip StaHung while
CurrentCommandCorrelationId is non-empty so a legitimate slow
ReadBulk no longer self-faults (-017).
- Tests: add per-method round-trip + cancellation coverage for the
11 GatewaySession bulk methods (-013); replace the real TCP probe
in GalaxyHierarchyCacheTests with an IGalaxyRepository fake
(-016).
- IntegrationTests: drive the StreamEvents writer in the live Write
test and assert OnWriteComplete (-012); add live tests for
Unadvise/RemoveItem/Unregister ordering, WriteSecured, and
abnormal worker exit (-014).
- Worker.Tests: replace MxAccessSession reflection with an internal
CreateForTesting factory (-016); cover WorkerCancel and
unexpected-body envelope branches (-017).
- Client.Java: cancel MxEventStream when close() races
beforeStart() (-014); return a CancellingCompletableFuture that
actually forwards cancellation through .thenApply chains (-015).
- Client.Python: drop the silent localhost-plaintext downgrade in
the CLI; require explicit --plaintext (-013).
- Client.Rust: stop bench-read-bulk from polluting success-latency
histograms with failed-call durations (-015); add coverage for
the five MalformedReply paths, the bulk-write helpers, the
Error::Unavailable mapping, and the unary-fault path (-016).
- Contracts: extend docs/Contracts.md with the bulk read/write
command family (-009).
Lows (highlights)
- Server: cap GalaxyGlobMatcher.RegexCache; align
WorkerAlarmRpcDispatcher missing-session handling; drop the
duplicate dashboard @page routes; refresh IAlarmRpcDispatcher
XML doc.
- Worker: surface SetXmlAlarmQuery COM failures; remove dead
subscriptionExpression / ExecutingCommand arms; preserve
factory-supplied runtime sessions; split MxAlarmSnapshot.cs into
three files.
- Tests: dispose the WebApplication in seven test classes; rebuild
FakeWorkerProcess.WaitForExitAsync against a real TaskCompletion
source; switch the heartbeat-expires test to ManualTimeProvider;
add InvariantCulture to the remaining DateTimeOffset.Parse sites;
document GalaxyFilterInputSafetyTests in GatewayTesting.md.
- IntegrationTests: comment fixes, RecordingServerStreamWriter
IDisposable, class-level [Trait], single-source ZB default
connection string.
- Worker.Tests: replace silent-return gating with LiveMxAccessFact
so absent env vars SKIP not pass; PascalCase rename of probe
[Fact]s; deterministic deadline test; new frame-protocol error
tests; ComputeTransitions diff-coverage; relocate dev-rig probes
to Probes/.
- Contracts: add round-trip coverage and per-field redaction /
Galaxy-identifier comments to the protos.
- Client.Dotnet: introduce clients/dotnet/Directory.Build.props so
TreatWarningsAsErrors / analysers apply; document
DiscoverHierarchyOptions and IMxGatewayCliClient; require typed
bulk-read handles in CLI; surface AcknowledgeAlarm transport
faults through Translate().
- Client.Go: kill dead code in alarms_test / fakeGalaxyServer /
runWriteBulkVariant; document the six new subcommands in
writeUsage; drain galaxy-watch events on limit; switch io.EOF
comparisons to errors.Is.
- Client.Java: shared shutdown helpers + new shutdownTimeout
option; regex-based credential redaction; Long.toUnsignedString
for uint64 sequence; doc fixes.
- Client.Python: combine duplicate imports; add coverage for
_percentile / bench-read-bulk / MAX_AGGREGATE_EVENTS /
_api_key_from_env; populate pyproject metadata and ship py.typed.
- Client.Rust: expose next_correlation_id() so CLI ping/close
stop hard-coding correlation IDs; resync RustClientDesign.md
with the current Session / Error surface and CLI subcommand set.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,6 +18,7 @@ using Xunit.Abstractions;
|
||||
namespace MxGateway.IntegrationTests;
|
||||
|
||||
[Collection(LiveResourcesCollection.Name)]
|
||||
[Trait("Category", "LiveMxAccess")]
|
||||
public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
{
|
||||
private static readonly TimeSpan CommandTimeout = TimeSpan.FromSeconds(15);
|
||||
@@ -27,7 +28,6 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
/// Verifies that a gateway session can register, add item, advise, and stream events from live MXAccess.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
[Trait("Category", "LiveMxAccess")]
|
||||
public async Task GatewaySession_WithLiveWorker_RegistersAdvisesStreamsDataAndCloses()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
@@ -37,9 +37,9 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
|
||||
TestWorkerProcessFactory processFactory = new(output);
|
||||
await using GatewayServiceFixture fixture = new(workerExecutablePath, processFactory, output);
|
||||
using RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
|
||||
string? sessionId = null;
|
||||
RecordingServerStreamWriter<MxEvent>? eventWriter = null;
|
||||
Task? streamTask = null;
|
||||
using CancellationTokenSource streamCancellation = new();
|
||||
|
||||
@@ -59,7 +59,6 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
Assert.Equal(ProtocolStatusCode.Ok, openReply.ProtocolStatus.Code);
|
||||
Assert.True(openReply.WorkerProcessId > 0);
|
||||
|
||||
eventWriter = new RecordingServerStreamWriter<MxEvent>();
|
||||
streamTask = fixture.Service.StreamEvents(
|
||||
new StreamEventsRequest { SessionId = sessionId },
|
||||
eventWriter,
|
||||
@@ -113,10 +112,11 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that a Write command round-trips through live MXAccess against an advised item.
|
||||
/// Verifies that a Write command round-trips through live MXAccess against an advised item
|
||||
/// and that the worker emits a matching <see cref="MxEventFamily.OnWriteComplete"/> event
|
||||
/// — the proof of round-trip the cross-language client e2e runner relies on.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
[Trait("Category", "LiveMxAccess")]
|
||||
public async Task GatewaySession_WithLiveWorker_WritesValueToAdvisedItem()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
@@ -126,9 +126,11 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
|
||||
TestWorkerProcessFactory processFactory = new(output);
|
||||
await using GatewayServiceFixture fixture = new(workerExecutablePath, processFactory, output);
|
||||
using RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
|
||||
string? sessionId = null;
|
||||
Task? streamTask = null;
|
||||
using CancellationTokenSource streamCancellation = new();
|
||||
|
||||
try
|
||||
{
|
||||
@@ -144,11 +146,10 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
sessionId = openReply.SessionId;
|
||||
Assert.Equal(ProtocolStatusCode.Ok, openReply.ProtocolStatus.Code);
|
||||
|
||||
RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
streamTask = fixture.Service.StreamEvents(
|
||||
new StreamEventsRequest { SessionId = sessionId },
|
||||
eventWriter,
|
||||
new TestServerCallContext());
|
||||
new TestServerCallContext(streamCancellation.Token));
|
||||
|
||||
MxCommandReply registerReply = await fixture.Service.Invoke(
|
||||
CreateRegisterRequest(sessionId),
|
||||
@@ -180,16 +181,50 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Write", writeReply);
|
||||
|
||||
// The gateway must always report a protocol-level status. MXAccess
|
||||
// parity details (a write rejection, a secured-item failure) belong
|
||||
// in hresult / statuses, not in a transport failure — the command
|
||||
// itself completed its round-trip to the worker and back.
|
||||
// Happy-path Write: the worker COM call succeeded so HResultConverter
|
||||
// produces ProtocolStatusCode.Ok. An MXAccess rejection (a write to a
|
||||
// bad item, a secured-item failure) would surface as
|
||||
// ProtocolStatusCode.MxaccessFailure with a non-zero hresult — never
|
||||
// as an RpcException / transport fault, because the command still
|
||||
// completed its round-trip to the worker and back.
|
||||
Assert.Equal(ProtocolStatusCode.Ok, writeReply.ProtocolStatus.Code);
|
||||
Assert.Equal(MxCommandKind.Write, writeReply.Kind);
|
||||
|
||||
// Proof of round-trip: MXAccess fires OnWriteComplete (event id 2)
|
||||
// after the underlying provider acknowledges the write — that is
|
||||
// the event the cross-language client e2e runner asserts on. We
|
||||
// scan the recorded stream (so an interleaving OnDataChange does
|
||||
// not preempt the match) for an OnWriteComplete carrying the same
|
||||
// server/item handles the Write command targeted.
|
||||
MxEvent writeComplete = await eventWriter
|
||||
.WaitForMessageAsync(
|
||||
candidate => candidate.Family == MxEventFamily.OnWriteComplete
|
||||
&& candidate.ServerHandle == registerReply.Register.ServerHandle
|
||||
&& candidate.ItemHandle == addItemReply.AddItem.ItemHandle,
|
||||
IntegrationTestEnvironment.LiveMxAccessEventTimeout,
|
||||
streamCancellation.Token)
|
||||
.ConfigureAwait(false);
|
||||
LogEvent(writeComplete);
|
||||
|
||||
Assert.Equal(MxEventFamily.OnWriteComplete, writeComplete.Family);
|
||||
Assert.Equal(sessionId, writeComplete.SessionId);
|
||||
Assert.Equal(registerReply.Register.ServerHandle, writeComplete.ServerHandle);
|
||||
Assert.Equal(addItemReply.AddItem.ItemHandle, writeComplete.ItemHandle);
|
||||
|
||||
// The stream task must not be in a faulted state. ShutDownAsync's
|
||||
// broad catch would otherwise swallow the fault and silently let
|
||||
// this Write-parity coverage pass against a broken event pipeline.
|
||||
Assert.False(
|
||||
streamTask.IsFaulted,
|
||||
streamTask.Exception?.ToString() ?? "Event stream task faulted without an exception.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
await ShutDownAsync(fixture, processFactory, sessionId, streamTask).ConfigureAwait(false);
|
||||
// Cancel the stream call before draining so StreamEvents observes
|
||||
// cancellation rather than blocking on the channel. Any unhandled
|
||||
// stream-task fault is rethrown from ShutDownAsync into the test.
|
||||
streamCancellation.Cancel();
|
||||
await ShutDownAsync(fixture, processFactory, sessionId, streamTask, propagateStreamFaults: true).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,7 +233,6 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
/// without faulting the gateway transport, exercising the invalid-handle parity path.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
[Trait("Category", "LiveMxAccess")]
|
||||
public async Task GatewaySession_WithLiveWorker_InvalidHandleCommand_SurfacesFailureWithoutTransportFault()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
@@ -235,8 +269,10 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
|
||||
// MXAccess parity: an invalid handle is an MXAccess-level failure.
|
||||
// The command still completed its worker round-trip, so the gateway
|
||||
// protocol status is Ok and the failure shows up in hresult / the
|
||||
// status proxies — it must not be reported as a transport fault.
|
||||
// must reply with ProtocolStatusCode.MxaccessFailure and a non-zero
|
||||
// hresult carrying the COM failure (per HResultConverter) — never a
|
||||
// gRPC transport fault. The assertion below just checks the status
|
||||
// is not Ok; the failure detail lives in hresult / the status proxies.
|
||||
Assert.NotEqual(ProtocolStatusCode.Ok, addItemReply.ProtocolStatus.Code);
|
||||
Assert.True(
|
||||
addItemReply.AddItem is null || addItemReply.AddItem.ItemHandle <= 0,
|
||||
@@ -248,35 +284,411 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the MXAccess teardown chain: Unadvise then RemoveItem then Unregister
|
||||
/// each return <see cref="ProtocolStatusCode.Ok"/>, and the worker stops emitting
|
||||
/// OnDataChange events for the un-advised item. Exercises the lifecycle-ordering
|
||||
/// parity CLAUDE.md singles out as a "do not synthesize" rule.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
public async Task GatewaySession_WithLiveWorker_UnadviseRemoveItemUnregister_TeardownOrderingParity()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
Assert.True(
|
||||
File.Exists(workerExecutablePath),
|
||||
$"Live MXAccess worker executable was not found at {workerExecutablePath}. Build the worker or set {IntegrationTestEnvironment.LiveMxAccessWorkerExecutableVariableName}.");
|
||||
|
||||
TestWorkerProcessFactory processFactory = new(output);
|
||||
await using GatewayServiceFixture fixture = new(workerExecutablePath, processFactory, output);
|
||||
using RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
|
||||
string? sessionId = null;
|
||||
Task? streamTask = null;
|
||||
using CancellationTokenSource streamCancellation = new();
|
||||
|
||||
try
|
||||
{
|
||||
OpenSessionReply openReply = await fixture.Service.OpenSession(
|
||||
new OpenSessionRequest
|
||||
{
|
||||
ClientSessionName = "live-mxaccess-teardown",
|
||||
ClientCorrelationId = "live-open-teardown",
|
||||
CommandTimeout = Duration.FromTimeSpan(CommandTimeout),
|
||||
},
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
|
||||
sessionId = openReply.SessionId;
|
||||
Assert.Equal(ProtocolStatusCode.Ok, openReply.ProtocolStatus.Code);
|
||||
|
||||
streamTask = fixture.Service.StreamEvents(
|
||||
new StreamEventsRequest { SessionId = sessionId },
|
||||
eventWriter,
|
||||
new TestServerCallContext(streamCancellation.Token));
|
||||
|
||||
MxCommandReply registerReply = await fixture.Service.Invoke(
|
||||
CreateRegisterRequest(sessionId),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Register", registerReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, registerReply.ProtocolStatus.Code);
|
||||
|
||||
int serverHandle = registerReply.Register.ServerHandle;
|
||||
|
||||
MxCommandReply addItemReply = await fixture.Service.Invoke(
|
||||
CreateAddItemRequest(sessionId, serverHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("AddItem", addItemReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, addItemReply.ProtocolStatus.Code);
|
||||
int itemHandle = addItemReply.AddItem.ItemHandle;
|
||||
|
||||
MxCommandReply adviseReply = await fixture.Service.Invoke(
|
||||
CreateAdviseRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Advise", adviseReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, adviseReply.ProtocolStatus.Code);
|
||||
|
||||
// Wait for an OnDataChange to prove the subscription is live before tearing it down.
|
||||
MxEvent firstDataChange = await eventWriter
|
||||
.WaitForMessageAsync(
|
||||
candidate => candidate.Family == MxEventFamily.OnDataChange
|
||||
&& candidate.ServerHandle == serverHandle
|
||||
&& candidate.ItemHandle == itemHandle,
|
||||
IntegrationTestEnvironment.LiveMxAccessEventTimeout,
|
||||
streamCancellation.Token)
|
||||
.ConfigureAwait(false);
|
||||
LogEvent(firstDataChange);
|
||||
|
||||
// RecordingServerStreamWriter.Messages returns a snapshot copy under its own
|
||||
// lock, so iterating after each teardown step is safe without external sync.
|
||||
int dataChangeCountBeforeUnadvise = CountMatchingEvents(
|
||||
eventWriter,
|
||||
e => e.Family == MxEventFamily.OnDataChange
|
||||
&& e.ServerHandle == serverHandle
|
||||
&& e.ItemHandle == itemHandle);
|
||||
|
||||
// 1) UnAdvise — must reply Ok; the worker must stop emitting OnDataChange
|
||||
// for this (server, item) pair after this returns.
|
||||
MxCommandReply unadviseReply = await fixture.Service.Invoke(
|
||||
CreateUnAdviseRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("UnAdvise", unadviseReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, unadviseReply.ProtocolStatus.Code);
|
||||
Assert.Equal(MxCommandKind.UnAdvise, unadviseReply.Kind);
|
||||
|
||||
// 2) RemoveItem — must reply Ok against the same handles.
|
||||
MxCommandReply removeItemReply = await fixture.Service.Invoke(
|
||||
CreateRemoveItemRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("RemoveItem", removeItemReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, removeItemReply.ProtocolStatus.Code);
|
||||
Assert.Equal(MxCommandKind.RemoveItem, removeItemReply.Kind);
|
||||
|
||||
// 3) Unregister — closes the client session inside the worker.
|
||||
MxCommandReply unregisterReply = await fixture.Service.Invoke(
|
||||
CreateUnregisterRequest(sessionId, serverHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Unregister", unregisterReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, unregisterReply.ProtocolStatus.Code);
|
||||
Assert.Equal(MxCommandKind.Unregister, unregisterReply.Kind);
|
||||
|
||||
// Allow a short settle window for any in-flight OnDataChange to drain, then
|
||||
// assert no further events arrived for the un-advised (serverHandle, itemHandle).
|
||||
// MXAccess parity: after UnAdvise the provider must stop publishing OnDataChange
|
||||
// for this item — a regression that left a stale subscription alive would surface
|
||||
// as additional events after this delay.
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(500)).ConfigureAwait(false);
|
||||
|
||||
int dataChangeCountAfterTeardown = CountMatchingEvents(
|
||||
eventWriter,
|
||||
e => e.Family == MxEventFamily.OnDataChange
|
||||
&& e.ServerHandle == serverHandle
|
||||
&& e.ItemHandle == itemHandle);
|
||||
output.WriteLine(
|
||||
$"DataChange count before UnAdvise={dataChangeCountBeforeUnadvise} after teardown+settle={dataChangeCountAfterTeardown}");
|
||||
Assert.Equal(dataChangeCountBeforeUnadvise, dataChangeCountAfterTeardown);
|
||||
|
||||
// A RemoveItem against the just-freed item handle must not silently succeed —
|
||||
// the worker has to relay MXAccess's invalid-handle response. Closing the
|
||||
// session is enough for parity, but we sanity-check that re-using the freed
|
||||
// pair does not accidentally appear Ok.
|
||||
MxCommandReply secondRemoveItemReply = await fixture.Service.Invoke(
|
||||
CreateRemoveItemRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("RemoveItem(stale)", secondRemoveItemReply);
|
||||
Assert.NotEqual(ProtocolStatusCode.Ok, secondRemoveItemReply.ProtocolStatus.Code);
|
||||
}
|
||||
finally
|
||||
{
|
||||
streamCancellation.Cancel();
|
||||
await ShutDownAsync(fixture, processFactory, sessionId, streamTask).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the MXAccess <c>WriteSecured</c> path: <c>AuthenticateUser</c> resolves a
|
||||
/// user id, then <c>WriteSecured</c> against the advised item completes its round-trip
|
||||
/// to the worker and back. CLAUDE.md singles out <c>WriteSecured</c> ordering as a
|
||||
/// parity surface the gateway must not "fix" — the test asserts the reply kind and
|
||||
/// protocol status, not a fabricated outcome.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
public async Task GatewaySession_WithLiveWorker_WriteSecured_AuthenticatedRoundTripParity()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
Assert.True(
|
||||
File.Exists(workerExecutablePath),
|
||||
$"Live MXAccess worker executable was not found at {workerExecutablePath}. Build the worker or set {IntegrationTestEnvironment.LiveMxAccessWorkerExecutableVariableName}.");
|
||||
|
||||
TestWorkerProcessFactory processFactory = new(output);
|
||||
await using GatewayServiceFixture fixture = new(workerExecutablePath, processFactory, output);
|
||||
// Stream events so a regression that emitted an OperationComplete or
|
||||
// OnWriteComplete with wrong handles would still be observable via the test
|
||||
// output (we don't assert a specific event here — the docs note successful
|
||||
// writes raise only OnWriteComplete, but WriteSecured against an unprotected
|
||||
// item commonly fails with 0x80004021 in this provider, which raises no event).
|
||||
using RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
|
||||
string? sessionId = null;
|
||||
Task? streamTask = null;
|
||||
using CancellationTokenSource streamCancellation = new();
|
||||
|
||||
try
|
||||
{
|
||||
OpenSessionReply openReply = await fixture.Service.OpenSession(
|
||||
new OpenSessionRequest
|
||||
{
|
||||
ClientSessionName = "live-mxaccess-write-secured",
|
||||
ClientCorrelationId = "live-open-write-secured",
|
||||
CommandTimeout = Duration.FromTimeSpan(CommandTimeout),
|
||||
},
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
|
||||
sessionId = openReply.SessionId;
|
||||
Assert.Equal(ProtocolStatusCode.Ok, openReply.ProtocolStatus.Code);
|
||||
|
||||
streamTask = fixture.Service.StreamEvents(
|
||||
new StreamEventsRequest { SessionId = sessionId },
|
||||
eventWriter,
|
||||
new TestServerCallContext(streamCancellation.Token));
|
||||
|
||||
MxCommandReply registerReply = await fixture.Service.Invoke(
|
||||
CreateRegisterRequest(sessionId),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Register", registerReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, registerReply.ProtocolStatus.Code);
|
||||
int serverHandle = registerReply.Register.ServerHandle;
|
||||
|
||||
MxCommandReply addItemReply = await fixture.Service.Invoke(
|
||||
CreateAddItemRequest(sessionId, serverHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("AddItem", addItemReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, addItemReply.ProtocolStatus.Code);
|
||||
int itemHandle = addItemReply.AddItem.ItemHandle;
|
||||
|
||||
MxCommandReply adviseReply = await fixture.Service.Invoke(
|
||||
CreateAdviseRequest(sessionId, serverHandle, itemHandle),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("Advise", adviseReply);
|
||||
Assert.Equal(ProtocolStatusCode.Ok, adviseReply.ProtocolStatus.Code);
|
||||
|
||||
// AuthenticateUser resolves an ArchestrA user id for the WriteSecured call.
|
||||
// Credentials are env-overridable so the test honors the gateway's "do not
|
||||
// log secrets" rule and works against either MXAccess's own user store or
|
||||
// the LmxOpcUa-baseline GLAuth-bridged ArchestrA identity (admin/admin123).
|
||||
(string verifyUser, string verifyPassword) = ResolveLiveMxAccessSecuredCredentials();
|
||||
MxCommandReply authReply = await fixture.Service.Invoke(
|
||||
CreateAuthenticateUserRequest(sessionId, serverHandle, verifyUser, verifyPassword),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
output.WriteLine(
|
||||
$"AuthenticateUser status={authReply.ProtocolStatus.Code} hresult={authReply.Hresult} user_id={authReply.AuthenticateUser?.UserId}");
|
||||
|
||||
// AuthenticateUser is allowed to fail (the underlying provider may reject
|
||||
// the credential pair); we use the returned user id if non-zero and fall
|
||||
// back to 0 ("operator only" / no verifier) so the parity assertion holds.
|
||||
int currentUserId = authReply.ProtocolStatus.Code == ProtocolStatusCode.Ok
|
||||
&& authReply.AuthenticateUser is not null
|
||||
&& authReply.AuthenticateUser.UserId != 0
|
||||
? authReply.AuthenticateUser.UserId
|
||||
: 0;
|
||||
|
||||
MxCommandReply writeSecuredReply = await fixture.Service.Invoke(
|
||||
CreateWriteSecuredRequest(
|
||||
sessionId,
|
||||
serverHandle,
|
||||
itemHandle,
|
||||
currentUserId,
|
||||
verifierUserId: 0),
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
LogReply("WriteSecured", writeSecuredReply);
|
||||
|
||||
// Parity: the command itself completed its round-trip — the reply kind is
|
||||
// WriteSecured and the gateway protocol status is set. The MXAccess outcome
|
||||
// (Ok for an unprotected provider, MxaccessFailure with hresult 0x80004021
|
||||
// when the item is not WriteSecured-eligible) lives in protocol_status +
|
||||
// hresult, never as a transport fault. The diagnostic message must never
|
||||
// contain the credential.
|
||||
Assert.Equal(MxCommandKind.WriteSecured, writeSecuredReply.Kind);
|
||||
Assert.True(
|
||||
writeSecuredReply.ProtocolStatus.Code is ProtocolStatusCode.Ok
|
||||
or ProtocolStatusCode.MxaccessFailure,
|
||||
$"Unexpected WriteSecured protocol status {writeSecuredReply.ProtocolStatus.Code}.");
|
||||
Assert.DoesNotContain(verifyPassword, writeSecuredReply.DiagnosticMessage ?? string.Empty, StringComparison.Ordinal);
|
||||
}
|
||||
finally
|
||||
{
|
||||
streamCancellation.Cancel();
|
||||
await ShutDownAsync(fixture, processFactory, sessionId, streamTask).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that killing the worker process marks the session
|
||||
/// <see cref="SessionState.Faulted"/> with a clean fault classification — the gateway
|
||||
/// must observe the abnormal exit, transition the session, and surface a non-empty
|
||||
/// fault description rather than hanging or crashing.
|
||||
/// </summary>
|
||||
[LiveMxAccessFact]
|
||||
public async Task GatewaySession_WithLiveWorker_AbnormalWorkerExit_MarksSessionFaulted()
|
||||
{
|
||||
string workerExecutablePath = IntegrationTestEnvironment.ResolveLiveMxAccessWorkerExecutablePath();
|
||||
Assert.True(
|
||||
File.Exists(workerExecutablePath),
|
||||
$"Live MXAccess worker executable was not found at {workerExecutablePath}. Build the worker or set {IntegrationTestEnvironment.LiveMxAccessWorkerExecutableVariableName}.");
|
||||
|
||||
TestWorkerProcessFactory processFactory = new(output);
|
||||
await using GatewayServiceFixture fixture = new(workerExecutablePath, processFactory, output);
|
||||
using RecordingServerStreamWriter<MxEvent> eventWriter = new();
|
||||
|
||||
string? sessionId = null;
|
||||
Task? streamTask = null;
|
||||
using CancellationTokenSource streamCancellation = new();
|
||||
|
||||
try
|
||||
{
|
||||
OpenSessionReply openReply = await fixture.Service.OpenSession(
|
||||
new OpenSessionRequest
|
||||
{
|
||||
ClientSessionName = "live-mxaccess-abnormal-exit",
|
||||
ClientCorrelationId = "live-open-abnormal",
|
||||
CommandTimeout = Duration.FromTimeSpan(CommandTimeout),
|
||||
},
|
||||
new TestServerCallContext()).ConfigureAwait(false);
|
||||
|
||||
sessionId = openReply.SessionId;
|
||||
Assert.Equal(ProtocolStatusCode.Ok, openReply.ProtocolStatus.Code);
|
||||
|
||||
streamTask = fixture.Service.StreamEvents(
|
||||
new StreamEventsRequest { SessionId = sessionId },
|
||||
eventWriter,
|
||||
new TestServerCallContext(streamCancellation.Token));
|
||||
|
||||
// Kill the worker process directly. WorkerClient's read loop hits an
|
||||
// end-of-stream on the named pipe and routes through SetFaulted; the
|
||||
// session manager then marks the session Faulted. We avoid CloseSession
|
||||
// so the transition is driven by the abnormal exit, not a graceful path.
|
||||
processFactory.KillAllAndDetach();
|
||||
|
||||
DateTimeOffset waitDeadline = DateTimeOffset.UtcNow + StreamShutdownTimeout;
|
||||
SessionState observedState = SessionState.Unspecified;
|
||||
string? observedFault = null;
|
||||
while (DateTimeOffset.UtcNow < waitDeadline)
|
||||
{
|
||||
if (fixture.TryGetSession(sessionId, out GatewaySession? session))
|
||||
{
|
||||
observedState = session.State;
|
||||
observedFault = session.FinalFault;
|
||||
if (observedState == SessionState.Faulted)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(50)).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
output.WriteLine($"AbnormalExit observed_state={observedState} fault={observedFault}");
|
||||
Assert.Equal(SessionState.Faulted, observedState);
|
||||
Assert.False(string.IsNullOrWhiteSpace(observedFault), "Faulted session must carry a non-empty fault description.");
|
||||
|
||||
// The fault classification must come from a known worker-client error code so
|
||||
// operators get an actionable cause string rather than an opaque exception
|
||||
// trace. We accept any of the abnormal-exit classifications WorkerClient
|
||||
// routes through SetFaulted on a killed worker.
|
||||
Assert.True(
|
||||
observedFault!.Contains("disconnect", StringComparison.OrdinalIgnoreCase)
|
||||
|| observedFault.Contains("pipe", StringComparison.OrdinalIgnoreCase)
|
||||
|| observedFault.Contains("heartbeat", StringComparison.OrdinalIgnoreCase)
|
||||
|| observedFault.Contains("worker", StringComparison.OrdinalIgnoreCase)
|
||||
|| observedFault.Contains("end of stream", StringComparison.OrdinalIgnoreCase),
|
||||
$"Fault description '{observedFault}' did not match a known worker-exit classification.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
streamCancellation.Cancel();
|
||||
// sessionId is intentionally null here — the session is already faulted and a
|
||||
// CloseSession round-trip would just log a cleanup failure. We still wait for
|
||||
// the worker process exit so the next test starts with a clean state.
|
||||
await ShutDownAsync(fixture, processFactory, sessionId: null, streamTask).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Closes the session and drains the event stream / worker processes without letting a
|
||||
/// cleanup timeout mask the original failure from the test body.
|
||||
/// </summary>
|
||||
/// <param name="propagateStreamFaults">
|
||||
/// When <see langword="true"/>, a faulted <paramref name="streamTask"/> is rethrown so the
|
||||
/// test fails on a silent stream-task exception (the Write parity test relies on this so
|
||||
/// stream-side defects in event delivery are visible). When <see langword="false"/>, all
|
||||
/// cleanup exceptions are logged and swallowed so a real test-body assertion failure is not
|
||||
/// masked by a shutdown timeout (the original IntegrationTests-004 fix).
|
||||
/// </param>
|
||||
private async Task ShutDownAsync(
|
||||
GatewayServiceFixture fixture,
|
||||
TestWorkerProcessFactory processFactory,
|
||||
string? sessionId,
|
||||
Task? streamTask)
|
||||
Task? streamTask,
|
||||
bool propagateStreamFaults = false)
|
||||
{
|
||||
Exception? streamFault = null;
|
||||
|
||||
try
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(sessionId))
|
||||
{
|
||||
await CloseSessionAsync(fixture, sessionId).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (streamTask is not null)
|
||||
{
|
||||
await streamTask.WaitAsync(StreamShutdownTimeout).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Cleanup runs in a finally block. A TimeoutException (or a faulted
|
||||
// StreamEvents task) here would otherwise replace any assertion
|
||||
// failure raised in the try block. Log it and let the original
|
||||
// failure surface.
|
||||
output.WriteLine($"Cleanup error during session/stream shutdown: {ex}");
|
||||
output.WriteLine($"Cleanup error during session close: {ex}");
|
||||
}
|
||||
|
||||
if (streamTask is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await streamTask.WaitAsync(StreamShutdownTimeout).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException ex)
|
||||
{
|
||||
// A linked CancellationToken on the streaming TestServerCallContext is the
|
||||
// intended way to stop StreamEvents promptly — treat the resulting
|
||||
// OperationCanceledException as a clean shutdown, not a fault.
|
||||
output.WriteLine($"Event stream task cancelled during shutdown: {ex.Message}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Cleanup runs in a finally block. By default a faulted StreamEvents task is
|
||||
// logged and swallowed so a test-body assertion failure is not masked. When
|
||||
// the caller opts into propagateStreamFaults (the Write parity test), we
|
||||
// rethrow the fault after the worker-process wait so a silent stream-side
|
||||
// defect actually fails the test.
|
||||
output.WriteLine($"Event stream task faulted during shutdown: {ex}");
|
||||
if (propagateStreamFaults)
|
||||
{
|
||||
streamFault = ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
@@ -287,6 +699,11 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
{
|
||||
output.WriteLine($"Cleanup error while waiting for worker processes to exit: {ex}");
|
||||
}
|
||||
|
||||
if (streamFault is not null)
|
||||
{
|
||||
throw streamFault;
|
||||
}
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateRegisterRequest(string sessionId)
|
||||
@@ -373,6 +790,145 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
};
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateUnAdviseRequest(
|
||||
string sessionId,
|
||||
int serverHandle,
|
||||
int itemHandle)
|
||||
{
|
||||
return new MxCommandRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = "live-unadvise",
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.UnAdvise,
|
||||
UnAdvise = new UnAdviseCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateRemoveItemRequest(
|
||||
string sessionId,
|
||||
int serverHandle,
|
||||
int itemHandle)
|
||||
{
|
||||
return new MxCommandRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = "live-remove-item",
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.RemoveItem,
|
||||
RemoveItem = new RemoveItemCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateUnregisterRequest(
|
||||
string sessionId,
|
||||
int serverHandle)
|
||||
{
|
||||
return new MxCommandRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = "live-unregister",
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.Unregister,
|
||||
Unregister = new UnregisterCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateAuthenticateUserRequest(
|
||||
string sessionId,
|
||||
int serverHandle,
|
||||
string verifyUser,
|
||||
string verifyPassword)
|
||||
{
|
||||
return new MxCommandRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = "live-authenticate-user",
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.AuthenticateUser,
|
||||
AuthenticateUser = new AuthenticateUserCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
VerifyUser = verifyUser,
|
||||
VerifyUserPassword = verifyPassword,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static MxCommandRequest CreateWriteSecuredRequest(
|
||||
string sessionId,
|
||||
int serverHandle,
|
||||
int itemHandle,
|
||||
int currentUserId,
|
||||
int verifierUserId)
|
||||
{
|
||||
return new MxCommandRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = "live-write-secured",
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.WriteSecured,
|
||||
WriteSecured = new WriteSecuredCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
CurrentUserId = currentUserId,
|
||||
VerifierUserId = verifierUserId,
|
||||
Value = new MxValue
|
||||
{
|
||||
DataType = MxDataType.Integer,
|
||||
Int32Value = 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static (string VerifyUser, string VerifyPassword) ResolveLiveMxAccessSecuredCredentials()
|
||||
{
|
||||
string verifyUser = Environment.GetEnvironmentVariable("MXGATEWAY_LIVE_MXACCESS_WRITE_SECURED_USER")
|
||||
?? "admin";
|
||||
string verifyPassword = Environment.GetEnvironmentVariable("MXGATEWAY_LIVE_MXACCESS_WRITE_SECURED_PASSWORD")
|
||||
?? "admin123";
|
||||
return (verifyUser, verifyPassword);
|
||||
}
|
||||
|
||||
private static int CountMatchingEvents(
|
||||
RecordingServerStreamWriter<MxEvent> writer,
|
||||
Func<MxEvent, bool> predicate)
|
||||
{
|
||||
int count = 0;
|
||||
foreach (MxEvent message in writer.Messages)
|
||||
{
|
||||
if (predicate(message))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
private async Task CloseSessionAsync(
|
||||
GatewayServiceFixture fixture,
|
||||
string sessionId)
|
||||
@@ -472,6 +1028,17 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
/// </summary>
|
||||
public MxAccessGatewayService Service { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Looks up a session by id directly against the in-process registry. The abnormal
|
||||
/// worker-exit test needs to observe the session's State / FinalFault as the gateway
|
||||
/// transitions it to Faulted, which the public gRPC API only exposes indirectly via
|
||||
/// CloseSession's reply (and not before a graceful close completes).
|
||||
/// </summary>
|
||||
public bool TryGetSession(string sessionId, out GatewaySession session)
|
||||
{
|
||||
return _registry.TryGet(sessionId, out session);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disposes the fixture resources and closes all sessions.
|
||||
/// </summary>
|
||||
@@ -516,7 +1083,7 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
/// <summary>
|
||||
/// Gathers messages written to a server stream for test inspection.
|
||||
/// </summary>
|
||||
private sealed class RecordingServerStreamWriter<T> : IServerStreamWriter<T>
|
||||
private sealed class RecordingServerStreamWriter<T> : IServerStreamWriter<T>, IDisposable
|
||||
{
|
||||
private readonly object syncRoot = new();
|
||||
private readonly List<T> messages = [];
|
||||
@@ -606,6 +1173,16 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases the wait handle backing <c>messageArrived</c>. The writer owns an
|
||||
/// <see cref="IDisposable"/> field so it must be disposable itself; the leak
|
||||
/// is otherwise bounded only by how many opt-in live tests run.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
messageArrived.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -734,6 +1311,32 @@ public sealed class WorkerLiveMxAccessSmokeTests(ITestOutputHelper output)
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Kills every recorded worker process tree so the abnormal-exit test can simulate a
|
||||
/// crashed worker without going through the graceful shutdown handshake. Failures to
|
||||
/// kill an already-dead process are tolerated.
|
||||
/// </summary>
|
||||
public void KillAllAndDetach()
|
||||
{
|
||||
foreach (TestWorkerProcess process in processes)
|
||||
{
|
||||
if (process.HasExited)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
process.Kill(entireProcessTree: true);
|
||||
output.WriteLine($"WorkerProcess killed pid={process.Id} (abnormal-exit simulation)");
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
output.WriteLine($"WorkerProcess kill skipped pid={process.Id}: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void WriteWorkerOutput(
|
||||
string streamName,
|
||||
string? line)
|
||||
|
||||
Reference in New Issue
Block a user