Galaxy IPC unblock — live dev-box E2E path
Three root-cause fixes to get an elevated dev-box shell past session open through to real MXAccess reads: 1. PipeAcl — drop BUILTIN\Administrators deny ACE. UAC's filtered token carries the Admins SID as deny-only, so the deny fired even from non-elevated admin-account shells. The per-connection SID check in PipeServer.VerifyCaller remains the real authorization boundary. 2. PipeServer — swap the Hello-read / VerifyCaller order. ImpersonateNamedPipeClient returns ERROR_CANNOT_IMPERSONATE until at least one frame has been read from the pipe; reading Hello first satisfies that rule. Previously the ACL deny-first path masked this race — removing the deny ACE exposed it. 3. GalaxyIpcClient — add a background reader + single pending-response slot. A RuntimeStatusChange event between OpenSessionRequest and OpenSessionResponse used to satisfy the caller's single ReadFrameAsync and fail CallAsync with "Expected OpenSessionResponse, got RuntimeStatusChange". The reader now routes response kinds (and ErrorResponse) to the pending TCS and everything else to a handler the driver registers in InitializeAsync. The Proxy was already set up to raise managed events from RaiseDataChange / RaiseAlarmEvent / OnHostConnectivityUpdate — those helpers had no caller until now. 4. RedundancyPublisherHostedService — swallow BadServerHalted while polling host.Server.CurrentInstance. StandardServer throws that code during startup rather than returning null, so the first poll attempt crashed the BackgroundService (and the host) before OnServerStarted ran. This race was latent behind the Galaxy init failure above. Updates docs that described the Admins deny ACE + mandatory non-elevated shells, and drops the admin-skip guards from every Galaxy integration + E2E fixture that had them (IpcHandshakeIntegrationTests, EndToEndIpcTests, ParityFixture, LiveStackFixture, HostSubprocessParityTests). Adds GalaxyIpcClientRoutingTests covering the router's request/response match, ErrorResponse, event-between-call, idle event, and peer-close paths. Verified live on the dev box against the p7-smoke cluster (gen 6): driver registered=1 failedInit=0, Phase 7 bridge subscribed, OPC UA server up on 4840, MXAccess read round-trip returns real data with Status=0x00000000. Task #112 — partial: Galaxy live stack is functional end-to-end. The supplied test-galaxy.ps1 script still fails because the UNS walker encodes TagConfig JSON as the tag's NodeId instead of the seeded TagId (pre-existing; separate issue from this commit). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,209 @@
|
||||
using System.IO.Pipes;
|
||||
using MessagePack;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Ipc;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Shared;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Shared.Contracts;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Exercises the single-pending-slot router in <see cref="GalaxyIpcClient"/>: request/response
|
||||
/// matching, <see cref="MessageKind.ErrorResponse"/> handling, and routing of unsolicited push
|
||||
/// frames (e.g. <see cref="MessageKind.RuntimeStatusChange"/>) arriving between a request and
|
||||
/// its response. Without the router, a push event interleaved with a call would be consumed
|
||||
/// as the response and the next <see cref="GalaxyIpcClient.CallAsync{TReq, TResp}"/> would
|
||||
/// fail with an "Expected X, got Y" mismatch — the bug that blocked task #112's live Galaxy
|
||||
/// E2E on the dev box.
|
||||
/// </summary>
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class GalaxyIpcClientRoutingTests
|
||||
{
|
||||
private const string Secret = "routing-suite-secret";
|
||||
|
||||
[Fact]
|
||||
public async Task Response_matching_expected_kind_completes_the_call()
|
||||
{
|
||||
var (pipe, serverStream, clientTask) = await StartPairAsync();
|
||||
|
||||
using (serverStream)
|
||||
await using (var client = await clientTask)
|
||||
{
|
||||
using var reader = new FrameReader(serverStream, leaveOpen: true);
|
||||
using var writer = new FrameWriter(serverStream, leaveOpen: true);
|
||||
|
||||
var callTask = client.CallAsync<OpenSessionRequest, OpenSessionResponse>(
|
||||
MessageKind.OpenSessionRequest,
|
||||
new OpenSessionRequest { DriverInstanceId = "t", DriverConfigJson = "{}" },
|
||||
MessageKind.OpenSessionResponse,
|
||||
CancellationToken.None);
|
||||
|
||||
var request = await reader.ReadFrameAsync(CancellationToken.None);
|
||||
request!.Value.Kind.ShouldBe(MessageKind.OpenSessionRequest);
|
||||
|
||||
await writer.WriteAsync(MessageKind.OpenSessionResponse,
|
||||
new OpenSessionResponse { Success = true, SessionId = 42 },
|
||||
CancellationToken.None);
|
||||
|
||||
var response = await callTask.WaitAsync(TimeSpan.FromSeconds(2));
|
||||
response.Success.ShouldBeTrue();
|
||||
response.SessionId.ShouldBe(42);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ErrorResponse_throws_GalaxyIpcException_regardless_of_expected_kind()
|
||||
{
|
||||
var (pipe, serverStream, clientTask) = await StartPairAsync();
|
||||
|
||||
using (serverStream)
|
||||
await using (var client = await clientTask)
|
||||
{
|
||||
using var reader = new FrameReader(serverStream, leaveOpen: true);
|
||||
using var writer = new FrameWriter(serverStream, leaveOpen: true);
|
||||
|
||||
var callTask = client.CallAsync<OpenSessionRequest, OpenSessionResponse>(
|
||||
MessageKind.OpenSessionRequest,
|
||||
new OpenSessionRequest { DriverInstanceId = "t", DriverConfigJson = "{}" },
|
||||
MessageKind.OpenSessionResponse,
|
||||
CancellationToken.None);
|
||||
|
||||
await reader.ReadFrameAsync(CancellationToken.None);
|
||||
await writer.WriteAsync(MessageKind.ErrorResponse,
|
||||
new ErrorResponse { Code = "bad-request", Message = "malformed" },
|
||||
CancellationToken.None);
|
||||
|
||||
var ex = await Should.ThrowAsync<GalaxyIpcException>(() => callTask.WaitAsync(TimeSpan.FromSeconds(2)));
|
||||
ex.Code.ShouldBe("bad-request");
|
||||
ex.Message.ShouldContain("malformed");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Unsolicited_event_between_request_and_response_routes_to_handler_not_the_call()
|
||||
{
|
||||
var (pipe, serverStream, clientTask) = await StartPairAsync();
|
||||
|
||||
using (serverStream)
|
||||
await using (var client = await clientTask)
|
||||
{
|
||||
var eventFrames = new List<(MessageKind Kind, byte[] Body)>();
|
||||
var eventReceived = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
client.SetEventHandler((k, body) =>
|
||||
{
|
||||
eventFrames.Add((k, body));
|
||||
if (k == MessageKind.RuntimeStatusChange) eventReceived.TrySetResult(true);
|
||||
return Task.CompletedTask;
|
||||
});
|
||||
|
||||
using var reader = new FrameReader(serverStream, leaveOpen: true);
|
||||
using var writer = new FrameWriter(serverStream, leaveOpen: true);
|
||||
|
||||
var callTask = client.CallAsync<OpenSessionRequest, OpenSessionResponse>(
|
||||
MessageKind.OpenSessionRequest,
|
||||
new OpenSessionRequest { DriverInstanceId = "t", DriverConfigJson = "{}" },
|
||||
MessageKind.OpenSessionResponse,
|
||||
CancellationToken.None);
|
||||
|
||||
await reader.ReadFrameAsync(CancellationToken.None);
|
||||
|
||||
// Push event lands first — the bug this test guards against is CallAsync consuming
|
||||
// this frame as the response and failing with "Expected X, got Y".
|
||||
await writer.WriteAsync(MessageKind.RuntimeStatusChange,
|
||||
new RuntimeStatusChangeNotification
|
||||
{
|
||||
Status = new HostConnectivityStatus
|
||||
{
|
||||
HostName = "host-a", RuntimeStatus = "Running", LastObservedUtcUnixMs = 1,
|
||||
},
|
||||
}, CancellationToken.None);
|
||||
|
||||
await writer.WriteAsync(MessageKind.OpenSessionResponse,
|
||||
new OpenSessionResponse { Success = true, SessionId = 7 },
|
||||
CancellationToken.None);
|
||||
|
||||
var response = await callTask.WaitAsync(TimeSpan.FromSeconds(2));
|
||||
response.SessionId.ShouldBe(7);
|
||||
|
||||
await eventReceived.Task.WaitAsync(TimeSpan.FromSeconds(2));
|
||||
var runtime = eventFrames.ShouldHaveSingleItem();
|
||||
runtime.Kind.ShouldBe(MessageKind.RuntimeStatusChange);
|
||||
var decoded = MessagePackSerializer.Deserialize<RuntimeStatusChangeNotification>(runtime.Body);
|
||||
decoded.Status.HostName.ShouldBe("host-a");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Idle_push_event_with_no_pending_call_still_reaches_handler()
|
||||
{
|
||||
var (pipe, serverStream, clientTask) = await StartPairAsync();
|
||||
|
||||
using (serverStream)
|
||||
await using (var client = await clientTask)
|
||||
{
|
||||
var received = new TaskCompletionSource<(MessageKind, byte[])>(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
client.SetEventHandler((k, body) => { received.TrySetResult((k, body)); return Task.CompletedTask; });
|
||||
|
||||
using var writer = new FrameWriter(serverStream, leaveOpen: true);
|
||||
await writer.WriteAsync(MessageKind.HostConnectivityStatus,
|
||||
new HostConnectivityStatus { HostName = "h", RuntimeStatus = "Running", LastObservedUtcUnixMs = 1 },
|
||||
CancellationToken.None);
|
||||
|
||||
var (kind, _) = await received.Task.WaitAsync(TimeSpan.FromSeconds(2));
|
||||
kind.ShouldBe(MessageKind.HostConnectivityStatus);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Peer_closing_pipe_during_pending_call_surfaces_as_EndOfStream()
|
||||
{
|
||||
var (pipe, serverStream, clientTask) = await StartPairAsync();
|
||||
|
||||
await using var client = await clientTask;
|
||||
|
||||
using var reader = new FrameReader(serverStream, leaveOpen: true);
|
||||
|
||||
var callTask = client.CallAsync<OpenSessionRequest, OpenSessionResponse>(
|
||||
MessageKind.OpenSessionRequest,
|
||||
new OpenSessionRequest { DriverInstanceId = "t", DriverConfigJson = "{}" },
|
||||
MessageKind.OpenSessionResponse,
|
||||
CancellationToken.None);
|
||||
|
||||
await reader.ReadFrameAsync(CancellationToken.None);
|
||||
serverStream.Dispose();
|
||||
|
||||
await Should.ThrowAsync<EndOfStreamException>(() => callTask.WaitAsync(TimeSpan.FromSeconds(2)));
|
||||
}
|
||||
|
||||
// ---- test harness ----------------------------------------------------
|
||||
|
||||
private static async Task<(string PipeName, NamedPipeServerStream Server, Task<GalaxyIpcClient> Client)> StartPairAsync()
|
||||
{
|
||||
var pipeName = $"GalaxyIpcRouting-{Guid.NewGuid():N}";
|
||||
var serverStream = new NamedPipeServerStream(
|
||||
pipeName, PipeDirection.InOut, maxNumberOfServerInstances: 1,
|
||||
PipeTransmissionMode.Byte, PipeOptions.Asynchronous);
|
||||
|
||||
// Drive a Hello/HelloAck handshake on a background task so the client's ConnectAsync
|
||||
// can complete. After the handshake the test owns the stream for manual framing.
|
||||
var acceptTask = Task.Run(async () =>
|
||||
{
|
||||
await serverStream.WaitForConnectionAsync();
|
||||
using var reader = new FrameReader(serverStream, leaveOpen: true);
|
||||
using var writer = new FrameWriter(serverStream, leaveOpen: true);
|
||||
|
||||
var hello = await reader.ReadFrameAsync(CancellationToken.None);
|
||||
if (hello is null || hello.Value.Kind != MessageKind.Hello)
|
||||
throw new InvalidOperationException("expected Hello first");
|
||||
|
||||
await writer.WriteAsync(MessageKind.HelloAck,
|
||||
new HelloAck { Accepted = true, HostName = "test-host" },
|
||||
CancellationToken.None);
|
||||
});
|
||||
|
||||
var clientTask = GalaxyIpcClient.ConnectAsync(pipeName, Secret, TimeSpan.FromSeconds(5), CancellationToken.None);
|
||||
await acceptTask;
|
||||
return (pipeName, serverStream, clientTask);
|
||||
}
|
||||
}
|
||||
@@ -50,13 +50,6 @@ public sealed class HostSubprocessParityTests : IDisposable
|
||||
return File.Exists(candidate) ? candidate : null;
|
||||
}
|
||||
|
||||
private static bool IsAdministrator()
|
||||
{
|
||||
if (!OperatingSystem.IsWindows()) return false;
|
||||
using var identity = WindowsIdentity.GetCurrent();
|
||||
return new WindowsPrincipal(identity).IsInRole(WindowsBuiltInRole.Administrator);
|
||||
}
|
||||
|
||||
private static async Task<bool> ZbReachableAsync()
|
||||
{
|
||||
try
|
||||
@@ -71,7 +64,7 @@ public sealed class HostSubprocessParityTests : IDisposable
|
||||
[Fact]
|
||||
public async Task Spawned_Host_in_db_mode_lets_Proxy_Discover_real_Galaxy_gobjects()
|
||||
{
|
||||
if (!OperatingSystem.IsWindows() || IsAdministrator()) return;
|
||||
if (!OperatingSystem.IsWindows()) return;
|
||||
if (!await ZbReachableAsync()) return;
|
||||
|
||||
var hostExe = FindHostExe();
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Versioning;
|
||||
using System.Security.Principal;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.TestSupport;
|
||||
|
||||
@@ -43,25 +38,6 @@ public sealed class LiveStackFixture : IAsyncLifetime
|
||||
|
||||
public async ValueTask InitializeAsync()
|
||||
{
|
||||
// 0. Elevated-shell short-circuit. The OtOpcUaGalaxyHost pipe ACL allows the configured
|
||||
// SID but explicitly DENIES Administrators (decision #76 — production hardening).
|
||||
// A test process running with a high-integrity token (any elevated shell) carries the
|
||||
// Admins group in its security context, so the deny rule trumps the user's allow and
|
||||
// the pipe connect returns UnauthorizedAccessException — technically correct but
|
||||
// the operationally confusing failure mode that ate most of the PR 37 install
|
||||
// debugging session. Surfacing it explicitly here saves the next operator the same
|
||||
// five-step diagnosis. ParityFixture has the same skip with the same rationale.
|
||||
if (IsElevatedAdministratorOnWindows())
|
||||
{
|
||||
SkipReason =
|
||||
"Test host is running with elevated (Administrators) privileges, but the " +
|
||||
"OtOpcUaGalaxyHost named-pipe ACL explicitly denies Administrators per the IPC " +
|
||||
"security design (decision #76 / PipeAcl.cs). Re-run from a NORMAL (non-admin) " +
|
||||
"PowerShell window — even when your user is already in the pipe's allow list, " +
|
||||
"the elevated token's Admins group membership trumps the allow rule.";
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. AVEVA + OtOpcUa service state — actionable diagnostic if anything is missing.
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10));
|
||||
PrerequisiteReport = await AvevaPrerequisites.CheckAllAsync(
|
||||
@@ -134,27 +110,6 @@ public sealed class LiveStackFixture : IAsyncLifetime
|
||||
if (SkipReason is not null) Assert.Skip(SkipReason);
|
||||
}
|
||||
|
||||
private static bool IsElevatedAdministratorOnWindows()
|
||||
{
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) return false;
|
||||
return CheckWindowsAdminToken();
|
||||
}
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static bool CheckWindowsAdminToken()
|
||||
{
|
||||
try
|
||||
{
|
||||
using var identity = WindowsIdentity.GetCurrent();
|
||||
return new WindowsPrincipal(identity).IsInRole(WindowsBuiltInRole.Administrator);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Probe shouldn't crash the test; if we can't determine elevation, optimistically
|
||||
// continue and let the actual pipe connect surface its own error.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[CollectionDefinition(Name)]
|
||||
|
||||
Reference in New Issue
Block a user