Three root-cause fixes to get an elevated dev-box shell past session open through to real MXAccess reads: 1. PipeAcl — drop BUILTIN\Administrators deny ACE. UAC's filtered token carries the Admins SID as deny-only, so the deny fired even from non-elevated admin-account shells. The per-connection SID check in PipeServer.VerifyCaller remains the real authorization boundary. 2. PipeServer — swap the Hello-read / VerifyCaller order. ImpersonateNamedPipeClient returns ERROR_CANNOT_IMPERSONATE until at least one frame has been read from the pipe; reading Hello first satisfies that rule. Previously the ACL deny-first path masked this race — removing the deny ACE exposed it. 3. GalaxyIpcClient — add a background reader + single pending-response slot. A RuntimeStatusChange event between OpenSessionRequest and OpenSessionResponse used to satisfy the caller's single ReadFrameAsync and fail CallAsync with "Expected OpenSessionResponse, got RuntimeStatusChange". The reader now routes response kinds (and ErrorResponse) to the pending TCS and everything else to a handler the driver registers in InitializeAsync. The Proxy was already set up to raise managed events from RaiseDataChange / RaiseAlarmEvent / OnHostConnectivityUpdate — those helpers had no caller until now. 4. RedundancyPublisherHostedService — swallow BadServerHalted while polling host.Server.CurrentInstance. StandardServer throws that code during startup rather than returning null, so the first poll attempt crashed the BackgroundService (and the host) before OnServerStarted ran. This race was latent behind the Galaxy init failure above. Updates docs that described the Admins deny ACE + mandatory non-elevated shells, and drops the admin-skip guards from every Galaxy integration + E2E fixture that had them (IpcHandshakeIntegrationTests, EndToEndIpcTests, ParityFixture, LiveStackFixture, HostSubprocessParityTests). Adds GalaxyIpcClientRoutingTests covering the router's request/response match, ErrorResponse, event-between-call, idle event, and peer-close paths. Verified live on the dev box against the p7-smoke cluster (gen 6): driver registered=1 failedInit=0, Phase 7 bridge subscribed, OPC UA server up on 4840, MXAccess read round-trip returns real data with Status=0x00000000. Task #112 — partial: Galaxy live stack is functional end-to-end. The supplied test-galaxy.ps1 script still fails because the UNS walker encodes TagConfig JSON as the tag's NodeId instead of the seeded TagId (pre-existing; separate issue from this commit). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
4.9 KiB
C#
120 lines
4.9 KiB
C#
using Xunit;
|
|
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.TestSupport;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Tests.LiveStack;
|
|
|
|
/// <summary>
|
|
/// Connects a single <see cref="GalaxyProxyDriver"/> to the already-running
|
|
/// <c>OtOpcUaGalaxyHost</c> Windows service for the lifetime of a test class. Uses
|
|
/// <see cref="AvevaPrerequisites"/> to decide whether to proceed; on failure,
|
|
/// <see cref="SkipReason"/> is populated and each test calls <see cref="SkipIfUnavailable"/>
|
|
/// to translate that into <c>Assert.Skip</c>.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// <b>Does NOT spawn the Host process.</b> Production deploys <c>OtOpcUaGalaxyHost</c>
|
|
/// as a standalone Windows service — spawning a second instance from a test would
|
|
/// bypass the COM-apartment + service-account setup and fail differently than
|
|
/// production (see <c>project_galaxy_host_service.md</c> memory).
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Shared-secret handling</b>: read from <see cref="LiveStackConfig"/> — env vars
|
|
/// first, then the service's registry-stored <c>Environment</c> values. Requires
|
|
/// the test process to have read access to
|
|
/// <c>HKLM\SYSTEM\CurrentControlSet\Services\OtOpcUaGalaxyHost</c>; on a dev box
|
|
/// that typically means running the test host elevated, or exporting
|
|
/// <c>OTOPCUA_GALAXY_SECRET</c> out-of-band.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class LiveStackFixture : IAsyncLifetime
|
|
{
|
|
public GalaxyProxyDriver? Driver { get; private set; }
|
|
|
|
public string? SkipReason { get; private set; }
|
|
|
|
public PrerequisiteReport? PrerequisiteReport { get; private set; }
|
|
|
|
public LiveStackConfig? Config { get; private set; }
|
|
|
|
public async ValueTask InitializeAsync()
|
|
{
|
|
// 1. AVEVA + OtOpcUa service state — actionable diagnostic if anything is missing.
|
|
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10));
|
|
PrerequisiteReport = await AvevaPrerequisites.CheckAllAsync(
|
|
new AvevaPrerequisites.Options { CheckGalaxyHostPipe = true, CheckHistorian = false },
|
|
cts.Token);
|
|
|
|
if (!PrerequisiteReport.IsLivetestReady)
|
|
{
|
|
SkipReason = PrerequisiteReport.SkipReason;
|
|
return;
|
|
}
|
|
|
|
// 2. Secret / pipe-name resolution. If the service is running but we can't discover its
|
|
// env vars from registry (non-elevated test host), a clear message beats a silent
|
|
// connect-rejected failure 10 seconds later.
|
|
Config = LiveStackConfig.Resolve();
|
|
if (Config is null)
|
|
{
|
|
SkipReason =
|
|
$"Cannot resolve shared secret. Set {LiveStackConfig.EnvSharedSecret} (and optionally " +
|
|
$"{LiveStackConfig.EnvPipeName}) in the environment, or run the test host elevated so it " +
|
|
$"can read HKLM\\{LiveStackConfig.ServiceRegistryKey}\\Environment.";
|
|
return;
|
|
}
|
|
|
|
// 3. Connect. InitializeAsync does the pipe connect + handshake; a 5-second
|
|
// ConnectTimeout gives enough headroom for a service that just started.
|
|
Driver = new GalaxyProxyDriver(new GalaxyProxyOptions
|
|
{
|
|
DriverInstanceId = "live-stack-smoke",
|
|
PipeName = Config.PipeName,
|
|
SharedSecret = Config.SharedSecret,
|
|
ConnectTimeout = TimeSpan.FromSeconds(5),
|
|
});
|
|
|
|
try
|
|
{
|
|
await Driver.InitializeAsync(driverConfigJson: "{}", CancellationToken.None);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
SkipReason =
|
|
$"Connected to named pipe '{Config.PipeName}' but GalaxyProxyDriver.InitializeAsync failed: " +
|
|
$"{ex.GetType().Name}: {ex.Message}. Common causes: shared secret mismatch (rotated after last install), " +
|
|
$"service account SID not in pipe ACL (installer sets OTOPCUA_ALLOWED_SID to the service account — " +
|
|
$"test must run as that user), or Host's backend couldn't connect to ZB.";
|
|
Driver.Dispose();
|
|
Driver = null;
|
|
return;
|
|
}
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (Driver is not null)
|
|
{
|
|
try { await Driver.ShutdownAsync(CancellationToken.None); } catch { /* best-effort */ }
|
|
Driver.Dispose();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Translate <see cref="SkipReason"/> into <c>Assert.Skip</c>. Tests call this at the
|
|
/// top of every fact so a fixture init failure shows up as a cleanly-skipped test with
|
|
/// the full prerequisites report, not a cascading NullReferenceException on
|
|
/// <see cref="Driver"/>.
|
|
/// </summary>
|
|
public void SkipIfUnavailable()
|
|
{
|
|
if (SkipReason is not null) Assert.Skip(SkipReason);
|
|
}
|
|
|
|
}
|
|
|
|
[CollectionDefinition(Name)]
|
|
public sealed class LiveStackCollection : ICollectionFixture<LiveStackFixture>
|
|
{
|
|
public const string Name = "LiveStack";
|
|
}
|