Phase 3 PR 37 — End-to-end live-stack Galaxy smoke test #36

Merged
dohertj2 merged 1 commits from phase-3-pr37-live-stack-smoke into v2 2026-04-18 16:56:51 -04:00
5 changed files with 371 additions and 10 deletions

View File

@@ -77,18 +77,36 @@ drive a full OPC UA session with username/password, then read an
`IHostConnectivityProbe`-style "whoami" node to verify the role surfaced).
That needs a test-only address-space node and is a separate PR.
## 5. Full Galaxy live-service smoke test against the merged v2 stack
## 5. Full Galaxy live-service smoke test against the merged v2 stack — **IN PROGRESS (PRs 36 + 37)**
**Status**: Individual pieces have live smoke tests (PR 5 MXAccess, PR 13
probe manager, PR 14 alarm tracker), but the full loop — OPC UA client →
`OtOpcUaServer``GalaxyProxyDriver` (in-process) → named-pipe to
Galaxy.Host subprocess → live MXAccess runtime → real Galaxy objects — has
no single end-to-end smoke test.
PR 36 shipped the prerequisites helper (`AvevaPrerequisites`) that probes
every dependency a live smoke test needs and produces actionable skip
messages.
**To do**:
- Test that spawns the full topology, discovers a deployed Galaxy object,
subscribes to one of its attributes, writes a value back, and asserts the
write round-tripped through MXAccess. Skip when ArchestrA isn't running.
PR 37 shipped the live-stack smoke test project structure:
`tests/Driver.Galaxy.Proxy.Tests/LiveStack/` with `LiveStackFixture` (connects
to the *already-running* `OtOpcUaGalaxyHost` Windows service via named pipe;
never spawns the Host process) and `LiveStackSmokeTests` covering:
- Fixture initializes successfully (IPC handshake succeeds end-to-end).
- Driver reports `DriverState.Healthy` post-handshake.
- `DiscoverAsync` returns at least one variable from the live Galaxy.
- `GetHostStatuses` reports at least one Platform/AppEngine host.
- `ReadAsync` on a discovered variable round-trips through
Proxy → Host pipe → MXAccess → back without a BadInternalError.
Shared secret + pipe name resolve from `OTOPCUA_GALAXY_SECRET` /
`OTOPCUA_GALAXY_PIPE` env vars, falling back to reading the service's
registry-stored Environment values (requires elevated test host).
**Remaining**:
- Install + run the `OtOpcUaGalaxyHost` + `OtOpcUa` services on the dev box
(`scripts/install/Install-Services.ps1`) so the skip-on-unready tests
actually execute and the smoke PR lands green.
- Subscribe-and-receive-data-change fact (needs a known tag that actually
ticks; deferred until operators confirm a scratch tag exists).
- Write-and-roundtrip fact (needs a test-only UDA or agreed scratch tag
so we can't accidentally mutate a process-critical value).
## 6. Second driver instance on the same server — **DONE (PR 32)**

View File

@@ -0,0 +1,75 @@
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
using Microsoft.Win32;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Tests.LiveStack;
/// <summary>
/// Resolves the pipe name + shared secret the live <see cref="GalaxyProxyDriver"/> needs
/// to connect to a running <c>OtOpcUaGalaxyHost</c> Windows service. Two sources are
/// consulted, first match wins:
/// <list type="number">
/// <item>Explicit env vars (<c>OTOPCUA_GALAXY_PIPE</c>, <c>OTOPCUA_GALAXY_SECRET</c>) — lets CI / benchwork override.</item>
/// <item>The service's per-process <c>Environment</c> registry values under
/// <c>HKLM\SYSTEM\CurrentControlSet\Services\OtOpcUaGalaxyHost</c> — what
/// <c>Install-Services.ps1</c> writes at install time. Requires the test to run as a
/// principal with read access to that registry key (typically Administrators).</item>
/// </list>
/// </summary>
/// <remarks>
/// Explicitly NOT baked-in-to-source: the shared secret is rotated per install (the
/// installer generates 32 random bytes and stores the base64 string). A hard-coded secret
/// in tests would diverge from production the moment someone re-installed the service.
/// </remarks>
public sealed record LiveStackConfig(string PipeName, string SharedSecret, string? Source)
{
public const string EnvPipeName = "OTOPCUA_GALAXY_PIPE";
public const string EnvSharedSecret = "OTOPCUA_GALAXY_SECRET";
public const string ServiceRegistryKey =
@"SYSTEM\CurrentControlSet\Services\OtOpcUaGalaxyHost";
public const string DefaultPipeName = "OtOpcUaGalaxy";
public static LiveStackConfig? Resolve()
{
var envPipe = Environment.GetEnvironmentVariable(EnvPipeName);
var envSecret = Environment.GetEnvironmentVariable(EnvSharedSecret);
if (!string.IsNullOrWhiteSpace(envPipe) && !string.IsNullOrWhiteSpace(envSecret))
return new LiveStackConfig(envPipe, envSecret, "env vars");
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
return null;
return FromServiceRegistry();
}
[SupportedOSPlatform("windows")]
private static LiveStackConfig? FromServiceRegistry()
{
try
{
using var key = Registry.LocalMachine.OpenSubKey(ServiceRegistryKey);
if (key is null) return null;
var env = key.GetValue("Environment") as string[];
if (env is null || env.Length == 0) return null;
string? pipe = null, secret = null;
foreach (var line in env)
{
var eq = line.IndexOf('=');
if (eq <= 0) continue;
var name = line[..eq];
var value = line[(eq + 1)..];
if (name.Equals(EnvPipeName, StringComparison.OrdinalIgnoreCase)) pipe = value;
else if (name.Equals(EnvSharedSecret, StringComparison.OrdinalIgnoreCase)) secret = value;
}
if (string.IsNullOrWhiteSpace(secret)) return null;
return new LiveStackConfig(pipe ?? DefaultPipeName, secret, "service registry");
}
catch
{
// Access denied / key missing / malformed — caller gets null and surfaces a Skip.
return null;
}
}
}

View File

@@ -0,0 +1,120 @@
using System.Threading;
using System.Threading.Tasks;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.TestSupport;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Tests.LiveStack;
/// <summary>
/// Connects a single <see cref="GalaxyProxyDriver"/> to the already-running
/// <c>OtOpcUaGalaxyHost</c> Windows service for the lifetime of a test class. Uses
/// <see cref="AvevaPrerequisites"/> to decide whether to proceed; on failure,
/// <see cref="SkipReason"/> is populated and each test calls <see cref="SkipIfUnavailable"/>
/// to translate that into <c>Assert.Skip</c>.
/// </summary>
/// <remarks>
/// <para>
/// <b>Does NOT spawn the Host process.</b> Production deploys <c>OtOpcUaGalaxyHost</c>
/// as a standalone Windows service — spawning a second instance from a test would
/// bypass the COM-apartment + service-account setup and fail differently than
/// production (see <c>project_galaxy_host_service.md</c> memory).
/// </para>
/// <para>
/// <b>Shared-secret handling</b>: read from <see cref="LiveStackConfig"/> — env vars
/// first, then the service's registry-stored <c>Environment</c> values. Requires
/// the test process to have read access to
/// <c>HKLM\SYSTEM\CurrentControlSet\Services\OtOpcUaGalaxyHost</c>; on a dev box
/// that typically means running the test host elevated, or exporting
/// <c>OTOPCUA_GALAXY_SECRET</c> out-of-band.
/// </para>
/// </remarks>
public sealed class LiveStackFixture : IAsyncLifetime
{
public GalaxyProxyDriver? Driver { get; private set; }
public string? SkipReason { get; private set; }
public PrerequisiteReport? PrerequisiteReport { get; private set; }
public LiveStackConfig? Config { get; private set; }
public async ValueTask InitializeAsync()
{
// 1. AVEVA + OtOpcUa service state — actionable diagnostic if anything is missing.
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(10));
PrerequisiteReport = await AvevaPrerequisites.CheckAllAsync(
new AvevaPrerequisites.Options { CheckGalaxyHostPipe = true, CheckHistorian = false },
cts.Token);
if (!PrerequisiteReport.IsLivetestReady)
{
SkipReason = PrerequisiteReport.SkipReason;
return;
}
// 2. Secret / pipe-name resolution. If the service is running but we can't discover its
// env vars from registry (non-elevated test host), a clear message beats a silent
// connect-rejected failure 10 seconds later.
Config = LiveStackConfig.Resolve();
if (Config is null)
{
SkipReason =
$"Cannot resolve shared secret. Set {LiveStackConfig.EnvSharedSecret} (and optionally " +
$"{LiveStackConfig.EnvPipeName}) in the environment, or run the test host elevated so it " +
$"can read HKLM\\{LiveStackConfig.ServiceRegistryKey}\\Environment.";
return;
}
// 3. Connect. InitializeAsync does the pipe connect + handshake; a 5-second
// ConnectTimeout gives enough headroom for a service that just started.
Driver = new GalaxyProxyDriver(new GalaxyProxyOptions
{
DriverInstanceId = "live-stack-smoke",
PipeName = Config.PipeName,
SharedSecret = Config.SharedSecret,
ConnectTimeout = TimeSpan.FromSeconds(5),
});
try
{
await Driver.InitializeAsync(driverConfigJson: "{}", CancellationToken.None);
}
catch (Exception ex)
{
SkipReason =
$"Connected to named pipe '{Config.PipeName}' but GalaxyProxyDriver.InitializeAsync failed: " +
$"{ex.GetType().Name}: {ex.Message}. Common causes: shared secret mismatch (rotated after last install), " +
$"service account SID not in pipe ACL (installer sets OTOPCUA_ALLOWED_SID to the service account — " +
$"test must run as that user), or Host's backend couldn't connect to ZB.";
Driver.Dispose();
Driver = null;
return;
}
}
public async ValueTask DisposeAsync()
{
if (Driver is not null)
{
try { await Driver.ShutdownAsync(CancellationToken.None); } catch { /* best-effort */ }
Driver.Dispose();
}
}
/// <summary>
/// Translate <see cref="SkipReason"/> into <c>Assert.Skip</c>. Tests call this at the
/// top of every fact so a fixture init failure shows up as a cleanly-skipped test with
/// the full prerequisites report, not a cascading NullReferenceException on
/// <see cref="Driver"/>.
/// </summary>
public void SkipIfUnavailable()
{
if (SkipReason is not null) Assert.Skip(SkipReason);
}
}
[CollectionDefinition(Name)]
public sealed class LiveStackCollection : ICollectionFixture<LiveStackFixture>
{
public const string Name = "LiveStack";
}

View File

@@ -0,0 +1,147 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.Tests.LiveStack;
/// <summary>
/// End-to-end smoke against the installed <c>OtOpcUaGalaxyHost</c> Windows service.
/// Closes LMX follow-up #5 — exercises the full topology: <see cref="GalaxyProxyDriver"/>
/// in-process → named-pipe IPC → <c>OtOpcUaGalaxyHost</c> service → <c>MxAccessGalaxyBackend</c> →
/// live MXAccess runtime → real Galaxy objects + attributes.
/// </summary>
/// <remarks>
/// <para>
/// <b>Preconditions</b> (all checked by <see cref="LiveStackFixture"/>, surfaced via
/// <c>Assert.Skip</c> when missing):
/// </para>
/// <list type="bullet">
/// <item>AVEVA System Platform installed + Platform deployed.</item>
/// <item><c>aaBootstrap</c> / <c>aaGR</c> / <c>NmxSvc</c> / <c>MSSQLSERVER</c> running.</item>
/// <item>MXAccess COM server registered.</item>
/// <item>ZB database exists with at least one deployed gobject.</item>
/// <item><c>OtOpcUaGalaxyHost</c> service installed + running (named pipe accepting connections).</item>
/// <item>Shared secret discoverable via <c>OTOPCUA_GALAXY_SECRET</c> env var or the
/// service's registry Environment values (test host typically needs to be elevated
/// to read the latter).</item>
/// <item>Test process runs as the account listed in the service's pipe ACL
/// (<c>OTOPCUA_ALLOWED_SID</c>, typically the service account per decision #76).</item>
/// </list>
/// <para>
/// Tests here are deliberately read-only. Writes against live Galaxy attributes are a
/// separate concern — they need a test-only UDA or an agreed scratch tag so they can't
/// accidentally mutate a process-critical value. Adding a write test is a follow-up
/// PR that reuses this fixture.
/// </para>
/// </remarks>
[Trait("Category", "LiveGalaxy")]
[Collection(LiveStackCollection.Name)]
public sealed class LiveStackSmokeTests(LiveStackFixture fixture)
{
[Fact]
public void Fixture_initialized_successfully()
{
fixture.SkipIfUnavailable();
// If the fixture init succeeded, Driver is non-null and InitializeAsync completed.
// This is the cheapest possible assertion that the IPC handshake worked end-to-end;
// every other test in this class depends on it.
fixture.Driver.ShouldNotBeNull();
fixture.Config.ShouldNotBeNull();
fixture.PrerequisiteReport.ShouldNotBeNull();
fixture.PrerequisiteReport!.IsLivetestReady.ShouldBeTrue(fixture.PrerequisiteReport.SkipReason);
}
[Fact]
public void Driver_reports_Healthy_after_IPC_handshake()
{
fixture.SkipIfUnavailable();
var health = fixture.Driver!.GetHealth();
health.State.ShouldBe(DriverState.Healthy,
$"Expected Healthy after successful IPC connect; Reason={health.LastError}");
}
[Fact]
public async Task DiscoverAsync_returns_at_least_one_variable_from_live_galaxy()
{
fixture.SkipIfUnavailable();
var builder = new CapturingAddressSpaceBuilder();
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
await fixture.Driver!.DiscoverAsync(builder, cts.Token);
builder.Variables.Count.ShouldBeGreaterThan(0,
"Live Galaxy has > 0 deployed objects per the prereq check — at least one variable must be discovered. " +
"Zero usually means the Host couldn't read ZB (check OTOPCUA_GALAXY_ZB_CONN in the service Environment).");
// Every discovered attribute must carry a non-empty FullName so the OPC UA server can
// route reads/writes back. Regression guard — PR 19 normalized this across drivers.
builder.Variables.ShouldAllBe(v => !string.IsNullOrEmpty(v.AttributeInfo.FullName));
}
[Fact]
public void GetHostStatuses_reports_at_least_one_platform()
{
fixture.SkipIfUnavailable();
var statuses = fixture.Driver!.GetHostStatuses();
statuses.Count.ShouldBeGreaterThan(0,
"Live Galaxy must report at least one Platform/AppEngine host via IHostConnectivityProbe. " +
"Zero means the Host's probe loop hasn't completed its first tick or the Platform isn't deployed locally.");
// Host names are driver-opaque to the Core but non-empty by contract.
statuses.ShouldAllBe(h => !string.IsNullOrEmpty(h.HostName));
}
[Fact]
public async Task Can_read_a_discovered_variable_from_live_galaxy()
{
fixture.SkipIfUnavailable();
var builder = new CapturingAddressSpaceBuilder();
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
await fixture.Driver!.DiscoverAsync(builder, cts.Token);
builder.Variables.Count.ShouldBeGreaterThan(0);
// Pick the first discovered variable. Read-only smoke — we don't assert on Value,
// only that a ReadAsync round-trip through Proxy → Host pipe → MXAccess → back
// returns a snapshot with a non-BadInternalError status. Galaxy attributes default to
// Uncertain quality until the Engine's first scan publishes them, which is fine here.
var full = builder.Variables[0].AttributeInfo.FullName;
var snapshots = await fixture.Driver!.ReadAsync([full], cts.Token);
snapshots.Count.ShouldBe(1);
var snap = snapshots[0];
snap.StatusCode.ShouldNotBe(0x80020000u,
$"Read returned BadInternalError for {full} — the Host couldn't fulfil the request. " +
$"Investigate: the Host service's logs at {System.Environment.GetFolderPath(System.Environment.SpecialFolder.CommonApplicationData)}\\OtOpcUa\\Galaxy\\logs.");
}
/// <summary>
/// Minimal <see cref="IAddressSpaceBuilder"/> implementation that captures every
/// Variable() call into a flat list so tests can inspect what discovery produced
/// without running the full OPC UA node-manager stack.
/// </summary>
private sealed class CapturingAddressSpaceBuilder : IAddressSpaceBuilder
{
public List<(string BrowseName, DriverAttributeInfo AttributeInfo)> Variables { get; } = [];
public IAddressSpaceBuilder Folder(string browseName, string displayName) => this;
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
{
Variables.Add((browseName, attributeInfo));
return new NoopHandle(attributeInfo.FullName);
}
public void AddProperty(string browseName, DriverDataType dataType, object? value) { }
private sealed class NoopHandle(string fullReference) : IVariableHandle
{
public string FullReference { get; } = fullReference;
public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info) => new NoopSink();
private sealed class NoopSink : IAlarmConditionSink
{
public void OnTransition(AlarmEventArgs args) { }
}
}
}
}

View File

@@ -22,6 +22,7 @@
<ItemGroup>
<ProjectReference Include="..\..\src\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj"/>
<ProjectReference Include="..\..\src\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.csproj"/>
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.TestSupport\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.TestSupport.csproj"/>
</ItemGroup>
<ItemGroup>