chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)

Group all 69 projects into category subfolders under src/ and tests/ so the
Rider Solution Explorer mirrors the module structure. Folders: Core, Server,
Drivers (with a nested Driver CLIs subfolder), Client, Tooling.

- Move every project folder on disk with git mv (history preserved as renames).
- Recompute relative paths in 57 .csproj files: cross-category ProjectReferences,
  the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external
  mxaccessgw refs in Driver.Galaxy and its test project.
- Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders.
- Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL,
  integration, install).

Build green (0 errors); unit tests pass. Docs left for a separate pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-17 01:55:28 -04:00
parent 69f02fed7f
commit a25593a9c6
1044 changed files with 365 additions and 343 deletions

View File

@@ -0,0 +1,960 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using MxGateway.Client;
using MxGateway.Contracts.Proto;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Health;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy;
/// <summary>
/// In-process .NET 10 Galaxy driver — the v2 replacement for the Galaxy.Host /
/// Galaxy.Proxy pair. PR 4.0 ships the project skeleton with <see cref="IDriver"/>
/// bodies that wire to a future <c>IGalaxyGatewayClient</c> abstraction. Capability
/// interfaces (browse, read, write, subscribe, history routing, host probes) land in
/// PRs 4.14.7; the wiring sequence keeps every intermediate state buildable so the
/// <c>Galaxy:Backend</c> flag (PR 4.W) can flip between legacy-host and mxgateway
/// for parity testing.
/// </summary>
/// <remarks>
/// This driver is registered as a Tier A in-process driver alongside Modbus / S7 / etc.
/// The legacy <c>GalaxyProxyDriver</c> (Driver.Galaxy.Proxy) coexists until PR 7.2;
/// <see cref="GalaxyDriverFactoryExtensions"/> registers under driver-type name
/// "GalaxyMxGateway" so both paths can be live simultaneously during parity testing.
/// </remarks>
public sealed class GalaxyDriver
: IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IAlarmSource, IDisposable
{
private readonly string _driverInstanceId;
private readonly GalaxyDriverOptions _options;
private readonly ILogger<GalaxyDriver> _logger;
// PR 4.1 — IGalaxyHierarchySource is the test seam for browse. When null, the driver
// lazily builds a GatewayGalaxyHierarchySource around a GalaxyRepositoryClient on
// first DiscoverAsync. Tests inject a fake source via the internal ctor to exercise
// GalaxyDiscoverer's translation logic without a real gRPC channel.
private IGalaxyHierarchySource? _hierarchySource;
private GalaxyRepositoryClient? _ownedRepositoryClient;
// PR 4.2 — IGalaxyDataReader is the test seam for IReadable. PR 4.4 supplies the
// production implementation that wraps GalaxyMxSession's SubscribeBulk + StreamEvents
// pump; until then ReadAsync throws NotSupportedException when the reader is null
// (legacy-host backend handles reads in production via DriverNodeManager's
// capability-routing).
private IGalaxyDataReader? _dataReader;
// PR 4.3 — IGalaxyDataWriter is the test seam for IWritable. Production wraps
// GalaxyMxSession via GatewayGalaxyDataWriter (Write / WriteSecured routing). The
// per-tag SecurityClassification map is populated during ITagDiscovery and consumed
// here at write time.
private IGalaxyDataWriter? _dataWriter;
private readonly System.Collections.Concurrent.ConcurrentDictionary<string, SecurityClassification>
_securityByFullRef = new(StringComparer.OrdinalIgnoreCase);
// PR 4.4 — subscription lifecycle. The pump consumes the gw event stream and fans
// out OnDataChange events to every registered driver subscription via the registry's
// reverse map. The subscriber is the test seam — production uses
// GatewayGalaxySubscriber over a connected GalaxyMxSession.
private IGalaxySubscriber? _subscriber;
private readonly SubscriptionRegistry _subscriptions = new();
private EventPump? _eventPump;
private readonly Lock _pumpLock = new();
// PR B.2 — IAlarmSource implementation. Production-side acks route through
// GatewayGalaxyAlarmAcknowledger which calls MxGatewayClient.AcknowledgeAlarmAsync
// (PR E.2 SDK). Tests inject IGalaxyAlarmAcknowledger via the internal ctor to
// exercise the wiring without a running gateway. The alarm event stream is
// delivered by EventPump.OnAlarmTransition (PR B.1) — this driver is the
// consumer that bridges it onto IAlarmSource.OnAlarmEvent.
private IGalaxyAlarmAcknowledger? _alarmAcknowledger;
private readonly Lock _alarmHandlersLock = new();
private readonly HashSet<GalaxyAlarmSubscriptionHandle> _alarmSubscriptions = new();
// PR 4.W — production runtime owned by InitializeAsync. The driver builds these
// when it opens a real gw session; tests bypass them by injecting seams via the
// internal ctor.
private GalaxyMxSession? _ownedMxSession;
private MxGatewayClient? _ownedMxClient;
// PR 4.5 — reconnect supervisor. Reflects in DriverState.Degraded while not Healthy.
private ReconnectSupervisor? _supervisor;
// PR 4.6 — IRediscoverable plumbing.
private DeployWatcher? _deployWatcher;
// PR 4.7 — IHostConnectivityProbe plumbing. The aggregator owns the merged
// transport+per-platform view; the forwarder is fed from the supervisor on
// transport state transitions; the probe watcher subscribes ScanState attributes
// for every discovered platform and pushes value changes to the aggregator.
private readonly HostStatusAggregator _hostStatuses = new();
private HostConnectivityForwarder? _transportForwarder;
private PerPlatformProbeWatcher? _probeWatcher;
private DriverHealth _health = new(DriverState.Unknown, null, null);
private bool _disposed;
/// <summary>
/// Server-pushed data-change notification. Fires from the
/// <see cref="EventPump"/>'s background loop; handlers should be cheap (or queue
/// onto another thread) to avoid blocking the gw event stream.
/// </summary>
public event EventHandler<DataChangeEventArgs>? OnDataChange;
/// <summary>Fires when the gateway signals a deploy-time change (PR 4.6 DeployWatcher).</summary>
public event EventHandler<RediscoveryEventArgs>? OnRediscoveryNeeded;
/// <summary>Fires when a host transitions Running ↔ Stopped (PR 4.7 HostStatusAggregator).</summary>
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
/// <inheritdoc />
public event EventHandler<AlarmEventArgs>? OnAlarmEvent;
public GalaxyDriver(
string driverInstanceId,
GalaxyDriverOptions options,
ILogger<GalaxyDriver>? logger = null)
: this(driverInstanceId, options,
hierarchySource: null, dataReader: null, dataWriter: null, subscriber: null,
alarmAcknowledger: null, logger)
{
}
/// <summary>
/// Test-visible ctor — inject custom seams so <see cref="DiscoverAsync"/>,
/// <see cref="ReadAsync"/>, <see cref="WriteAsync"/>, and
/// <see cref="SubscribeAsync"/> can be exercised against canned data without
/// building real gRPC channels.
/// </summary>
internal GalaxyDriver(
string driverInstanceId,
GalaxyDriverOptions options,
IGalaxyHierarchySource? hierarchySource,
IGalaxyDataReader? dataReader = null,
IGalaxyDataWriter? dataWriter = null,
IGalaxySubscriber? subscriber = null,
IGalaxyAlarmAcknowledger? alarmAcknowledger = null,
ILogger<GalaxyDriver>? logger = null)
{
_driverInstanceId = !string.IsNullOrWhiteSpace(driverInstanceId)
? driverInstanceId
: throw new ArgumentException("Driver instance id required.", nameof(driverInstanceId));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? NullLogger<GalaxyDriver>.Instance;
_hierarchySource = hierarchySource;
_dataReader = dataReader;
_dataWriter = dataWriter;
_subscriber = subscriber;
_alarmAcknowledger = alarmAcknowledger;
// Forward the aggregator's transitions through IHostConnectivityProbe.
_hostStatuses.OnHostStatusChanged += (_, args) => OnHostStatusChanged?.Invoke(this, args);
}
/// <inheritdoc />
public string DriverInstanceId => _driverInstanceId;
/// <inheritdoc />
public string DriverType => GalaxyDriverFactoryExtensions.DriverTypeName;
/// <summary>Test-visible options snapshot.</summary>
internal GalaxyDriverOptions Options => _options;
/// <inheritdoc />
public async Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
// Tests inject seams via the internal ctor; production InitializeAsync builds
// the gateway client + session + per-capability runtime components from
// GalaxyDriverOptions. When seams are pre-injected we leave them alone (the
// test exercises the wired surface without a real gw round-trip).
if (_subscriber is null && _dataWriter is null && _hierarchySource is null)
{
await BuildProductionRuntimeAsync(cancellationToken).ConfigureAwait(false);
}
else
{
_logger.LogDebug(
"GalaxyDriver {InstanceId} initializing with pre-injected seams — production runtime build skipped",
_driverInstanceId);
}
StartDeployWatcher();
_logger.LogInformation(
"GalaxyDriver {InstanceId} initialized — endpoint={Endpoint} clientName={ClientName}",
_driverInstanceId, _options.Gateway.Endpoint, _options.MxAccess.ClientName);
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
}
/// <summary>
/// Build the production gw client + session + per-capability runtime components
/// from <c>_options</c>. Sets up the reconnect supervisor's reopen / replay
/// callbacks so a transport drop replays every active subscription on the
/// restored session.
/// </summary>
private async Task BuildProductionRuntimeAsync(CancellationToken cancellationToken)
{
var clientOptions = BuildClientOptions(_options.Gateway);
_ownedMxClient = MxGatewayClient.Create(clientOptions);
_ownedMxSession = new GalaxyMxSession(_options.MxAccess, _logger);
await _ownedMxSession.ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
// PR 6.1 — wrap the gw-facing seams in tracing decorators so every Subscribe /
// Unsubscribe / Write / StreamEvents call emits a span on the
// "ZB.MOM.WW.OtOpcUa.Driver.Galaxy" ActivitySource. The host process's tracing
// listener (OTLP exporter, dotnet-trace, etc.) consumes these without the driver
// taking a dependency on the OpenTelemetry packages.
_subscriber = new TracedGalaxySubscriber(
new GatewayGalaxySubscriber(_ownedMxSession), _options.MxAccess.ClientName);
_dataWriter = new TracedGalaxyDataWriter(
new GatewayGalaxyDataWriter(_ownedMxSession, _options.MxAccess.WriteUserId, _logger),
_options.MxAccess.ClientName);
_supervisor = new ReconnectSupervisor(
reopen: ReopenAsync,
replay: ReplayAsync,
options: new ReconnectOptions(
InitialBackoffOverride: TimeSpan.FromMilliseconds(_options.Reconnect.InitialBackoffMs),
MaxBackoffOverride: TimeSpan.FromMilliseconds(_options.Reconnect.MaxBackoffMs)),
logger: _logger);
_transportForwarder = new HostConnectivityForwarder(_options.MxAccess.ClientName, _hostStatuses, _logger);
_transportForwarder.SetTransport(HostState.Running); // initial state — we just connected
_supervisor.StateChanged += OnSupervisorStateChanged;
_probeWatcher = new PerPlatformProbeWatcher(
_subscriber, _hostStatuses, _logger,
bufferedUpdateIntervalMs: _options.MxAccess.PublishingIntervalMs);
// PR B.2 — wire the alarm acknowledger to the live gateway client.
_alarmAcknowledger ??= new GatewayGalaxyAlarmAcknowledger(_ownedMxClient, _ownedMxSession, _logger);
}
/// <summary>
/// Reopen callback for <see cref="ReconnectSupervisor"/>: re-Register the gw session.
/// If the session never connected, this is a fresh ConnectAsync; otherwise it's a
/// reconnect against the existing client.
/// </summary>
private async Task ReopenAsync(CancellationToken cancellationToken)
{
if (_ownedMxSession is null) return;
var clientOptions = BuildClientOptions(_options.Gateway);
await _ownedMxSession.ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
}
/// <summary>
/// Replay callback. Walks every active subscription's bindings and re-issues
/// SubscribeBulk for the tag list. PR 6.x can swap this for the gw's batched
/// <c>ReplaySubscriptionsCommand</c> once it ships.
/// </summary>
private async Task ReplayAsync(CancellationToken cancellationToken)
{
if (_subscriber is null) return;
var bindings = _subscriptions.SnapshotAllBindings();
if (bindings.Count == 0) return;
var refs = bindings.Select(b => b.FullReference).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
await _subscriber.SubscribeBulkAsync(
refs, _options.MxAccess.PublishingIntervalMs, cancellationToken).ConfigureAwait(false);
_logger.LogInformation(
"GalaxyDriver {InstanceId} replay completed — {Count} tags re-subscribed",
_driverInstanceId, refs.Length);
}
private void OnSupervisorStateChanged(object? sender, StateTransition transition)
{
// Reflect supervisor state in DriverHealth + transport forwarder.
_health = transition.Next switch
{
ReconnectSupervisor.State.Healthy => new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null),
_ => new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, transition.Cause),
};
if (_transportForwarder is not null)
{
var hostState = transition.Next == ReconnectSupervisor.State.Healthy
? HostState.Running
: HostState.Stopped;
_transportForwarder.SetTransport(hostState);
}
}
/// <summary>
/// Resolves <c>Gateway.ApiKeySecretRef</c> to the actual API-key bytes. Three
/// forms supported, evaluated in order:
/// <list type="number">
/// <item><c>env:NAME</c> — reads <c>Environment.GetEnvironmentVariable(NAME)</c>.
/// Throws when the variable is unset, so a misconfigured deployment fails
/// fast at InitializeAsync rather than silently sending an empty key.</item>
/// <item><c>file:PATH</c> — reads UTF-8 text from <c>PATH</c>, trimming
/// whitespace. Lets operators stash the key in an ACL'd file outside the
/// repo (the same pattern as the legacy <c>.local/galaxy-host-secret.txt</c>).</item>
/// <item>Anything else — used as the literal API key. Convenient for dev,
/// and avoids breaking existing configs that pre-date this resolver.</item>
/// </list>
/// A future PR can swap any of these arms for a DPAPI-backed lookup without
/// changing the call site.
/// </summary>
internal static string ResolveApiKey(string secretRef)
{
ArgumentException.ThrowIfNullOrEmpty(secretRef);
if (secretRef.StartsWith("env:", StringComparison.OrdinalIgnoreCase))
{
var name = secretRef[4..];
var value = Environment.GetEnvironmentVariable(name);
return !string.IsNullOrEmpty(value)
? value
: throw new InvalidOperationException(
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' resolves to env var '{name}', but it is unset.");
}
if (secretRef.StartsWith("file:", StringComparison.OrdinalIgnoreCase))
{
var path = secretRef[5..];
if (!File.Exists(path))
{
throw new InvalidOperationException(
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' points at '{path}', which doesn't exist.");
}
var contents = File.ReadAllText(path).Trim();
return !string.IsNullOrEmpty(contents)
? contents
: throw new InvalidOperationException(
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' file '{path}' is empty.");
}
return secretRef;
}
private static MxGatewayClientOptions BuildClientOptions(GalaxyGatewayOptions gw) => new()
{
Endpoint = new Uri(gw.Endpoint, UriKind.Absolute),
ApiKey = ResolveApiKey(gw.ApiKeySecretRef),
UseTls = gw.UseTls,
CaCertificatePath = gw.CaCertificatePath,
ConnectTimeout = TimeSpan.FromSeconds(gw.ConnectTimeoutSeconds),
DefaultCallTimeout = TimeSpan.FromSeconds(gw.DefaultCallTimeoutSeconds),
StreamTimeout = gw.StreamTimeoutSeconds > 0 ? TimeSpan.FromSeconds(gw.StreamTimeoutSeconds) : null,
};
private void StartDeployWatcher()
{
if (!_options.Repository.WatchDeployEvents) return;
if (_ownedRepositoryClient is null && _hierarchySource is null) return;
// Reuse the lazily-built repository client (DiscoverAsync constructs it on demand).
// If discovery hasn't run yet, build the client here so the watcher has a target.
if (_ownedRepositoryClient is null)
{
_ownedRepositoryClient = MxGateway.Client.GalaxyRepositoryClient.Create(
BuildClientOptions(_options.Gateway));
}
var source = new GatewayGalaxyDeployWatchSource(_ownedRepositoryClient);
_deployWatcher = new DeployWatcher(source, _logger);
_deployWatcher.OnRediscoveryNeeded += (_, args) => OnRediscoveryNeeded?.Invoke(this, args);
_ = _deployWatcher.StartAsync(CancellationToken.None);
}
/// <inheritdoc />
public Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
// In-place config reapply. PR 4.5's reconnect supervisor will swap the
// gateway-client options under the lock; for the skeleton we just refresh health.
ObjectDisposedException.ThrowIf(_disposed, this);
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
return Task.CompletedTask;
}
/// <inheritdoc />
public Task ShutdownAsync(CancellationToken cancellationToken)
{
if (_disposed) return Task.CompletedTask;
_logger.LogInformation("GalaxyDriver {InstanceId} shutting down", _driverInstanceId);
_health = new DriverHealth(DriverState.Unknown, _health.LastSuccessfulRead, null);
return Task.CompletedTask;
}
/// <inheritdoc />
public DriverHealth GetHealth()
{
// Reconnect supervisor wins when degraded — the cached _health reflects the last
// successful operation, but ongoing recovery should surface as Degraded.
if (_supervisor?.IsDegraded == true)
{
return new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, _supervisor.LastError);
}
return _health;
}
// ===== IHostConnectivityProbe (PR 4.7 wire-up) =====
/// <inheritdoc />
public IReadOnlyList<HostConnectivityStatus> GetHostStatuses() => _hostStatuses.Snapshot();
/// <inheritdoc />
public long GetMemoryFootprint() => 0; // PR 4.4 sets this from SubscriptionRegistry size.
/// <inheritdoc />
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
// ===== ITagDiscovery (PR 4.1) =====
/// <inheritdoc />
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(builder);
// PR 4.3 — capture SecurityClassification per attribute. PR 4.W — also refresh
// the per-platform probe watcher's membership after discovery so newly-added
// $WinPlatform / $AppEngine objects start advising their ScanState attribute.
var capturingBuilder = new SecurityCapturingBuilder(builder, _securityByFullRef);
var source = _hierarchySource ??= BuildDefaultHierarchySource();
var discoverer = new GalaxyDiscoverer(source);
await discoverer.DiscoverAsync(capturingBuilder, cancellationToken).ConfigureAwait(false);
if (_probeWatcher is not null)
{
var hierarchy = await source.GetHierarchyAsync(cancellationToken).ConfigureAwait(false);
var platforms = hierarchy
.Where(o => o.TemplateChain.Any(t =>
string.Equals(t, "$WinPlatform", StringComparison.OrdinalIgnoreCase)
|| string.Equals(t, "$AppEngine", StringComparison.OrdinalIgnoreCase)))
.Select(o => o.TagName)
.Where(name => !string.IsNullOrEmpty(name));
await _probeWatcher.SyncPlatformsAsync(platforms, cancellationToken).ConfigureAwait(false);
}
}
private SecurityClassification ResolveSecurity(string fullReference) =>
_securityByFullRef.TryGetValue(fullReference, out var sec) ? sec : SecurityClassification.FreeAccess;
// ===== IReadable =====
/// <inheritdoc />
public Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(fullReferences);
if (fullReferences.Count == 0) return Task.FromResult<IReadOnlyList<DataValueSnapshot>>([]);
if (_dataReader is not null)
{
// Test-only path — tests inject a canned reader via the internal ctor.
return _dataReader.ReadAsync(fullReferences, cancellationToken);
}
if (_subscriber is null)
{
throw new NotSupportedException(
"GalaxyDriver.ReadAsync requires a connected GalaxyMxSession (production runtime not built). " +
"Either inject a test seam via the internal ctor or call InitializeAsync against a real gateway.");
}
return ReadViaSubscribeOnceAsync(fullReferences, cancellationToken);
}
/// <summary>
/// Production read path. MxAccess has no one-shot Read RPC — every value comes
/// through the event stream. We synthesise a Read by:
/// <list type="number">
/// <item>Subscribing the requested tags through the existing
/// <see cref="SubscriptionRegistry"/> + <see cref="EventPump"/>.</item>
/// <item>Waiting for the first <c>OnDataChange</c> per item handle (the gateway
/// pushes the current value as the initial event after a SubscribeBulk).</item>
/// <item>Unsubscribing.</item>
/// </list>
/// Tags the gw rejects at SubscribeBulk time, or that never publish before the
/// caller's cancellation token fires, return a Bad-status snapshot in input order
/// so the caller still sees one snapshot per requested reference.
/// </summary>
private async Task<IReadOnlyList<DataValueSnapshot>> ReadViaSubscribeOnceAsync(
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
{
var pump = EnsureEventPumpStarted();
var subscriptionId = _subscriptions.NextSubscriptionId();
// Pre-allocate one TaskCompletionSource per full-reference so the OnDataChange
// handler can complete them out-of-order as events arrive. Wired BEFORE the
// SubscribeBulk call so we don't race with the first event the gw pushes.
var pendingByRef = new Dictionary<string, TaskCompletionSource<DataValueSnapshot>>(
StringComparer.OrdinalIgnoreCase);
foreach (var fullRef in fullReferences.Distinct(StringComparer.OrdinalIgnoreCase))
{
pendingByRef[fullRef] = new TaskCompletionSource<DataValueSnapshot>(
TaskCreationOptions.RunContinuationsAsynchronously);
}
EventHandler<DataChangeEventArgs> handler = (_, args) =>
{
// Filter to OUR subscription — the pump's OnDataChange fans out across all
// subscriptions on the driver, and we don't want a parallel ISubscribable
// caller's events to leak into our read.
if (args.SubscriptionHandle is GalaxySubscriptionHandle gsh
&& gsh.SubscriptionId == subscriptionId
&& pendingByRef.TryGetValue(args.FullReference, out var tcs))
{
tcs.TrySetResult(args.Snapshot);
}
};
pump.OnDataChange += handler;
var bufferedIntervalMs = _options.MxAccess.PublishingIntervalMs;
IReadOnlyList<SubscribeResult> results;
try
{
results = await _subscriber!
.SubscribeBulkAsync(fullReferences, bufferedIntervalMs, cancellationToken)
.ConfigureAwait(false);
}
catch
{
pump.OnDataChange -= handler;
throw;
}
// Register bindings so the pump knows to dispatch events for these handles.
var bindings = new List<TagBinding>(fullReferences.Count);
for (var i = 0; i < fullReferences.Count; i++)
{
var fullRef = fullReferences[i];
var match = results.FirstOrDefault(r => string.Equals(r.TagAddress, fullRef, StringComparison.OrdinalIgnoreCase));
var itemHandle = match is { WasSuccessful: true } ? match.ItemHandle : 0;
bindings.Add(new TagBinding(fullRef, itemHandle));
// Tags the gw rejected up front — complete with Bad status now so the
// wait below doesn't time out on them.
if (itemHandle <= 0
&& pendingByRef.TryGetValue(fullRef, out var rejectedTcs))
{
rejectedTcs.TrySetResult(new DataValueSnapshot(
Value: null,
StatusCode: 0x80000000u, // Bad
SourceTimestampUtc: null,
ServerTimestampUtc: DateTime.UtcNow));
}
}
_subscriptions.Register(subscriptionId, bindings);
try
{
// Wait for every pending TCS to complete or the caller's CT to fire. When the
// CT fires before all values arrive, fill the still-pending entries with a
// Bad-status snapshot rather than throwing — Read semantics let callers see
// partial results.
using var registration = cancellationToken.Register(() =>
{
foreach (var tcs in pendingByRef.Values)
{
tcs.TrySetResult(new DataValueSnapshot(
Value: null,
StatusCode: 0x800B0000u, // BadTimeout
SourceTimestampUtc: null,
ServerTimestampUtc: DateTime.UtcNow));
}
});
var snapshots = new DataValueSnapshot[fullReferences.Count];
for (var i = 0; i < fullReferences.Count; i++)
{
snapshots[i] = await pendingByRef[fullReferences[i]].Task.ConfigureAwait(false);
}
return snapshots;
}
finally
{
pump.OnDataChange -= handler;
// Drop the bindings + unsubscribe the live handles. UnsubscribeBulkAsync's
// failure isn't fatal — the registry is already cleared, so any straggling
// event from the gw would be a no-op fan-out.
_subscriptions.Remove(subscriptionId);
var liveHandles = bindings.Where(b => b.ItemHandle > 0).Select(b => b.ItemHandle).ToArray();
if (liveHandles.Length > 0)
{
try
{
await _subscriber!.UnsubscribeBulkAsync(liveHandles, CancellationToken.None)
.ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"GalaxyDriver.ReadViaSubscribeOnceAsync UnsubscribeBulk failed for {Count} handle(s) — registry already cleared.",
liveHandles.Length);
}
}
}
}
// ===== IWritable (PR 4.3) =====
/// <inheritdoc />
public Task<IReadOnlyList<WriteResult>> WriteAsync(
IReadOnlyList<WriteRequest> writes, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(writes);
if (writes.Count == 0) return Task.FromResult<IReadOnlyList<WriteResult>>([]);
if (_dataWriter is null)
{
// Mirror the IReadable fallback: production write path runs on top of
// GalaxyMxSession (PR 4.2 skeleton; PR 4.4 wires the live session). Until
// that lands, deployments selecting Galaxy:Backend=mxgateway can't write.
throw new NotSupportedException(
"GalaxyDriver.WriteAsync requires GatewayGalaxyDataWriter wired against a connected " +
"GalaxyMxSession (PR 4.4). Until that lands, route writes through the legacy-host " +
"backend (Galaxy:Backend=legacy-host).");
}
return _dataWriter.WriteAsync(writes, ResolveSecurity, cancellationToken);
}
// ===== ISubscribable (PR 4.4) =====
/// <inheritdoc />
public async Task<ISubscriptionHandle> SubscribeAsync(
IReadOnlyList<string> fullReferences, TimeSpan publishingInterval, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(fullReferences);
if (_subscriber is null)
{
throw new NotSupportedException(
"GalaxyDriver.SubscribeAsync requires a connected GalaxyMxSession + GatewayGalaxySubscriber. " +
"PR 4.W wires the production session; until then route subscriptions through the legacy-host backend.");
}
var pump = EnsureEventPumpStarted();
var subscriptionId = _subscriptions.NextSubscriptionId();
if (fullReferences.Count == 0)
{
// Empty subscriptions register but never bind anything — keeps Unsubscribe
// symmetric for callers that conditionally add tags later.
_subscriptions.Register(subscriptionId, []);
return new GalaxySubscriptionHandle(subscriptionId);
}
// PR 6.3 — when the caller doesn't set a publishing interval (TimeSpan.Zero or
// negative), fall back to the configured MxAccess.PublishingIntervalMs. The
// server's UA subscription publishingInterval drives this in production; tests
// and infrastructure callers (probe watcher, deploy watcher) hit the fallback.
var requested = (int)Math.Max(0, publishingInterval.TotalMilliseconds);
var bufferedIntervalMs = requested > 0 ? requested : _options.MxAccess.PublishingIntervalMs;
var results = await _subscriber
.SubscribeBulkAsync(fullReferences, bufferedIntervalMs, cancellationToken)
.ConfigureAwait(false);
// Build the binding list in input order. Failed entries (gw rejected the tag) are
// recorded with a non-positive ItemHandle so the caller can detect partial failure
// by inspecting the returned handle's diagnostic context — full per-tag error
// surface lands in PR 5.3's parity tests.
var bindings = new List<TagBinding>(fullReferences.Count);
for (var i = 0; i < fullReferences.Count; i++)
{
var fullRef = fullReferences[i];
var match = results.FirstOrDefault(r => string.Equals(r.TagAddress, fullRef, StringComparison.OrdinalIgnoreCase));
var itemHandle = match is { WasSuccessful: true } ? match.ItemHandle : 0;
bindings.Add(new TagBinding(fullRef, itemHandle));
if (match is null || !match.WasSuccessful)
{
_logger.LogWarning(
"Galaxy subscribe for {FullRef} failed: {Error}",
fullRef, match?.ErrorMessage ?? "<no result returned>");
}
}
_subscriptions.Register(subscriptionId, bindings);
_ = pump; // keep the pump alive for the subscription's lifetime
return new GalaxySubscriptionHandle(subscriptionId);
}
/// <inheritdoc />
public async Task UnsubscribeAsync(ISubscriptionHandle handle, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(handle);
if (handle is not GalaxySubscriptionHandle gsh)
{
throw new ArgumentException(
$"Subscription handle was not issued by this driver (expected GalaxySubscriptionHandle, got {handle.GetType().Name}).",
nameof(handle));
}
var bindings = _subscriptions.Remove(gsh.SubscriptionId);
if (bindings is null) return; // already removed or never registered
var liveItemHandles = bindings.Where(b => b.ItemHandle > 0).Select(b => b.ItemHandle).ToArray();
if (liveItemHandles.Length == 0 || _subscriber is null) return;
try
{
await _subscriber.UnsubscribeBulkAsync(liveItemHandles, cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Galaxy UnsubscribeBulk failed for subscription {SubscriptionId} — registry already cleared on driver side.",
gsh.SubscriptionId);
}
}
/// <summary>
/// Lazily start the <see cref="EventPump"/> on the first subscribe. The pump is
/// shared across every subscription on this driver — fan-out happens through the
/// <see cref="SubscriptionRegistry"/> reverse map, not by spinning a pump per
/// subscription.
/// </summary>
private EventPump EnsureEventPumpStarted()
{
lock (_pumpLock)
{
if (_eventPump is not null) return _eventPump;
_eventPump = new EventPump(
_subscriber!, _subscriptions, _logger,
channelCapacity: _options.MxAccess.EventPumpChannelCapacity,
clientName: _options.MxAccess.ClientName);
_eventPump.OnDataChange += OnPumpDataChange;
_eventPump.OnAlarmTransition += OnPumpAlarmTransition;
_eventPump.Start();
return _eventPump;
}
}
// ===== IAlarmSource (PR B.2) =====
/// <inheritdoc />
public Task<IAlarmSubscriptionHandle> SubscribeAlarmsAsync(
IReadOnlyList<string> sourceNodeIds, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(sourceNodeIds);
// The driver doesn't multiplex alarm subscriptions per source-node-id today —
// alarm events arrive on the same gateway StreamEvents channel as data-change
// events once the gateway emits the new family (PRs A.2 + A.3). The
// subscription handle is a sentinel the server uses for symmetric Unsubscribe;
// every active handle receives every alarm transition, and the server filters
// by source node before raising Part 9 conditions. Same shape AbCip uses.
EnsureEventPumpStarted();
var handle = new GalaxyAlarmSubscriptionHandle(Guid.NewGuid().ToString("N"));
lock (_alarmHandlersLock)
{
_alarmSubscriptions.Add(handle);
}
return Task.FromResult<IAlarmSubscriptionHandle>(handle);
}
/// <inheritdoc />
public Task UnsubscribeAlarmsAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(handle);
if (handle is not GalaxyAlarmSubscriptionHandle gash)
{
throw new ArgumentException(
$"Subscription handle was not issued by this driver (expected GalaxyAlarmSubscriptionHandle, got {handle.GetType().Name}).",
nameof(handle));
}
lock (_alarmHandlersLock)
{
_alarmSubscriptions.Remove(gash);
}
return Task.CompletedTask;
}
/// <inheritdoc />
public async Task AcknowledgeAsync(
IReadOnlyList<AlarmAcknowledgeRequest> acknowledgements, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(acknowledgements);
if (acknowledgements.Count == 0) return;
if (_alarmAcknowledger is null)
{
throw new NotSupportedException(
"GalaxyDriver.AcknowledgeAsync requires GatewayGalaxyAlarmAcknowledger wired against a connected " +
"GalaxyMxSession (PR B.2). InitializeAsync must run before alarm acknowledgements can flow.");
}
// Acks are issued one-by-one — the gateway RPC accepts a single alarm
// reference per call. AlarmConditionState's per-condition Acknowledge in the
// server-side ACL layer is the natural rate-limit, so issuing in series here
// keeps the operator-comment ordering deterministic without bursting the
// worker's STA queue.
foreach (var ack in acknowledgements)
{
// ConditionId carries the alarm full reference for the Galaxy driver —
// SourceNodeId is the OPC UA browse path, which the gateway can't address.
// The server-side condition state pairs them through AlarmConditionService.
var alarmFullReference = !string.IsNullOrEmpty(ack.ConditionId)
? ack.ConditionId
: ack.SourceNodeId;
await _alarmAcknowledger.AcknowledgeAsync(
alarmFullReference,
ack.Comment ?? string.Empty,
operatorUser: string.Empty, // server-side ACL fills this from the OPC UA session
cancellationToken).ConfigureAwait(false);
}
}
/// <summary>
/// Receives <see cref="GalaxyAlarmTransition"/> events from the EventPump and
/// reshapes them into <see cref="AlarmEventArgs"/> for OPC UA-side consumers.
/// Fires <see cref="OnAlarmEvent"/> only when at least one alarm subscription is
/// active so a server that hasn't called <see cref="SubscribeAlarmsAsync"/> yet
/// doesn't surface untracked transitions.
/// </summary>
private void OnPumpAlarmTransition(object? sender, GalaxyAlarmTransition transition)
{
GalaxyAlarmSubscriptionHandle? handle;
lock (_alarmHandlersLock)
{
// Pick any active subscription handle as the "owner" of the event. The
// server-side state machine doesn't multiplex by handle today; if multiple
// alarm subscriptions are active we still only fire the event once and
// the AlarmConditionService dispatches per-source-node downstream.
handle = _alarmSubscriptions.Count > 0
? _alarmSubscriptions.First()
: null;
}
if (handle is null) return;
var args = new AlarmEventArgs(
SubscriptionHandle: handle,
SourceNodeId: transition.SourceObjectReference,
ConditionId: transition.AlarmFullReference,
AlarmType: transition.AlarmTypeName,
Message: transition.Description,
Severity: transition.SeverityBucket,
SourceTimestampUtc: transition.TransitionTimestampUtc,
OperatorComment: string.IsNullOrEmpty(transition.OperatorComment) ? null : transition.OperatorComment,
OriginalRaiseTimestampUtc: transition.OriginalRaiseTimestampUtc,
AlarmCategory: string.IsNullOrEmpty(transition.Category) ? null : transition.Category);
try
{
OnAlarmEvent?.Invoke(this, args);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"GalaxyDriver OnAlarmEvent handler threw for {AlarmRef} — continuing.",
transition.AlarmFullReference);
}
}
/// <summary>
/// Forwards every fan-out event to the public <see cref="OnDataChange"/> for
/// ISubscribable consumers, AND routes ScanState changes to the per-platform
/// probe watcher (PR 4.7) so platform health entries update without the watcher
/// consuming the event stream itself.
/// </summary>
private void OnPumpDataChange(object? sender, DataChangeEventArgs args)
{
OnDataChange?.Invoke(this, args);
if (_probeWatcher is not null
&& args.FullReference.EndsWith(PerPlatformProbeWatcher.ProbeSuffix, StringComparison.OrdinalIgnoreCase))
{
// The probe decoder takes a raw quality byte; recover it from the StatusCode
// top byte (Good=0x00 → byte 192, Uncertain=0x40 → byte 64, Bad=0x80 → byte 0).
var qualityByte = (byte)((args.Snapshot.StatusCode >> 30) & 0x3) switch
{
0 => 192,
1 => 64,
_ => 0,
};
_probeWatcher.OnProbeValueChanged(args.FullReference, args.Snapshot.Value, (byte)qualityByte);
}
}
/// <summary>
/// Lazily builds the default <see cref="IGalaxyHierarchySource"/> from
/// <c>_options.Gateway</c>. Owned <see cref="GalaxyRepositoryClient"/> is disposed in
/// <see cref="Dispose"/>. Tests bypass this by injecting their own source via the
/// internal ctor.
/// </summary>
private IGalaxyHierarchySource BuildDefaultHierarchySource()
{
var gw = _options.Gateway;
var clientOptions = new MxGatewayClientOptions
{
Endpoint = new Uri(gw.Endpoint, UriKind.Absolute),
ApiKey = ResolveApiKey(gw.ApiKeySecretRef),
UseTls = gw.UseTls,
CaCertificatePath = gw.CaCertificatePath,
ConnectTimeout = TimeSpan.FromSeconds(gw.ConnectTimeoutSeconds),
DefaultCallTimeout = TimeSpan.FromSeconds(gw.DefaultCallTimeoutSeconds),
StreamTimeout = gw.StreamTimeoutSeconds > 0
? TimeSpan.FromSeconds(gw.StreamTimeoutSeconds)
: null,
};
_ownedRepositoryClient = GalaxyRepositoryClient.Create(clientOptions);
return new TracedGalaxyHierarchySource(
new GatewayGalaxyHierarchySource(_ownedRepositoryClient), _options.MxAccess.ClientName);
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Order: stop deploy watcher, supervisor, probe watcher, pump, then sessions and
// clients. Each step is best-effort — disposal during a faulted state shouldn't
// throw and prevent the rest of the cleanup.
try { _deployWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "DeployWatcher dispose failed"); }
try { _supervisor?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ReconnectSupervisor dispose failed"); }
try { _probeWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ProbeWatcher dispose failed"); }
try { _transportForwarder?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "Transport forwarder dispose failed"); }
EventPump? pump;
lock (_pumpLock) { pump = _eventPump; _eventPump = null; }
pump?.DisposeAsync().AsTask().GetAwaiter().GetResult();
_ownedMxSession?.DisposeAsync().AsTask().GetAwaiter().GetResult();
_ownedMxSession = null;
_ownedMxClient?.DisposeAsync().AsTask().GetAwaiter().GetResult();
_ownedMxClient = null;
_ownedRepositoryClient?.DisposeAsync().AsTask().GetAwaiter().GetResult();
_ownedRepositoryClient = null;
_hierarchySource = null;
}
/// <summary>
/// Address-space builder wrapper that records each variable's
/// <see cref="DriverAttributeInfo.SecurityClass"/> into the supplied dictionary
/// before delegating to the inner builder. Used by <see cref="DiscoverAsync"/>
/// to capture per-tag classifications for the IWritable routing decision —
/// PR 4.3 needs the data, but the discoverer itself doesn't (and shouldn't)
/// know about the driver's internal state.
/// </summary>
private sealed class SecurityCapturingBuilder(
IAddressSpaceBuilder inner,
System.Collections.Concurrent.ConcurrentDictionary<string, SecurityClassification> map)
: IAddressSpaceBuilder
{
public IAddressSpaceBuilder Folder(string browseName, string displayName)
=> new SecurityCapturingBuilder(inner.Folder(browseName, displayName), map);
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
{
map[attributeInfo.FullName] = attributeInfo.SecurityClass;
return inner.Variable(browseName, displayName, attributeInfo);
}
public void AddProperty(string browseName, DriverDataType dataType, object? value)
=> inner.AddProperty(browseName, dataType, value);
}
}