chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Populates the five sub-attribute references on <see cref="AlarmConditionInfo"/>
|
||||
/// by Galaxy convention. The server-level <c>AlarmConditionService</c> (PR 2.2) uses
|
||||
/// these to subscribe to live alarm-state attributes and to route ack writes back to
|
||||
/// the alarm tag.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Galaxy alarms expose four runtime attributes plus a write-only ack target,
|
||||
/// consistently named on every alarm-bearing object:
|
||||
/// <list type="bullet">
|
||||
/// <item><c><tag>.<attr>.InAlarm</c></item>
|
||||
/// <item><c><tag>.<attr>.Priority</c></item>
|
||||
/// <item><c><tag>.<attr>.DescAttrName</c></item>
|
||||
/// <item><c><tag>.<attr>.Acked</c></item>
|
||||
/// <item><c><tag>.<attr>.AckMsg</c></item>
|
||||
/// </list>
|
||||
/// This is the same convention the legacy <c>GalaxyAlarmTracker</c> hard-coded; we
|
||||
/// concentrate it here so PR 2.2's service receives complete <c>AlarmConditionInfo</c>
|
||||
/// rows during discovery without the server needing to know the convention.
|
||||
/// </remarks>
|
||||
internal static class AlarmRefBuilder
|
||||
{
|
||||
private const string InAlarmSuffix = ".InAlarm";
|
||||
private const string PrioritySuffix = ".Priority";
|
||||
private const string DescAttrNameSuffix = ".DescAttrName";
|
||||
private const string AckedSuffix = ".Acked";
|
||||
private const string AckMsgSuffix = ".AckMsg";
|
||||
|
||||
/// <summary>
|
||||
/// Build an <see cref="AlarmConditionInfo"/> for an alarm-bearing attribute with all
|
||||
/// five sub-attribute references populated. <paramref name="fullReference"/> is the
|
||||
/// attribute's full reference (e.g. <c>"Tank1.Level.HiHi"</c>); the convention prefixes
|
||||
/// each suffix to it.
|
||||
/// </summary>
|
||||
public static AlarmConditionInfo Build(
|
||||
string fullReference,
|
||||
AlarmSeverity initialSeverity = AlarmSeverity.Medium,
|
||||
string? initialDescription = null) => new(
|
||||
SourceName: fullReference,
|
||||
InitialSeverity: initialSeverity,
|
||||
InitialDescription: initialDescription,
|
||||
InAlarmRef: fullReference + InAlarmSuffix,
|
||||
PriorityRef: fullReference + PrioritySuffix,
|
||||
DescAttrNameRef: fullReference + DescAttrNameSuffix,
|
||||
AckedRef: fullReference + AckedSuffix,
|
||||
AckMsgWriteRef: fullReference + AckMsgSuffix);
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Maps Galaxy <c>mx_data_type</c> integer codes to <see cref="DriverDataType"/>.
|
||||
/// Ported from the legacy <c>GalaxyProxyDriver.MapDataType</c> with the same fallback
|
||||
/// to <see cref="DriverDataType.String"/> for unknown codes — keeps wire compatibility
|
||||
/// with deployed configs while we tighten this through the parity matrix.
|
||||
/// </summary>
|
||||
internal static class DataTypeMap
|
||||
{
|
||||
public static DriverDataType Map(int mxDataType) => mxDataType switch
|
||||
{
|
||||
0 => DriverDataType.Boolean,
|
||||
1 => DriverDataType.Int32,
|
||||
2 => DriverDataType.Float32,
|
||||
3 => DriverDataType.Float64,
|
||||
4 => DriverDataType.String,
|
||||
5 => DriverDataType.DateTime,
|
||||
_ => DriverDataType.String,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Long-lived consumer of <see cref="IGalaxyDeployWatchSource"/>. Translates
|
||||
/// gateway <see cref="DeployEvent"/> stream into
|
||||
/// <see cref="IRediscoverable.OnRediscoveryNeeded"/>-shaped events whenever the
|
||||
/// observed <c>time_of_last_deploy</c> actually changes.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The first event the gateway emits on subscribe is the bootstrap snapshot
|
||||
/// carrying the current cached deploy time — even when the caller passed a
|
||||
/// <c>lastSeenDeployTime</c>, a different gateway instance / cache invalidation
|
||||
/// may still re-deliver it. The watcher therefore suppresses the first event
|
||||
/// it observes locally, recording its (presence, time) pair as the baseline,
|
||||
/// and only raises rediscover for subsequent events whose pair differs.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// When <see cref="IGalaxyDeployWatchSource.WatchAsync"/> throws (transport
|
||||
/// drop, gateway restart) the loop logs a warning, waits with capped
|
||||
/// exponential backoff, then re-subscribes using the last-observed deploy time
|
||||
/// so a reconnect doesn't fan out a redundant rediscover for state we already
|
||||
/// knew about.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class DeployWatcher : IDisposable
|
||||
{
|
||||
private static readonly TimeSpan DefaultInitialBackoff = TimeSpan.FromSeconds(1);
|
||||
private static readonly TimeSpan DefaultMaxBackoff = TimeSpan.FromSeconds(30);
|
||||
|
||||
private readonly IGalaxyDeployWatchSource _source;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TimeSpan _initialBackoff;
|
||||
private readonly TimeSpan _maxBackoff;
|
||||
private readonly Func<int, TimeSpan>? _jitter;
|
||||
|
||||
private CancellationTokenSource? _cts;
|
||||
private Task? _loopTask;
|
||||
private int _started; // 0 = not started, 1 = started
|
||||
|
||||
/// <inheritdoc cref="IRediscoverable.OnRediscoveryNeeded"/>
|
||||
public event EventHandler<RediscoveryEventArgs>? OnRediscoveryNeeded;
|
||||
|
||||
public DeployWatcher(IGalaxyDeployWatchSource source, ILogger? logger = null)
|
||||
: this(source, logger, DefaultInitialBackoff, DefaultMaxBackoff, jitter: null)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test-only ctor lets tests collapse the retry backoff so a fault-injection
|
||||
/// scenario doesn't sit in <see cref="Task.Delay(TimeSpan, CancellationToken)"/>.
|
||||
/// </summary>
|
||||
internal DeployWatcher(
|
||||
IGalaxyDeployWatchSource source,
|
||||
ILogger? logger,
|
||||
TimeSpan initialBackoff,
|
||||
TimeSpan maxBackoff,
|
||||
Func<int, TimeSpan>? jitter)
|
||||
{
|
||||
_source = source ?? throw new ArgumentNullException(nameof(source));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_initialBackoff = initialBackoff;
|
||||
_maxBackoff = maxBackoff;
|
||||
_jitter = jitter;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Kicks off the background watch loop. Returns immediately once the loop task
|
||||
/// has been scheduled — the loop itself runs until <see cref="StopAsync"/> or
|
||||
/// the supplied <paramref name="cancellationToken"/> is signaled.
|
||||
/// </summary>
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (Interlocked.Exchange(ref _started, 1) != 0)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
"DeployWatcher.StartAsync has already been called. Construct a new instance to restart.");
|
||||
}
|
||||
|
||||
_cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
_loopTask = Task.Run(() => RunLoopAsync(_cts.Token), CancellationToken.None);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>Cancels the loop and waits for it to exit cleanly.</summary>
|
||||
public async Task StopAsync()
|
||||
{
|
||||
var cts = _cts;
|
||||
var loop = _loopTask;
|
||||
if (cts is null || loop is null) return;
|
||||
|
||||
try { cts.Cancel(); } catch (ObjectDisposedException) { }
|
||||
|
||||
try
|
||||
{
|
||||
await loop.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected: cancellation propagated up from the source enumerator.
|
||||
}
|
||||
finally
|
||||
{
|
||||
cts.Dispose();
|
||||
_cts = null;
|
||||
_loopTask = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_loopTask is null) return;
|
||||
StopAsync().GetAwaiter().GetResult();
|
||||
}
|
||||
|
||||
private async Task RunLoopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
DateTimeOffset? lastSeenDeployTime = null;
|
||||
bool? lastSeenPresent = null;
|
||||
bool baselineCaptured = false;
|
||||
TimeSpan backoff = _initialBackoff;
|
||||
int attempt = 0;
|
||||
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (DeployEvent ev in _source
|
||||
.WatchAsync(lastSeenDeployTime, cancellationToken)
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
// Successful read — reset retry state.
|
||||
backoff = _initialBackoff;
|
||||
attempt = 0;
|
||||
|
||||
DateTimeOffset? observedTime = ev.TimeOfLastDeployPresent && ev.TimeOfLastDeploy is not null
|
||||
? ev.TimeOfLastDeploy.ToDateTimeOffset()
|
||||
: null;
|
||||
bool observedPresent = ev.TimeOfLastDeployPresent;
|
||||
|
||||
if (!baselineCaptured)
|
||||
{
|
||||
// Bootstrap event — record state and suppress.
|
||||
baselineCaptured = true;
|
||||
lastSeenDeployTime = observedTime;
|
||||
lastSeenPresent = observedPresent;
|
||||
_logger.LogDebug(
|
||||
"DeployWatcher bootstrap event sequence={Sequence} present={Present} time={Time} suppressed.",
|
||||
ev.Sequence, observedPresent, observedTime);
|
||||
continue;
|
||||
}
|
||||
|
||||
bool presenceFlipped = lastSeenPresent != observedPresent;
|
||||
bool timeChanged = observedPresent && lastSeenDeployTime != observedTime;
|
||||
|
||||
if (!presenceFlipped && !timeChanged)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"DeployWatcher event sequence={Sequence} matches last-seen state; skipping rediscover.",
|
||||
ev.Sequence);
|
||||
continue;
|
||||
}
|
||||
|
||||
lastSeenDeployTime = observedTime;
|
||||
lastSeenPresent = observedPresent;
|
||||
|
||||
string? scopeHint = observedTime?.ToString("O");
|
||||
var args = new RediscoveryEventArgs("deploy-time-changed", scopeHint);
|
||||
|
||||
_logger.LogInformation(
|
||||
"DeployWatcher raising rediscover sequence={Sequence} reason={Reason} scopeHint={ScopeHint}.",
|
||||
ev.Sequence, args.Reason, args.ScopeHint);
|
||||
|
||||
try
|
||||
{
|
||||
OnRediscoveryNeeded?.Invoke(this, args);
|
||||
}
|
||||
catch (Exception handlerEx)
|
||||
{
|
||||
_logger.LogError(handlerEx,
|
||||
"DeployWatcher subscriber threw while handling rediscover; continuing.");
|
||||
}
|
||||
}
|
||||
|
||||
// Stream completed normally — gateway closed the subscription. Re-open
|
||||
// immediately if we weren't asked to stop.
|
||||
_logger.LogDebug("DeployWatcher stream completed; re-subscribing.");
|
||||
continue;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
attempt++;
|
||||
TimeSpan jitterAmount = _jitter?.Invoke(attempt) ?? RandomJitter(backoff);
|
||||
TimeSpan delay = backoff + jitterAmount;
|
||||
_logger.LogWarning(ex,
|
||||
"DeployWatcher source threw; retrying in {Delay} (attempt {Attempt}, last-seen time {LastSeen}).",
|
||||
delay, attempt, lastSeenDeployTime);
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Exponential backoff capped at _maxBackoff.
|
||||
var doubled = TimeSpan.FromTicks(Math.Min(_maxBackoff.Ticks, backoff.Ticks * 2));
|
||||
backoff = doubled < _initialBackoff ? _initialBackoff : doubled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static TimeSpan RandomJitter(TimeSpan baseDelay)
|
||||
{
|
||||
// Up to +/- 25% of the base delay, biased non-negative.
|
||||
long maxTicks = Math.Max(1L, baseDelay.Ticks / 4);
|
||||
long ticks = Random.Shared.NextInt64(0, maxTicks);
|
||||
return TimeSpan.FromTicks(ticks);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Translates a Galaxy object hierarchy (from <see cref="IGalaxyHierarchySource"/>) into
|
||||
/// <see cref="IAddressSpaceBuilder"/> calls — folders for each gobject, variables for
|
||||
/// each dynamic attribute. Alarm-bearing attributes get all five sub-attribute refs
|
||||
/// populated via <see cref="AlarmRefBuilder"/> so the server-level alarm subsystem
|
||||
/// (PR 2.2) can subscribe + ack without help from the driver.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Hierarchy materialisation rules (mirror legacy <c>MxAccessGalaxyBackend.DiscoverAsync</c>):
|
||||
/// <list type="bullet">
|
||||
/// <item>Browse name = <c>contained_name</c> when present; falls back to <c>tag_name</c>.</item>
|
||||
/// <item>Folder per gobject; variables placed inside their owner folder.</item>
|
||||
/// <item>Variable's full reference = <c>tag_name.attribute_name</c> — the format MXAccess
|
||||
/// expects for read/write addressing (translated from the contained-name browse path).</item>
|
||||
/// <item>Hierarchy is rendered flat (one folder per gobject under the driver root) for
|
||||
/// this PR. PR 4.W's address-space wiring revisits whether to nest under
|
||||
/// <c>parent_gobject_id</c> for a true tree shape.</item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public sealed class GalaxyDiscoverer
|
||||
{
|
||||
private readonly IGalaxyHierarchySource _source;
|
||||
|
||||
public GalaxyDiscoverer(IGalaxyHierarchySource source)
|
||||
{
|
||||
_source = source ?? throw new ArgumentNullException(nameof(source));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Drive the supplied builder with one folder + N variables per Galaxy object the
|
||||
/// gateway returns. Idempotent — caller can re-invoke after a redeploy event.
|
||||
/// </summary>
|
||||
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
var objects = await _source.GetHierarchyAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (var obj in objects)
|
||||
{
|
||||
var browseName = string.IsNullOrEmpty(obj.ContainedName) ? obj.TagName : obj.ContainedName;
|
||||
if (string.IsNullOrEmpty(browseName)) continue; // skip objects with no usable identity
|
||||
|
||||
var folder = builder.Folder(browseName, browseName);
|
||||
|
||||
foreach (var attr in obj.Attributes)
|
||||
{
|
||||
if (string.IsNullOrEmpty(attr.AttributeName)) continue;
|
||||
|
||||
var fullReference = !string.IsNullOrEmpty(attr.FullTagReference)
|
||||
? StripArraySuffix(attr.FullTagReference)
|
||||
: obj.TagName + "." + attr.AttributeName;
|
||||
|
||||
var info = new DriverAttributeInfo(
|
||||
FullName: fullReference,
|
||||
DriverDataType: DataTypeMap.Map(attr.MxDataType),
|
||||
IsArray: attr.IsArray,
|
||||
ArrayDim: attr.IsArray && attr.ArrayDimensionPresent && attr.ArrayDimension > 0
|
||||
? (uint)attr.ArrayDimension
|
||||
: null,
|
||||
SecurityClass: SecurityMap.Map(attr.SecurityClassification),
|
||||
IsHistorized: attr.IsHistorized,
|
||||
IsAlarm: attr.IsAlarm);
|
||||
|
||||
var handle = folder.Variable(attr.AttributeName, attr.AttributeName, info);
|
||||
|
||||
// Alarm-bearing attributes ship the full sub-attribute ref set so the server's
|
||||
// AlarmConditionService can subscribe + ack-write without re-deriving the names.
|
||||
if (attr.IsAlarm)
|
||||
{
|
||||
handle.MarkAsAlarmCondition(AlarmRefBuilder.Build(fullReference));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PR 5.W workaround for mxaccessgw GalaxyRepository.cs:173-175 — the gateway's
|
||||
// SQL appends `[]` to array-typed `full_tag_reference` values, but MxAccess COM
|
||||
// `IInstance.AddItem` doesn't accept `[]`-suffixed addresses (so any downstream
|
||||
// Subscribe/Read/Write through the worker would fail with the suffixed form).
|
||||
// Strip defensively here so the parity matrix can run today; remove once the
|
||||
// gw fix (mxaccessgw/requirements-array-suffix-fix.md) lands.
|
||||
private static string StripArraySuffix(string fullReference) =>
|
||||
fullReference.EndsWith("[]", StringComparison.Ordinal)
|
||||
? fullReference[..^2]
|
||||
: fullReference;
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IGalaxyDeployWatchSource"/> wrapping the gateway's
|
||||
/// <see cref="GalaxyRepositoryClient"/>. Forwards
|
||||
/// <c>WatchDeployEventsAsync(lastSeenDeployTime, ct)</c> verbatim — paging /
|
||||
/// bootstrap suppression policy lives on the gateway, while
|
||||
/// <see cref="DeployWatcher"/> owns the change-detection and reconnect-loop
|
||||
/// concerns above this seam.
|
||||
/// </summary>
|
||||
public sealed class GatewayGalaxyDeployWatchSource : IGalaxyDeployWatchSource
|
||||
{
|
||||
private readonly GalaxyRepositoryClient _client;
|
||||
|
||||
public GatewayGalaxyDeployWatchSource(GalaxyRepositoryClient client)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
}
|
||||
|
||||
public IAsyncEnumerable<DeployEvent> WatchAsync(
|
||||
DateTimeOffset? lastSeenDeployTime, CancellationToken cancellationToken)
|
||||
=> _client.WatchDeployEventsAsync(lastSeenDeployTime, cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IGalaxyHierarchySource"/> wrapping the gateway's
|
||||
/// <see cref="GalaxyRepositoryClient"/>. Pages internally via the client's overload.
|
||||
/// </summary>
|
||||
public sealed class GatewayGalaxyHierarchySource : IGalaxyHierarchySource
|
||||
{
|
||||
private readonly GalaxyRepositoryClient _client;
|
||||
|
||||
public GatewayGalaxyHierarchySource(GalaxyRepositoryClient client)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<GalaxyObject>> GetHierarchyAsync(CancellationToken cancellationToken)
|
||||
=> _client.DiscoverHierarchyAsync(cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam between <see cref="DeployWatcher"/> and the gateway. Production
|
||||
/// wraps <c>GalaxyRepositoryClient.WatchDeployEventsAsync</c>; tests substitute a fake
|
||||
/// yielding controlled <see cref="DeployEvent"/> instances so the watcher's bootstrap
|
||||
/// suppression, change detection, reconnect, and shutdown semantics can be exercised
|
||||
/// without a real gRPC stream.
|
||||
/// </summary>
|
||||
public interface IGalaxyDeployWatchSource
|
||||
{
|
||||
/// <summary>
|
||||
/// Subscribe to Galaxy deploy events. The server emits a bootstrap event with the
|
||||
/// current cached state on subscribe, then one event per new
|
||||
/// <c>time_of_last_deploy</c>. Pass <paramref name="lastSeenDeployTime"/> to ask the
|
||||
/// gateway to suppress its bootstrap when the caller already has the current value;
|
||||
/// <see cref="DeployWatcher"/> still suppresses the first event it observes locally
|
||||
/// so a transport reconnect doesn't re-fire on identical state.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<DeployEvent> WatchAsync(
|
||||
DateTimeOffset? lastSeenDeployTime, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam between <see cref="GalaxyDiscoverer"/> and the gateway. Production
|
||||
/// wraps <c>GalaxyRepositoryClient</c>; tests substitute a fake returning canned
|
||||
/// <see cref="GalaxyObject"/> rows so the discoverer's translation logic can be exercised
|
||||
/// without a real gRPC channel.
|
||||
/// </summary>
|
||||
public interface IGalaxyHierarchySource
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns the full materialised Galaxy hierarchy. The gateway client pages
|
||||
/// internally; this interface deliberately exposes only the post-paging shape so
|
||||
/// callers don't reimplement paging.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<GalaxyObject>> GetHierarchyAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// Maps Galaxy <c>security_classification</c> integer codes to
|
||||
/// <see cref="SecurityClassification"/>. Ported from the legacy
|
||||
/// <c>GalaxyProxyDriver.MapSecurity</c>; unknown codes fall back to
|
||||
/// <see cref="SecurityClassification.FreeAccess"/> so a forward-compatible Galaxy
|
||||
/// deployment with new classifications doesn't break discovery.
|
||||
/// </summary>
|
||||
internal static class SecurityMap
|
||||
{
|
||||
public static SecurityClassification Map(int mxSec) => mxSec switch
|
||||
{
|
||||
0 => SecurityClassification.FreeAccess,
|
||||
1 => SecurityClassification.Operate,
|
||||
2 => SecurityClassification.SecuredWrite,
|
||||
3 => SecurityClassification.VerifiedWrite,
|
||||
4 => SecurityClassification.Tune,
|
||||
5 => SecurityClassification.Configure,
|
||||
6 => SecurityClassification.ViewOnly,
|
||||
_ => SecurityClassification.FreeAccess,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — Decorator that emits one <see cref="System.Diagnostics.Activity"/> span
|
||||
/// per <c>GetHierarchy</c> RPC. <c>galaxy.object_count</c> on the span lets ops
|
||||
/// correlate slow Discover passes with Galaxy size without instrumenting the
|
||||
/// discoverer's translation step.
|
||||
/// </summary>
|
||||
internal sealed class TracedGalaxyHierarchySource(IGalaxyHierarchySource inner, string clientName) : IGalaxyHierarchySource
|
||||
{
|
||||
public async Task<IReadOnlyList<GalaxyObject>> GetHierarchyAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.get_hierarchy");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
try
|
||||
{
|
||||
var hierarchy = await inner.GetHierarchyAsync(cancellationToken).ConfigureAwait(false);
|
||||
activity?.SetTag("galaxy.object_count", hierarchy.Count);
|
||||
return hierarchy;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-instance options for the in-process .NET 10 Galaxy driver. Maps to the
|
||||
/// <c>DriverConfig</c> JSON column on the central config DB. Decomposed into nested
|
||||
/// records so the JSON structure mirrors the runtime shape and operators can target
|
||||
/// individual sections (gateway endpoint, mxaccess client identity, reconnect policy)
|
||||
/// without touching the rest.
|
||||
/// </summary>
|
||||
/// <param name="Gateway">Connection details for the MxAccess gateway (mxaccessgw repo).</param>
|
||||
/// <param name="MxAccess">MXAccess-specific knobs surfaced through gw — client name, publishing interval, write-user.</param>
|
||||
/// <param name="Repository">Galaxy Repository browse options consumed by the discoverer.</param>
|
||||
/// <param name="Reconnect">Backoff knobs for the in-driver reconnect supervisor (PR 4.5).</param>
|
||||
public sealed record GalaxyDriverOptions(
|
||||
GalaxyGatewayOptions Gateway,
|
||||
GalaxyMxAccessOptions MxAccess,
|
||||
GalaxyRepositoryOptions Repository,
|
||||
GalaxyReconnectOptions Reconnect);
|
||||
|
||||
/// <summary>
|
||||
/// Connection details for the MxAccess gateway. <see cref="ApiKeySecretRef"/> resolves
|
||||
/// through the server-side secret store (DPAPI for production, environment override for
|
||||
/// dev) — the API key never appears in cleartext config.
|
||||
/// </summary>
|
||||
// PR 6.5 tuning notes:
|
||||
// ConnectTimeoutSeconds = 10 — cold-start network path comfort margin; soak runs
|
||||
// never saw a successful connect take >2s, so 10s is generous without being lax.
|
||||
// DefaultCallTimeoutSeconds = 30 — bumped from 5s because a 50k-tag SubscribeBulk
|
||||
// can exceed 5s under MxAccess COM contention (the worker walks the gw item list
|
||||
// serially under the apartment lock). 30s leaves comfortable headroom for the
|
||||
// legitimate worst case while still failing fast on a wedged worker.
|
||||
// StreamTimeoutSeconds = 0 — unlimited; the StreamEvents RPC must run for the
|
||||
// lifetime of the driver. Set a finite value only for diagnostic runs.
|
||||
public sealed record GalaxyGatewayOptions(
|
||||
string Endpoint,
|
||||
string ApiKeySecretRef,
|
||||
bool UseTls = true,
|
||||
string? CaCertificatePath = null,
|
||||
int ConnectTimeoutSeconds = 10,
|
||||
int DefaultCallTimeoutSeconds = 30,
|
||||
int StreamTimeoutSeconds = 0);
|
||||
|
||||
/// <summary>
|
||||
/// MXAccess-specific knobs the gateway forwards to the worker process.
|
||||
/// </summary>
|
||||
/// <param name="ClientName">
|
||||
/// Wonderware client identity. MUST be unique per OtOpcUa instance — when two instances
|
||||
/// share a name, the older session loses subscription state. Redundancy pairs (decision
|
||||
/// #149) enforce uniqueness via install scripts.
|
||||
/// </param>
|
||||
/// <param name="PublishingIntervalMs">
|
||||
/// Hint forwarded as <c>buffered_update_interval_ms</c> on subscribe; lets the worker
|
||||
/// coalesce updates at the OPC UA publishing cadence rather than every COM tick.
|
||||
/// </param>
|
||||
/// <param name="WriteUserId">
|
||||
/// Reserved for ArchestrA secured-write user mapping; PR 4.3 wires <c>WriteSecured</c>
|
||||
/// routing against this id. 0 = anonymous.
|
||||
/// </param>
|
||||
/// <param name="EventPumpChannelCapacity">
|
||||
/// Bounded-channel size between the EventPump's network-read loop and its listener
|
||||
/// fan-out loop (PR 6.2). Default 50_000 = one second of headroom at 50k tags / 1Hz;
|
||||
/// raise it when <c>galaxy.events.dropped</c> shows up under transient consumer
|
||||
/// slowness, lower it on a memory-tight host where the headroom isn't needed.
|
||||
/// </param>
|
||||
public sealed record GalaxyMxAccessOptions(
|
||||
string ClientName,
|
||||
int PublishingIntervalMs = 1000,
|
||||
int WriteUserId = 0,
|
||||
int EventPumpChannelCapacity = 50_000);
|
||||
|
||||
/// <summary>
|
||||
/// Galaxy Repository browse-side knobs consumed by PR 4.1's <c>GalaxyDiscoverer</c>.
|
||||
/// </summary>
|
||||
public sealed record GalaxyRepositoryOptions(
|
||||
int DiscoverPageSize = 5000,
|
||||
bool WatchDeployEvents = true);
|
||||
|
||||
/// <summary>
|
||||
/// Backoff knobs for the in-driver reconnect supervisor (PR 4.5). Replay-on-session-lost
|
||||
/// calls the gw's <c>ReplaySubscriptions</c> RPC after reconnect rather than re-issuing
|
||||
/// subscribe-bulk for every tag.
|
||||
/// </summary>
|
||||
public sealed record GalaxyReconnectOptions(
|
||||
int InitialBackoffMs = 500,
|
||||
int MaxBackoffMs = 30_000,
|
||||
bool ReplayOnSessionLost = true);
|
||||
960
src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs
Normal file
960
src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs
Normal file
@@ -0,0 +1,960 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browse;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Health;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy;
|
||||
|
||||
/// <summary>
|
||||
/// In-process .NET 10 Galaxy driver — the v2 replacement for the Galaxy.Host /
|
||||
/// Galaxy.Proxy pair. PR 4.0 ships the project skeleton with <see cref="IDriver"/>
|
||||
/// bodies that wire to a future <c>IGalaxyGatewayClient</c> abstraction. Capability
|
||||
/// interfaces (browse, read, write, subscribe, history routing, host probes) land in
|
||||
/// PRs 4.1–4.7; the wiring sequence keeps every intermediate state buildable so the
|
||||
/// <c>Galaxy:Backend</c> flag (PR 4.W) can flip between legacy-host and mxgateway
|
||||
/// for parity testing.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This driver is registered as a Tier A in-process driver alongside Modbus / S7 / etc.
|
||||
/// The legacy <c>GalaxyProxyDriver</c> (Driver.Galaxy.Proxy) coexists until PR 7.2;
|
||||
/// <see cref="GalaxyDriverFactoryExtensions"/> registers under driver-type name
|
||||
/// "GalaxyMxGateway" so both paths can be live simultaneously during parity testing.
|
||||
/// </remarks>
|
||||
public sealed class GalaxyDriver
|
||||
: IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IAlarmSource, IDisposable
|
||||
{
|
||||
private readonly string _driverInstanceId;
|
||||
private readonly GalaxyDriverOptions _options;
|
||||
private readonly ILogger<GalaxyDriver> _logger;
|
||||
|
||||
// PR 4.1 — IGalaxyHierarchySource is the test seam for browse. When null, the driver
|
||||
// lazily builds a GatewayGalaxyHierarchySource around a GalaxyRepositoryClient on
|
||||
// first DiscoverAsync. Tests inject a fake source via the internal ctor to exercise
|
||||
// GalaxyDiscoverer's translation logic without a real gRPC channel.
|
||||
private IGalaxyHierarchySource? _hierarchySource;
|
||||
private GalaxyRepositoryClient? _ownedRepositoryClient;
|
||||
|
||||
// PR 4.2 — IGalaxyDataReader is the test seam for IReadable. PR 4.4 supplies the
|
||||
// production implementation that wraps GalaxyMxSession's SubscribeBulk + StreamEvents
|
||||
// pump; until then ReadAsync throws NotSupportedException when the reader is null
|
||||
// (legacy-host backend handles reads in production via DriverNodeManager's
|
||||
// capability-routing).
|
||||
private IGalaxyDataReader? _dataReader;
|
||||
|
||||
// PR 4.3 — IGalaxyDataWriter is the test seam for IWritable. Production wraps
|
||||
// GalaxyMxSession via GatewayGalaxyDataWriter (Write / WriteSecured routing). The
|
||||
// per-tag SecurityClassification map is populated during ITagDiscovery and consumed
|
||||
// here at write time.
|
||||
private IGalaxyDataWriter? _dataWriter;
|
||||
private readonly System.Collections.Concurrent.ConcurrentDictionary<string, SecurityClassification>
|
||||
_securityByFullRef = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// PR 4.4 — subscription lifecycle. The pump consumes the gw event stream and fans
|
||||
// out OnDataChange events to every registered driver subscription via the registry's
|
||||
// reverse map. The subscriber is the test seam — production uses
|
||||
// GatewayGalaxySubscriber over a connected GalaxyMxSession.
|
||||
private IGalaxySubscriber? _subscriber;
|
||||
private readonly SubscriptionRegistry _subscriptions = new();
|
||||
private EventPump? _eventPump;
|
||||
private readonly Lock _pumpLock = new();
|
||||
|
||||
// PR B.2 — IAlarmSource implementation. Production-side acks route through
|
||||
// GatewayGalaxyAlarmAcknowledger which calls MxGatewayClient.AcknowledgeAlarmAsync
|
||||
// (PR E.2 SDK). Tests inject IGalaxyAlarmAcknowledger via the internal ctor to
|
||||
// exercise the wiring without a running gateway. The alarm event stream is
|
||||
// delivered by EventPump.OnAlarmTransition (PR B.1) — this driver is the
|
||||
// consumer that bridges it onto IAlarmSource.OnAlarmEvent.
|
||||
private IGalaxyAlarmAcknowledger? _alarmAcknowledger;
|
||||
private readonly Lock _alarmHandlersLock = new();
|
||||
private readonly HashSet<GalaxyAlarmSubscriptionHandle> _alarmSubscriptions = new();
|
||||
|
||||
// PR 4.W — production runtime owned by InitializeAsync. The driver builds these
|
||||
// when it opens a real gw session; tests bypass them by injecting seams via the
|
||||
// internal ctor.
|
||||
private GalaxyMxSession? _ownedMxSession;
|
||||
private MxGatewayClient? _ownedMxClient;
|
||||
|
||||
// PR 4.5 — reconnect supervisor. Reflects in DriverState.Degraded while not Healthy.
|
||||
private ReconnectSupervisor? _supervisor;
|
||||
|
||||
// PR 4.6 — IRediscoverable plumbing.
|
||||
private DeployWatcher? _deployWatcher;
|
||||
|
||||
// PR 4.7 — IHostConnectivityProbe plumbing. The aggregator owns the merged
|
||||
// transport+per-platform view; the forwarder is fed from the supervisor on
|
||||
// transport state transitions; the probe watcher subscribes ScanState attributes
|
||||
// for every discovered platform and pushes value changes to the aggregator.
|
||||
private readonly HostStatusAggregator _hostStatuses = new();
|
||||
private HostConnectivityForwarder? _transportForwarder;
|
||||
private PerPlatformProbeWatcher? _probeWatcher;
|
||||
|
||||
private DriverHealth _health = new(DriverState.Unknown, null, null);
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Server-pushed data-change notification. Fires from the
|
||||
/// <see cref="EventPump"/>'s background loop; handlers should be cheap (or queue
|
||||
/// onto another thread) to avoid blocking the gw event stream.
|
||||
/// </summary>
|
||||
public event EventHandler<DataChangeEventArgs>? OnDataChange;
|
||||
|
||||
/// <summary>Fires when the gateway signals a deploy-time change (PR 4.6 DeployWatcher).</summary>
|
||||
public event EventHandler<RediscoveryEventArgs>? OnRediscoveryNeeded;
|
||||
|
||||
/// <summary>Fires when a host transitions Running ↔ Stopped (PR 4.7 HostStatusAggregator).</summary>
|
||||
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
|
||||
|
||||
/// <inheritdoc />
|
||||
public event EventHandler<AlarmEventArgs>? OnAlarmEvent;
|
||||
|
||||
public GalaxyDriver(
|
||||
string driverInstanceId,
|
||||
GalaxyDriverOptions options,
|
||||
ILogger<GalaxyDriver>? logger = null)
|
||||
: this(driverInstanceId, options,
|
||||
hierarchySource: null, dataReader: null, dataWriter: null, subscriber: null,
|
||||
alarmAcknowledger: null, logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test-visible ctor — inject custom seams so <see cref="DiscoverAsync"/>,
|
||||
/// <see cref="ReadAsync"/>, <see cref="WriteAsync"/>, and
|
||||
/// <see cref="SubscribeAsync"/> can be exercised against canned data without
|
||||
/// building real gRPC channels.
|
||||
/// </summary>
|
||||
internal GalaxyDriver(
|
||||
string driverInstanceId,
|
||||
GalaxyDriverOptions options,
|
||||
IGalaxyHierarchySource? hierarchySource,
|
||||
IGalaxyDataReader? dataReader = null,
|
||||
IGalaxyDataWriter? dataWriter = null,
|
||||
IGalaxySubscriber? subscriber = null,
|
||||
IGalaxyAlarmAcknowledger? alarmAcknowledger = null,
|
||||
ILogger<GalaxyDriver>? logger = null)
|
||||
{
|
||||
_driverInstanceId = !string.IsNullOrWhiteSpace(driverInstanceId)
|
||||
? driverInstanceId
|
||||
: throw new ArgumentException("Driver instance id required.", nameof(driverInstanceId));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? NullLogger<GalaxyDriver>.Instance;
|
||||
_hierarchySource = hierarchySource;
|
||||
_dataReader = dataReader;
|
||||
_dataWriter = dataWriter;
|
||||
_subscriber = subscriber;
|
||||
_alarmAcknowledger = alarmAcknowledger;
|
||||
|
||||
// Forward the aggregator's transitions through IHostConnectivityProbe.
|
||||
_hostStatuses.OnHostStatusChanged += (_, args) => OnHostStatusChanged?.Invoke(this, args);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DriverInstanceId => _driverInstanceId;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DriverType => GalaxyDriverFactoryExtensions.DriverTypeName;
|
||||
|
||||
/// <summary>Test-visible options snapshot.</summary>
|
||||
internal GalaxyDriverOptions Options => _options;
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
// Tests inject seams via the internal ctor; production InitializeAsync builds
|
||||
// the gateway client + session + per-capability runtime components from
|
||||
// GalaxyDriverOptions. When seams are pre-injected we leave them alone (the
|
||||
// test exercises the wired surface without a real gw round-trip).
|
||||
if (_subscriber is null && _dataWriter is null && _hierarchySource is null)
|
||||
{
|
||||
await BuildProductionRuntimeAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"GalaxyDriver {InstanceId} initializing with pre-injected seams — production runtime build skipped",
|
||||
_driverInstanceId);
|
||||
}
|
||||
|
||||
StartDeployWatcher();
|
||||
_logger.LogInformation(
|
||||
"GalaxyDriver {InstanceId} initialized — endpoint={Endpoint} clientName={ClientName}",
|
||||
_driverInstanceId, _options.Gateway.Endpoint, _options.MxAccess.ClientName);
|
||||
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the production gw client + session + per-capability runtime components
|
||||
/// from <c>_options</c>. Sets up the reconnect supervisor's reopen / replay
|
||||
/// callbacks so a transport drop replays every active subscription on the
|
||||
/// restored session.
|
||||
/// </summary>
|
||||
private async Task BuildProductionRuntimeAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var clientOptions = BuildClientOptions(_options.Gateway);
|
||||
_ownedMxClient = MxGatewayClient.Create(clientOptions);
|
||||
_ownedMxSession = new GalaxyMxSession(_options.MxAccess, _logger);
|
||||
await _ownedMxSession.ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// PR 6.1 — wrap the gw-facing seams in tracing decorators so every Subscribe /
|
||||
// Unsubscribe / Write / StreamEvents call emits a span on the
|
||||
// "ZB.MOM.WW.OtOpcUa.Driver.Galaxy" ActivitySource. The host process's tracing
|
||||
// listener (OTLP exporter, dotnet-trace, etc.) consumes these without the driver
|
||||
// taking a dependency on the OpenTelemetry packages.
|
||||
_subscriber = new TracedGalaxySubscriber(
|
||||
new GatewayGalaxySubscriber(_ownedMxSession), _options.MxAccess.ClientName);
|
||||
_dataWriter = new TracedGalaxyDataWriter(
|
||||
new GatewayGalaxyDataWriter(_ownedMxSession, _options.MxAccess.WriteUserId, _logger),
|
||||
_options.MxAccess.ClientName);
|
||||
|
||||
_supervisor = new ReconnectSupervisor(
|
||||
reopen: ReopenAsync,
|
||||
replay: ReplayAsync,
|
||||
options: new ReconnectOptions(
|
||||
InitialBackoffOverride: TimeSpan.FromMilliseconds(_options.Reconnect.InitialBackoffMs),
|
||||
MaxBackoffOverride: TimeSpan.FromMilliseconds(_options.Reconnect.MaxBackoffMs)),
|
||||
logger: _logger);
|
||||
|
||||
_transportForwarder = new HostConnectivityForwarder(_options.MxAccess.ClientName, _hostStatuses, _logger);
|
||||
_transportForwarder.SetTransport(HostState.Running); // initial state — we just connected
|
||||
|
||||
_supervisor.StateChanged += OnSupervisorStateChanged;
|
||||
|
||||
_probeWatcher = new PerPlatformProbeWatcher(
|
||||
_subscriber, _hostStatuses, _logger,
|
||||
bufferedUpdateIntervalMs: _options.MxAccess.PublishingIntervalMs);
|
||||
|
||||
// PR B.2 — wire the alarm acknowledger to the live gateway client.
|
||||
_alarmAcknowledger ??= new GatewayGalaxyAlarmAcknowledger(_ownedMxClient, _ownedMxSession, _logger);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reopen callback for <see cref="ReconnectSupervisor"/>: re-Register the gw session.
|
||||
/// If the session never connected, this is a fresh ConnectAsync; otherwise it's a
|
||||
/// reconnect against the existing client.
|
||||
/// </summary>
|
||||
private async Task ReopenAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_ownedMxSession is null) return;
|
||||
var clientOptions = BuildClientOptions(_options.Gateway);
|
||||
await _ownedMxSession.ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Replay callback. Walks every active subscription's bindings and re-issues
|
||||
/// SubscribeBulk for the tag list. PR 6.x can swap this for the gw's batched
|
||||
/// <c>ReplaySubscriptionsCommand</c> once it ships.
|
||||
/// </summary>
|
||||
private async Task ReplayAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_subscriber is null) return;
|
||||
var bindings = _subscriptions.SnapshotAllBindings();
|
||||
if (bindings.Count == 0) return;
|
||||
|
||||
var refs = bindings.Select(b => b.FullReference).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
|
||||
await _subscriber.SubscribeBulkAsync(
|
||||
refs, _options.MxAccess.PublishingIntervalMs, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"GalaxyDriver {InstanceId} replay completed — {Count} tags re-subscribed",
|
||||
_driverInstanceId, refs.Length);
|
||||
}
|
||||
|
||||
private void OnSupervisorStateChanged(object? sender, StateTransition transition)
|
||||
{
|
||||
// Reflect supervisor state in DriverHealth + transport forwarder.
|
||||
_health = transition.Next switch
|
||||
{
|
||||
ReconnectSupervisor.State.Healthy => new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null),
|
||||
_ => new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, transition.Cause),
|
||||
};
|
||||
if (_transportForwarder is not null)
|
||||
{
|
||||
var hostState = transition.Next == ReconnectSupervisor.State.Healthy
|
||||
? HostState.Running
|
||||
: HostState.Stopped;
|
||||
_transportForwarder.SetTransport(hostState);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves <c>Gateway.ApiKeySecretRef</c> to the actual API-key bytes. Three
|
||||
/// forms supported, evaluated in order:
|
||||
/// <list type="number">
|
||||
/// <item><c>env:NAME</c> — reads <c>Environment.GetEnvironmentVariable(NAME)</c>.
|
||||
/// Throws when the variable is unset, so a misconfigured deployment fails
|
||||
/// fast at InitializeAsync rather than silently sending an empty key.</item>
|
||||
/// <item><c>file:PATH</c> — reads UTF-8 text from <c>PATH</c>, trimming
|
||||
/// whitespace. Lets operators stash the key in an ACL'd file outside the
|
||||
/// repo (the same pattern as the legacy <c>.local/galaxy-host-secret.txt</c>).</item>
|
||||
/// <item>Anything else — used as the literal API key. Convenient for dev,
|
||||
/// and avoids breaking existing configs that pre-date this resolver.</item>
|
||||
/// </list>
|
||||
/// A future PR can swap any of these arms for a DPAPI-backed lookup without
|
||||
/// changing the call site.
|
||||
/// </summary>
|
||||
internal static string ResolveApiKey(string secretRef)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(secretRef);
|
||||
|
||||
if (secretRef.StartsWith("env:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var name = secretRef[4..];
|
||||
var value = Environment.GetEnvironmentVariable(name);
|
||||
return !string.IsNullOrEmpty(value)
|
||||
? value
|
||||
: throw new InvalidOperationException(
|
||||
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' resolves to env var '{name}', but it is unset.");
|
||||
}
|
||||
|
||||
if (secretRef.StartsWith("file:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var path = secretRef[5..];
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' points at '{path}', which doesn't exist.");
|
||||
}
|
||||
var contents = File.ReadAllText(path).Trim();
|
||||
return !string.IsNullOrEmpty(contents)
|
||||
? contents
|
||||
: throw new InvalidOperationException(
|
||||
$"Galaxy.Gateway.ApiKeySecretRef='{secretRef}' file '{path}' is empty.");
|
||||
}
|
||||
|
||||
return secretRef;
|
||||
}
|
||||
|
||||
private static MxGatewayClientOptions BuildClientOptions(GalaxyGatewayOptions gw) => new()
|
||||
{
|
||||
Endpoint = new Uri(gw.Endpoint, UriKind.Absolute),
|
||||
ApiKey = ResolveApiKey(gw.ApiKeySecretRef),
|
||||
UseTls = gw.UseTls,
|
||||
CaCertificatePath = gw.CaCertificatePath,
|
||||
ConnectTimeout = TimeSpan.FromSeconds(gw.ConnectTimeoutSeconds),
|
||||
DefaultCallTimeout = TimeSpan.FromSeconds(gw.DefaultCallTimeoutSeconds),
|
||||
StreamTimeout = gw.StreamTimeoutSeconds > 0 ? TimeSpan.FromSeconds(gw.StreamTimeoutSeconds) : null,
|
||||
};
|
||||
|
||||
private void StartDeployWatcher()
|
||||
{
|
||||
if (!_options.Repository.WatchDeployEvents) return;
|
||||
if (_ownedRepositoryClient is null && _hierarchySource is null) return;
|
||||
|
||||
// Reuse the lazily-built repository client (DiscoverAsync constructs it on demand).
|
||||
// If discovery hasn't run yet, build the client here so the watcher has a target.
|
||||
if (_ownedRepositoryClient is null)
|
||||
{
|
||||
_ownedRepositoryClient = MxGateway.Client.GalaxyRepositoryClient.Create(
|
||||
BuildClientOptions(_options.Gateway));
|
||||
}
|
||||
|
||||
var source = new GatewayGalaxyDeployWatchSource(_ownedRepositoryClient);
|
||||
_deployWatcher = new DeployWatcher(source, _logger);
|
||||
_deployWatcher.OnRediscoveryNeeded += (_, args) => OnRediscoveryNeeded?.Invoke(this, args);
|
||||
|
||||
_ = _deployWatcher.StartAsync(CancellationToken.None);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
|
||||
{
|
||||
// In-place config reapply. PR 4.5's reconnect supervisor will swap the
|
||||
// gateway-client options under the lock; for the skeleton we just refresh health.
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ShutdownAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (_disposed) return Task.CompletedTask;
|
||||
_logger.LogInformation("GalaxyDriver {InstanceId} shutting down", _driverInstanceId);
|
||||
_health = new DriverHealth(DriverState.Unknown, _health.LastSuccessfulRead, null);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public DriverHealth GetHealth()
|
||||
{
|
||||
// Reconnect supervisor wins when degraded — the cached _health reflects the last
|
||||
// successful operation, but ongoing recovery should surface as Degraded.
|
||||
if (_supervisor?.IsDegraded == true)
|
||||
{
|
||||
return new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, _supervisor.LastError);
|
||||
}
|
||||
return _health;
|
||||
}
|
||||
|
||||
// ===== IHostConnectivityProbe (PR 4.7 wire-up) =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<HostConnectivityStatus> GetHostStatuses() => _hostStatuses.Snapshot();
|
||||
|
||||
/// <inheritdoc />
|
||||
public long GetMemoryFootprint() => 0; // PR 4.4 sets this from SubscriptionRegistry size.
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||
|
||||
// ===== ITagDiscovery (PR 4.1) =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
|
||||
// PR 4.3 — capture SecurityClassification per attribute. PR 4.W — also refresh
|
||||
// the per-platform probe watcher's membership after discovery so newly-added
|
||||
// $WinPlatform / $AppEngine objects start advising their ScanState attribute.
|
||||
var capturingBuilder = new SecurityCapturingBuilder(builder, _securityByFullRef);
|
||||
var source = _hierarchySource ??= BuildDefaultHierarchySource();
|
||||
var discoverer = new GalaxyDiscoverer(source);
|
||||
await discoverer.DiscoverAsync(capturingBuilder, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (_probeWatcher is not null)
|
||||
{
|
||||
var hierarchy = await source.GetHierarchyAsync(cancellationToken).ConfigureAwait(false);
|
||||
var platforms = hierarchy
|
||||
.Where(o => o.TemplateChain.Any(t =>
|
||||
string.Equals(t, "$WinPlatform", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(t, "$AppEngine", StringComparison.OrdinalIgnoreCase)))
|
||||
.Select(o => o.TagName)
|
||||
.Where(name => !string.IsNullOrEmpty(name));
|
||||
await _probeWatcher.SyncPlatformsAsync(platforms, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private SecurityClassification ResolveSecurity(string fullReference) =>
|
||||
_securityByFullRef.TryGetValue(fullReference, out var sec) ? sec : SecurityClassification.FreeAccess;
|
||||
|
||||
// ===== IReadable =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
|
||||
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(fullReferences);
|
||||
if (fullReferences.Count == 0) return Task.FromResult<IReadOnlyList<DataValueSnapshot>>([]);
|
||||
|
||||
if (_dataReader is not null)
|
||||
{
|
||||
// Test-only path — tests inject a canned reader via the internal ctor.
|
||||
return _dataReader.ReadAsync(fullReferences, cancellationToken);
|
||||
}
|
||||
|
||||
if (_subscriber is null)
|
||||
{
|
||||
throw new NotSupportedException(
|
||||
"GalaxyDriver.ReadAsync requires a connected GalaxyMxSession (production runtime not built). " +
|
||||
"Either inject a test seam via the internal ctor or call InitializeAsync against a real gateway.");
|
||||
}
|
||||
|
||||
return ReadViaSubscribeOnceAsync(fullReferences, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Production read path. MxAccess has no one-shot Read RPC — every value comes
|
||||
/// through the event stream. We synthesise a Read by:
|
||||
/// <list type="number">
|
||||
/// <item>Subscribing the requested tags through the existing
|
||||
/// <see cref="SubscriptionRegistry"/> + <see cref="EventPump"/>.</item>
|
||||
/// <item>Waiting for the first <c>OnDataChange</c> per item handle (the gateway
|
||||
/// pushes the current value as the initial event after a SubscribeBulk).</item>
|
||||
/// <item>Unsubscribing.</item>
|
||||
/// </list>
|
||||
/// Tags the gw rejects at SubscribeBulk time, or that never publish before the
|
||||
/// caller's cancellation token fires, return a Bad-status snapshot in input order
|
||||
/// so the caller still sees one snapshot per requested reference.
|
||||
/// </summary>
|
||||
private async Task<IReadOnlyList<DataValueSnapshot>> ReadViaSubscribeOnceAsync(
|
||||
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
|
||||
{
|
||||
var pump = EnsureEventPumpStarted();
|
||||
var subscriptionId = _subscriptions.NextSubscriptionId();
|
||||
|
||||
// Pre-allocate one TaskCompletionSource per full-reference so the OnDataChange
|
||||
// handler can complete them out-of-order as events arrive. Wired BEFORE the
|
||||
// SubscribeBulk call so we don't race with the first event the gw pushes.
|
||||
var pendingByRef = new Dictionary<string, TaskCompletionSource<DataValueSnapshot>>(
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var fullRef in fullReferences.Distinct(StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
pendingByRef[fullRef] = new TaskCompletionSource<DataValueSnapshot>(
|
||||
TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
}
|
||||
|
||||
EventHandler<DataChangeEventArgs> handler = (_, args) =>
|
||||
{
|
||||
// Filter to OUR subscription — the pump's OnDataChange fans out across all
|
||||
// subscriptions on the driver, and we don't want a parallel ISubscribable
|
||||
// caller's events to leak into our read.
|
||||
if (args.SubscriptionHandle is GalaxySubscriptionHandle gsh
|
||||
&& gsh.SubscriptionId == subscriptionId
|
||||
&& pendingByRef.TryGetValue(args.FullReference, out var tcs))
|
||||
{
|
||||
tcs.TrySetResult(args.Snapshot);
|
||||
}
|
||||
};
|
||||
pump.OnDataChange += handler;
|
||||
|
||||
var bufferedIntervalMs = _options.MxAccess.PublishingIntervalMs;
|
||||
IReadOnlyList<SubscribeResult> results;
|
||||
try
|
||||
{
|
||||
results = await _subscriber!
|
||||
.SubscribeBulkAsync(fullReferences, bufferedIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
pump.OnDataChange -= handler;
|
||||
throw;
|
||||
}
|
||||
|
||||
// Register bindings so the pump knows to dispatch events for these handles.
|
||||
var bindings = new List<TagBinding>(fullReferences.Count);
|
||||
for (var i = 0; i < fullReferences.Count; i++)
|
||||
{
|
||||
var fullRef = fullReferences[i];
|
||||
var match = results.FirstOrDefault(r => string.Equals(r.TagAddress, fullRef, StringComparison.OrdinalIgnoreCase));
|
||||
var itemHandle = match is { WasSuccessful: true } ? match.ItemHandle : 0;
|
||||
bindings.Add(new TagBinding(fullRef, itemHandle));
|
||||
|
||||
// Tags the gw rejected up front — complete with Bad status now so the
|
||||
// wait below doesn't time out on them.
|
||||
if (itemHandle <= 0
|
||||
&& pendingByRef.TryGetValue(fullRef, out var rejectedTcs))
|
||||
{
|
||||
rejectedTcs.TrySetResult(new DataValueSnapshot(
|
||||
Value: null,
|
||||
StatusCode: 0x80000000u, // Bad
|
||||
SourceTimestampUtc: null,
|
||||
ServerTimestampUtc: DateTime.UtcNow));
|
||||
}
|
||||
}
|
||||
_subscriptions.Register(subscriptionId, bindings);
|
||||
|
||||
try
|
||||
{
|
||||
// Wait for every pending TCS to complete or the caller's CT to fire. When the
|
||||
// CT fires before all values arrive, fill the still-pending entries with a
|
||||
// Bad-status snapshot rather than throwing — Read semantics let callers see
|
||||
// partial results.
|
||||
using var registration = cancellationToken.Register(() =>
|
||||
{
|
||||
foreach (var tcs in pendingByRef.Values)
|
||||
{
|
||||
tcs.TrySetResult(new DataValueSnapshot(
|
||||
Value: null,
|
||||
StatusCode: 0x800B0000u, // BadTimeout
|
||||
SourceTimestampUtc: null,
|
||||
ServerTimestampUtc: DateTime.UtcNow));
|
||||
}
|
||||
});
|
||||
|
||||
var snapshots = new DataValueSnapshot[fullReferences.Count];
|
||||
for (var i = 0; i < fullReferences.Count; i++)
|
||||
{
|
||||
snapshots[i] = await pendingByRef[fullReferences[i]].Task.ConfigureAwait(false);
|
||||
}
|
||||
return snapshots;
|
||||
}
|
||||
finally
|
||||
{
|
||||
pump.OnDataChange -= handler;
|
||||
// Drop the bindings + unsubscribe the live handles. UnsubscribeBulkAsync's
|
||||
// failure isn't fatal — the registry is already cleared, so any straggling
|
||||
// event from the gw would be a no-op fan-out.
|
||||
_subscriptions.Remove(subscriptionId);
|
||||
var liveHandles = bindings.Where(b => b.ItemHandle > 0).Select(b => b.ItemHandle).ToArray();
|
||||
if (liveHandles.Length > 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _subscriber!.UnsubscribeBulkAsync(liveHandles, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"GalaxyDriver.ReadViaSubscribeOnceAsync UnsubscribeBulk failed for {Count} handle(s) — registry already cleared.",
|
||||
liveHandles.Length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===== IWritable (PR 4.3) =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(writes);
|
||||
if (writes.Count == 0) return Task.FromResult<IReadOnlyList<WriteResult>>([]);
|
||||
|
||||
if (_dataWriter is null)
|
||||
{
|
||||
// Mirror the IReadable fallback: production write path runs on top of
|
||||
// GalaxyMxSession (PR 4.2 skeleton; PR 4.4 wires the live session). Until
|
||||
// that lands, deployments selecting Galaxy:Backend=mxgateway can't write.
|
||||
throw new NotSupportedException(
|
||||
"GalaxyDriver.WriteAsync requires GatewayGalaxyDataWriter wired against a connected " +
|
||||
"GalaxyMxSession (PR 4.4). Until that lands, route writes through the legacy-host " +
|
||||
"backend (Galaxy:Backend=legacy-host).");
|
||||
}
|
||||
|
||||
return _dataWriter.WriteAsync(writes, ResolveSecurity, cancellationToken);
|
||||
}
|
||||
|
||||
// ===== ISubscribable (PR 4.4) =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<ISubscriptionHandle> SubscribeAsync(
|
||||
IReadOnlyList<string> fullReferences, TimeSpan publishingInterval, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(fullReferences);
|
||||
|
||||
if (_subscriber is null)
|
||||
{
|
||||
throw new NotSupportedException(
|
||||
"GalaxyDriver.SubscribeAsync requires a connected GalaxyMxSession + GatewayGalaxySubscriber. " +
|
||||
"PR 4.W wires the production session; until then route subscriptions through the legacy-host backend.");
|
||||
}
|
||||
|
||||
var pump = EnsureEventPumpStarted();
|
||||
var subscriptionId = _subscriptions.NextSubscriptionId();
|
||||
|
||||
if (fullReferences.Count == 0)
|
||||
{
|
||||
// Empty subscriptions register but never bind anything — keeps Unsubscribe
|
||||
// symmetric for callers that conditionally add tags later.
|
||||
_subscriptions.Register(subscriptionId, []);
|
||||
return new GalaxySubscriptionHandle(subscriptionId);
|
||||
}
|
||||
|
||||
// PR 6.3 — when the caller doesn't set a publishing interval (TimeSpan.Zero or
|
||||
// negative), fall back to the configured MxAccess.PublishingIntervalMs. The
|
||||
// server's UA subscription publishingInterval drives this in production; tests
|
||||
// and infrastructure callers (probe watcher, deploy watcher) hit the fallback.
|
||||
var requested = (int)Math.Max(0, publishingInterval.TotalMilliseconds);
|
||||
var bufferedIntervalMs = requested > 0 ? requested : _options.MxAccess.PublishingIntervalMs;
|
||||
var results = await _subscriber
|
||||
.SubscribeBulkAsync(fullReferences, bufferedIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// Build the binding list in input order. Failed entries (gw rejected the tag) are
|
||||
// recorded with a non-positive ItemHandle so the caller can detect partial failure
|
||||
// by inspecting the returned handle's diagnostic context — full per-tag error
|
||||
// surface lands in PR 5.3's parity tests.
|
||||
var bindings = new List<TagBinding>(fullReferences.Count);
|
||||
for (var i = 0; i < fullReferences.Count; i++)
|
||||
{
|
||||
var fullRef = fullReferences[i];
|
||||
var match = results.FirstOrDefault(r => string.Equals(r.TagAddress, fullRef, StringComparison.OrdinalIgnoreCase));
|
||||
var itemHandle = match is { WasSuccessful: true } ? match.ItemHandle : 0;
|
||||
bindings.Add(new TagBinding(fullRef, itemHandle));
|
||||
if (match is null || !match.WasSuccessful)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Galaxy subscribe for {FullRef} failed: {Error}",
|
||||
fullRef, match?.ErrorMessage ?? "<no result returned>");
|
||||
}
|
||||
}
|
||||
|
||||
_subscriptions.Register(subscriptionId, bindings);
|
||||
_ = pump; // keep the pump alive for the subscription's lifetime
|
||||
return new GalaxySubscriptionHandle(subscriptionId);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task UnsubscribeAsync(ISubscriptionHandle handle, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(handle);
|
||||
if (handle is not GalaxySubscriptionHandle gsh)
|
||||
{
|
||||
throw new ArgumentException(
|
||||
$"Subscription handle was not issued by this driver (expected GalaxySubscriptionHandle, got {handle.GetType().Name}).",
|
||||
nameof(handle));
|
||||
}
|
||||
|
||||
var bindings = _subscriptions.Remove(gsh.SubscriptionId);
|
||||
if (bindings is null) return; // already removed or never registered
|
||||
|
||||
var liveItemHandles = bindings.Where(b => b.ItemHandle > 0).Select(b => b.ItemHandle).ToArray();
|
||||
if (liveItemHandles.Length == 0 || _subscriber is null) return;
|
||||
|
||||
try
|
||||
{
|
||||
await _subscriber.UnsubscribeBulkAsync(liveItemHandles, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy UnsubscribeBulk failed for subscription {SubscriptionId} — registry already cleared on driver side.",
|
||||
gsh.SubscriptionId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lazily start the <see cref="EventPump"/> on the first subscribe. The pump is
|
||||
/// shared across every subscription on this driver — fan-out happens through the
|
||||
/// <see cref="SubscriptionRegistry"/> reverse map, not by spinning a pump per
|
||||
/// subscription.
|
||||
/// </summary>
|
||||
private EventPump EnsureEventPumpStarted()
|
||||
{
|
||||
lock (_pumpLock)
|
||||
{
|
||||
if (_eventPump is not null) return _eventPump;
|
||||
_eventPump = new EventPump(
|
||||
_subscriber!, _subscriptions, _logger,
|
||||
channelCapacity: _options.MxAccess.EventPumpChannelCapacity,
|
||||
clientName: _options.MxAccess.ClientName);
|
||||
_eventPump.OnDataChange += OnPumpDataChange;
|
||||
_eventPump.OnAlarmTransition += OnPumpAlarmTransition;
|
||||
_eventPump.Start();
|
||||
return _eventPump;
|
||||
}
|
||||
}
|
||||
|
||||
// ===== IAlarmSource (PR B.2) =====
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IAlarmSubscriptionHandle> SubscribeAlarmsAsync(
|
||||
IReadOnlyList<string> sourceNodeIds, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(sourceNodeIds);
|
||||
|
||||
// The driver doesn't multiplex alarm subscriptions per source-node-id today —
|
||||
// alarm events arrive on the same gateway StreamEvents channel as data-change
|
||||
// events once the gateway emits the new family (PRs A.2 + A.3). The
|
||||
// subscription handle is a sentinel the server uses for symmetric Unsubscribe;
|
||||
// every active handle receives every alarm transition, and the server filters
|
||||
// by source node before raising Part 9 conditions. Same shape AbCip uses.
|
||||
EnsureEventPumpStarted();
|
||||
var handle = new GalaxyAlarmSubscriptionHandle(Guid.NewGuid().ToString("N"));
|
||||
lock (_alarmHandlersLock)
|
||||
{
|
||||
_alarmSubscriptions.Add(handle);
|
||||
}
|
||||
return Task.FromResult<IAlarmSubscriptionHandle>(handle);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task UnsubscribeAlarmsAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(handle);
|
||||
if (handle is not GalaxyAlarmSubscriptionHandle gash)
|
||||
{
|
||||
throw new ArgumentException(
|
||||
$"Subscription handle was not issued by this driver (expected GalaxyAlarmSubscriptionHandle, got {handle.GetType().Name}).",
|
||||
nameof(handle));
|
||||
}
|
||||
lock (_alarmHandlersLock)
|
||||
{
|
||||
_alarmSubscriptions.Remove(gash);
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task AcknowledgeAsync(
|
||||
IReadOnlyList<AlarmAcknowledgeRequest> acknowledgements, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(acknowledgements);
|
||||
if (acknowledgements.Count == 0) return;
|
||||
|
||||
if (_alarmAcknowledger is null)
|
||||
{
|
||||
throw new NotSupportedException(
|
||||
"GalaxyDriver.AcknowledgeAsync requires GatewayGalaxyAlarmAcknowledger wired against a connected " +
|
||||
"GalaxyMxSession (PR B.2). InitializeAsync must run before alarm acknowledgements can flow.");
|
||||
}
|
||||
|
||||
// Acks are issued one-by-one — the gateway RPC accepts a single alarm
|
||||
// reference per call. AlarmConditionState's per-condition Acknowledge in the
|
||||
// server-side ACL layer is the natural rate-limit, so issuing in series here
|
||||
// keeps the operator-comment ordering deterministic without bursting the
|
||||
// worker's STA queue.
|
||||
foreach (var ack in acknowledgements)
|
||||
{
|
||||
// ConditionId carries the alarm full reference for the Galaxy driver —
|
||||
// SourceNodeId is the OPC UA browse path, which the gateway can't address.
|
||||
// The server-side condition state pairs them through AlarmConditionService.
|
||||
var alarmFullReference = !string.IsNullOrEmpty(ack.ConditionId)
|
||||
? ack.ConditionId
|
||||
: ack.SourceNodeId;
|
||||
await _alarmAcknowledger.AcknowledgeAsync(
|
||||
alarmFullReference,
|
||||
ack.Comment ?? string.Empty,
|
||||
operatorUser: string.Empty, // server-side ACL fills this from the OPC UA session
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Receives <see cref="GalaxyAlarmTransition"/> events from the EventPump and
|
||||
/// reshapes them into <see cref="AlarmEventArgs"/> for OPC UA-side consumers.
|
||||
/// Fires <see cref="OnAlarmEvent"/> only when at least one alarm subscription is
|
||||
/// active so a server that hasn't called <see cref="SubscribeAlarmsAsync"/> yet
|
||||
/// doesn't surface untracked transitions.
|
||||
/// </summary>
|
||||
private void OnPumpAlarmTransition(object? sender, GalaxyAlarmTransition transition)
|
||||
{
|
||||
GalaxyAlarmSubscriptionHandle? handle;
|
||||
lock (_alarmHandlersLock)
|
||||
{
|
||||
// Pick any active subscription handle as the "owner" of the event. The
|
||||
// server-side state machine doesn't multiplex by handle today; if multiple
|
||||
// alarm subscriptions are active we still only fire the event once and
|
||||
// the AlarmConditionService dispatches per-source-node downstream.
|
||||
handle = _alarmSubscriptions.Count > 0
|
||||
? _alarmSubscriptions.First()
|
||||
: null;
|
||||
}
|
||||
if (handle is null) return;
|
||||
|
||||
var args = new AlarmEventArgs(
|
||||
SubscriptionHandle: handle,
|
||||
SourceNodeId: transition.SourceObjectReference,
|
||||
ConditionId: transition.AlarmFullReference,
|
||||
AlarmType: transition.AlarmTypeName,
|
||||
Message: transition.Description,
|
||||
Severity: transition.SeverityBucket,
|
||||
SourceTimestampUtc: transition.TransitionTimestampUtc,
|
||||
OperatorComment: string.IsNullOrEmpty(transition.OperatorComment) ? null : transition.OperatorComment,
|
||||
OriginalRaiseTimestampUtc: transition.OriginalRaiseTimestampUtc,
|
||||
AlarmCategory: string.IsNullOrEmpty(transition.Category) ? null : transition.Category);
|
||||
try
|
||||
{
|
||||
OnAlarmEvent?.Invoke(this, args);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"GalaxyDriver OnAlarmEvent handler threw for {AlarmRef} — continuing.",
|
||||
transition.AlarmFullReference);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Forwards every fan-out event to the public <see cref="OnDataChange"/> for
|
||||
/// ISubscribable consumers, AND routes ScanState changes to the per-platform
|
||||
/// probe watcher (PR 4.7) so platform health entries update without the watcher
|
||||
/// consuming the event stream itself.
|
||||
/// </summary>
|
||||
private void OnPumpDataChange(object? sender, DataChangeEventArgs args)
|
||||
{
|
||||
OnDataChange?.Invoke(this, args);
|
||||
|
||||
if (_probeWatcher is not null
|
||||
&& args.FullReference.EndsWith(PerPlatformProbeWatcher.ProbeSuffix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// The probe decoder takes a raw quality byte; recover it from the StatusCode
|
||||
// top byte (Good=0x00 → byte 192, Uncertain=0x40 → byte 64, Bad=0x80 → byte 0).
|
||||
var qualityByte = (byte)((args.Snapshot.StatusCode >> 30) & 0x3) switch
|
||||
{
|
||||
0 => 192,
|
||||
1 => 64,
|
||||
_ => 0,
|
||||
};
|
||||
_probeWatcher.OnProbeValueChanged(args.FullReference, args.Snapshot.Value, (byte)qualityByte);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Lazily builds the default <see cref="IGalaxyHierarchySource"/> from
|
||||
/// <c>_options.Gateway</c>. Owned <see cref="GalaxyRepositoryClient"/> is disposed in
|
||||
/// <see cref="Dispose"/>. Tests bypass this by injecting their own source via the
|
||||
/// internal ctor.
|
||||
/// </summary>
|
||||
private IGalaxyHierarchySource BuildDefaultHierarchySource()
|
||||
{
|
||||
var gw = _options.Gateway;
|
||||
var clientOptions = new MxGatewayClientOptions
|
||||
{
|
||||
Endpoint = new Uri(gw.Endpoint, UriKind.Absolute),
|
||||
ApiKey = ResolveApiKey(gw.ApiKeySecretRef),
|
||||
UseTls = gw.UseTls,
|
||||
CaCertificatePath = gw.CaCertificatePath,
|
||||
ConnectTimeout = TimeSpan.FromSeconds(gw.ConnectTimeoutSeconds),
|
||||
DefaultCallTimeout = TimeSpan.FromSeconds(gw.DefaultCallTimeoutSeconds),
|
||||
StreamTimeout = gw.StreamTimeoutSeconds > 0
|
||||
? TimeSpan.FromSeconds(gw.StreamTimeoutSeconds)
|
||||
: null,
|
||||
};
|
||||
_ownedRepositoryClient = GalaxyRepositoryClient.Create(clientOptions);
|
||||
return new TracedGalaxyHierarchySource(
|
||||
new GatewayGalaxyHierarchySource(_ownedRepositoryClient), _options.MxAccess.ClientName);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
// Order: stop deploy watcher, supervisor, probe watcher, pump, then sessions and
|
||||
// clients. Each step is best-effort — disposal during a faulted state shouldn't
|
||||
// throw and prevent the rest of the cleanup.
|
||||
try { _deployWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "DeployWatcher dispose failed"); }
|
||||
try { _supervisor?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ReconnectSupervisor dispose failed"); }
|
||||
try { _probeWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ProbeWatcher dispose failed"); }
|
||||
try { _transportForwarder?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "Transport forwarder dispose failed"); }
|
||||
|
||||
EventPump? pump;
|
||||
lock (_pumpLock) { pump = _eventPump; _eventPump = null; }
|
||||
pump?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
|
||||
_ownedMxSession?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
_ownedMxSession = null;
|
||||
|
||||
_ownedMxClient?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
_ownedMxClient = null;
|
||||
|
||||
_ownedRepositoryClient?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
_ownedRepositoryClient = null;
|
||||
_hierarchySource = null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Address-space builder wrapper that records each variable's
|
||||
/// <see cref="DriverAttributeInfo.SecurityClass"/> into the supplied dictionary
|
||||
/// before delegating to the inner builder. Used by <see cref="DiscoverAsync"/>
|
||||
/// to capture per-tag classifications for the IWritable routing decision —
|
||||
/// PR 4.3 needs the data, but the discoverer itself doesn't (and shouldn't)
|
||||
/// know about the driver's internal state.
|
||||
/// </summary>
|
||||
private sealed class SecurityCapturingBuilder(
|
||||
IAddressSpaceBuilder inner,
|
||||
System.Collections.Concurrent.ConcurrentDictionary<string, SecurityClassification> map)
|
||||
: IAddressSpaceBuilder
|
||||
{
|
||||
public IAddressSpaceBuilder Folder(string browseName, string displayName)
|
||||
=> new SecurityCapturingBuilder(inner.Folder(browseName, displayName), map);
|
||||
|
||||
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
|
||||
{
|
||||
map[attributeInfo.FullName] = attributeInfo.SecurityClass;
|
||||
return inner.Variable(browseName, displayName, attributeInfo);
|
||||
}
|
||||
|
||||
public void AddProperty(string browseName, DriverDataType dataType, object? value)
|
||||
=> inner.AddProperty(browseName, dataType, value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy;
|
||||
|
||||
/// <summary>
|
||||
/// Static factory registration helper for <see cref="GalaxyDriver"/>. Mirrors
|
||||
/// <c>GalaxyProxyDriverFactoryExtensions</c> / <c>ModbusDriverFactoryExtensions</c>.
|
||||
/// Server's <c>Program.cs</c> calls <see cref="Register"/> once at startup; the driver
|
||||
/// bootstrap pipeline materialises DriverInstance rows whose <c>DriverType</c> matches
|
||||
/// <see cref="DriverTypeName"/> into live <see cref="GalaxyDriver"/> instances.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The driver-type name <c>"GalaxyMxGateway"</c> is intentionally distinct from the
|
||||
/// legacy proxy's <c>"Galaxy"</c> so both factories can be registered simultaneously
|
||||
/// during parity testing (Phase 5). PR 4.W will add a server-side <c>Galaxy:Backend</c>
|
||||
/// switch that materialises a Galaxy DriverInstance under one or the other type name.
|
||||
/// </remarks>
|
||||
public static class GalaxyDriverFactoryExtensions
|
||||
{
|
||||
public const string DriverTypeName = "GalaxyMxGateway";
|
||||
|
||||
public static void Register(DriverFactoryRegistry registry, ILoggerFactory? loggerFactory = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(registry);
|
||||
registry.Register(DriverTypeName, (id, json) => CreateInstance(id, json, loggerFactory));
|
||||
}
|
||||
|
||||
/// <summary>Convenience for tests + standalone callers.</summary>
|
||||
public static GalaxyDriver CreateInstance(string driverInstanceId, string driverConfigJson)
|
||||
=> CreateInstance(driverInstanceId, driverConfigJson, loggerFactory: null);
|
||||
|
||||
public static GalaxyDriver CreateInstance(
|
||||
string driverInstanceId, string driverConfigJson, ILoggerFactory? loggerFactory)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverInstanceId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverConfigJson);
|
||||
|
||||
var dto = JsonSerializer.Deserialize<GalaxyDriverConfigDto>(driverConfigJson, JsonOptions)
|
||||
?? throw new InvalidOperationException(
|
||||
$"Galaxy driver config for '{driverInstanceId}' deserialised to null");
|
||||
|
||||
var options = new GalaxyDriverOptions(
|
||||
Gateway: new GalaxyGatewayOptions(
|
||||
Endpoint: dto.Gateway?.Endpoint
|
||||
?? throw new InvalidOperationException(
|
||||
$"Galaxy driver '{driverInstanceId}' missing required Gateway.Endpoint"),
|
||||
ApiKeySecretRef: dto.Gateway.ApiKeySecretRef
|
||||
?? throw new InvalidOperationException(
|
||||
$"Galaxy driver '{driverInstanceId}' missing required Gateway.ApiKeySecretRef"),
|
||||
UseTls: dto.Gateway.UseTls ?? true,
|
||||
CaCertificatePath: dto.Gateway.CaCertificatePath,
|
||||
ConnectTimeoutSeconds: dto.Gateway.ConnectTimeoutSeconds ?? 10,
|
||||
DefaultCallTimeoutSeconds: dto.Gateway.DefaultCallTimeoutSeconds ?? 30,
|
||||
StreamTimeoutSeconds: dto.Gateway.StreamTimeoutSeconds ?? 0),
|
||||
MxAccess: new GalaxyMxAccessOptions(
|
||||
ClientName: dto.MxAccess?.ClientName
|
||||
?? throw new InvalidOperationException(
|
||||
$"Galaxy driver '{driverInstanceId}' missing required MxAccess.ClientName"),
|
||||
PublishingIntervalMs: dto.MxAccess.PublishingIntervalMs ?? 1000,
|
||||
WriteUserId: dto.MxAccess.WriteUserId ?? 0,
|
||||
EventPumpChannelCapacity: dto.MxAccess.EventPumpChannelCapacity ?? 50_000),
|
||||
Repository: new GalaxyRepositoryOptions(
|
||||
DiscoverPageSize: dto.Repository?.DiscoverPageSize ?? 5000,
|
||||
WatchDeployEvents: dto.Repository?.WatchDeployEvents ?? true),
|
||||
Reconnect: new GalaxyReconnectOptions(
|
||||
InitialBackoffMs: dto.Reconnect?.InitialBackoffMs ?? 500,
|
||||
MaxBackoffMs: dto.Reconnect?.MaxBackoffMs ?? 30_000,
|
||||
ReplayOnSessionLost: dto.Reconnect?.ReplayOnSessionLost ?? true));
|
||||
|
||||
return new GalaxyDriver(driverInstanceId, options, loggerFactory?.CreateLogger<GalaxyDriver>());
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
ReadCommentHandling = JsonCommentHandling.Skip,
|
||||
AllowTrailingCommas = true,
|
||||
};
|
||||
|
||||
internal sealed class GalaxyDriverConfigDto
|
||||
{
|
||||
public GatewayDto? Gateway { get; init; }
|
||||
public MxAccessDto? MxAccess { get; init; }
|
||||
public RepositoryDto? Repository { get; init; }
|
||||
public ReconnectDto? Reconnect { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class GatewayDto
|
||||
{
|
||||
public string? Endpoint { get; init; }
|
||||
public string? ApiKeySecretRef { get; init; }
|
||||
public bool? UseTls { get; init; }
|
||||
public string? CaCertificatePath { get; init; }
|
||||
public int? ConnectTimeoutSeconds { get; init; }
|
||||
public int? DefaultCallTimeoutSeconds { get; init; }
|
||||
public int? StreamTimeoutSeconds { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class MxAccessDto
|
||||
{
|
||||
public string? ClientName { get; init; }
|
||||
public int? PublishingIntervalMs { get; init; }
|
||||
public int? WriteUserId { get; init; }
|
||||
public int? EventPumpChannelCapacity { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class RepositoryDto
|
||||
{
|
||||
public int? DiscoverPageSize { get; init; }
|
||||
public bool? WatchDeployEvents { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class ReconnectDto
|
||||
{
|
||||
public int? InitialBackoffMs { get; init; }
|
||||
public int? MaxBackoffMs { get; init; }
|
||||
public bool? ReplayOnSessionLost { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Pushes the synthetic top-level transport-health entry into the
|
||||
/// <see cref="HostStatusAggregator"/>. Each driver instance has one entry under its
|
||||
/// <c>MxAccess.ClientName</c> reflecting the gateway transport state — useful for
|
||||
/// dashboards that want a single "Galaxy is up" signal independent of any individual
|
||||
/// platform's ScanState.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The eventual production source for this signal is the gateway's <c>StreamSessionHealth</c>
|
||||
/// RPC (mxaccessgw issue gw-6). Until that ships, the driver-side reconnect supervisor
|
||||
/// (PR 4.5) calls <see cref="SetTransport"/> on transport state transitions:
|
||||
/// <see cref="HostState.Running"/> when the gw session re-Registers, <see cref="HostState.Stopped"/>
|
||||
/// when the supervisor moves to <c>TransportLost</c>. The forwarder is intentionally
|
||||
/// stateless beyond the cached client name + last-pushed value so the supervisor can
|
||||
/// drive it without any back-pressure plumbing.
|
||||
/// </remarks>
|
||||
public sealed class HostConnectivityForwarder : IDisposable
|
||||
{
|
||||
private readonly string _clientName;
|
||||
private readonly HostStatusAggregator _aggregator;
|
||||
private readonly ILogger _logger;
|
||||
private bool _disposed;
|
||||
|
||||
public HostConnectivityForwarder(string clientName, HostStatusAggregator aggregator, ILogger? logger = null)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clientName);
|
||||
_clientName = clientName;
|
||||
_aggregator = aggregator ?? throw new ArgumentNullException(nameof(aggregator));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Push a transport state into the aggregator. Idempotent at the aggregator layer —
|
||||
/// repeated calls with the same state don't fan out duplicate transitions.
|
||||
/// </summary>
|
||||
public void SetTransport(HostState state)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
var status = new HostConnectivityStatus(_clientName, state, DateTime.UtcNow);
|
||||
_aggregator.Update(status);
|
||||
_logger.LogDebug(
|
||||
"GalaxyDriver transport state for {ClientName}: {State}",
|
||||
_clientName, state);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// No-op today; reserved for the eventual gw-6 StreamSessionHealth consumer that
|
||||
// will own a long-running task this method tears down.
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Pure-logic merger for the per-host connectivity entries that
|
||||
/// <see cref="IHostConnectivityProbe"/> surfaces. Holds the current set of host
|
||||
/// statuses (one synthetic top-level transport entry plus one entry per
|
||||
/// <c>$WinPlatform</c>/<c>$AppEngine</c> probe) and emits
|
||||
/// <see cref="OnHostStatusChanged"/> only when an upsert actually changes a host's
|
||||
/// <see cref="HostState"/> — re-asserting the same state is a no-op so a stable
|
||||
/// <c>ScanState=Running</c> burst doesn't fan out duplicate transitions.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This class owns the de-dup + diff logic that lived in
|
||||
/// <c>GalaxyProxyDriver.OnHostConnectivityUpdate</c> in v1. The watcher
|
||||
/// (<see cref="PerPlatformProbeWatcher"/>) and the transport forwarder
|
||||
/// (<see cref="HostConnectivityForwarder"/>) both feed this aggregator; the
|
||||
/// <see cref="GalaxyDriver"/> consumes <see cref="Snapshot"/> from
|
||||
/// <c>IHostConnectivityProbe.GetHostStatuses()</c> and re-raises
|
||||
/// <see cref="OnHostStatusChanged"/> as the driver-level event (wired in PR 4.W).
|
||||
/// </remarks>
|
||||
public sealed class HostStatusAggregator
|
||||
{
|
||||
private readonly object _lock = new();
|
||||
private readonly Dictionary<string, HostConnectivityStatus> _byHost =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Fires when an <see cref="Update"/> call either introduces a new host or
|
||||
/// transitions an existing host's <see cref="HostState"/>. Handlers run
|
||||
/// outside the internal lock so they can safely re-enter the aggregator
|
||||
/// (e.g. the driver re-broadcasting through <c>IHostConnectivityProbe</c>).
|
||||
/// </summary>
|
||||
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot the current host set. Suitable as the body of
|
||||
/// <c>IHostConnectivityProbe.GetHostStatuses()</c>.
|
||||
/// </summary>
|
||||
public IReadOnlyList<HostConnectivityStatus> Snapshot()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return [.. _byHost.Values];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Upsert the supplied status by <see cref="HostConnectivityStatus.HostName"/>.
|
||||
/// Raises <see cref="OnHostStatusChanged"/> when the host is newly tracked
|
||||
/// (previous state reported as <see cref="HostState.Unknown"/>) or when its
|
||||
/// state value differs from the last cached entry. Re-asserting the same
|
||||
/// state is silent.
|
||||
/// </summary>
|
||||
public void Update(HostConnectivityStatus status)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(status);
|
||||
|
||||
HostState previous;
|
||||
bool changed;
|
||||
lock (_lock)
|
||||
{
|
||||
if (_byHost.TryGetValue(status.HostName, out var existing))
|
||||
{
|
||||
previous = existing.State;
|
||||
changed = existing.State != status.State;
|
||||
}
|
||||
else
|
||||
{
|
||||
previous = HostState.Unknown;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
_byHost[status.HostName] = status;
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
OnHostStatusChanged?.Invoke(this,
|
||||
new HostStatusChangedEventArgs(status.HostName, previous, status.State));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Drop a host entirely (e.g. after a redeploy removes a Platform). No event
|
||||
/// is fired — observers only react to live transitions, not topology
|
||||
/// reductions. Returns <c>true</c> when the host was tracked.
|
||||
/// </summary>
|
||||
public bool Remove(string hostName)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(hostName);
|
||||
lock (_lock)
|
||||
{
|
||||
return _byHost.Remove(hostName);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Subscribes the <c>ScanState</c> attribute of every <c>$WinPlatform</c> /
|
||||
/// <c>$AppEngine</c> object the discoverer surfaced and translates ScanState
|
||||
/// value-changes into per-host <see cref="HostConnectivityStatus"/> updates.
|
||||
/// Ports the state machine in
|
||||
/// <c>Driver.Galaxy.Host/Backend/Stability/GalaxyRuntimeProbeManager.cs</c> onto the
|
||||
/// gateway subscription path.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Address grammar: each platform tag's probe address is
|
||||
/// <c>{platformTagName}.ScanState</c>. The watcher subscribes that address through
|
||||
/// <see cref="IGalaxySubscriber"/>; the EventPump (PR 4.4) routes inbound
|
||||
/// OnDataChange events back via <see cref="OnProbeValueChanged"/>. State decoding:
|
||||
/// <list type="bullet">
|
||||
/// <item>Quality < <c>192</c> (Good) → <see cref="HostState.Unknown"/>.</item>
|
||||
/// <item>Value <c>1</c>, <c>true</c>, or "Running" → <see cref="HostState.Running"/>.</item>
|
||||
/// <item>Value <c>0</c>, <c>false</c>, or "Stopped" → <see cref="HostState.Stopped"/>.</item>
|
||||
/// <item>Anything else with Good quality → <see cref="HostState.Faulted"/>.</item>
|
||||
/// </list>
|
||||
/// <see cref="SyncPlatformsAsync"/> is idempotent — call it after every
|
||||
/// Discover / Rediscover. Newly-added platforms are subscribed; removed ones are
|
||||
/// unsubscribed and dropped from the aggregator.
|
||||
/// </remarks>
|
||||
public sealed class PerPlatformProbeWatcher : IDisposable
|
||||
{
|
||||
public const string ProbeSuffix = ".ScanState";
|
||||
|
||||
private readonly IGalaxySubscriber _subscriber;
|
||||
private readonly HostStatusAggregator _aggregator;
|
||||
private readonly ILogger _logger;
|
||||
private readonly int _bufferedUpdateIntervalMs;
|
||||
|
||||
// Tracked platform → gw item handle. Item handle 0 means the gw rejected the subscribe;
|
||||
// we keep the entry so SyncPlatformsAsync doesn't try to subscribe it again on every call.
|
||||
private readonly ConcurrentDictionary<string, int> _itemHandlesByPlatform =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Lock _syncLock = new();
|
||||
private bool _disposed;
|
||||
|
||||
public PerPlatformProbeWatcher(
|
||||
IGalaxySubscriber subscriber,
|
||||
HostStatusAggregator aggregator,
|
||||
ILogger? logger = null,
|
||||
int bufferedUpdateIntervalMs = 0)
|
||||
{
|
||||
_subscriber = subscriber ?? throw new ArgumentNullException(nameof(subscriber));
|
||||
_aggregator = aggregator ?? throw new ArgumentNullException(nameof(aggregator));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
if (bufferedUpdateIntervalMs < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(bufferedUpdateIntervalMs),
|
||||
"bufferedUpdateIntervalMs must be >= 0; 0 means use the gw's default cadence.");
|
||||
}
|
||||
_bufferedUpdateIntervalMs = bufferedUpdateIntervalMs;
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of platform tag names currently watched.</summary>
|
||||
public IReadOnlyCollection<string> WatchedPlatforms => [.. _itemHandlesByPlatform.Keys];
|
||||
|
||||
/// <summary>
|
||||
/// Reconcile the watched platform set against <paramref name="platformTagNames"/>.
|
||||
/// Subscribes new entries, unsubscribes dropped ones. Calling with the same set is
|
||||
/// a no-op.
|
||||
/// </summary>
|
||||
public async Task SyncPlatformsAsync(
|
||||
IEnumerable<string> platformTagNames, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(platformTagNames);
|
||||
|
||||
var desired = new HashSet<string>(platformTagNames, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Compute deltas under the lock so concurrent SyncPlatformsAsync calls don't
|
||||
// race on the membership view.
|
||||
List<string> toAdd;
|
||||
List<(string Platform, int ItemHandle)> toRemove;
|
||||
lock (_syncLock)
|
||||
{
|
||||
toAdd = [.. desired.Where(p => !_itemHandlesByPlatform.ContainsKey(p))];
|
||||
toRemove = [.. _itemHandlesByPlatform
|
||||
.Where(kvp => !desired.Contains(kvp.Key) && kvp.Value > 0)
|
||||
.Select(kvp => (kvp.Key, kvp.Value))];
|
||||
|
||||
// Drop removed entries from the membership map up-front so a concurrent
|
||||
// OnProbeValueChanged for them is silently ignored. The unsubscribe RPC
|
||||
// runs outside the lock.
|
||||
foreach (var (platform, _) in toRemove)
|
||||
{
|
||||
_itemHandlesByPlatform.TryRemove(platform, out _);
|
||||
_aggregator.Remove(platform);
|
||||
}
|
||||
}
|
||||
|
||||
if (toRemove.Count > 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _subscriber.UnsubscribeBulkAsync(
|
||||
[.. toRemove.Select(t => t.ItemHandle)], cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"PerPlatformProbeWatcher unsubscribe failed for {Count} probe(s); aggregator entries already cleared.",
|
||||
toRemove.Count);
|
||||
}
|
||||
}
|
||||
|
||||
if (toAdd.Count == 0) return;
|
||||
|
||||
var probeAddresses = toAdd.Select(p => p + ProbeSuffix).ToArray();
|
||||
// PR 6.3 — use the configured bufferedUpdateIntervalMs (defaults to 0 = gw cadence
|
||||
// when the driver hasn't overridden MxAccess.PublishingIntervalMs). Probe ScanState
|
||||
// changes are rare so a coarser interval is usually fine; deployments that need
|
||||
// tighter health visibility can dial it down through GalaxyDriverOptions.
|
||||
var results = await _subscriber.SubscribeBulkAsync(
|
||||
probeAddresses, _bufferedUpdateIntervalMs, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
for (var i = 0; i < toAdd.Count; i++)
|
||||
{
|
||||
var platform = toAdd[i];
|
||||
var match = results.FirstOrDefault(r => string.Equals(
|
||||
r.TagAddress, probeAddresses[i], StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var itemHandle = match is { WasSuccessful: true } ? match.ItemHandle : 0;
|
||||
_itemHandlesByPlatform[platform] = itemHandle;
|
||||
|
||||
if (itemHandle <= 0)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"PerPlatformProbeWatcher subscribe failed for {Platform}: {Error}",
|
||||
platform, match?.ErrorMessage ?? "<no result returned>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Route an OnDataChange for a probe address into the aggregator. The EventPump
|
||||
/// (PR 4.4) calls this; tests can drive it directly to exercise the state machine
|
||||
/// without spinning a real gw. Foreign references (anything not ending in
|
||||
/// <see cref="ProbeSuffix"/>, or a probe for a platform we're not tracking) are
|
||||
/// silently dropped.
|
||||
/// </summary>
|
||||
public void OnProbeValueChanged(string fullReference, object? value, byte qualityByte)
|
||||
{
|
||||
if (_disposed) return;
|
||||
ArgumentNullException.ThrowIfNull(fullReference);
|
||||
|
||||
if (!fullReference.EndsWith(ProbeSuffix, StringComparison.OrdinalIgnoreCase)) return;
|
||||
var platform = fullReference[..^ProbeSuffix.Length];
|
||||
if (!_itemHandlesByPlatform.ContainsKey(platform)) return;
|
||||
|
||||
var state = DecodeState(value, qualityByte);
|
||||
_aggregator.Update(new HostConnectivityStatus(platform, state, DateTime.UtcNow));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decode a ScanState value + raw quality byte to a <see cref="HostState"/>.
|
||||
/// Public for tests that want to pin the decoding table.
|
||||
/// </summary>
|
||||
public static HostState DecodeState(object? value, byte qualityByte)
|
||||
{
|
||||
if (qualityByte < 192) return HostState.Unknown;
|
||||
|
||||
return value switch
|
||||
{
|
||||
bool b => b ? HostState.Running : HostState.Stopped,
|
||||
int i => i == 1 ? HostState.Running : i == 0 ? HostState.Stopped : HostState.Faulted,
|
||||
short s => s == 1 ? HostState.Running : s == 0 ? HostState.Stopped : HostState.Faulted,
|
||||
long l => l == 1 ? HostState.Running : l == 0 ? HostState.Stopped : HostState.Faulted,
|
||||
string str when string.Equals(str, "Running", StringComparison.OrdinalIgnoreCase) => HostState.Running,
|
||||
string str when string.Equals(str, "Stopped", StringComparison.OrdinalIgnoreCase) => HostState.Stopped,
|
||||
_ => HostState.Faulted,
|
||||
};
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
// Best-effort unsubscribe everything we know about. Run synchronously through
|
||||
// GetAwaiter().GetResult() since Dispose is sync; transport errors are swallowed.
|
||||
var liveHandles = _itemHandlesByPlatform.Values.Where(h => h > 0).ToArray();
|
||||
_itemHandlesByPlatform.Clear();
|
||||
if (liveHandles.Length > 0)
|
||||
{
|
||||
try { _subscriber.UnsubscribeBulkAsync(liveHandles, CancellationToken.None).GetAwaiter().GetResult(); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "PerPlatformProbeWatcher dispose unsubscribe failed"); }
|
||||
}
|
||||
}
|
||||
}
|
||||
313
src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/EventPump.cs
Normal file
313
src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/EventPump.cs
Normal file
@@ -0,0 +1,313 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Threading.Channels;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Long-running consumer of <see cref="IGalaxySubscriber.StreamEventsAsync"/>. Translates
|
||||
/// each <see cref="MxEvent"/> with family <see cref="MxEventFamily.OnDataChange"/> into
|
||||
/// <see cref="DataChangeEventArgs"/> and dispatches one event per registered driver
|
||||
/// subscription that includes the changed item handle (fan-out via
|
||||
/// <see cref="SubscriptionRegistry.ResolveSubscribers"/>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// One pump per connected <see cref="GalaxyMxSession"/>. Reconnect lives in PR 4.5's
|
||||
/// supervisor; on transport failure here we log + propagate so the supervisor can
|
||||
/// decide whether to restart.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// PR 6.2 — the network-read loop and the listener-fanout loop are decoupled by a
|
||||
/// bounded <see cref="Channel{T}"/>. When a listener is slow enough to fill the
|
||||
/// channel, new events are dropped (newest-dropped semantics: producer's
|
||||
/// <c>TryWrite</c> fails) rather than back-pressuring the gw stream. Three counters
|
||||
/// on the <c>ZB.MOM.WW.OtOpcUa.Driver.Galaxy</c> meter expose received / dispatched
|
||||
/// / dropped totals so ops sees pressure before it manifests as user-visible loss.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal sealed class EventPump : IAsyncDisposable
|
||||
{
|
||||
public const string MeterName = "ZB.MOM.WW.OtOpcUa.Driver.Galaxy";
|
||||
private const int DefaultChannelCapacity = 50_000;
|
||||
|
||||
// Single static meter so a host-level MeterListener catches all pump instances.
|
||||
private static readonly Meter Meter = new(MeterName);
|
||||
private static readonly Counter<long> EventsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.events.received", unit: "{event}",
|
||||
description: "MxEvents read from the gateway StreamEvents stream.");
|
||||
private static readonly Counter<long> EventsDispatched =
|
||||
Meter.CreateCounter<long>("galaxy.events.dispatched", unit: "{event}",
|
||||
description: "MxEvents passed through the bounded channel and into OnDataChange.");
|
||||
private static readonly Counter<long> EventsDropped =
|
||||
Meter.CreateCounter<long>("galaxy.events.dropped", unit: "{event}",
|
||||
description: "MxEvents dropped because the bounded channel was full (newest-dropped).");
|
||||
private static readonly Counter<long> AlarmTransitionsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.received", unit: "{event}",
|
||||
description: "OnAlarmTransition events decoded and forwarded to driver-level handlers.");
|
||||
private static readonly Counter<long> AlarmTransitionsDecodingFailures =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.decoding_failures", unit: "{event}",
|
||||
description: "OnAlarmTransition events that arrived without a populated body or with an unspecified transition kind.");
|
||||
|
||||
private readonly IGalaxySubscriber _subscriber;
|
||||
private readonly SubscriptionRegistry _registry;
|
||||
private readonly ILogger _logger;
|
||||
private readonly Func<long, ISubscriptionHandle> _handleFactory;
|
||||
private readonly Channel<MxEvent> _channel;
|
||||
private readonly KeyValuePair<string, object?> _clientTag;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
|
||||
private Task? _loop;
|
||||
private Task? _dispatchLoop;
|
||||
private bool _disposed;
|
||||
|
||||
public event EventHandler<DataChangeEventArgs>? OnDataChange;
|
||||
|
||||
/// <summary>
|
||||
/// Fires for every <see cref="MxEventFamily.OnAlarmTransition"/> event the
|
||||
/// gateway forwards. Decoded into a <see cref="GalaxyAlarmTransition"/> with
|
||||
/// the OPC UA severity bucket already mapped via
|
||||
/// <see cref="MxAccessSeverityMapper"/>. The driver wraps this onto
|
||||
/// <c>IAlarmSource.OnAlarmEvent</c> in PR B.2.
|
||||
/// </summary>
|
||||
internal event EventHandler<GalaxyAlarmTransition>? OnAlarmTransition;
|
||||
|
||||
public EventPump(
|
||||
IGalaxySubscriber subscriber,
|
||||
SubscriptionRegistry registry,
|
||||
ILogger? logger = null,
|
||||
Func<long, ISubscriptionHandle>? handleFactory = null,
|
||||
int channelCapacity = DefaultChannelCapacity,
|
||||
string? clientName = null)
|
||||
{
|
||||
_subscriber = subscriber ?? throw new ArgumentNullException(nameof(subscriber));
|
||||
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_handleFactory = handleFactory ?? (id => new GalaxySubscriptionHandle(id));
|
||||
|
||||
if (channelCapacity < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(channelCapacity),
|
||||
"channelCapacity must be >= 1; recommended 50_000 for 50k-tag deployments.");
|
||||
}
|
||||
_channel = Channel.CreateBounded<MxEvent>(new BoundedChannelOptions(channelCapacity)
|
||||
{
|
||||
// Newest-dropped policy: when full, the producer's TryWrite returns false
|
||||
// and we account for the drop. We do this manually rather than relying on
|
||||
// BoundedChannelFullMode.DropWrite so we can count drops without polling.
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
SingleReader = true,
|
||||
SingleWriter = true,
|
||||
});
|
||||
_clientTag = new KeyValuePair<string, object?>("galaxy.client", clientName ?? "<unknown>");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Start consuming the event stream on a background task. Idempotent — second
|
||||
/// calls are no-ops while the loop is running.
|
||||
/// </summary>
|
||||
public void Start()
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
if (_loop is not null) return;
|
||||
_loop = Task.Run(() => RunAsync(_cts.Token));
|
||||
_dispatchLoop = Task.Run(() => DispatchLoopAsync(_cts.Token));
|
||||
}
|
||||
|
||||
private async Task RunAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var ev in _subscriber.StreamEventsAsync(ct).WithCancellation(ct).ConfigureAwait(false))
|
||||
{
|
||||
if (ct.IsCancellationRequested) break;
|
||||
EventsReceived.Add(1, _clientTag);
|
||||
|
||||
// Newest-dropped: TryWrite fast-paths the common case (channel has room).
|
||||
// When full we count the drop and continue reading the gw stream so
|
||||
// back-pressure doesn't propagate upstream.
|
||||
if (!_channel.Writer.TryWrite(ev))
|
||||
{
|
||||
EventsDropped.Add(1, _clientTag);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Clean shutdown — no log.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy EventPump loop ended with an exception — reconnect supervisor (PR 4.5) handles restart.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Tell the dispatch loop the producer is done so it drains and exits.
|
||||
_channel.Writer.TryComplete();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DispatchLoopAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var ev in _channel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
Dispatch(ev);
|
||||
EventsDispatched.Add(1, _clientTag);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Clean shutdown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy EventPump dispatch loop ended with an exception — events past this point will be lost until restart.");
|
||||
}
|
||||
}
|
||||
|
||||
private void Dispatch(MxEvent ev)
|
||||
{
|
||||
switch (ev.Family)
|
||||
{
|
||||
case MxEventFamily.OnDataChange:
|
||||
DispatchDataChange(ev);
|
||||
break;
|
||||
case MxEventFamily.OnAlarmTransition:
|
||||
DispatchAlarmTransition(ev);
|
||||
break;
|
||||
default:
|
||||
// OnWriteComplete / OperationComplete / OnBufferedDataChange are filtered
|
||||
// out — write callers get their reply via the InvokeAsync round-trip, not
|
||||
// via the event stream.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private void DispatchDataChange(MxEvent ev)
|
||||
{
|
||||
var subscribers = _registry.ResolveSubscribers(ev.ItemHandle);
|
||||
if (subscribers.Count == 0) return; // stale event after unsubscribe — drop quietly
|
||||
|
||||
var snapshot = ToSnapshot(ev);
|
||||
foreach (var (subscriptionId, fullReference) in subscribers)
|
||||
{
|
||||
var handle = _handleFactory(subscriptionId);
|
||||
try
|
||||
{
|
||||
OnDataChange?.Invoke(this, new DataChangeEventArgs(handle, fullReference, snapshot));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy OnDataChange handler threw for {FullRef} subscription {SubscriptionId} — continuing fan-out.",
|
||||
fullReference, subscriptionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DispatchAlarmTransition(MxEvent ev)
|
||||
{
|
||||
// Body absent (e.g. malformed gateway event or worker version skew) — count and
|
||||
// drop. The Part 9 sub-attribute fallback path keeps an alarm functional even
|
||||
// when the rich payload disappears.
|
||||
if (ev.OnAlarmTransition is not { } body)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition event arrived without a populated body (sequence={Sequence}); ignoring.",
|
||||
ev.WorkerSequence);
|
||||
return;
|
||||
}
|
||||
if (body.TransitionKind == AlarmTransitionKind.Unspecified)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition for {AlarmRef} has unspecified transition kind; ignoring.",
|
||||
body.AlarmFullReference);
|
||||
return;
|
||||
}
|
||||
|
||||
var (bucket, opcUaSeverity) = MxAccessSeverityMapper.Map(body.Severity);
|
||||
var transitionTimestamp = body.TransitionTimestamp is { } tts
|
||||
? tts.ToDateTime()
|
||||
: DateTime.UtcNow;
|
||||
DateTime? originalRaiseTimestamp = body.OriginalRaiseTimestamp is { } orts
|
||||
? orts.ToDateTime()
|
||||
: null;
|
||||
|
||||
var transition = new GalaxyAlarmTransition(
|
||||
AlarmFullReference: body.AlarmFullReference,
|
||||
SourceObjectReference: body.SourceObjectReference,
|
||||
AlarmTypeName: body.AlarmTypeName,
|
||||
TransitionKind: MapTransitionKind(body.TransitionKind),
|
||||
SeverityBucket: bucket,
|
||||
OpcUaSeverity: opcUaSeverity,
|
||||
RawMxAccessSeverity: body.Severity,
|
||||
OriginalRaiseTimestampUtc: originalRaiseTimestamp,
|
||||
TransitionTimestampUtc: transitionTimestamp,
|
||||
OperatorUser: body.OperatorUser,
|
||||
OperatorComment: body.OperatorComment,
|
||||
Category: body.Category,
|
||||
Description: body.Description);
|
||||
|
||||
AlarmTransitionsReceived.Add(1, _clientTag);
|
||||
try
|
||||
{
|
||||
OnAlarmTransition?.Invoke(this, transition);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy OnAlarmTransition handler threw for {AlarmRef} — continuing.",
|
||||
transition.AlarmFullReference);
|
||||
}
|
||||
}
|
||||
|
||||
private static GalaxyAlarmTransitionKind MapTransitionKind(AlarmTransitionKind kind) => kind switch
|
||||
{
|
||||
AlarmTransitionKind.Raise => GalaxyAlarmTransitionKind.Raise,
|
||||
AlarmTransitionKind.Acknowledge => GalaxyAlarmTransitionKind.Acknowledge,
|
||||
AlarmTransitionKind.Clear => GalaxyAlarmTransitionKind.Clear,
|
||||
AlarmTransitionKind.Retrigger => GalaxyAlarmTransitionKind.Retrigger,
|
||||
_ => GalaxyAlarmTransitionKind.Unspecified,
|
||||
};
|
||||
|
||||
private DataValueSnapshot ToSnapshot(MxEvent ev)
|
||||
{
|
||||
var value = MxValueDecoder.Decode(ev.Value);
|
||||
var statusCode = ev.Statuses.Count > 0
|
||||
? StatusCodeMap.FromMxStatus(ev.Statuses[0], _logger)
|
||||
: StatusCodeMap.FromQualityByte((byte)(ev.Quality & 0xFF), _logger);
|
||||
|
||||
DateTime? sourceTimestamp = ev.SourceTimestamp is { } ts ? ts.ToDateTime() : null;
|
||||
return new DataValueSnapshot(
|
||||
Value: value,
|
||||
StatusCode: statusCode,
|
||||
SourceTimestampUtc: sourceTimestamp,
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_cts.Cancel();
|
||||
_channel.Writer.TryComplete();
|
||||
if (_loop is not null)
|
||||
{
|
||||
try { await _loop.ConfigureAwait(false); } catch { /* shutdown */ }
|
||||
}
|
||||
if (_dispatchLoop is not null)
|
||||
{
|
||||
try { await _dispatchLoop.ConfigureAwait(false); } catch { /* shutdown */ }
|
||||
}
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side handle returned by <see cref="GalaxyDriver.SubscribeAlarmsAsync"/>.
|
||||
/// The driver doesn't multiplex alarm transitions per handle — every active handle
|
||||
/// observes the gateway's alarm-event stream — but the handle is needed for
|
||||
/// symmetric Unsubscribe and for the server-side AlarmConditionService to
|
||||
/// correlate transitions with the originating subscription.
|
||||
/// </summary>
|
||||
internal sealed class GalaxyAlarmSubscriptionHandle : IAlarmSubscriptionHandle
|
||||
{
|
||||
public GalaxyAlarmSubscriptionHandle(string diagnosticId)
|
||||
{
|
||||
DiagnosticId = diagnosticId;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DiagnosticId { get; }
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Decoded MXAccess alarm transition surfaced by <see cref="EventPump"/>.
|
||||
/// The driver wraps this into <see cref="AlarmEventArgs"/> on the
|
||||
/// <see cref="IAlarmSource.OnAlarmEvent"/> path; the richer fields
|
||||
/// (operator user/comment, original raise time, category) become available
|
||||
/// on the OPC UA Part 9 condition once <c>AlarmEventArgs</c> is extended in
|
||||
/// the client-surface refresh PR (E.7).
|
||||
/// </summary>
|
||||
internal sealed record GalaxyAlarmTransition(
|
||||
string AlarmFullReference,
|
||||
string SourceObjectReference,
|
||||
string AlarmTypeName,
|
||||
GalaxyAlarmTransitionKind TransitionKind,
|
||||
AlarmSeverity SeverityBucket,
|
||||
int OpcUaSeverity,
|
||||
int RawMxAccessSeverity,
|
||||
DateTime? OriginalRaiseTimestampUtc,
|
||||
DateTime TransitionTimestampUtc,
|
||||
string OperatorUser,
|
||||
string OperatorComment,
|
||||
string Category,
|
||||
string Description);
|
||||
|
||||
/// <summary>Kind of alarm state change observed by <see cref="EventPump"/>.</summary>
|
||||
internal enum GalaxyAlarmTransitionKind
|
||||
{
|
||||
Unspecified = 0,
|
||||
Raise = 1,
|
||||
Acknowledge = 2,
|
||||
Clear = 3,
|
||||
Retrigger = 4,
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Client;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side wrapper around the gateway's <see cref="MxGatewaySession"/>. Owns the
|
||||
/// MXAccess <c>Register</c> handle, caches the per-tag item handles AddItem returns,
|
||||
/// and coordinates the read / write / subscribe call paths. PRs 4.2-4.5 fill this in
|
||||
/// incrementally:
|
||||
/// <list type="bullet">
|
||||
/// <item>PR 4.2 (this PR) — skeleton + lifecycle wiring.</item>
|
||||
/// <item>PR 4.3 — write path.</item>
|
||||
/// <item>PR 4.4 — subscription registry + event pump + the production
|
||||
/// <see cref="IGalaxyDataReader"/> implementation that drives the read path.</item>
|
||||
/// <item>PR 4.5 — reconnect supervisor.</item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
public sealed class GalaxyMxSession : IAsyncDisposable
|
||||
{
|
||||
private readonly GalaxyMxAccessOptions _options;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
// Owned gateway client + session — populated when ConnectAsync runs. Tests can leave
|
||||
// them null and exercise the surface via injected IGalaxyDataReader fakes.
|
||||
private MxGatewayClient? _ownedClient;
|
||||
private MxGatewaySession? _session;
|
||||
private int _serverHandle;
|
||||
private bool _disposed;
|
||||
|
||||
public GalaxyMxSession(GalaxyMxAccessOptions options, ILogger? logger = null)
|
||||
{
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
}
|
||||
|
||||
public bool IsConnected => _session is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Server-side handle returned by MXAccess <c>Register</c>. Zero before
|
||||
/// <see cref="ConnectAsync"/> opens the session.
|
||||
/// </summary>
|
||||
public int ServerHandle => _serverHandle;
|
||||
|
||||
/// <summary>
|
||||
/// Connect the underlying gateway client + open an MXAccess session + register the
|
||||
/// configured client name. Idempotent — second calls are no-ops while
|
||||
/// <see cref="IsConnected"/> is true.
|
||||
/// </summary>
|
||||
public async Task ConnectAsync(MxGatewayClientOptions clientOptions, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
if (_session is not null) return;
|
||||
|
||||
_ownedClient = MxGatewayClient.Create(clientOptions);
|
||||
_session = await _ownedClient.OpenSessionAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
_serverHandle = await _session.RegisterAsync(_options.ClientName, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"GalaxyMxSession connected — clientName={ClientName} serverHandle={Handle}",
|
||||
_options.ClientName, _serverHandle);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test seam — attach a session opened externally (e.g. against an in-process gw
|
||||
/// fake). Skips the gateway-client construction so tests can drive the session
|
||||
/// surface without spinning a real gRPC channel. Caller retains client ownership.
|
||||
/// </summary>
|
||||
internal void AttachForTests(MxGatewaySession session, int serverHandle)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_serverHandle = serverHandle;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the underlying gateway session. Null until <see cref="ConnectAsync"/> or
|
||||
/// <see cref="AttachForTests"/> runs. PR 4.3 / 4.4 use this to issue commands.
|
||||
/// </summary>
|
||||
public MxGatewaySession? Session => _session;
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
if (_session is not null)
|
||||
{
|
||||
try { await _session.DisposeAsync().ConfigureAwait(false); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession session dispose failed (best-effort)"); }
|
||||
}
|
||||
_session = null;
|
||||
|
||||
if (_ownedClient is not null)
|
||||
{
|
||||
try { await _ownedClient.DisposeAsync().ConfigureAwait(false); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession client dispose failed (best-effort)"); }
|
||||
}
|
||||
_ownedClient = null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-internal subscription identity. The numeric id is allocated monotonically per
|
||||
/// driver; the diagnostic string carries the same id prefixed for log cross-referencing.
|
||||
/// </summary>
|
||||
internal sealed record GalaxySubscriptionHandle(long SubscriptionId) : ISubscriptionHandle
|
||||
{
|
||||
public string DiagnosticId => $"galaxy-sub-{SubscriptionId}";
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — In-box <see cref="ActivitySource"/> wired around every gw call the
|
||||
/// driver makes (Subscribe/Unsubscribe, Write/WriteSecured, GetHierarchy). The
|
||||
/// decorators in this folder produce one span per call, tagged with the inputs
|
||||
/// ops needs to triage a slow or failing operation:
|
||||
/// <c>galaxy.tag_count</c>, <c>galaxy.success_count</c>, <c>galaxy.client</c>.
|
||||
/// <para>
|
||||
/// The driver itself doesn't take a dependency on the OpenTelemetry packages —
|
||||
/// <c>System.Diagnostics.ActivitySource</c> is in the BCL. The host process
|
||||
/// decides which listener (OTLP exporter, Application Insights, dotnet-trace)
|
||||
/// subscribes to <see cref="ActivitySourceName"/>.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
internal static class GalaxyTelemetry
|
||||
{
|
||||
public const string ActivitySourceName = "ZB.MOM.WW.OtOpcUa.Driver.Galaxy";
|
||||
|
||||
public static readonly ActivitySource ActivitySource = new(ActivitySourceName);
|
||||
|
||||
/// <summary>
|
||||
/// Tag a span with a failure reason and set its status to <c>Error</c>. Helper
|
||||
/// so the decorators don't repeat the four-line idiom on every catch block.
|
||||
/// </summary>
|
||||
public static void RecordError(this Activity? activity, Exception ex)
|
||||
{
|
||||
if (activity is null) return;
|
||||
activity.SetStatus(ActivityStatusCode.Error, ex.Message);
|
||||
activity.SetTag("exception.type", ex.GetType().FullName);
|
||||
activity.SetTag("exception.message", ex.Message);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyAlarmAcknowledger"/> backed by the
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC (PR E.2). Maps the
|
||||
/// reply's protocol status into a thrown exception when the gateway
|
||||
/// reports a non-OK condition; native MxStatus failures inside the reply
|
||||
/// surface as a logged warning so operator workflows aren't blocked by a
|
||||
/// transient MxAccess hiccup.
|
||||
/// </summary>
|
||||
internal sealed class GatewayGalaxyAlarmAcknowledger : IGalaxyAlarmAcknowledger
|
||||
{
|
||||
private readonly MxGatewayClient _client;
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
public GatewayGalaxyAlarmAcknowledger(
|
||||
MxGatewayClient client,
|
||||
GalaxyMxSession session,
|
||||
ILogger logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task AcknowledgeAsync(
|
||||
string alarmFullReference,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(alarmFullReference);
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GatewayGalaxyAlarmAcknowledger requires a connected GalaxyMxSession; underlying gateway session is null.");
|
||||
var sessionId = session.SessionId;
|
||||
|
||||
var reply = await _client.AcknowledgeAlarmAsync(
|
||||
new AcknowledgeAlarmRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
AlarmFullReference = alarmFullReference,
|
||||
Comment = comment ?? string.Empty,
|
||||
OperatorUser = operatorUser ?? string.Empty,
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (reply.Status is { Success: 0 } status)
|
||||
{
|
||||
// Native MxAccess rejected the ack — log but don't throw. Treat as a
|
||||
// best-effort operator workflow; the operator can retry via the OPC UA
|
||||
// session if necessary.
|
||||
_logger.LogWarning(
|
||||
"Galaxy AcknowledgeAlarm for {AlarmRef} returned MxStatus failure: category={Category} detail={Detail} text={Text}",
|
||||
alarmFullReference, status.Category, status.Detail, status.DiagnosticText);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyDataWriter"/> over <see cref="GalaxyMxSession"/>.
|
||||
/// For each batch entry: lazy-AddItem to obtain the MXAccess item handle, encode
|
||||
/// the value via <see cref="MxValueEncoder"/>, route through Write or WriteSecured
|
||||
/// based on the per-tag <see cref="SecurityClassification"/>, and translate the
|
||||
/// reply's <c>MxStatusProxy</c> into an OPC UA <see cref="WriteResult"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Item handle cache survives across writes — repeated writes to the same tag avoid
|
||||
/// re-AddItem. Per-tag failures are isolated: one bad write doesn't fail the batch.
|
||||
/// PR 4.4 will share this cache with the subscription registry; for now it lives
|
||||
/// here so the writer is independently testable.
|
||||
/// </remarks>
|
||||
public sealed class GatewayGalaxyDataWriter : IGalaxyDataWriter
|
||||
{
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly int _writeUserId;
|
||||
private readonly ILogger _logger;
|
||||
private readonly ConcurrentDictionary<string, int> _itemHandles =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
public GatewayGalaxyDataWriter(GalaxyMxSession session, int writeUserId, ILogger? logger = null)
|
||||
{
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_writeUserId = writeUserId;
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(writes);
|
||||
ArgumentNullException.ThrowIfNull(securityResolver);
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. Call ConnectAsync before issuing writes.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
var results = new WriteResult[writes.Count];
|
||||
for (var i = 0; i < writes.Count; i++)
|
||||
{
|
||||
results[i] = await WriteOneAsync(session, serverHandle, writes[i],
|
||||
securityResolver(writes[i].FullReference), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private async Task<WriteResult> WriteOneAsync(
|
||||
MxGatewaySession session, int serverHandle, WriteRequest request,
|
||||
SecurityClassification classification, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var itemHandle = await EnsureItemHandleAsync(session, serverHandle, request.FullReference, ct)
|
||||
.ConfigureAwait(false);
|
||||
var mxValue = MxValueEncoder.Encode(request.Value);
|
||||
|
||||
var reply = NeedsSecuredWrite(classification)
|
||||
? await InvokeWriteSecuredAsync(session, serverHandle, itemHandle, mxValue, ct).ConfigureAwait(false)
|
||||
: await session.WriteRawAsync(serverHandle, itemHandle, mxValue, _writeUserId, ct).ConfigureAwait(false);
|
||||
|
||||
return TranslateReply(reply, request.FullReference);
|
||||
}
|
||||
catch (ArgumentException ex)
|
||||
{
|
||||
// Bad value type — caller passed a CLR type the encoder can't render.
|
||||
_logger.LogWarning(ex,
|
||||
"GalaxyDriver write rejected — unsupported value type for {FullRef}", request.FullReference);
|
||||
return new WriteResult(StatusCodeMap.BadInternalError);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "GalaxyDriver write failed for {FullRef}", request.FullReference);
|
||||
return new WriteResult(StatusCodeMap.BadCommunicationError);
|
||||
}
|
||||
}
|
||||
|
||||
private static bool NeedsSecuredWrite(SecurityClassification classification) =>
|
||||
classification is SecurityClassification.SecuredWrite or SecurityClassification.VerifiedWrite;
|
||||
|
||||
private async Task<int> EnsureItemHandleAsync(
|
||||
MxGatewaySession session, int serverHandle, string fullRef, CancellationToken ct)
|
||||
{
|
||||
if (_itemHandles.TryGetValue(fullRef, out var existing)) return existing;
|
||||
var handle = await session.AddItemAsync(serverHandle, fullRef, ct).ConfigureAwait(false);
|
||||
_itemHandles[fullRef] = handle;
|
||||
return handle;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Issue a WriteSecured command. The high-level session client doesn't expose
|
||||
/// <c>WriteSecuredAsync</c> as a typed method — we build the <see cref="MxCommand"/>
|
||||
/// directly and route through <c>InvokeAsync</c>. Verifier user is left at zero
|
||||
/// for SecuredWrite; VerifiedWrite uses the same path because the gw's worker
|
||||
/// interprets the underlying MXAccess command kind.
|
||||
/// </summary>
|
||||
private static Task<MxCommandReply> InvokeWriteSecuredAsync(
|
||||
MxGatewaySession session, int serverHandle, int itemHandle, MxValue value, CancellationToken ct)
|
||||
{
|
||||
var command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.WriteSecured,
|
||||
WriteSecured = new WriteSecuredCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
Value = value,
|
||||
CurrentUserId = 0,
|
||||
VerifierUserId = 0,
|
||||
},
|
||||
};
|
||||
var request = new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
Command = command,
|
||||
};
|
||||
return session.InvokeAsync(request, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Translate a gateway <see cref="MxCommandReply"/> into an OPC UA
|
||||
/// <see cref="WriteResult"/>. Honours the protocol-level Status field first
|
||||
/// (transport / dispatch failures), then the first MXAccess status row.
|
||||
/// </summary>
|
||||
private WriteResult TranslateReply(MxCommandReply reply, string fullRef)
|
||||
{
|
||||
// Protocol status — wraps transport / worker-side failures that happen before
|
||||
// MXAccess saw the command.
|
||||
if (reply.ProtocolStatus is { } proto && proto.Code != ProtocolStatusCode.Ok)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"GalaxyDriver write protocol failure {Code} for {FullRef}: {Message}",
|
||||
proto.Code, fullRef, proto.Message);
|
||||
return new WriteResult(StatusCodeMap.BadCommunicationError);
|
||||
}
|
||||
|
||||
// MX-side status — the worker's WriteCompleteEvent rolls into the reply's
|
||||
// statuses array. Use the first row (single-write contract).
|
||||
if (reply.Statuses.Count > 0)
|
||||
{
|
||||
var status = reply.Statuses[0];
|
||||
return new WriteResult(StatusCodeMap.FromMxStatus(status, _logger));
|
||||
}
|
||||
|
||||
return new WriteResult(StatusCodeMap.Good);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
// Use the generated nested status enum for the SetBufferedUpdateInterval reply check.
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxySubscriber"/> over a connected
|
||||
/// <see cref="GalaxyMxSession"/>. Forwards SubscribeBulk / UnsubscribeBulk to the
|
||||
/// gateway and streams MxEvents via the gw's bidirectional events RPC.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// PR 6.3 wired the per-call <c>buffered_update_interval_ms</c> through
|
||||
/// <see cref="SubscribeBulkAsync"/>. The gw's contract is session-level
|
||||
/// (<c>SetBufferedUpdateInterval</c> applies to all buffered subscriptions on the
|
||||
/// server handle), so we cache the last-applied value and skip redundant calls.
|
||||
/// </remarks>
|
||||
public sealed class GatewayGalaxySubscriber : IGalaxySubscriber
|
||||
{
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly Lock _intervalLock = new();
|
||||
private int _lastAppliedIntervalMs = -1; // -1 = never applied; 0 = explicit "use gw default"
|
||||
|
||||
public GatewayGalaxySubscriber(GalaxyMxSession session)
|
||||
{
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. Call ConnectAsync before subscribing.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
// The gw's SubscribeBulk RPC doesn't carry a per-call interval — buffered cadence
|
||||
// is session-level, set via SetBufferedUpdateInterval. Apply it before the
|
||||
// SubscribeBulk so the very first events on the new handles publish at the
|
||||
// requested cadence. Skip when the last-applied value already matches.
|
||||
if (bufferedUpdateIntervalMs > 0)
|
||||
{
|
||||
await EnsureSessionIntervalAsync(session, serverHandle, bufferedUpdateIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return await session.SubscribeBulkAsync(serverHandle, fullReferences, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply the gateway's session-level <c>SetBufferedUpdateInterval</c> command. The
|
||||
/// gw's contract is "for this server handle, every buffered subscription publishes
|
||||
/// at this cadence" — there's no per-handle granularity, so we cache the last
|
||||
/// applied value and skip redundant calls.
|
||||
/// </summary>
|
||||
private async Task EnsureSessionIntervalAsync(
|
||||
MxGateway.Client.MxGatewaySession session, int serverHandle, int intervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
lock (_intervalLock)
|
||||
{
|
||||
if (_lastAppliedIntervalMs == intervalMs) return;
|
||||
}
|
||||
|
||||
var reply = await session.InvokeAsync(
|
||||
new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.SetBufferedUpdateInterval,
|
||||
SetBufferedUpdateInterval = new SetBufferedUpdateIntervalCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
UpdateIntervalMilliseconds = intervalMs,
|
||||
},
|
||||
},
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (reply.ProtocolStatus?.Code is not (ProtocolStatusCode.Ok or ProtocolStatusCode.MxaccessFailure))
|
||||
{
|
||||
// Don't throw on a soft failure — the SubscribeBulk will still succeed at the
|
||||
// gw's default cadence, which is functional just not the requested cadence.
|
||||
// The trace span (PR 6.1) plus the warning here gives ops the signal.
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_intervalLock)
|
||||
{
|
||||
_lastAppliedIntervalMs = intervalMs;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken)
|
||||
{
|
||||
if (itemHandles.Count == 0) return;
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. UnsubscribeBulk called after disconnect.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
await session.UnsubscribeBulkAsync(serverHandle, itemHandles, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public IAsyncEnumerable<MxEvent> StreamEventsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. StreamEventsAsync called before ConnectAsync.");
|
||||
return session.StreamEventsAsync(afterWorkerSequence: 0, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Test seam for the gateway-side Acknowledge call. Production wraps the
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC; tests substitute a fake
|
||||
/// so <see cref="GalaxyDriver.AcknowledgeAsync"/> can be exercised without a
|
||||
/// running gateway.
|
||||
/// </summary>
|
||||
internal interface IGalaxyAlarmAcknowledger
|
||||
{
|
||||
/// <summary>
|
||||
/// Forward a single alarm acknowledgement to the gateway. The gateway
|
||||
/// translates this to an MxAccess Acknowledge call against the worker's
|
||||
/// session and returns the native MxStatus on the reply.
|
||||
/// </summary>
|
||||
/// <param name="alarmFullReference">
|
||||
/// Fully-qualified alarm reference (e.g. <c>"Tank01.Level.HiHi"</c>).
|
||||
/// </param>
|
||||
/// <param name="comment">Operator-supplied comment forwarded to MxAccess.</param>
|
||||
/// <param name="operatorUser">
|
||||
/// Operator principal performing the acknowledgement. Resolved from the
|
||||
/// OPC UA session by the server-side ACL layer before reaching the driver.
|
||||
/// </param>
|
||||
/// <param name="cancellationToken">Cancels the gateway RPC.</param>
|
||||
Task AcknowledgeAsync(
|
||||
string alarmFullReference,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for one-shot reads. Production implementation (PR 4.4) wraps
|
||||
/// <c>MxGatewaySession</c>'s SubscribeBulk + StreamEvents path to obtain values; tests
|
||||
/// substitute a fake returning canned snapshots.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The interface is deliberately minimal — no per-tag overload, no continuation
|
||||
/// points. The driver-side <c>IReadable.ReadAsync</c> contract guarantees a value per
|
||||
/// requested tag in input order, with status codes carrying the per-tag failure mode
|
||||
/// (e.g. BadInternalError for transport failure on a single tag, BadOutOfService for
|
||||
/// a tag the gateway didn't recognise).
|
||||
/// </remarks>
|
||||
public interface IGalaxyDataReader
|
||||
{
|
||||
/// <summary>
|
||||
/// Read each <paramref name="fullReferences"/> entry once and return one
|
||||
/// <see cref="DataValueSnapshot"/> per request entry, in input order.
|
||||
/// Implementations MUST return the same length as the input — partial-tag
|
||||
/// failures are encoded as Bad-quality snapshots, not omitted.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
|
||||
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for batched writes. Production implementation routes by
|
||||
/// <see cref="SecurityClassification"/>: SecuredWrite / VerifiedWrite go through
|
||||
/// <c>MxCommandKind.WriteSecured</c>, everything else through
|
||||
/// <c>MxGatewaySession.WriteAsync</c>. Tests substitute a fake to record routing
|
||||
/// decisions without touching real gw infrastructure.
|
||||
/// </summary>
|
||||
public interface IGalaxyDataWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Write each <paramref name="writes"/> entry; return one
|
||||
/// <see cref="WriteResult"/> per request entry, in input order. Implementations
|
||||
/// MUST return the same length as the input — partial-tag failures are encoded
|
||||
/// as Bad-status results, not omitted.
|
||||
/// </summary>
|
||||
/// <param name="writes">Pairs of full reference + value to write.</param>
|
||||
/// <param name="securityResolver">
|
||||
/// Maps a full reference to its discovered <see cref="SecurityClassification"/>
|
||||
/// so the writer can route SecuredWrite / VerifiedWrite tags through the
|
||||
/// <c>WriteSecured</c> command instead of <c>Write</c>. Returns
|
||||
/// <see cref="SecurityClassification.FreeAccess"/> when the tag isn't tracked
|
||||
/// (the safest default — non-secured Write).
|
||||
/// </param>
|
||||
/// <param name="cancellationToken">Aborts the in-flight batch.</param>
|
||||
Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for subscription lifecycle + the inbound event stream. Production
|
||||
/// wraps <c>MxGatewaySession.SubscribeBulkAsync</c>, <c>UnsubscribeBulkAsync</c>, and
|
||||
/// <c>StreamEventsAsync</c>; tests substitute a fake to drive synthetic events through
|
||||
/// the <see cref="EventPump"/> without a real gw.
|
||||
/// </summary>
|
||||
public interface IGalaxySubscriber
|
||||
{
|
||||
/// <summary>
|
||||
/// Subscribe a batch of tag full references. Returns one
|
||||
/// <see cref="SubscribeResult"/> per request entry, in input order. Failed tags
|
||||
/// (gateway rejection) carry a non-zero status and an item handle of zero or
|
||||
/// negative — the caller treats those as per-tag failures rather than a whole-call
|
||||
/// failure.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Unsubscribe a batch of item handles obtained from <see cref="SubscribeBulkAsync"/>.</summary>
|
||||
Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Long-running consumer of the gateway's <c>StreamEvents</c> RPC. Each emitted
|
||||
/// <see cref="MxEvent"/> carries the gw item handle the caller correlates against
|
||||
/// its <see cref="SubscriptionRegistry"/>.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<MxEvent> StreamEventsAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a raw MXAccess alarm severity (0-999, MXAccess scale) onto the
|
||||
/// <see cref="AlarmSeverity"/> ladder + an OPC UA Part 9 numeric severity (1-1000).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The four-bucket OPC UA ladder (250 / 500 / 750 / 1000 — Low / Medium / High /
|
||||
/// Critical) is the same ladder v1's <c>GalaxyAlarmTracker</c> exposed (per
|
||||
/// <c>docs/v1/AlarmTracking.md</c>). Galaxy templates assign severity values
|
||||
/// 0-999; the bucket boundaries below match v1 so customers see no
|
||||
/// surprise re-classification when the v2 path takes over.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Out-of-range inputs (negative or >= 1000) are clamped into the nearest
|
||||
/// bucket rather than rejected. MXAccess occasionally surfaces slightly
|
||||
/// out-of-range severities for legacy alarm types and we want them to flow
|
||||
/// through the alarm path rather than disappear at the mapper.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal static class MxAccessSeverityMapper
|
||||
{
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Low bucket (0-249 MxAccess).</summary>
|
||||
public const int OpcUaSeverityLow = 250;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Medium bucket (250-499 MxAccess).</summary>
|
||||
public const int OpcUaSeverityMedium = 500;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the High bucket (500-749 MxAccess).</summary>
|
||||
public const int OpcUaSeverityHigh = 750;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Critical bucket (750+ MxAccess).</summary>
|
||||
public const int OpcUaSeverityCritical = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Translate a raw MXAccess severity into the four-bucket
|
||||
/// <see cref="AlarmSeverity"/> + OPC UA Part 9 numeric severity tuple.
|
||||
/// </summary>
|
||||
public static (AlarmSeverity Bucket, int OpcUaSeverity) Map(int rawMxAccessSeverity)
|
||||
{
|
||||
if (rawMxAccessSeverity < 250)
|
||||
{
|
||||
return (AlarmSeverity.Low, OpcUaSeverityLow);
|
||||
}
|
||||
if (rawMxAccessSeverity < 500)
|
||||
{
|
||||
return (AlarmSeverity.Medium, OpcUaSeverityMedium);
|
||||
}
|
||||
if (rawMxAccessSeverity < 750)
|
||||
{
|
||||
return (AlarmSeverity.High, OpcUaSeverityHigh);
|
||||
}
|
||||
return (AlarmSeverity.Critical, OpcUaSeverityCritical);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Translates gateway-side <see cref="MxValue"/> instances into the boxed CLR objects
|
||||
/// <c>DataValueSnapshot.Value</c> carries. Mirrors the seven Galaxy data types in
|
||||
/// <c>DataTypeMap</c> (Boolean, Int32, Int64, Float32, Float64, String, DateTime), plus
|
||||
/// the array variants exposed by <see cref="MxArray"/>. Unknown / awkward values fall
|
||||
/// back to the <c>raw_value</c> bytes so a forward-compatible MXAccess deployment
|
||||
/// doesn't lose data on the wire — the consumer can opt to deserialise the bytes.
|
||||
/// </summary>
|
||||
internal static class MxValueDecoder
|
||||
{
|
||||
public static object? Decode(MxValue? value)
|
||||
{
|
||||
if (value is null) return null;
|
||||
if (value.IsNull) return null;
|
||||
|
||||
return value.KindCase switch
|
||||
{
|
||||
MxValue.KindOneofCase.BoolValue => value.BoolValue,
|
||||
MxValue.KindOneofCase.Int32Value => value.Int32Value,
|
||||
MxValue.KindOneofCase.Int64Value => value.Int64Value,
|
||||
MxValue.KindOneofCase.FloatValue => value.FloatValue,
|
||||
MxValue.KindOneofCase.DoubleValue => value.DoubleValue,
|
||||
MxValue.KindOneofCase.StringValue => value.StringValue,
|
||||
MxValue.KindOneofCase.TimestampValue => DecodeTimestamp(value.TimestampValue),
|
||||
MxValue.KindOneofCase.ArrayValue => DecodeArray(value.ArrayValue),
|
||||
MxValue.KindOneofCase.RawValue => value.RawValue.ToByteArray(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
|
||||
private static DateTime? DecodeTimestamp(Timestamp? ts) => ts?.ToDateTime();
|
||||
|
||||
private static object? DecodeArray(MxArray? array)
|
||||
{
|
||||
if (array is null) return null;
|
||||
|
||||
return array.ValuesCase switch
|
||||
{
|
||||
MxArray.ValuesOneofCase.BoolValues => array.BoolValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.Int32Values => array.Int32Values.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.Int64Values => array.Int64Values.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.FloatValues => array.FloatValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.DoubleValues => array.DoubleValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.StringValues => array.StringValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.TimestampValues => array.TimestampValues.Values.Select(t => t.ToDateTime()).ToArray(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Translates boxed CLR values from <c>WriteRequest.Value</c> into gateway-side
|
||||
/// <see cref="MxValue"/> instances. Inverse of <see cref="MxValueDecoder"/>.
|
||||
/// Handles the seven Galaxy data types — Boolean, Int32, Int64, Float32, Float64,
|
||||
/// String, DateTime — and their array variants. Null + unsupported types throw
|
||||
/// <see cref="ArgumentException"/> so the IWritable caller can fail the write with a
|
||||
/// clear status code rather than silently mis-typing the wire payload.
|
||||
/// </summary>
|
||||
internal static class MxValueEncoder
|
||||
{
|
||||
public static MxValue Encode(object? value)
|
||||
{
|
||||
if (value is null) return new MxValue { IsNull = true };
|
||||
|
||||
switch (value)
|
||||
{
|
||||
case bool b: return new MxValue { BoolValue = b };
|
||||
case sbyte i8: return new MxValue { Int32Value = i8 };
|
||||
case short i16: return new MxValue { Int32Value = i16 };
|
||||
case int i32: return new MxValue { Int32Value = i32 };
|
||||
case byte u8: return new MxValue { Int32Value = u8 };
|
||||
case ushort u16: return new MxValue { Int32Value = u16 };
|
||||
case uint u32 when u32 <= int.MaxValue: return new MxValue { Int32Value = (int)u32 };
|
||||
case long i64: return new MxValue { Int64Value = i64 };
|
||||
case ulong u64 when u64 <= long.MaxValue: return new MxValue { Int64Value = (long)u64 };
|
||||
case float f32: return new MxValue { FloatValue = f32 };
|
||||
case double f64: return new MxValue { DoubleValue = f64 };
|
||||
case string s: return new MxValue { StringValue = s };
|
||||
case DateTime dt: return new MxValue { TimestampValue = Timestamp.FromDateTime(EnsureUtc(dt)) };
|
||||
case DateTimeOffset dto: return new MxValue { TimestampValue = Timestamp.FromDateTimeOffset(dto) };
|
||||
|
||||
case bool[] arr: return EncodeArray(arr, (mx, vs) => mx.BoolValues = ToBoolArray(vs));
|
||||
case int[] arr: return EncodeArray(arr, (mx, vs) => mx.Int32Values = ToInt32Array(vs));
|
||||
case long[] arr: return EncodeArray(arr, (mx, vs) => mx.Int64Values = ToInt64Array(vs));
|
||||
case float[] arr: return EncodeArray(arr, (mx, vs) => mx.FloatValues = ToFloatArray(vs));
|
||||
case double[] arr: return EncodeArray(arr, (mx, vs) => mx.DoubleValues = ToDoubleArray(vs));
|
||||
case string[] arr: return EncodeArray(arr, (mx, vs) => mx.StringValues = ToStringArray(vs));
|
||||
case DateTime[] arr: return EncodeArray(arr, (mx, vs) => mx.TimestampValues = ToTimestampArray(vs));
|
||||
|
||||
default:
|
||||
throw new ArgumentException(
|
||||
$"Cannot encode value of type {value.GetType()} as MxValue. Supported: " +
|
||||
"bool, int / long (and their unsigned variants), float, double, string, DateTime, " +
|
||||
"and their 1-D array variants.",
|
||||
nameof(value));
|
||||
}
|
||||
}
|
||||
|
||||
private static MxValue EncodeArray<T>(T[] values, Action<MxArray, T[]> populate)
|
||||
{
|
||||
var array = new MxArray();
|
||||
populate(array, values);
|
||||
array.Dimensions.Add((uint)values.Length);
|
||||
return new MxValue { ArrayValue = array };
|
||||
}
|
||||
|
||||
private static BoolArray ToBoolArray(bool[] vs) { var a = new BoolArray(); a.Values.AddRange(vs); return a; }
|
||||
private static Int32Array ToInt32Array(int[] vs) { var a = new Int32Array(); a.Values.AddRange(vs); return a; }
|
||||
private static Int64Array ToInt64Array(long[] vs) { var a = new Int64Array(); a.Values.AddRange(vs); return a; }
|
||||
private static FloatArray ToFloatArray(float[] vs) { var a = new FloatArray(); a.Values.AddRange(vs); return a; }
|
||||
private static DoubleArray ToDoubleArray(double[] vs) { var a = new DoubleArray(); a.Values.AddRange(vs); return a; }
|
||||
private static StringArray ToStringArray(string[] vs) { var a = new StringArray(); a.Values.AddRange(vs); return a; }
|
||||
private static TimestampArray ToTimestampArray(DateTime[] vs)
|
||||
{
|
||||
var a = new TimestampArray();
|
||||
foreach (var dt in vs) a.Values.Add(Timestamp.FromDateTime(EnsureUtc(dt)));
|
||||
return a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="Timestamp.FromDateTime"/> requires UTC. Convert non-UTC inputs
|
||||
/// explicitly so a caller passing local time gets predictable wire bytes.
|
||||
/// </summary>
|
||||
private static DateTime EnsureUtc(DateTime dt) => dt.Kind switch
|
||||
{
|
||||
DateTimeKind.Utc => dt,
|
||||
DateTimeKind.Local => dt.ToUniversalTime(),
|
||||
_ => DateTime.SpecifyKind(dt, DateTimeKind.Utc),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Coordinates GalaxyDriver's recovery from gateway transport failure. Drives a
|
||||
/// state machine — <c>Healthy → TransportLost → Reopening → Replaying → Healthy</c>
|
||||
/// — and exposes the current state through a snapshot + change event so the
|
||||
/// driver's <c>DriverHealth</c> reflects <c>Degraded</c> while we're not in
|
||||
/// <c>Healthy</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The supervisor doesn't own the session, the subscription registry, or the
|
||||
/// event pump. It receives transport-failure signals from the rest of the
|
||||
/// driver (EventPump throws, a gw RPC raises, the heartbeat times out), runs
|
||||
/// a one-attempt-at-a-time recovery loop, and lets the rest of the driver
|
||||
/// continue serving cached state during recovery.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Reopen</b>: caller-supplied callback that re-opens the gw session +
|
||||
/// re-Registers the MXAccess client. Throws on failure.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Replay</b>: caller-supplied callback that re-establishes every active
|
||||
/// subscription. Production wraps gw's <c>ReplaySubscriptionsCommand</c>
|
||||
/// (mxaccessgw issue #0.3); when that's not available, the callback falls
|
||||
/// back to walking the SubscriptionRegistry and re-issuing SubscribeBulk for
|
||||
/// every tracked tag.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Backoff is capped exponential — first retry after
|
||||
/// <see cref="ReconnectOptions.InitialBackoff"/>, doubled per failed attempt,
|
||||
/// capped at <see cref="ReconnectOptions.MaxBackoff"/>. Persistent failures
|
||||
/// hold the supervisor in <c>Reopening</c> indefinitely; the supervisor never
|
||||
/// gives up on its own — operators / Phase 6.4 soak handle that policy.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class ReconnectSupervisor : IDisposable
|
||||
{
|
||||
/// <summary>Recovery state machine.</summary>
|
||||
public enum State
|
||||
{
|
||||
Healthy,
|
||||
TransportLost,
|
||||
Reopening,
|
||||
Replaying,
|
||||
}
|
||||
|
||||
private readonly Func<CancellationToken, Task> _reopen;
|
||||
private readonly Func<CancellationToken, Task> _replay;
|
||||
private readonly ReconnectOptions _options;
|
||||
private readonly ILogger _logger;
|
||||
private readonly Func<int, TimeSpan, TimeSpan, TimeSpan>? _backoffDelay;
|
||||
|
||||
private readonly Lock _stateLock = new();
|
||||
private State _state = State.Healthy;
|
||||
private string? _lastError;
|
||||
private DateTime? _lastTransitionUtc;
|
||||
|
||||
private Task? _recoveryLoop;
|
||||
private CancellationTokenSource? _loopCts;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>Fires after every state transition.</summary>
|
||||
public event EventHandler<StateTransition>? StateChanged;
|
||||
|
||||
public ReconnectSupervisor(
|
||||
Func<CancellationToken, Task> reopen,
|
||||
Func<CancellationToken, Task> replay,
|
||||
ReconnectOptions? options = null,
|
||||
ILogger? logger = null,
|
||||
Func<int, TimeSpan, TimeSpan, TimeSpan>? backoffDelay = null)
|
||||
{
|
||||
_reopen = reopen ?? throw new ArgumentNullException(nameof(reopen));
|
||||
_replay = replay ?? throw new ArgumentNullException(nameof(replay));
|
||||
_options = options ?? new ReconnectOptions();
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_backoffDelay = backoffDelay;
|
||||
}
|
||||
|
||||
/// <summary>Current state. Healthy = fully recovered + subscriptions live.</summary>
|
||||
public State CurrentState
|
||||
{
|
||||
get { lock (_stateLock) return _state; }
|
||||
}
|
||||
|
||||
/// <summary>True when CurrentState != Healthy. Drivers map this to DriverState.Degraded.</summary>
|
||||
public bool IsDegraded
|
||||
{
|
||||
get { lock (_stateLock) return _state != State.Healthy; }
|
||||
}
|
||||
|
||||
public string? LastError
|
||||
{
|
||||
get { lock (_stateLock) return _lastError; }
|
||||
}
|
||||
|
||||
public DateTime? LastTransitionUtc
|
||||
{
|
||||
get { lock (_stateLock) return _lastTransitionUtc; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Notify the supervisor that a gw transport failure has been observed. Idempotent —
|
||||
/// repeated calls during an in-flight recovery do not start a parallel loop. The
|
||||
/// first call spawns a background task that drives reopen → replay until it
|
||||
/// succeeds or <see cref="Dispose"/> cancels it.
|
||||
/// </summary>
|
||||
public void ReportTransportFailure(Exception cause)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(cause);
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
lock (_stateLock)
|
||||
{
|
||||
_lastError = cause.Message;
|
||||
if (_state != State.Healthy)
|
||||
{
|
||||
// Already recovering — nothing else to do.
|
||||
_logger.LogDebug("Transport failure reported during {State}: {Message}", _state, cause.Message);
|
||||
return;
|
||||
}
|
||||
|
||||
TransitionLocked(State.TransportLost, cause.Message);
|
||||
|
||||
_loopCts = new CancellationTokenSource();
|
||||
_recoveryLoop = Task.Run(() => RecoveryLoopAsync(_loopCts.Token));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wait until the current recovery cycle reaches Healthy or the supplied token
|
||||
/// is cancelled. Returns immediately when already Healthy. Useful for tests
|
||||
/// and for orchestration that wants to gate calls on recovery completing.
|
||||
/// </summary>
|
||||
public async Task WaitForHealthyAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested && IsDegraded)
|
||||
{
|
||||
await Task.Delay(50, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RecoveryLoopAsync(CancellationToken ct)
|
||||
{
|
||||
var attempt = 0;
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
attempt++;
|
||||
if (attempt > 1)
|
||||
{
|
||||
var delay = ComputeBackoff(attempt);
|
||||
_logger.LogInformation(
|
||||
"Galaxy reconnect attempt {Attempt} — waiting {Delay} before retry", attempt, delay);
|
||||
try { await Task.Delay(delay, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
}
|
||||
|
||||
// === Reopening phase ===
|
||||
lock (_stateLock) TransitionLocked(State.Reopening, _lastError);
|
||||
|
||||
try
|
||||
{
|
||||
await _reopen(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Galaxy reopen failed (attempt {Attempt})", attempt);
|
||||
lock (_stateLock) { _lastError = ex.Message; }
|
||||
continue; // back to backoff + retry
|
||||
}
|
||||
|
||||
// === Replaying phase ===
|
||||
lock (_stateLock) TransitionLocked(State.Replaying, _lastError);
|
||||
|
||||
try
|
||||
{
|
||||
await _replay(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Galaxy replay failed (attempt {Attempt})", attempt);
|
||||
lock (_stateLock) { _lastError = ex.Message; }
|
||||
continue; // back to backoff + retry
|
||||
}
|
||||
|
||||
// === Done ===
|
||||
lock (_stateLock)
|
||||
{
|
||||
_lastError = null;
|
||||
TransitionLocked(State.Healthy, null);
|
||||
}
|
||||
_logger.LogInformation("Galaxy reconnect succeeded after {Attempt} attempt(s)", attempt);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private TimeSpan ComputeBackoff(int attempt)
|
||||
{
|
||||
if (_backoffDelay is not null)
|
||||
return _backoffDelay(attempt, _options.InitialBackoff, _options.MaxBackoff);
|
||||
|
||||
// Standard capped exponential — InitialBackoff * 2^(attempt-2), capped at MaxBackoff.
|
||||
// Attempt 2 → InitialBackoff, attempt 3 → 2x, attempt 4 → 4x, etc.
|
||||
var multiplier = Math.Min(1L << Math.Max(0, attempt - 2), int.MaxValue);
|
||||
var ticks = _options.InitialBackoff.Ticks * multiplier;
|
||||
if (ticks <= 0 || ticks > _options.MaxBackoff.Ticks) ticks = _options.MaxBackoff.Ticks;
|
||||
return TimeSpan.FromTicks(ticks);
|
||||
}
|
||||
|
||||
private void TransitionLocked(State next, string? cause)
|
||||
{
|
||||
if (next == _state) return;
|
||||
var previous = _state;
|
||||
_state = next;
|
||||
_lastTransitionUtc = DateTime.UtcNow;
|
||||
var transition = new StateTransition(previous, next, cause, _lastTransitionUtc.Value);
|
||||
try { StateChanged?.Invoke(this, transition); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy reconnect StateChanged handler threw — continuing.");
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
CancellationTokenSource? cts;
|
||||
Task? loop;
|
||||
lock (_stateLock) { cts = _loopCts; loop = _recoveryLoop; _loopCts = null; _recoveryLoop = null; }
|
||||
|
||||
cts?.Cancel();
|
||||
if (loop is not null)
|
||||
{
|
||||
try { loop.GetAwaiter().GetResult(); } catch { /* shutdown */ }
|
||||
}
|
||||
cts?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One state transition observed by the supervisor.
|
||||
/// </summary>
|
||||
public sealed record StateTransition(
|
||||
ReconnectSupervisor.State Previous,
|
||||
ReconnectSupervisor.State Next,
|
||||
string? Cause,
|
||||
DateTime AtUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Knobs for the supervisor's backoff. <see cref="ReconnectOptions"/> on the driver
|
||||
/// options record (PR 4.0) maps onto this — they're separate types so the supervisor
|
||||
/// can be exercised in tests without the full driver options surface.
|
||||
/// </summary>
|
||||
public sealed record ReconnectOptions(
|
||||
TimeSpan? InitialBackoffOverride = null,
|
||||
TimeSpan? MaxBackoffOverride = null)
|
||||
{
|
||||
public TimeSpan InitialBackoff => InitialBackoffOverride ?? TimeSpan.FromMilliseconds(500);
|
||||
public TimeSpan MaxBackoff => MaxBackoffOverride ?? TimeSpan.FromSeconds(30);
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Maps the gateway's <see cref="MxStatusProxy"/> (raw MXAccess HRESULT + category bits)
|
||||
/// to OPC UA <c>StatusCode</c> uints. Replaces the legacy
|
||||
/// <c>MxAccessGalaxyBackend.ToWire</c> heuristic (Quality >= 192 → Good, else Uncertain)
|
||||
/// with an explicit table that preserves specific codes (BadNotConnected, OutOfService,
|
||||
/// UncertainSubNormal, etc.) instead of collapsing to category buckets.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// OPC DA quality bytes are 16-bit values arranged as <c>[QQSSSSSSLLNNNN]</c>:
|
||||
/// Q = quality category (Bad/Uncertain/Good = 0/1/3), S = substatus, L = limit, N = vendor.
|
||||
/// This mapper consumes the LOW byte (where the Q+S bits live) — the same byte the legacy
|
||||
/// Wonderware Historian SDK exposed as the raw quality byte. Category-only fallback paths
|
||||
/// handle deployment versions of MXAccess that surface unfamiliar substatuses.
|
||||
///
|
||||
/// Unknown substatus values fall back to the matching category bucket (<c>Good</c>,
|
||||
/// <c>Uncertain</c>, <c>Bad</c>) and emit a single diagnostic log line per session via
|
||||
/// the supplied logger so field captures can extend the table.
|
||||
/// </remarks>
|
||||
internal static class StatusCodeMap
|
||||
{
|
||||
// OPC UA Part 4 standard StatusCodes — top-byte categories are 0x00 (Good),
|
||||
// 0x40 (Uncertain), 0x80 (Bad). Specific codes layer onto the category byte.
|
||||
|
||||
public const uint Good = 0x00000000u;
|
||||
public const uint GoodLocalOverride = 0x00D80000u;
|
||||
public const uint Uncertain = 0x40000000u;
|
||||
public const uint UncertainLastUsableValue = 0x40A40000u;
|
||||
public const uint UncertainSensorNotAccurate = 0x408D0000u;
|
||||
public const uint UncertainEngineeringUnitsExceeded = 0x408E0000u;
|
||||
public const uint UncertainSubNormal = 0x408F0000u;
|
||||
public const uint Bad = 0x80000000u;
|
||||
public const uint BadConfigurationError = 0x80890000u;
|
||||
public const uint BadNotConnected = 0x808A0000u;
|
||||
public const uint BadDeviceFailure = 0x808B0000u;
|
||||
public const uint BadSensorFailure = 0x808C0000u;
|
||||
public const uint BadCommunicationError = 0x80050000u;
|
||||
public const uint BadOutOfService = 0x808D0000u;
|
||||
public const uint BadWaitingForInitialData = 0x80320000u;
|
||||
public const uint BadInternalError = 0x80020000u;
|
||||
|
||||
/// <summary>
|
||||
/// Map a raw OPC DA quality byte (the low byte of an OPC DA <c>OpcQuality</c> ushort,
|
||||
/// which is what Wonderware Historian + MXAccess surface as <c>OPCITEMSTATE.qLong</c>'s
|
||||
/// low byte) to the OPC UA StatusCode uint.
|
||||
/// </summary>
|
||||
public static uint FromQualityByte(byte q, ILogger? logger = null) => q switch
|
||||
{
|
||||
// Good family — top two bits 11b (192-255).
|
||||
192 => Good,
|
||||
216 => GoodLocalOverride,
|
||||
|
||||
// Uncertain family — top two bits 01b (64-127).
|
||||
64 => Uncertain,
|
||||
68 => UncertainLastUsableValue,
|
||||
80 => UncertainSensorNotAccurate,
|
||||
84 => UncertainEngineeringUnitsExceeded,
|
||||
88 => UncertainSubNormal,
|
||||
|
||||
// Bad family — top two bits 00b (0-63).
|
||||
0 => Bad,
|
||||
4 => BadConfigurationError,
|
||||
8 => BadNotConnected,
|
||||
12 => BadDeviceFailure,
|
||||
16 => BadSensorFailure,
|
||||
20 => BadCommunicationError,
|
||||
24 => BadOutOfService,
|
||||
32 => BadWaitingForInitialData,
|
||||
|
||||
_ => Categorize(q, logger),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Map a gateway-reported <see cref="MxStatusProxy"/> to OPC UA StatusCode. Honors
|
||||
/// the success flag, then the detail byte (treated as a quality substatus), with a
|
||||
/// transport-error fallback for status rows whose detected_by indicates the failure
|
||||
/// happened before the MXAccess call ran.
|
||||
/// </summary>
|
||||
public static uint FromMxStatus(MxStatusProxy? status, ILogger? logger = null)
|
||||
{
|
||||
if (status is null) return Good;
|
||||
if (status.Success != 0) return Good;
|
||||
|
||||
// Detail field carries the substatus when the worker translated MX-style codes;
|
||||
// when zero, infer from category + detected_by.
|
||||
var detail = (byte)(status.Detail & 0xFF);
|
||||
if (detail != 0) return FromQualityByte(detail, logger);
|
||||
|
||||
// detected_by != Mxaccess (raw_detected_by != the MXAccess source enum) implies
|
||||
// the failure happened pre-call (gateway, worker, transport) — surface as a
|
||||
// communication error rather than a generic Bad.
|
||||
if (status.RawDetectedBy != 0) return BadCommunicationError;
|
||||
|
||||
return Bad;
|
||||
}
|
||||
|
||||
private static uint Categorize(byte q, ILogger? logger)
|
||||
{
|
||||
if (q >= 192) { Log(logger, q, "Good"); return Good; }
|
||||
if (q >= 64) { Log(logger, q, "Uncertain"); return Uncertain; }
|
||||
Log(logger, q, "Bad");
|
||||
return Bad;
|
||||
}
|
||||
|
||||
private static void Log(ILogger? logger, byte q, string bucket)
|
||||
{
|
||||
// Best-effort diagnostic so field captures can extend the table — once per bucket
|
||||
// per session is plenty (the LogWarning level is rate-limited by Serilog filters
|
||||
// in production).
|
||||
logger?.LogWarning(
|
||||
"Unrecognised MXAccess quality byte 0x{Q:X2} — falling back to {Bucket} category. " +
|
||||
"Field capture welcome — extend StatusCodeMap.FromQualityByte.", q, bucket);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Bookkeeping for live subscriptions. Maps each driver-issued <c>SubscriptionId</c> to the
|
||||
/// set of (full-reference, gw item-handle) pairs the gateway returned, and maintains the
|
||||
/// reverse map (item-handle → set of driver subscriptions) so the
|
||||
/// <see cref="EventPump"/> can fan out a single OnDataChange event to every driver
|
||||
/// subscription that includes the changed tag.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A tag may legitimately appear in multiple driver subscriptions (separate clients or
|
||||
/// OPC UA monitored items observing the same Galaxy attribute). Using a single shared
|
||||
/// gw subscription per session and fanning out on the driver side keeps the gateway's
|
||||
/// work bounded; the reverse map is the fan-out index.
|
||||
/// </remarks>
|
||||
internal sealed class SubscriptionRegistry
|
||||
{
|
||||
private readonly ConcurrentDictionary<long, SubscriptionEntry> _bySubscriptionId = new();
|
||||
private readonly ConcurrentDictionary<int, ConcurrentBag<long>> _subscribersByItemHandle = new();
|
||||
private long _nextSubscriptionId;
|
||||
|
||||
public int TrackedSubscriptionCount => _bySubscriptionId.Count;
|
||||
public int TrackedItemHandleCount => _subscribersByItemHandle.Count;
|
||||
|
||||
/// <summary>Allocate a fresh subscription id. Monotonic; unique per registry lifetime.</summary>
|
||||
public long NextSubscriptionId() => Interlocked.Increment(ref _nextSubscriptionId);
|
||||
|
||||
/// <summary>
|
||||
/// Register a subscription and the per-tag item handles the gateway returned for it.
|
||||
/// Failed tags (item handle = 0 or negative) are stored anyway so unsubscribe can
|
||||
/// emit per-tag UnsubscribeBulk for the ones that did succeed.
|
||||
/// </summary>
|
||||
public void Register(long subscriptionId, IReadOnlyList<TagBinding> bindings)
|
||||
{
|
||||
var entry = new SubscriptionEntry(subscriptionId, bindings);
|
||||
_bySubscriptionId[subscriptionId] = entry;
|
||||
foreach (var binding in bindings)
|
||||
{
|
||||
if (binding.ItemHandle <= 0) continue; // failed gw subscribe — no events expected
|
||||
_subscribersByItemHandle.AddOrUpdate(
|
||||
binding.ItemHandle,
|
||||
_ => [subscriptionId],
|
||||
(_, bag) => { bag.Add(subscriptionId); return bag; });
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Remove a subscription. Returns the bindings the caller should pass to
|
||||
/// <c>UnsubscribeBulkAsync</c>; null when the id was never registered.
|
||||
/// </summary>
|
||||
public IReadOnlyList<TagBinding>? Remove(long subscriptionId)
|
||||
{
|
||||
if (!_bySubscriptionId.TryRemove(subscriptionId, out var entry)) return null;
|
||||
|
||||
foreach (var binding in entry.Bindings)
|
||||
{
|
||||
if (binding.ItemHandle <= 0) continue;
|
||||
if (!_subscribersByItemHandle.TryGetValue(binding.ItemHandle, out var bag)) continue;
|
||||
|
||||
// Filter the bag to drop this subscription id. ConcurrentBag has no Remove —
|
||||
// rebuild it from the remaining entries. The contention here is bounded by
|
||||
// the number of tags in the dropped subscription.
|
||||
var remaining = new ConcurrentBag<long>(bag.Where(id => id != subscriptionId));
|
||||
if (remaining.IsEmpty) _subscribersByItemHandle.TryRemove(binding.ItemHandle, out _);
|
||||
else _subscribersByItemHandle[binding.ItemHandle] = remaining;
|
||||
}
|
||||
|
||||
return entry.Bindings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look up the (subscription id, full reference) pairs that should receive an
|
||||
/// OnDataChange for the given gw item handle. Returns empty when nobody subscribes.
|
||||
/// </summary>
|
||||
public IReadOnlyList<(long SubscriptionId, string FullReference)> ResolveSubscribers(int itemHandle)
|
||||
{
|
||||
if (!_subscribersByItemHandle.TryGetValue(itemHandle, out var bag)) return [];
|
||||
|
||||
// Each subscription may include the tag once. Walk every active subscription that
|
||||
// claims this handle and pull the full ref from its binding list.
|
||||
var result = new List<(long, string)>();
|
||||
foreach (var subId in bag.Distinct())
|
||||
{
|
||||
if (!_bySubscriptionId.TryGetValue(subId, out var entry)) continue;
|
||||
var binding = entry.Bindings.FirstOrDefault(b => b.ItemHandle == itemHandle);
|
||||
if (binding is { FullReference: { } fullRef })
|
||||
result.Add((subId, fullRef));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>Snapshot every active binding for diagnostic output.</summary>
|
||||
public IReadOnlyList<TagBinding> SnapshotAllBindings() =>
|
||||
[.. _bySubscriptionId.Values.SelectMany(entry => entry.Bindings)];
|
||||
|
||||
private sealed record SubscriptionEntry(long SubscriptionId, IReadOnlyList<TagBinding> Bindings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One (full reference, gw item handle) pair returned by SubscribeBulk. Item handle is
|
||||
/// zero or negative when the gateway rejected this individual tag (bad name, duplicate);
|
||||
/// the registry keeps the binding so the caller can surface a per-tag failure status.
|
||||
/// </summary>
|
||||
internal sealed record TagBinding(string FullReference, int ItemHandle);
|
||||
@@ -0,0 +1,54 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — Decorator that emits one <see cref="System.Diagnostics.Activity"/> span
|
||||
/// per gw write batch. Tags secured-write counts so ops can see the routing-by-
|
||||
/// classification split (FreeAccess/Operate vs Tune/Configure) without re-reading
|
||||
/// the discovery dictionary.
|
||||
/// </summary>
|
||||
internal sealed class TracedGalaxyDataWriter(IGalaxyDataWriter inner, string clientName) : IGalaxyDataWriter
|
||||
{
|
||||
public async Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.write");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", writes.Count);
|
||||
|
||||
if (activity is { IsAllDataRequested: true })
|
||||
{
|
||||
// Counting the secured-write split is cheap (one resolver call per request)
|
||||
// and only happens when a tracing listener is actively recording — keeps the
|
||||
// hot path free when no one's listening.
|
||||
var securedCount = 0;
|
||||
foreach (var w in writes)
|
||||
{
|
||||
var sc = securityResolver(w.FullReference);
|
||||
if (sc is SecurityClassification.Tune
|
||||
or SecurityClassification.Configure
|
||||
or SecurityClassification.VerifiedWrite)
|
||||
{
|
||||
securedCount++;
|
||||
}
|
||||
}
|
||||
activity.SetTag("galaxy.secured_write_count", securedCount);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var results = await inner.WriteAsync(writes, securityResolver, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
activity?.SetTag("galaxy.success_count", results.Count(r => r.StatusCode < 0x80000000u));
|
||||
return results;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — Decorator that emits one <see cref="System.Diagnostics.Activity"/> span
|
||||
/// per gw subscription RPC. Wraps the production <see cref="GatewayGalaxySubscriber"/>;
|
||||
/// tests substitute a fake at the same seam without taking the tracing overhead.
|
||||
/// </summary>
|
||||
internal sealed class TracedGalaxySubscriber(IGalaxySubscriber inner, string clientName) : IGalaxySubscriber
|
||||
{
|
||||
public async Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.subscribe_bulk");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", fullReferences.Count);
|
||||
activity?.SetTag("galaxy.buffered_interval_ms", bufferedUpdateIntervalMs);
|
||||
try
|
||||
{
|
||||
var results = await inner.SubscribeBulkAsync(fullReferences, bufferedUpdateIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
activity?.SetTag("galaxy.success_count", results.Count(r => r.WasSuccessful));
|
||||
return results;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.unsubscribe_bulk");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", itemHandles.Count);
|
||||
try
|
||||
{
|
||||
await inner.UnsubscribeBulkAsync(itemHandles, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Streaming RPC — one parent span covers the entire stream lifetime. Per-event
|
||||
/// spans would dominate the trace volume at 50k tags / 1Hz; ops gets per-event
|
||||
/// visibility through <see cref="EventPump"/>'s metrics in PR 6.2 instead.
|
||||
/// </summary>
|
||||
public async IAsyncEnumerable<MxEvent> StreamEventsAsync(
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.stream_events");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
|
||||
IAsyncEnumerator<MxEvent>? enumerator = null;
|
||||
try
|
||||
{
|
||||
enumerator = inner.StreamEventsAsync(cancellationToken).GetAsyncEnumerator(cancellationToken);
|
||||
var eventCount = 0L;
|
||||
while (true)
|
||||
{
|
||||
bool moveNext;
|
||||
try
|
||||
{
|
||||
moveNext = await enumerator.MoveNextAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
activity?.SetTag("galaxy.event_count", eventCount);
|
||||
throw;
|
||||
}
|
||||
|
||||
if (!moveNext) break;
|
||||
eventCount++;
|
||||
yield return enumerator.Current;
|
||||
}
|
||||
activity?.SetTag("galaxy.event_count", eventCount);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (enumerator is not null) await enumerator.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Platforms>AnyCPU;x64</Platforms>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Driver.Galaxy</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Abstractions\ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core\ZB.MOM.WW.OtOpcUa.Core.csproj"/>
|
||||
<!-- mxaccessgw .NET client. Path-based ProjectReference because both repos sit
|
||||
side-by-side on the dev box; long-term we'll consume MxGateway.Client as a
|
||||
NuGet package. PR 4.W revisits the dependency shape before parity gating. -->
|
||||
<ProjectReference Include="..\..\..\..\mxaccessgw\clients\dotnet\MxGateway.Client\MxGateway.Client.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-37gx-xxp4-5rgx"/>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-w3x6-4m5h-cxqf"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user