using Opc.Ua;
using Opc.Ua.Client;
using Opc.Ua.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient;
///
/// OPC UA Client (gateway) driver. Opens a against a remote OPC UA
/// server and re-exposes its address space through the local OtOpcUa server. PR 66 ships
/// the scaffold: only (connect / close / health). Browse, read,
/// write, subscribe, and probe land in PRs 67-69.
///
///
///
/// Builds its own rather than reusing
/// Client.Shared — Client.Shared is oriented at the interactive CLI; this
/// driver is an always-on service component with different session-lifetime needs
/// (keep-alive monitor, session transfer on reconnect, multi-year uptime).
///
///
/// Session lifetime: a single per driver instance.
/// Subscriptions multiplex onto that session; SDK reconnect handler takes the session
/// down and brings it back up on remote-server restart — the driver must re-send
/// subscriptions + TransferSubscriptions on reconnect to avoid dangling
/// monitored-item handles. That mechanic lands in PR 69.
///
///
public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string driverInstanceId)
: IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IAlarmSource, IHistoryProvider, IDisposable, IAsyncDisposable
{
// ---- IAlarmSource state ----
private readonly System.Collections.Concurrent.ConcurrentDictionary _alarmSubscriptions = new();
private long _nextAlarmSubscriptionId;
public event EventHandler? OnAlarmEvent;
// ---- ISubscribable + IHostConnectivityProbe state ----
private readonly System.Collections.Concurrent.ConcurrentDictionary _subscriptions = new();
private long _nextSubscriptionId;
private readonly object _probeLock = new();
private HostState _hostState = HostState.Unknown;
private DateTime _hostStateChangedUtc = DateTime.UtcNow;
private KeepAliveEventHandler? _keepAliveHandler;
public event EventHandler? OnDataChange;
public event EventHandler? OnHostStatusChanged;
// OPC UA StatusCode constants the driver surfaces for local-side faults. Upstream-server
// StatusCodes are passed through verbatim per driver-specs.md §8 "cascading quality" —
// downstream clients need to distinguish 'remote source down' from 'local driver failure'.
private const uint StatusBadNodeIdInvalid = 0x80330000u;
private const uint StatusBadInternalError = 0x80020000u;
private const uint StatusBadCommunicationError = 0x80050000u;
private readonly OpcUaClientDriverOptions _options = options;
private readonly SemaphoreSlim _gate = new(1, 1);
/// Active OPC UA session. Null until returns cleanly.
internal ISession? Session { get; private set; }
/// Per-connection gate. PRs 67+ serialize read/write/browse on this.
internal SemaphoreSlim Gate => _gate;
private DriverHealth _health = new(DriverState.Unknown, null, null);
private bool _disposed;
/// URL of the endpoint the driver actually connected to. Exposed via .
private string? _connectedEndpointUrl;
///
/// SDK-provided reconnect handler that owns the retry loop + session-transfer machinery
/// when the session's keep-alive channel reports a bad status. Null outside the
/// reconnecting window; constructed lazily inside the keep-alive handler.
///
private SessionReconnectHandler? _reconnectHandler;
public string DriverInstanceId => driverInstanceId;
public string DriverType => "OpcUaClient";
public async Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
_health = new DriverHealth(DriverState.Initializing, null, null);
try
{
var appConfig = await BuildApplicationConfigurationAsync(cancellationToken).ConfigureAwait(false);
var candidates = ResolveEndpointCandidates(_options);
var identity = BuildUserIdentity(_options);
// Failover sweep: try each endpoint in order, return the session from the first
// one that successfully connects. Per-endpoint failures are captured so the final
// aggregate exception names every URL that was tried and why — critical diag for
// operators debugging 'why did the failover pick #3?'.
var attemptErrors = new List(candidates.Count);
ISession? session = null;
string? connectedUrl = null;
foreach (var url in candidates)
{
try
{
session = await OpenSessionOnEndpointAsync(
appConfig, url, _options.SecurityPolicy, _options.SecurityMode,
identity, cancellationToken).ConfigureAwait(false);
connectedUrl = url;
break;
}
catch (Exception ex)
{
attemptErrors.Add($"{url} -> {ex.GetType().Name}: {ex.Message}");
}
}
if (session is null)
throw new AggregateException(
"OPC UA Client failed to connect to any of the configured endpoints. " +
"Tried:\n " + string.Join("\n ", attemptErrors),
attemptErrors.Select(e => new InvalidOperationException(e)));
// Wire the session's keep-alive channel into HostState + the reconnect trigger.
// OPC UA keep-alives are authoritative for session liveness: the SDK pings on
// KeepAliveInterval and sets KeepAliveStopped when N intervals elapse without a
// response. On a bad keep-alive the driver spins up a SessionReconnectHandler
// which transparently retries + swaps the underlying session. Subscriptions move
// via TransferSubscriptions so local MonitoredItem handles stay valid.
_keepAliveHandler = OnKeepAlive;
session.KeepAlive += _keepAliveHandler;
Session = session;
_connectedEndpointUrl = connectedUrl;
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
TransitionTo(HostState.Running);
}
catch (Exception ex)
{
try { if (Session is Session s) await s.CloseAsync().ConfigureAwait(false); } catch { }
Session = null;
_health = new DriverHealth(DriverState.Faulted, null, ex.Message);
throw;
}
}
///
/// Build a minimal in-memory . Certificates live
/// under the OS user profile — on Windows that's %LocalAppData%\OtOpcUa\pki
/// — so multiple driver instances in the same OtOpcUa server process share one
/// certificate store without extra config.
///
private async Task BuildApplicationConfigurationAsync(CancellationToken ct)
{
// The default ctor is obsolete in favour of the ITelemetryContext overload; suppress
// locally rather than plumbing a telemetry context all the way through the driver
// surface — the driver emits no per-request telemetry of its own and the SDK's
// internal fallback is fine for a gateway use case.
#pragma warning disable CS0618
var app = new ApplicationInstance
{
ApplicationName = _options.SessionName,
ApplicationType = ApplicationType.Client,
};
#pragma warning restore CS0618
var pkiRoot = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
"OtOpcUa", "pki");
var config = new ApplicationConfiguration
{
ApplicationName = _options.SessionName,
ApplicationType = ApplicationType.Client,
ApplicationUri = _options.ApplicationUri,
SecurityConfiguration = new SecurityConfiguration
{
ApplicationCertificate = new CertificateIdentifier
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(pkiRoot, "own"),
SubjectName = $"CN={_options.SessionName}",
},
TrustedPeerCertificates = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(pkiRoot, "trusted"),
},
TrustedIssuerCertificates = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(pkiRoot, "issuers"),
},
RejectedCertificateStore = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(pkiRoot, "rejected"),
},
AutoAcceptUntrustedCertificates = _options.AutoAcceptCertificates,
},
TransportQuotas = new TransportQuotas { OperationTimeout = (int)_options.Timeout.TotalMilliseconds },
ClientConfiguration = new ClientConfiguration
{
DefaultSessionTimeout = (int)_options.SessionTimeout.TotalMilliseconds,
},
DisableHiResClock = true,
};
await config.ValidateAsync(ApplicationType.Client, ct).ConfigureAwait(false);
// Attach a cert-validator handler that honours the AutoAccept flag. Without this,
// AutoAcceptUntrustedCertificates on the config alone isn't always enough in newer
// SDK versions — the validator raises an event the app has to handle.
if (_options.AutoAcceptCertificates)
{
config.CertificateValidator.CertificateValidation += (s, e) =>
{
if (e.Error.StatusCode == StatusCodes.BadCertificateUntrusted)
e.Accept = true;
};
}
// Ensure an application certificate exists. The SDK auto-generates one if missing.
app.ApplicationConfiguration = config;
await app.CheckApplicationInstanceCertificatesAsync(silent: true, lifeTimeInMonths: null, ct)
.ConfigureAwait(false);
return config;
}
///
/// Resolve the ordered failover candidate list. EndpointUrls wins when
/// non-empty; otherwise fall back to EndpointUrl as a single-URL shortcut so
/// existing single-endpoint configs keep working without migration.
///
internal static IReadOnlyList ResolveEndpointCandidates(OpcUaClientDriverOptions opts)
{
if (opts.EndpointUrls is { Count: > 0 }) return opts.EndpointUrls;
return [opts.EndpointUrl];
}
///
/// Build the user-identity token from the driver options. Split out of
/// so the failover sweep reuses one identity across
/// every endpoint attempt — generating it N times would re-unlock the user cert's
/// private key N times, wasteful + keeps the password in memory longer.
///
internal static UserIdentity BuildUserIdentity(OpcUaClientDriverOptions options) =>
options.AuthType switch
{
OpcUaAuthType.Anonymous => new UserIdentity(new AnonymousIdentityToken()),
OpcUaAuthType.Username => new UserIdentity(
options.Username ?? string.Empty,
System.Text.Encoding.UTF8.GetBytes(options.Password ?? string.Empty)),
OpcUaAuthType.Certificate => BuildCertificateIdentity(options),
_ => new UserIdentity(new AnonymousIdentityToken()),
};
///
/// Open a session against a single endpoint URL. Bounded by
/// so the failover
/// sweep doesn't spend its full budget on one dead server. Moved out of
/// so the failover loop body stays readable.
///
private async Task OpenSessionOnEndpointAsync(
ApplicationConfiguration appConfig,
string endpointUrl,
OpcUaSecurityPolicy policy,
OpcUaSecurityMode mode,
UserIdentity identity,
CancellationToken ct)
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(_options.PerEndpointConnectTimeout);
var selected = await SelectMatchingEndpointAsync(
appConfig, endpointUrl, policy, mode, cts.Token).ConfigureAwait(false);
var endpointConfig = EndpointConfiguration.Create(appConfig);
endpointConfig.OperationTimeout = (int)_options.Timeout.TotalMilliseconds;
var endpoint = new ConfiguredEndpoint(null, selected, endpointConfig);
var session = await new DefaultSessionFactory(telemetry: null!).CreateAsync(
appConfig,
endpoint,
false, // updateBeforeConnect
_options.SessionName,
(uint)_options.SessionTimeout.TotalMilliseconds,
identity,
null, // preferredLocales
cts.Token).ConfigureAwait(false);
session.KeepAliveInterval = (int)_options.KeepAliveInterval.TotalMilliseconds;
return session;
}
///
/// Select the remote endpoint matching both the requested
/// and . The SDK's CoreClientUtils.SelectEndpointAsync
/// only honours a boolean "use security" flag; we need policy-aware matching so an
/// operator asking for Basic256Sha256 against a server that also offers
/// Basic128Rsa15 doesn't silently end up on the weaker cipher.
///
private static async Task SelectMatchingEndpointAsync(
ApplicationConfiguration appConfig,
string endpointUrl,
OpcUaSecurityPolicy policy,
OpcUaSecurityMode mode,
CancellationToken ct)
{
// GetEndpoints returns everything the server advertises; policy + mode filter is
// applied client-side so the selection is explicit and fails loudly if the operator
// asks for a combination the server doesn't publish. DiscoveryClient.CreateAsync
// is the non-obsolete path in SDK 1.5.378; the synchronous Create(..) variants are
// all deprecated.
using var client = await DiscoveryClient.CreateAsync(
appConfig, new Uri(endpointUrl), Opc.Ua.DiagnosticsMasks.None, ct).ConfigureAwait(false);
var all = await client.GetEndpointsAsync(null, ct).ConfigureAwait(false);
var wantedPolicyUri = MapSecurityPolicy(policy);
var wantedMode = mode switch
{
OpcUaSecurityMode.None => MessageSecurityMode.None,
OpcUaSecurityMode.Sign => MessageSecurityMode.Sign,
OpcUaSecurityMode.SignAndEncrypt => MessageSecurityMode.SignAndEncrypt,
_ => throw new ArgumentOutOfRangeException(nameof(mode)),
};
var match = all.FirstOrDefault(e =>
e.SecurityPolicyUri == wantedPolicyUri && e.SecurityMode == wantedMode);
if (match is null)
{
var advertised = string.Join(", ", all
.Select(e => $"{ShortPolicyName(e.SecurityPolicyUri)}/{e.SecurityMode}"));
throw new InvalidOperationException(
$"No endpoint at '{endpointUrl}' matches SecurityPolicy={policy} + SecurityMode={mode}. " +
$"Server advertises: {advertised}");
}
return match;
}
///
/// Build a carrying a client user-authentication
/// certificate loaded from .
/// Used when the remote server's endpoint advertises Certificate-type user tokens.
/// Fails fast if the path is missing, the file doesn't exist, or the certificate
/// lacks a private key (the private key is required to sign the user-token
/// challenge during session activation).
///
internal static UserIdentity BuildCertificateIdentity(OpcUaClientDriverOptions options)
{
if (string.IsNullOrWhiteSpace(options.UserCertificatePath))
throw new InvalidOperationException(
"OpcUaAuthType.Certificate requires OpcUaClientDriverOptions.UserCertificatePath to be set.");
if (!System.IO.File.Exists(options.UserCertificatePath))
throw new System.IO.FileNotFoundException(
$"User certificate not found at '{options.UserCertificatePath}'.",
options.UserCertificatePath);
// X509CertificateLoader (new in .NET 9) is the only non-obsolete way to load a PFX
// since the legacy X509Certificate2 ctors are marked obsolete on net10. Passes the
// password through verbatim; PEM files with external keys fall back to
// LoadCertificateFromFile which picks up the adjacent .key if present.
var cert = System.Security.Cryptography.X509Certificates.X509CertificateLoader
.LoadPkcs12FromFile(options.UserCertificatePath, options.UserCertificatePassword);
if (!cert.HasPrivateKey)
throw new InvalidOperationException(
$"User certificate at '{options.UserCertificatePath}' has no private key — " +
"the private key is required to sign the OPC UA user-token challenge at session activation.");
return new UserIdentity(cert);
}
/// Convert a driver to the OPC UA policy URI.
internal static string MapSecurityPolicy(OpcUaSecurityPolicy policy) => policy switch
{
OpcUaSecurityPolicy.None => SecurityPolicies.None,
OpcUaSecurityPolicy.Basic128Rsa15 => SecurityPolicies.Basic128Rsa15,
OpcUaSecurityPolicy.Basic256 => SecurityPolicies.Basic256,
OpcUaSecurityPolicy.Basic256Sha256 => SecurityPolicies.Basic256Sha256,
OpcUaSecurityPolicy.Aes128_Sha256_RsaOaep => SecurityPolicies.Aes128_Sha256_RsaOaep,
OpcUaSecurityPolicy.Aes256_Sha256_RsaPss => SecurityPolicies.Aes256_Sha256_RsaPss,
_ => throw new ArgumentOutOfRangeException(nameof(policy), policy, null),
};
private static string ShortPolicyName(string policyUri) =>
policyUri?.Substring(policyUri.LastIndexOf('#') + 1) ?? "(null)";
public async Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
{
await ShutdownAsync(cancellationToken).ConfigureAwait(false);
await InitializeAsync(driverConfigJson, cancellationToken).ConfigureAwait(false);
}
public async Task ShutdownAsync(CancellationToken cancellationToken)
{
// Tear down remote subscriptions first — otherwise Session.Close will try and may fail
// with BadSubscriptionIdInvalid noise in the upstream log. _subscriptions is cleared
// whether or not the wire-side delete succeeds since the local handles are useless
// after close anyway.
foreach (var rs in _subscriptions.Values)
{
try { await rs.Subscription.DeleteAsync(silent: true, cancellationToken).ConfigureAwait(false); }
catch { /* best-effort */ }
}
_subscriptions.Clear();
foreach (var ras in _alarmSubscriptions.Values)
{
try { await ras.Subscription.DeleteAsync(silent: true, cancellationToken).ConfigureAwait(false); }
catch { /* best-effort */ }
}
_alarmSubscriptions.Clear();
// Abort any in-flight reconnect attempts before touching the session — BeginReconnect's
// retry loop holds a reference to the current session and would fight Session.CloseAsync
// if left spinning.
try { _reconnectHandler?.CancelReconnect(); } catch { }
_reconnectHandler?.Dispose();
_reconnectHandler = null;
if (_keepAliveHandler is not null && Session is not null)
{
try { Session.KeepAlive -= _keepAliveHandler; } catch { }
}
_keepAliveHandler = null;
try { if (Session is Session s) await s.CloseAsync(cancellationToken).ConfigureAwait(false); }
catch { /* best-effort */ }
try { Session?.Dispose(); } catch { }
Session = null;
_connectedEndpointUrl = null;
TransitionTo(HostState.Unknown);
_health = new DriverHealth(DriverState.Unknown, _health.LastSuccessfulRead, null);
}
public DriverHealth GetHealth() => _health;
public long GetMemoryFootprint() => 0;
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
// ---- IReadable ----
public async Task> ReadAsync(
IReadOnlyList fullReferences, CancellationToken cancellationToken)
{
var session = RequireSession();
var results = new DataValueSnapshot[fullReferences.Count];
var now = DateTime.UtcNow;
// Parse NodeIds up-front. Tags whose reference doesn't parse get BadNodeIdInvalid
// and are omitted from the wire request — saves a round-trip against the upstream
// server for a fault the driver can detect locally.
var toSend = new ReadValueIdCollection();
var indexMap = new List(fullReferences.Count); // maps wire-index -> results-index
for (var i = 0; i < fullReferences.Count; i++)
{
if (!TryParseNodeId(session, fullReferences[i], out var nodeId))
{
results[i] = new DataValueSnapshot(null, StatusBadNodeIdInvalid, null, now);
continue;
}
toSend.Add(new ReadValueId { NodeId = nodeId, AttributeId = Attributes.Value });
indexMap.Add(i);
}
if (toSend.Count == 0) return results;
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
try
{
var resp = await session.ReadAsync(
requestHeader: null,
maxAge: 0,
timestampsToReturn: TimestampsToReturn.Both,
nodesToRead: toSend,
ct: cancellationToken).ConfigureAwait(false);
var values = resp.Results;
for (var w = 0; w < values.Count; w++)
{
var r = indexMap[w];
var dv = values[w];
// Preserve the upstream StatusCode verbatim — including Bad codes per
// §8's cascading-quality rule. Also preserve SourceTimestamp so downstream
// clients can detect stale upstream data.
results[r] = new DataValueSnapshot(
Value: dv.Value,
StatusCode: dv.StatusCode.Code,
SourceTimestampUtc: dv.SourceTimestamp == DateTime.MinValue ? null : dv.SourceTimestamp,
ServerTimestampUtc: dv.ServerTimestamp == DateTime.MinValue ? now : dv.ServerTimestamp);
}
_health = new DriverHealth(DriverState.Healthy, now, null);
}
catch (Exception ex)
{
// Transport / timeout / session-dropped — fan out the same fault across every
// tag in this batch. Per-tag StatusCode stays BadCommunicationError (not
// BadInternalError) so operators distinguish "upstream unreachable" from
// "driver bug".
for (var w = 0; w < indexMap.Count; w++)
{
var r = indexMap[w];
results[r] = new DataValueSnapshot(null, StatusBadCommunicationError, null, now);
}
_health = new DriverHealth(DriverState.Degraded, _health.LastSuccessfulRead, ex.Message);
}
}
finally { _gate.Release(); }
return results;
}
// ---- IWritable ----
public async Task> WriteAsync(
IReadOnlyList writes, CancellationToken cancellationToken)
{
var session = RequireSession();
var results = new WriteResult[writes.Count];
var toSend = new WriteValueCollection();
var indexMap = new List(writes.Count);
for (var i = 0; i < writes.Count; i++)
{
if (!TryParseNodeId(session, writes[i].FullReference, out var nodeId))
{
results[i] = new WriteResult(StatusBadNodeIdInvalid);
continue;
}
toSend.Add(new WriteValue
{
NodeId = nodeId,
AttributeId = Attributes.Value,
Value = new DataValue(new Variant(writes[i].Value)),
});
indexMap.Add(i);
}
if (toSend.Count == 0) return results;
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
try
{
var resp = await session.WriteAsync(
requestHeader: null,
nodesToWrite: toSend,
ct: cancellationToken).ConfigureAwait(false);
var codes = resp.Results;
for (var w = 0; w < codes.Count; w++)
{
var r = indexMap[w];
// Pass upstream WriteResult StatusCode through verbatim. Success codes
// include Good (0) and any warning-level Good* variants; anything with
// the severity bits set is a Bad.
results[r] = new WriteResult(codes[w].Code);
}
}
catch (Exception)
{
for (var w = 0; w < indexMap.Count; w++)
results[indexMap[w]] = new WriteResult(StatusBadCommunicationError);
}
}
finally { _gate.Release(); }
return results;
}
///
/// Parse a tag's full-reference string as a NodeId. Accepts the standard OPC UA
/// serialized forms (ns=2;s=…, i=2253, ns=4;g=…, ns=3;b=…).
/// Empty + malformed strings return false; the driver surfaces that as
/// without a wire round-trip.
///
internal static bool TryParseNodeId(ISession session, string fullReference, out NodeId nodeId)
{
nodeId = NodeId.Null;
if (string.IsNullOrWhiteSpace(fullReference)) return false;
try
{
nodeId = NodeId.Parse(session.MessageContext, fullReference);
return !NodeId.IsNull(nodeId);
}
catch
{
return false;
}
}
private ISession RequireSession() =>
Session ?? throw new InvalidOperationException("OpcUaClientDriver not initialized");
// ---- ITagDiscovery ----
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(builder);
var session = RequireSession();
var root = !string.IsNullOrEmpty(_options.BrowseRoot)
? NodeId.Parse(session.MessageContext, _options.BrowseRoot)
: ObjectIds.ObjectsFolder;
var rootFolder = builder.Folder("Remote", "Remote");
var visited = new HashSet();
var discovered = 0;
var pendingVariables = new List();
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
// Pass 1: browse hierarchy + create folders inline, collect variables into a
// pending list. Defers variable registration until attributes are resolved — the
// address-space builder's Variable call is the one-way commit, so doing it only
// once per variable (with correct DataType/SecurityClass/IsArray) avoids the
// alternative (register with placeholders + mutate later) which the
// IAddressSpaceBuilder contract doesn't expose.
await BrowseRecursiveAsync(session, root, rootFolder, visited,
depth: 0,
discovered: () => discovered, increment: () => discovered++,
pendingVariables: pendingVariables,
ct: cancellationToken).ConfigureAwait(false);
// Pass 2: batch-read DataType + AccessLevel + ValueRank + Historizing per
// variable. One wire request for up to ~N variables; for 10k-node servers this is
// still a couple of hundred ms total since the SDK chunks ReadAsync automatically.
await EnrichAndRegisterVariablesAsync(session, pendingVariables, cancellationToken)
.ConfigureAwait(false);
}
finally { _gate.Release(); }
}
///
/// A variable collected during the browse pass, waiting for attribute enrichment
/// before being registered on the address-space builder.
///
private readonly record struct PendingVariable(
IAddressSpaceBuilder ParentFolder,
string BrowseName,
string DisplayName,
NodeId NodeId);
private async Task BrowseRecursiveAsync(
ISession session, NodeId node, IAddressSpaceBuilder folder, HashSet visited,
int depth, Func discovered, Action increment,
List pendingVariables, CancellationToken ct)
{
if (depth >= _options.MaxBrowseDepth) return;
if (discovered() >= _options.MaxDiscoveredNodes) return;
if (!visited.Add(node)) return;
var browseDescriptions = new BrowseDescriptionCollection
{
new()
{
NodeId = node,
BrowseDirection = BrowseDirection.Forward,
ReferenceTypeId = ReferenceTypeIds.HierarchicalReferences,
IncludeSubtypes = true,
NodeClassMask = (uint)(NodeClass.Object | NodeClass.Variable),
ResultMask = (uint)(BrowseResultMask.BrowseName | BrowseResultMask.DisplayName
| BrowseResultMask.NodeClass | BrowseResultMask.TypeDefinition),
}
};
BrowseResponse resp;
try
{
resp = await session.BrowseAsync(
requestHeader: null,
view: null,
requestedMaxReferencesPerNode: 0,
nodesToBrowse: browseDescriptions,
ct: ct).ConfigureAwait(false);
}
catch
{
// Transient browse failure on a sub-tree — don't kill the whole discovery, just
// skip this branch. The driver's health surface will reflect the cascade via the
// probe loop (PR 69).
return;
}
if (resp.Results.Count == 0) return;
var refs = resp.Results[0].References;
foreach (var rf in refs)
{
if (discovered() >= _options.MaxDiscoveredNodes) break;
var childId = ExpandedNodeId.ToNodeId(rf.NodeId, session.NamespaceUris);
if (NodeId.IsNull(childId)) continue;
var browseName = rf.BrowseName?.Name ?? childId.ToString();
var displayName = rf.DisplayName?.Text ?? browseName;
if (rf.NodeClass == NodeClass.Object)
{
var subFolder = folder.Folder(browseName, displayName);
increment();
await BrowseRecursiveAsync(session, childId, subFolder, visited,
depth + 1, discovered, increment, pendingVariables, ct).ConfigureAwait(false);
}
else if (rf.NodeClass == NodeClass.Variable)
{
pendingVariables.Add(new PendingVariable(folder, browseName, displayName, childId));
increment();
}
}
}
///
/// Pass 2 of discovery: batch-read DataType + ValueRank + AccessLevel + Historizing
/// for every collected variable in one Session.ReadAsync (the SDK chunks internally
/// to respect the server's per-request limits). Then register each variable on its
/// parent folder with the real .
///
///
///
/// Attributes read: DataType (NodeId of the value type),
/// ValueRank (-1 = scalar, 1 = array), UserAccessLevel (the
/// effective access mask for our session — more accurate than AccessLevel which
/// is the server-side configured mask before user filtering), and
/// Historizing (server flags whether historian data is available).
///
///
/// When the upstream server returns Bad on any attribute, the variable falls back
/// to safe defaults (Int32 / ViewOnly / not-array / not-historized) and is still
/// registered — a partial enrichment failure shouldn't drop entire variables from
/// the address space. Operators reading the Admin dashboard see the variable
/// with conservative metadata which is obviously wrong and easy to triage.
///
///
private async Task EnrichAndRegisterVariablesAsync(
ISession session, IReadOnlyList pending, CancellationToken ct)
{
if (pending.Count == 0) return;
// 4 attributes per variable: DataType, ValueRank, UserAccessLevel, Historizing.
var nodesToRead = new ReadValueIdCollection(pending.Count * 4);
foreach (var pv in pending)
{
nodesToRead.Add(new ReadValueId { NodeId = pv.NodeId, AttributeId = Attributes.DataType });
nodesToRead.Add(new ReadValueId { NodeId = pv.NodeId, AttributeId = Attributes.ValueRank });
nodesToRead.Add(new ReadValueId { NodeId = pv.NodeId, AttributeId = Attributes.UserAccessLevel });
nodesToRead.Add(new ReadValueId { NodeId = pv.NodeId, AttributeId = Attributes.Historizing });
}
DataValueCollection values;
try
{
var resp = await session.ReadAsync(
requestHeader: null,
maxAge: 0,
timestampsToReturn: TimestampsToReturn.Neither,
nodesToRead: nodesToRead,
ct: ct).ConfigureAwait(false);
values = resp.Results;
}
catch
{
// Enrichment-read failed wholesale (server unreachable mid-browse). Register the
// pending variables with conservative defaults rather than dropping them — the
// downstream catalog is still useful for reading via IReadable.
foreach (var pv in pending)
RegisterFallback(pv);
return;
}
for (var i = 0; i < pending.Count; i++)
{
var pv = pending[i];
var baseIdx = i * 4;
var dataTypeDv = values[baseIdx];
var valueRankDv = values[baseIdx + 1];
var accessDv = values[baseIdx + 2];
var histDv = values[baseIdx + 3];
var dataType = StatusCode.IsGood(dataTypeDv.StatusCode) && dataTypeDv.Value is NodeId dtId
? MapUpstreamDataType(dtId)
: DriverDataType.Int32;
var valueRank = StatusCode.IsGood(valueRankDv.StatusCode) && valueRankDv.Value is int vr ? vr : -1;
var isArray = valueRank >= 0; // -1 = scalar; 1+ = array dimensions; 0 = one-dimensional array
var access = StatusCode.IsGood(accessDv.StatusCode) && accessDv.Value is byte ab ? ab : (byte)0;
var securityClass = MapAccessLevelToSecurityClass(access);
var historizing = StatusCode.IsGood(histDv.StatusCode) && histDv.Value is bool b && b;
pv.ParentFolder.Variable(pv.BrowseName, pv.DisplayName, new DriverAttributeInfo(
FullName: pv.NodeId.ToString() ?? string.Empty,
DriverDataType: dataType,
IsArray: isArray,
ArrayDim: null,
SecurityClass: securityClass,
IsHistorized: historizing,
IsAlarm: false));
}
void RegisterFallback(PendingVariable pv)
{
pv.ParentFolder.Variable(pv.BrowseName, pv.DisplayName, new DriverAttributeInfo(
FullName: pv.NodeId.ToString() ?? string.Empty,
DriverDataType: DriverDataType.Int32,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false,
IsAlarm: false));
}
}
///
/// Map an upstream OPC UA built-in DataType NodeId (via DataTypeIds.*) to a
/// . Unknown / custom types fall through to
/// which is the safest passthrough for
/// Variant-wrapped structs + enums + extension objects; downstream clients see a
/// string rendering but the cascading-quality path still preserves upstream
/// StatusCode + timestamps.
///
internal static DriverDataType MapUpstreamDataType(NodeId dataType)
{
if (dataType == DataTypeIds.Boolean) return DriverDataType.Boolean;
if (dataType == DataTypeIds.SByte || dataType == DataTypeIds.Byte ||
dataType == DataTypeIds.Int16) return DriverDataType.Int16;
if (dataType == DataTypeIds.UInt16) return DriverDataType.UInt16;
if (dataType == DataTypeIds.Int32) return DriverDataType.Int32;
if (dataType == DataTypeIds.UInt32) return DriverDataType.UInt32;
if (dataType == DataTypeIds.Int64) return DriverDataType.Int64;
if (dataType == DataTypeIds.UInt64) return DriverDataType.UInt64;
if (dataType == DataTypeIds.Float) return DriverDataType.Float32;
if (dataType == DataTypeIds.Double) return DriverDataType.Float64;
if (dataType == DataTypeIds.String) return DriverDataType.String;
if (dataType == DataTypeIds.DateTime || dataType == DataTypeIds.UtcTime)
return DriverDataType.DateTime;
return DriverDataType.String;
}
///
/// Map an OPC UA AccessLevel/UserAccessLevel attribute value (AccessLevels
/// bitmask) to a the local node-manager's ACL
/// layer can gate writes off. CurrentWrite-capable variables surface as
/// ; read-only as .
///
internal static SecurityClassification MapAccessLevelToSecurityClass(byte accessLevel)
{
const byte CurrentWrite = 2; // AccessLevels.CurrentWrite = 0x02
return (accessLevel & CurrentWrite) != 0
? SecurityClassification.Operate
: SecurityClassification.ViewOnly;
}
// ---- ISubscribable ----
public async Task SubscribeAsync(
IReadOnlyList fullReferences, TimeSpan publishingInterval, CancellationToken cancellationToken)
{
var session = RequireSession();
var id = Interlocked.Increment(ref _nextSubscriptionId);
var handle = new OpcUaSubscriptionHandle(id);
// Floor the publishing interval at 50ms — OPC UA servers routinely negotiate
// minimum-supported intervals up anyway, but sending sub-50ms wastes negotiation
// bandwidth on every subscription create.
var intervalMs = publishingInterval < TimeSpan.FromMilliseconds(50)
? 50
: (int)publishingInterval.TotalMilliseconds;
var subscription = new Subscription(telemetry: null!, new SubscriptionOptions
{
DisplayName = $"opcua-sub-{id}",
PublishingInterval = intervalMs,
KeepAliveCount = 10,
LifetimeCount = 1000,
MaxNotificationsPerPublish = 0,
PublishingEnabled = true,
Priority = 0,
TimestampsToReturn = TimestampsToReturn.Both,
});
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
session.AddSubscription(subscription);
await subscription.CreateAsync(cancellationToken).ConfigureAwait(false);
foreach (var fullRef in fullReferences)
{
if (!TryParseNodeId(session, fullRef, out var nodeId)) continue;
// The tag string is routed through MonitoredItem.Handle so the Notification
// handler can identify which tag changed without an extra lookup.
var item = new MonitoredItem(telemetry: null!, new MonitoredItemOptions
{
DisplayName = fullRef,
StartNodeId = nodeId,
AttributeId = Attributes.Value,
MonitoringMode = MonitoringMode.Reporting,
SamplingInterval = intervalMs,
QueueSize = 1,
DiscardOldest = true,
})
{
Handle = fullRef,
};
item.Notification += (mi, args) => OnMonitoredItemNotification(handle, mi, args);
subscription.AddItem(item);
}
await subscription.CreateItemsAsync(cancellationToken).ConfigureAwait(false);
_subscriptions[id] = new RemoteSubscription(subscription, handle);
}
finally { _gate.Release(); }
return handle;
}
public async Task UnsubscribeAsync(ISubscriptionHandle handle, CancellationToken cancellationToken)
{
if (handle is not OpcUaSubscriptionHandle h) return;
if (!_subscriptions.TryRemove(h.Id, out var rs)) return;
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
try { await rs.Subscription.DeleteAsync(silent: true, cancellationToken).ConfigureAwait(false); }
catch { /* best-effort — the subscription may already be gone on reconnect */ }
}
finally { _gate.Release(); }
}
private void OnMonitoredItemNotification(OpcUaSubscriptionHandle handle, MonitoredItem item, MonitoredItemNotificationEventArgs args)
{
// args.NotificationValue arrives as a MonitoredItemNotification for value-change
// subscriptions; extract its DataValue. The Handle property carries our tag string.
if (args.NotificationValue is not MonitoredItemNotification mn) return;
var dv = mn.Value;
if (dv is null) return;
var fullRef = (item.Handle as string) ?? item.DisplayName ?? string.Empty;
var snapshot = new DataValueSnapshot(
Value: dv.Value,
StatusCode: dv.StatusCode.Code,
SourceTimestampUtc: dv.SourceTimestamp == DateTime.MinValue ? null : dv.SourceTimestamp,
ServerTimestampUtc: dv.ServerTimestamp == DateTime.MinValue ? DateTime.UtcNow : dv.ServerTimestamp);
OnDataChange?.Invoke(this, new DataChangeEventArgs(handle, fullRef, snapshot));
}
private sealed record RemoteSubscription(Subscription Subscription, OpcUaSubscriptionHandle Handle);
private sealed record OpcUaSubscriptionHandle(long Id) : ISubscriptionHandle
{
public string DiagnosticId => $"opcua-sub-{Id}";
}
// ---- IAlarmSource ----
///
/// Field positions in the EventFilter SelectClauses below. Used to index into the
/// EventFieldList.EventFields Variant collection when an event arrives.
///
private const int AlarmFieldEventId = 0;
private const int AlarmFieldEventType = 1;
private const int AlarmFieldSourceNode = 2;
private const int AlarmFieldMessage = 3;
private const int AlarmFieldSeverity = 4;
private const int AlarmFieldTime = 5;
private const int AlarmFieldConditionId = 6;
public async Task SubscribeAlarmsAsync(
IReadOnlyList sourceNodeIds, CancellationToken cancellationToken)
{
var session = RequireSession();
var id = Interlocked.Increment(ref _nextAlarmSubscriptionId);
var handle = new OpcUaAlarmSubscriptionHandle(id);
// Pre-resolve the source-node filter set so the per-event notification handler can
// match in O(1) without re-parsing on every event.
var sourceFilter = new HashSet(sourceNodeIds, StringComparer.Ordinal);
var subscription = new Subscription(telemetry: null!, new SubscriptionOptions
{
DisplayName = $"opcua-alarm-sub-{id}",
PublishingInterval = 500, // 500ms — alarms don't need fast polling; the server pushes
KeepAliveCount = 10,
LifetimeCount = 1000,
MaxNotificationsPerPublish = 0,
PublishingEnabled = true,
Priority = 0,
TimestampsToReturn = TimestampsToReturn.Both,
});
// EventFilter SelectClauses — pick the standard BaseEventType fields we need to
// materialize an AlarmEventArgs. Field positions are indexed by the AlarmField*
// constants so the notification handler indexes in O(1) without re-examining the
// QualifiedName BrowsePaths.
var filter = new EventFilter();
void AddField(string browseName) => filter.SelectClauses.Add(new SimpleAttributeOperand
{
TypeDefinitionId = ObjectTypeIds.BaseEventType,
BrowsePath = [new QualifiedName(browseName)],
AttributeId = Attributes.Value,
});
AddField("EventId");
AddField("EventType");
AddField("SourceNode");
AddField("Message");
AddField("Severity");
AddField("Time");
// ConditionId on ConditionType nodes is the branch identifier for
// acknowledgeable conditions. Not a BaseEventType field — reach it via the typed path.
filter.SelectClauses.Add(new SimpleAttributeOperand
{
TypeDefinitionId = ObjectTypeIds.ConditionType,
BrowsePath = [], // empty path = the condition node itself
AttributeId = Attributes.NodeId,
});
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
session.AddSubscription(subscription);
await subscription.CreateAsync(cancellationToken).ConfigureAwait(false);
var eventItem = new MonitoredItem(telemetry: null!, new MonitoredItemOptions
{
DisplayName = "Server/Events",
StartNodeId = ObjectIds.Server,
AttributeId = Attributes.EventNotifier,
MonitoringMode = MonitoringMode.Reporting,
QueueSize = 1000, // deep queue — a server can fire many alarms in bursts
DiscardOldest = false,
Filter = filter,
})
{
Handle = handle,
};
eventItem.Notification += (mi, args) => OnEventNotification(handle, sourceFilter, mi, args);
subscription.AddItem(eventItem);
await subscription.CreateItemsAsync(cancellationToken).ConfigureAwait(false);
_alarmSubscriptions[id] = new RemoteAlarmSubscription(subscription, handle);
}
finally { _gate.Release(); }
return handle;
}
public async Task UnsubscribeAlarmsAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
{
if (handle is not OpcUaAlarmSubscriptionHandle h) return;
if (!_alarmSubscriptions.TryRemove(h.Id, out var rs)) return;
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
try { await rs.Subscription.DeleteAsync(silent: true, cancellationToken).ConfigureAwait(false); }
catch { /* best-effort — session may already be gone across a reconnect */ }
}
finally { _gate.Release(); }
}
public async Task AcknowledgeAsync(
IReadOnlyList acknowledgements, CancellationToken cancellationToken)
{
// Short-circuit empty batch BEFORE touching the session so callers can pass an empty
// list without guarding the size themselves — e.g. a bulk-ack UI that built an empty
// list because the filter matched nothing.
if (acknowledgements.Count == 0) return;
var session = RequireSession();
// OPC UA A&C: call the AcknowledgeableConditionType.Acknowledge method on each
// condition node with EventId + Comment arguments. CallAsync accepts a batch —
// one CallMethodRequest per ack.
var callRequests = new CallMethodRequestCollection();
foreach (var ack in acknowledgements)
{
if (!TryParseNodeId(session, ack.ConditionId, out var conditionId)) continue;
callRequests.Add(new CallMethodRequest
{
ObjectId = conditionId,
MethodId = MethodIds.AcknowledgeableConditionType_Acknowledge,
InputArguments = [
new Variant(Array.Empty()), // EventId — server-side best-effort; empty resolves to 'most recent'
new Variant(new LocalizedText(ack.Comment ?? string.Empty)),
],
});
}
if (callRequests.Count == 0) return;
await _gate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
try
{
_ = await session.CallAsync(
requestHeader: null,
methodsToCall: callRequests,
ct: cancellationToken).ConfigureAwait(false);
}
catch { /* best-effort — caller's re-ack mechanism catches pathological paths */ }
}
finally { _gate.Release(); }
}
private void OnEventNotification(
OpcUaAlarmSubscriptionHandle handle,
HashSet sourceFilter,
MonitoredItem item,
MonitoredItemNotificationEventArgs args)
{
if (args.NotificationValue is not EventFieldList efl) return;
if (efl.EventFields.Count <= AlarmFieldConditionId) return;
var sourceNode = efl.EventFields[AlarmFieldSourceNode].Value?.ToString() ?? string.Empty;
if (sourceFilter.Count > 0 && !sourceFilter.Contains(sourceNode)) return;
var eventType = efl.EventFields[AlarmFieldEventType].Value?.ToString() ?? "BaseEventType";
var message = (efl.EventFields[AlarmFieldMessage].Value as LocalizedText)?.Text ?? string.Empty;
var severity = efl.EventFields[AlarmFieldSeverity].Value is ushort sev ? sev : (ushort)0;
var time = efl.EventFields[AlarmFieldTime].Value is DateTime t ? t : DateTime.UtcNow;
var conditionId = efl.EventFields[AlarmFieldConditionId].Value?.ToString() ?? string.Empty;
OnAlarmEvent?.Invoke(this, new AlarmEventArgs(
SubscriptionHandle: handle,
SourceNodeId: sourceNode,
ConditionId: conditionId,
AlarmType: eventType,
Message: message,
Severity: MapSeverity(severity),
SourceTimestampUtc: time));
}
///
/// Map an OPC UA BaseEventType.Severity (1..1000) to our coarse-grained
/// bucket. Thresholds match the OPC UA A&C Part 9
/// guidance: 1-200 Low, 201-500 Medium, 501-800 High, 801-1000 Critical.
///
internal static AlarmSeverity MapSeverity(ushort opcSeverity) => opcSeverity switch
{
<= 200 => AlarmSeverity.Low,
<= 500 => AlarmSeverity.Medium,
<= 800 => AlarmSeverity.High,
_ => AlarmSeverity.Critical,
};
private sealed record RemoteAlarmSubscription(Subscription Subscription, OpcUaAlarmSubscriptionHandle Handle);
private sealed record OpcUaAlarmSubscriptionHandle(long Id) : IAlarmSubscriptionHandle
{
public string DiagnosticId => $"opcua-alarm-sub-{Id}";
}
// ---- IHistoryProvider (passthrough to upstream server) ----
public async Task ReadRawAsync(
string fullReference, DateTime startUtc, DateTime endUtc, uint maxValuesPerNode,
CancellationToken cancellationToken)
{
var details = new ReadRawModifiedDetails
{
IsReadModified = false,
StartTime = startUtc,
EndTime = endUtc,
NumValuesPerNode = maxValuesPerNode,
ReturnBounds = false,
};
return await ExecuteHistoryReadAsync(fullReference, new ExtensionObject(details), cancellationToken)
.ConfigureAwait(false);
}
public async Task ReadProcessedAsync(
string fullReference, DateTime startUtc, DateTime endUtc, TimeSpan interval,
HistoryAggregateType aggregate, CancellationToken cancellationToken)
{
var aggregateId = MapAggregateToNodeId(aggregate);
var details = new ReadProcessedDetails
{
StartTime = startUtc,
EndTime = endUtc,
ProcessingInterval = interval.TotalMilliseconds,
AggregateType = [aggregateId],
};
return await ExecuteHistoryReadAsync(fullReference, new ExtensionObject(details), cancellationToken)
.ConfigureAwait(false);
}
public async Task ReadAtTimeAsync(
string fullReference, IReadOnlyList timestampsUtc, CancellationToken cancellationToken)
{
var reqTimes = new DateTimeCollection(timestampsUtc);
var details = new ReadAtTimeDetails
{
ReqTimes = reqTimes,
UseSimpleBounds = true,
};
return await ExecuteHistoryReadAsync(fullReference, new ExtensionObject(details), cancellationToken)
.ConfigureAwait(false);
}
///
/// Shared HistoryRead wire path — used by Raw/Processed/AtTime. Handles NodeId parse,
/// Session.HistoryReadAsync call, Bad-StatusCode passthrough (no translation per §8
/// cascading-quality rule), and HistoryData unwrap into .
///
private async Task ExecuteHistoryReadAsync(
string fullReference, ExtensionObject historyReadDetails, CancellationToken ct)
{
var session = RequireSession();
if (!TryParseNodeId(session, fullReference, out var nodeId))
{
return new Core.Abstractions.HistoryReadResult([], null);
}
var nodesToRead = new HistoryReadValueIdCollection
{
new HistoryReadValueId { NodeId = nodeId },
};
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
var resp = await session.HistoryReadAsync(
requestHeader: null,
historyReadDetails: historyReadDetails,
timestampsToReturn: TimestampsToReturn.Both,
releaseContinuationPoints: false,
nodesToRead: nodesToRead,
ct: ct).ConfigureAwait(false);
if (resp.Results.Count == 0) return new Core.Abstractions.HistoryReadResult([], null);
var r = resp.Results[0];
// Unwrap HistoryData from the ExtensionObject-encoded payload the SDK returns.
// Samples stay in chronological order per OPC UA Part 11; cascading-quality
// rule: preserve each DataValue's upstream StatusCode + timestamps verbatim.
var samples = new List();
if (r.HistoryData?.Body is HistoryData hd)
{
var now = DateTime.UtcNow;
foreach (var dv in hd.DataValues)
{
samples.Add(new DataValueSnapshot(
Value: dv.Value,
StatusCode: dv.StatusCode.Code,
SourceTimestampUtc: dv.SourceTimestamp == DateTime.MinValue ? null : dv.SourceTimestamp,
ServerTimestampUtc: dv.ServerTimestamp == DateTime.MinValue ? now : dv.ServerTimestamp));
}
}
var contPt = r.ContinuationPoint is { Length: > 0 } ? r.ContinuationPoint : null;
return new Core.Abstractions.HistoryReadResult(samples, contPt);
}
finally { _gate.Release(); }
}
/// Map to the OPC UA Part 13 standard aggregate NodeId.
internal static NodeId MapAggregateToNodeId(HistoryAggregateType aggregate) => aggregate switch
{
HistoryAggregateType.Average => ObjectIds.AggregateFunction_Average,
HistoryAggregateType.Minimum => ObjectIds.AggregateFunction_Minimum,
HistoryAggregateType.Maximum => ObjectIds.AggregateFunction_Maximum,
HistoryAggregateType.Total => ObjectIds.AggregateFunction_Total,
HistoryAggregateType.Count => ObjectIds.AggregateFunction_Count,
_ => throw new ArgumentOutOfRangeException(nameof(aggregate), aggregate, null),
};
// ReadEventsAsync stays at the interface default (throws NotSupportedException) per
// IHistoryProvider contract -- the OPC UA Client driver CAN forward HistoryReadEvents,
// but the call-site needs an EventFilter SelectClauses surface which the interface
// doesn't carry. Landing the event-history passthrough requires extending
// IHistoryProvider.ReadEventsAsync with a filter-spec parameter; out of scope for this PR.
// ---- IHostConnectivityProbe ----
///
/// Endpoint-URL-keyed host identity for the Admin /hosts dashboard. Reflects the
/// endpoint the driver actually connected to after the failover sweep — not the
/// first URL in the candidate list — so operators see which of the configured
/// endpoints is currently serving traffic. Falls back to the first configured URL
/// pre-init so the dashboard has something to render before the first connect.
///
public string HostName => _connectedEndpointUrl
?? ResolveEndpointCandidates(_options).FirstOrDefault()
?? _options.EndpointUrl;
public IReadOnlyList GetHostStatuses()
{
lock (_probeLock)
return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)];
}
///
/// Session keep-alive handler. On a healthy ping, bumps HostState back to Running
/// (typical bounce after a transient network blip). On a bad ping, starts the SDK's
/// which retries on the configured period +
/// fires when it lands a new session.
///
private void OnKeepAlive(ISession sender, KeepAliveEventArgs e)
{
if (!ServiceResult.IsBad(e.Status))
{
TransitionTo(HostState.Running);
return;
}
TransitionTo(HostState.Stopped);
// Kick off the SDK's reconnect loop exactly once per drop. The handler handles its
// own retry cadence via ReconnectPeriod; we tear it down in OnReconnectComplete.
if (_reconnectHandler is not null) return;
_reconnectHandler = new SessionReconnectHandler(telemetry: null!,
reconnectAbort: false,
maxReconnectPeriod: (int)TimeSpan.FromMinutes(2).TotalMilliseconds);
var state = _reconnectHandler.BeginReconnect(
sender,
(int)_options.ReconnectPeriod.TotalMilliseconds,
OnReconnectComplete);
}
///
/// Called by when its retry loop has either
/// successfully swapped to a new session or given up. Reads the new session off
/// handler.Session, unwires the old keep-alive hook, rewires for the new
/// one, and tears down the handler. Subscription migration is already handled
/// inside the SDK via TransferSubscriptions (the SDK calls it automatically
/// when is true,
/// which is the default).
///
private void OnReconnectComplete(object? sender, EventArgs e)
{
if (sender is not SessionReconnectHandler handler) return;
var newSession = handler.Session;
var oldSession = Session;
// Rewire keep-alive onto the new session — without this the next drop wouldn't
// trigger another reconnect attempt.
if (oldSession is not null && _keepAliveHandler is not null)
{
try { oldSession.KeepAlive -= _keepAliveHandler; } catch { }
}
if (newSession is not null && _keepAliveHandler is not null)
{
newSession.KeepAlive += _keepAliveHandler;
}
Session = newSession;
_reconnectHandler?.Dispose();
_reconnectHandler = null;
// Whether the reconnect actually succeeded depends on whether the session is
// non-null + connected. When it succeeded, flip back to Running so downstream
// consumers see recovery.
if (newSession is not null)
{
TransitionTo(HostState.Running);
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
}
}
private void TransitionTo(HostState newState)
{
HostState old;
lock (_probeLock)
{
old = _hostState;
if (old == newState) return;
_hostState = newState;
_hostStateChangedUtc = DateTime.UtcNow;
}
OnHostStatusChanged?.Invoke(this, new HostStatusChangedEventArgs(HostName, old, newState));
}
public void Dispose() => DisposeAsync().AsTask().GetAwaiter().GetResult();
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
try { await ShutdownAsync(CancellationToken.None).ConfigureAwait(false); }
catch { /* disposal is best-effort */ }
_gate.Dispose();
}
}