feat(dcl): OPC UA verify-endpoint probe with untrusted-cert capture (T17)
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Event;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Protocol;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DataConnection;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Management;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Serialization;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.DataConnections;
|
||||
using ZB.MOM.WW.ScadaBridge.DataConnectionLayer.Adapters;
|
||||
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
||||
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
|
||||
|
||||
@@ -20,6 +24,11 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
private readonly DataConnectionOptions _options;
|
||||
private readonly ISiteHealthCollector _healthCollector;
|
||||
private readonly ISiteEventLogger? _siteEventLogger;
|
||||
// T17: deployment-wide OPC UA application identity / cert-store paths — the same
|
||||
// global options the DataConnectionFactory feeds to RealOpcUaClient when creating OPC
|
||||
// UA connections. Needed by the verify-endpoint probe (VerifyEndpointCommand), which
|
||||
// builds an ApplicationConfiguration directly rather than through a connection actor.
|
||||
private readonly OpcUaGlobalOptions _opcUaGlobalOptions;
|
||||
private readonly Dictionary<string, IActorRef> _connectionActors = new();
|
||||
|
||||
/// <summary>
|
||||
@@ -29,16 +38,23 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
/// <param name="options">Configuration options for data connections.</param>
|
||||
/// <param name="healthCollector">Collector for site health metrics reported by connection actors.</param>
|
||||
/// <param name="siteEventLogger">Optional logger for site event entries; null disables site event logging.</param>
|
||||
/// <param name="opcUaGlobalOptions">
|
||||
/// Deployment-wide OPC UA application identity / cert-store paths used by the
|
||||
/// verify-endpoint probe; null falls back to defaults (mirrors
|
||||
/// <see cref="DataConnectionFactory"/>'s default-options constructor).
|
||||
/// </param>
|
||||
public DataConnectionManagerActor(
|
||||
IDataConnectionFactory factory,
|
||||
DataConnectionOptions options,
|
||||
ISiteHealthCollector healthCollector,
|
||||
ISiteEventLogger? siteEventLogger = null)
|
||||
ISiteEventLogger? siteEventLogger = null,
|
||||
OpcUaGlobalOptions? opcUaGlobalOptions = null)
|
||||
{
|
||||
_factory = factory;
|
||||
_options = options;
|
||||
_healthCollector = healthCollector;
|
||||
_siteEventLogger = siteEventLogger;
|
||||
_opcUaGlobalOptions = opcUaGlobalOptions ?? new OpcUaGlobalOptions();
|
||||
|
||||
Receive<CreateConnectionCommand>(HandleCreateConnection);
|
||||
Receive<SubscribeTagsRequest>(HandleRoute);
|
||||
@@ -52,6 +68,7 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
Receive<BrowseNodeCommand>(HandleBrowse);
|
||||
Receive<SearchAddressSpaceCommand>(HandleSearch);
|
||||
Receive<ReadTagValuesCommand>(HandleReadTagValues);
|
||||
Receive<VerifyEndpointCommand>(HandleVerifyEndpoint);
|
||||
}
|
||||
|
||||
private void HandleCreateConnection(CreateConnectionCommand command)
|
||||
@@ -243,6 +260,46 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// T17: Handles a <see cref="VerifyEndpointCommand"/> from the Central UI's "Verify"
|
||||
/// action — probes the endpoint config WITHOUT persisting it (connect → capture an
|
||||
/// untrusted cert → disconnect) and pipes a structured <see cref="VerifyEndpointResult"/>
|
||||
/// back to the sender. Verify does NOT require an existing connection (the config may be
|
||||
/// brand-new and unsaved), so — unlike the routed browse/read handlers — it does not look
|
||||
/// up a connection actor; it runs the probe directly. Only OPC UA is supported today.
|
||||
/// </summary>
|
||||
private void HandleVerifyEndpoint(VerifyEndpointCommand cmd)
|
||||
{
|
||||
if (!string.Equals(cmd.Protocol, "OpcUa", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Sender.Tell(new VerifyEndpointResult(
|
||||
false, VerifyFailureKind.ServerError,
|
||||
"Verify is only supported for OPC UA connections.", null));
|
||||
return;
|
||||
}
|
||||
|
||||
OpcUaEndpointConfig config;
|
||||
try
|
||||
{
|
||||
(config, _) = OpcUaEndpointConfigSerializer.Deserialize(cmd.ConfigJson);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Defensive: Deserialize is designed not to throw (it classifies Malformed), but
|
||||
// a verify must never crash the manager — surface the parse failure as ServerError.
|
||||
_log.Warning(ex, "Verify config for {0} could not be parsed", cmd.ConnectionName);
|
||||
Sender.Tell(new VerifyEndpointResult(
|
||||
false, VerifyFailureKind.ServerError,
|
||||
"The endpoint configuration could not be parsed.", null));
|
||||
return;
|
||||
}
|
||||
|
||||
var probeLogger = NullLogger.Instance;
|
||||
RealOpcUaClient
|
||||
.VerifyEndpointAsync(config, _opcUaGlobalOptions, probeLogger, TimeSpan.FromSeconds(6), CancellationToken.None)
|
||||
.PipeTo(Sender);
|
||||
}
|
||||
|
||||
private void HandleRemoveConnection(RemoveConnectionCommand command)
|
||||
{
|
||||
if (_connectionActors.TryGetValue(command.ConnectionName, out var actor))
|
||||
|
||||
@@ -5,7 +5,9 @@ using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Client;
|
||||
using Opc.Ua.Configuration;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Management;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Alarms;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.DataConnections;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.DataConnectionLayer.Adapters;
|
||||
@@ -155,6 +157,230 @@ public class RealOpcUaClient : IOpcUaClient
|
||||
await _subscription.CreateAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// T17: Probes an OPC UA endpoint configuration WITHOUT persisting it or creating a
|
||||
/// long-lived connection — connect, capture the server certificate if it is untrusted,
|
||||
/// then disconnect. The probe is secure-by-default and READ-ONLY: it forces
|
||||
/// <c>AutoAcceptUntrustedCertificates = false</c> and a validation hook that captures an
|
||||
/// untrusted server certificate then REJECTS it (<c>e.Accept = false</c>). It never trusts
|
||||
/// the certificate — trusting is a separate, later operator action. The session is always
|
||||
/// disposed in a <c>finally</c>.
|
||||
/// </summary>
|
||||
/// <param name="config">The endpoint configuration to probe.</param>
|
||||
/// <param name="globalOptions">Deployment-wide OPC UA application identity / cert-store paths.</param>
|
||||
/// <param name="logger">Logger for diagnostics.</param>
|
||||
/// <param name="timeout">Wall-clock budget for the whole probe (discovery + session create).</param>
|
||||
/// <param name="ct">External cancellation token, linked with the timeout.</param>
|
||||
/// <returns>A structured <see cref="VerifyEndpointResult"/> classifying the outcome.</returns>
|
||||
public static async Task<VerifyEndpointResult> VerifyEndpointAsync(
|
||||
OpcUaEndpointConfig config,
|
||||
OpcUaGlobalOptions globalOptions,
|
||||
ILogger logger,
|
||||
TimeSpan timeout,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Captured by the certificate-validation hook below. A non-null value here means
|
||||
// the server presented an untrusted certificate; it dominates the outcome mapping.
|
||||
X509Certificate2? capturedCert = null;
|
||||
ISession? session = null;
|
||||
Exception? failure = null;
|
||||
|
||||
var endpointUrl = string.IsNullOrWhiteSpace(config.EndpointUrl)
|
||||
? "opc.tcp://localhost:4840"
|
||||
: config.EndpointUrl;
|
||||
|
||||
var preferredSecurityMode = config.SecurityMode switch
|
||||
{
|
||||
OpcUaSecurityMode.Sign => MessageSecurityMode.Sign,
|
||||
OpcUaSecurityMode.SignAndEncrypt => MessageSecurityMode.SignAndEncrypt,
|
||||
_ => MessageSecurityMode.None
|
||||
};
|
||||
|
||||
// T17: secure-by-default — force AutoAccept=false so an untrusted server cert is
|
||||
// captured and rejected rather than silently accepted (defeating the whole probe).
|
||||
var appConfig = new ApplicationConfiguration
|
||||
{
|
||||
ApplicationName = string.IsNullOrWhiteSpace(globalOptions.ApplicationName)
|
||||
? "ScadaBridge-DCL"
|
||||
: globalOptions.ApplicationName,
|
||||
ApplicationType = ApplicationType.Client,
|
||||
SecurityConfiguration = new SecurityConfiguration
|
||||
{
|
||||
AutoAcceptUntrustedCertificates = false,
|
||||
ApplicationCertificate = new CertificateIdentifier(),
|
||||
TrustedIssuerCertificates = new CertificateTrustList { StorePath = ResolveStorePath(globalOptions.TrustedIssuerStorePath, "issuers") },
|
||||
TrustedPeerCertificates = new CertificateTrustList { StorePath = ResolveStorePath(globalOptions.TrustedPeerStorePath, "trusted") },
|
||||
RejectedCertificateStore = new CertificateTrustList { StorePath = ResolveStorePath(globalOptions.RejectedCertificateStorePath, "rejected") }
|
||||
},
|
||||
ClientConfiguration = new ClientConfiguration { DefaultSessionTimeout = config.SessionTimeoutMs },
|
||||
TransportQuotas = new TransportQuotas { OperationTimeout = config.OperationTimeoutMs }
|
||||
};
|
||||
|
||||
// T17: capture the untrusted server cert, then REJECT it (e.Accept = false). The
|
||||
// validator runs on the SDK's connect thread; copying the cert is the only state we
|
||||
// keep. Never accept — this probe must not trust anything.
|
||||
appConfig.CertificateValidator.CertificateValidation += (_, e) =>
|
||||
{
|
||||
try
|
||||
{
|
||||
// Copy into a stable instance so disposing the SDK's chain doesn't invalidate it.
|
||||
capturedCert = X509CertificateLoader.LoadCertificate(e.Certificate.RawData);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort capture: fall back to the original reference if the copy fails.
|
||||
capturedCert = e.Certificate;
|
||||
}
|
||||
e.Accept = false;
|
||||
};
|
||||
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
linkedCts.CancelAfter(timeout);
|
||||
|
||||
try
|
||||
{
|
||||
await appConfig.ValidateAsync(ApplicationType.Client);
|
||||
|
||||
// Discover endpoints, pick the preferred security mode (same logic as ConnectAsync).
|
||||
EndpointDescription? endpoint;
|
||||
try
|
||||
{
|
||||
#pragma warning disable CS0618
|
||||
using var discoveryClient = DiscoveryClient.Create(new Uri(endpointUrl));
|
||||
var endpoints = discoveryClient.GetEndpoints(null);
|
||||
#pragma warning restore CS0618
|
||||
endpoint = endpoints
|
||||
.Where(ep => ep.SecurityMode == preferredSecurityMode)
|
||||
.FirstOrDefault() ?? endpoints.FirstOrDefault();
|
||||
}
|
||||
catch
|
||||
{
|
||||
endpoint = new EndpointDescription(endpointUrl);
|
||||
}
|
||||
|
||||
var endpointConfig = EndpointConfiguration.Create(appConfig);
|
||||
var configuredEndpoint = new ConfiguredEndpoint(null, endpoint, endpointConfig);
|
||||
|
||||
#pragma warning disable CS0618 // Allow obsolete DefaultSessionFactory constructor for compatibility
|
||||
var sessionFactory = new DefaultSessionFactory();
|
||||
#pragma warning restore CS0618
|
||||
|
||||
var userIdentity = BuildUserIdentity(config.UserIdentity is { } ui
|
||||
? new OpcUaUserIdentityOptions(
|
||||
ui.TokenType.ToString(), ui.Username, ui.Password,
|
||||
ui.CertificatePath, ui.CertificatePassword)
|
||||
: null);
|
||||
|
||||
session = await sessionFactory.CreateAsync(
|
||||
appConfig, configuredEndpoint, false,
|
||||
"ScadaBridge-DCL-Verify", (uint)config.SessionTimeoutMs,
|
||||
userIdentity, null, linkedCts.Token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// OperationCanceledException from the linked CTS firing on timeout is mapped to
|
||||
// VerifyFailureKind.Timeout inside MapVerifyOutcome.
|
||||
failure = ex;
|
||||
logger.LogDebug(ex, "OPC UA verify of {Endpoint} failed.", endpointUrl);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// T17: ALWAYS dispose the probe session — never leave a connection open.
|
||||
if (session != null)
|
||||
{
|
||||
try { await session.CloseAsync(CancellationToken.None); }
|
||||
catch (Exception ex) { logger.LogDebug(ex, "OPC UA verify session close failed (ignored)."); }
|
||||
session.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
return MapVerifyOutcome(failure, capturedCert);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// T17: Pure mapping of a probe outcome — an optional exception plus an optionally
|
||||
/// captured untrusted server certificate — to a <see cref="VerifyEndpointResult"/>.
|
||||
/// Factored out so the classification is unit-testable WITHOUT a live OPC UA server.
|
||||
/// Precedence: a captured certificate ALWAYS yields
|
||||
/// <see cref="VerifyFailureKind.UntrustedCertificate"/>; otherwise the exception is
|
||||
/// classified; null exception + null cert means the session was created (success).
|
||||
/// </summary>
|
||||
/// <param name="failure">The exception thrown during the probe, or null on success.</param>
|
||||
/// <param name="capturedCert">The untrusted server certificate captured by the validation hook, or null.</param>
|
||||
/// <returns>The classified verification result.</returns>
|
||||
internal static VerifyEndpointResult MapVerifyOutcome(Exception? failure, X509Certificate2? capturedCert)
|
||||
{
|
||||
// An untrusted server certificate dominates — regardless of how the connect failed,
|
||||
// this is the actionable case (the operator may choose to trust it later).
|
||||
if (capturedCert != null)
|
||||
{
|
||||
var info = new ServerCertInfo(
|
||||
capturedCert.Thumbprint,
|
||||
capturedCert.Subject,
|
||||
capturedCert.Issuer,
|
||||
capturedCert.NotBefore.ToUniversalTime(),
|
||||
capturedCert.NotAfter.ToUniversalTime(),
|
||||
Convert.ToBase64String(capturedCert.RawData));
|
||||
return new VerifyEndpointResult(
|
||||
false, VerifyFailureKind.UntrustedCertificate,
|
||||
"The server certificate is not trusted by this site.", info);
|
||||
}
|
||||
|
||||
if (failure is null)
|
||||
return new VerifyEndpointResult(true, null, null, null);
|
||||
|
||||
// Timeout / cancellation (the linked CTS fired, or the SDK reported a request timeout).
|
||||
if (failure is TimeoutException or OperationCanceledException)
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.Timeout, failure.Message, null);
|
||||
|
||||
if (failure is ServiceResultException sre)
|
||||
{
|
||||
// A socket cause wrapped inside the SDK exception means the host is unreachable.
|
||||
if (HasSocketCause(sre))
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.Unreachable, sre.Message, null);
|
||||
|
||||
switch (sre.StatusCode)
|
||||
{
|
||||
case StatusCodes.BadRequestTimeout:
|
||||
case StatusCodes.BadTimeout:
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.Timeout, sre.Message, null);
|
||||
case StatusCodes.BadUserAccessDenied:
|
||||
case StatusCodes.BadIdentityTokenRejected:
|
||||
case StatusCodes.BadIdentityTokenInvalid:
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.AuthFailed, sre.Message, null);
|
||||
case StatusCodes.BadConnectionRejected:
|
||||
case StatusCodes.BadNotConnected:
|
||||
case StatusCodes.BadConnectionClosed:
|
||||
case StatusCodes.BadNoCommunication:
|
||||
case StatusCodes.BadServerNotConnected:
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.Unreachable, sre.Message, null);
|
||||
default:
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.ServerError, sre.Message, null);
|
||||
}
|
||||
}
|
||||
|
||||
// A bare socket failure (DNS / connection refused) before the SDK wrapped it.
|
||||
if (HasSocketCause(failure))
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.Unreachable, failure.Message, null);
|
||||
|
||||
return new VerifyEndpointResult(false, VerifyFailureKind.ServerError, failure.Message, null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Walks the exception's <c>InnerException</c> chain looking for a
|
||||
/// <see cref="System.Net.Sockets.SocketException"/> — the signature of a DNS-resolution
|
||||
/// or connection-refused failure that means the endpoint host is unreachable.
|
||||
/// </summary>
|
||||
private static bool HasSocketCause(Exception ex)
|
||||
{
|
||||
for (var cur = ex; cur != null; cur = cur.InnerException)
|
||||
{
|
||||
if (cur is System.Net.Sockets.SocketException)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task DisconnectAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user