Closes the Phase 6.1 Stream A.2 "per-instance overrides bound from DriverInstance.ResilienceConfig JSON column" work flagged as a follow-up when Stream A.1 shipped in PR #78. Every driver can now override its Polly pipeline policy per instance instead of inheriting pure tier defaults. Configuration: - DriverInstance entity gains a nullable `ResilienceConfig` string column (nvarchar(max)) + SQL check constraint `CK_DriverInstance_ResilienceConfig_IsJson` that enforces ISJSON when not null. Null = use tier defaults (decision #143 / unchanged from pre-Phase-6.1). - EF migration `20260419161008_AddDriverInstanceResilienceConfig`. - SchemaComplianceTests expected-constraint list gains the new CK name. Core.Resilience.DriverResilienceOptionsParser: - Pure-function parser. ParseOrDefaults(tier, json, out diag) returns the effective DriverResilienceOptions — tier defaults with per-capability / bulkhead overrides layered on top when the JSON payload supplies them. Partial policies (e.g. Read { retryCount: 10 }) fill missing fields from the tier default for that capability. - Malformed JSON falls back to pure tier defaults + surfaces a human-readable diagnostic via the out parameter. Callers log the diag but don't fail startup — a misconfigured ResilienceConfig must not brick a working driver. - Property names + capability keys are case-insensitive; unrecognised capability names are logged-and-skipped; unrecognised shape-level keys are ignored so future shapes land without a migration. Server wire-in: - OtOpcUaServer gains two optional ctor params: `tierLookup` (driverType → DriverTier) + `resilienceConfigLookup` (driverInstanceId → JSON string). CreateMasterNodeManager now resolves tier + JSON for each driver, parses via DriverResilienceOptionsParser, logs the diagnostic if any, and constructs CapabilityInvoker with the merged options instead of pure Tier A defaults. - OpcUaApplicationHost threads both lookups through. Default null keeps existing tests constructing without either Func unchanged (falls back to Tier A + tier defaults exactly as before). Tests (13 new DriverResilienceOptionsParserTests): - null / whitespace / empty-object JSON returns pure tier defaults. - Malformed JSON falls back + surfaces diagnostic. - Read override merged into tier defaults; other capabilities untouched. - Partial policy fills missing fields from tier default. - Bulkhead overrides honored. - Unknown capability skipped + surfaced in diagnostic. - Property names + capability keys are case-insensitive. - Every tier × every capability × empty-JSON round-trips tier defaults exactly (theory). Full solution dotnet test: 1215 passing (was 1202, +13). Pre-existing Client.CLI Subscribe flake unchanged. Production wiring (Program.cs) example: Func<string, DriverTier> tierLookup = type => type switch { "Galaxy" => DriverTier.C, "Modbus" or "S7" => DriverTier.B, "OpcUaClient" => DriverTier.A, _ => DriverTier.A, }; Func<string, string?> cfgLookup = id => db.DriverInstances.AsNoTracking().FirstOrDefault(x => x.DriverInstanceId == id)?.ResilienceConfig; var host = new OpcUaApplicationHost(..., tierLookup: tierLookup, resilienceConfigLookup: cfgLookup); Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
265 lines
11 KiB
C#
265 lines
11 KiB
C#
using Microsoft.Extensions.Logging;
|
|
using Opc.Ua;
|
|
using Opc.Ua.Configuration;
|
|
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
|
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Observability;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
|
|
|
/// <summary>
|
|
/// Wraps <see cref="ApplicationInstance"/> to bring the OPC UA server online — builds an
|
|
/// <see cref="ApplicationConfiguration"/> programmatically (no external XML file), ensures
|
|
/// the application certificate exists in the PKI store (auto-generates self-signed on first
|
|
/// run), starts the server, then walks each <see cref="DriverNodeManager"/> and invokes
|
|
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> against it so the driver's
|
|
/// discovery streams into the already-running server's address space.
|
|
/// </summary>
|
|
public sealed class OpcUaApplicationHost : IAsyncDisposable
|
|
{
|
|
private readonly OpcUaServerOptions _options;
|
|
private readonly DriverHost _driverHost;
|
|
private readonly IUserAuthenticator _authenticator;
|
|
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
|
|
private readonly AuthorizationGate? _authzGate;
|
|
private readonly NodeScopeResolver? _scopeResolver;
|
|
private readonly StaleConfigFlag? _staleConfigFlag;
|
|
private readonly Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? _tierLookup;
|
|
private readonly Func<string, string?>? _resilienceConfigLookup;
|
|
private readonly ILoggerFactory _loggerFactory;
|
|
private readonly ILogger<OpcUaApplicationHost> _logger;
|
|
private ApplicationInstance? _application;
|
|
private OtOpcUaServer? _server;
|
|
private HealthEndpointsHost? _healthHost;
|
|
private bool _disposed;
|
|
|
|
public OpcUaApplicationHost(OpcUaServerOptions options, DriverHost driverHost,
|
|
IUserAuthenticator authenticator, ILoggerFactory loggerFactory, ILogger<OpcUaApplicationHost> logger,
|
|
DriverResiliencePipelineBuilder? pipelineBuilder = null,
|
|
AuthorizationGate? authzGate = null,
|
|
NodeScopeResolver? scopeResolver = null,
|
|
StaleConfigFlag? staleConfigFlag = null,
|
|
Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? tierLookup = null,
|
|
Func<string, string?>? resilienceConfigLookup = null)
|
|
{
|
|
_options = options;
|
|
_driverHost = driverHost;
|
|
_authenticator = authenticator;
|
|
_pipelineBuilder = pipelineBuilder ?? new DriverResiliencePipelineBuilder();
|
|
_authzGate = authzGate;
|
|
_scopeResolver = scopeResolver;
|
|
_staleConfigFlag = staleConfigFlag;
|
|
_tierLookup = tierLookup;
|
|
_resilienceConfigLookup = resilienceConfigLookup;
|
|
_loggerFactory = loggerFactory;
|
|
_logger = logger;
|
|
}
|
|
|
|
public OtOpcUaServer? Server => _server;
|
|
|
|
/// <summary>
|
|
/// Builds the <see cref="ApplicationConfiguration"/>, validates/creates the application
|
|
/// certificate, constructs + starts the <see cref="OtOpcUaServer"/>, then drives
|
|
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> per registered driver so
|
|
/// the address space is populated before the first client connects.
|
|
/// </summary>
|
|
public async Task StartAsync(CancellationToken ct)
|
|
{
|
|
_application = new ApplicationInstance
|
|
{
|
|
ApplicationName = _options.ApplicationName,
|
|
ApplicationType = ApplicationType.Server,
|
|
ApplicationConfiguration = BuildConfiguration(),
|
|
};
|
|
|
|
var hasCert = await _application.CheckApplicationInstanceCertificate(silent: true, minimumKeySize: CertificateFactory.DefaultKeySize).ConfigureAwait(false);
|
|
if (!hasCert)
|
|
throw new InvalidOperationException(
|
|
$"OPC UA application certificate could not be validated or created in {_options.PkiStoreRoot}");
|
|
|
|
_server = new OtOpcUaServer(_driverHost, _authenticator, _pipelineBuilder, _loggerFactory,
|
|
authzGate: _authzGate, scopeResolver: _scopeResolver,
|
|
tierLookup: _tierLookup, resilienceConfigLookup: _resilienceConfigLookup);
|
|
await _application.Start(_server).ConfigureAwait(false);
|
|
|
|
_logger.LogInformation("OPC UA server started — endpoint={Endpoint} driverCount={Count}",
|
|
_options.EndpointUrl, _server.DriverNodeManagers.Count);
|
|
|
|
// Phase 6.1 Stream C: health endpoints on :4841 (loopback by default — see
|
|
// HealthEndpointsHost remarks for the Windows URL-ACL tradeoff).
|
|
if (_options.HealthEndpointsEnabled)
|
|
{
|
|
_healthHost = new HealthEndpointsHost(
|
|
_driverHost,
|
|
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
|
|
usingStaleConfig: _staleConfigFlag is null ? null : () => _staleConfigFlag.IsStale,
|
|
prefix: _options.HealthEndpointsPrefix);
|
|
_healthHost.Start();
|
|
}
|
|
|
|
// Drive each driver's discovery through its node manager. The node manager IS the
|
|
// IAddressSpaceBuilder; GenericDriverNodeManager captures alarm-condition sinks into
|
|
// its internal map and wires OnAlarmEvent → sink routing.
|
|
foreach (var nodeManager in _server.DriverNodeManagers)
|
|
{
|
|
var driverId = nodeManager.Driver.DriverInstanceId;
|
|
try
|
|
{
|
|
var generic = new GenericDriverNodeManager(nodeManager.Driver);
|
|
await generic.BuildAddressSpaceAsync(nodeManager, ct).ConfigureAwait(false);
|
|
_logger.LogInformation("Address space populated for driver {Driver}", driverId);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Per decision #12: driver exceptions isolate — log and keep the server serving
|
|
// the other drivers' subtrees. Re-building this one takes a Reinitialize call.
|
|
_logger.LogError(ex, "Discovery failed for driver {Driver}; subtree faulted", driverId);
|
|
}
|
|
}
|
|
}
|
|
|
|
private ApplicationConfiguration BuildConfiguration()
|
|
{
|
|
Directory.CreateDirectory(_options.PkiStoreRoot);
|
|
|
|
var cfg = new ApplicationConfiguration
|
|
{
|
|
ApplicationName = _options.ApplicationName,
|
|
ApplicationUri = _options.ApplicationUri,
|
|
ApplicationType = ApplicationType.Server,
|
|
ProductUri = "urn:OtOpcUa:Server",
|
|
|
|
SecurityConfiguration = new SecurityConfiguration
|
|
{
|
|
ApplicationCertificate = new CertificateIdentifier
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "own"),
|
|
SubjectName = "CN=" + _options.ApplicationName,
|
|
},
|
|
TrustedIssuerCertificates = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "issuers"),
|
|
},
|
|
TrustedPeerCertificates = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "trusted"),
|
|
},
|
|
RejectedCertificateStore = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "rejected"),
|
|
},
|
|
AutoAcceptUntrustedCertificates = _options.AutoAcceptUntrustedClientCertificates,
|
|
AddAppCertToTrustedStore = true,
|
|
},
|
|
|
|
TransportConfigurations = new TransportConfigurationCollection(),
|
|
TransportQuotas = new TransportQuotas { OperationTimeout = 15000 },
|
|
|
|
ServerConfiguration = new ServerConfiguration
|
|
{
|
|
BaseAddresses = new StringCollection { _options.EndpointUrl },
|
|
SecurityPolicies = BuildSecurityPolicies(),
|
|
UserTokenPolicies = BuildUserTokenPolicies(),
|
|
MinRequestThreadCount = 5,
|
|
MaxRequestThreadCount = 100,
|
|
MaxQueuedRequestCount = 200,
|
|
},
|
|
|
|
TraceConfiguration = new TraceConfiguration(),
|
|
};
|
|
|
|
cfg.Validate(ApplicationType.Server).GetAwaiter().GetResult();
|
|
|
|
if (cfg.SecurityConfiguration.AutoAcceptUntrustedCertificates)
|
|
{
|
|
cfg.CertificateValidator.CertificateValidation += (_, e) =>
|
|
{
|
|
if (e.Error.StatusCode == StatusCodes.BadCertificateUntrusted)
|
|
e.Accept = true;
|
|
};
|
|
}
|
|
|
|
return cfg;
|
|
}
|
|
|
|
private ServerSecurityPolicyCollection BuildSecurityPolicies()
|
|
{
|
|
var policies = new ServerSecurityPolicyCollection
|
|
{
|
|
// Keep the None policy present so legacy clients can discover + browse. Locked-down
|
|
// deployments remove this by setting Ldap.Enabled=true + dropping None here; left in
|
|
// for PR 19 so the PR 17 test harness continues to pass unchanged.
|
|
new ServerSecurityPolicy
|
|
{
|
|
SecurityMode = MessageSecurityMode.None,
|
|
SecurityPolicyUri = SecurityPolicies.None,
|
|
},
|
|
};
|
|
|
|
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt)
|
|
{
|
|
policies.Add(new ServerSecurityPolicy
|
|
{
|
|
SecurityMode = MessageSecurityMode.SignAndEncrypt,
|
|
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
|
});
|
|
}
|
|
|
|
return policies;
|
|
}
|
|
|
|
private UserTokenPolicyCollection BuildUserTokenPolicies()
|
|
{
|
|
var tokens = new UserTokenPolicyCollection
|
|
{
|
|
new UserTokenPolicy(UserTokenType.Anonymous)
|
|
{
|
|
PolicyId = "Anonymous",
|
|
SecurityPolicyUri = SecurityPolicies.None,
|
|
},
|
|
};
|
|
|
|
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt
|
|
&& _options.Ldap.Enabled)
|
|
{
|
|
tokens.Add(new UserTokenPolicy(UserTokenType.UserName)
|
|
{
|
|
PolicyId = "UserName",
|
|
// Passwords must ride an encrypted channel — scope this token to Basic256Sha256
|
|
// so the stack rejects any attempt to send UserName over the None endpoint.
|
|
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
|
});
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (_disposed) return;
|
|
_disposed = true;
|
|
try
|
|
{
|
|
_server?.Stop();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "OPC UA server stop threw during dispose");
|
|
}
|
|
|
|
if (_healthHost is not null)
|
|
{
|
|
try { await _healthHost.DisposeAsync().ConfigureAwait(false); }
|
|
catch (Exception ex) { _logger.LogWarning(ex, "Health endpoints host dispose threw"); }
|
|
}
|
|
await Task.CompletedTask;
|
|
}
|
|
}
|