Closes Stream C per docs/v2/implementation/phase-6-1-resilience-and-observability.md. Core.Observability (new namespace): - DriverHealthReport — pure-function aggregation over DriverHealthSnapshot list. Empty fleet = Healthy. Any Faulted = Faulted. Any Unknown/Initializing (no Faulted) = NotReady. Any Degraded or Reconnecting (no Faulted, no NotReady) = Degraded. Else Healthy. HttpStatus(verdict) maps to the Stream C.1 state matrix: Healthy/Degraded → 200, NotReady/Faulted → 503. - LogContextEnricher — Serilog LogContext wrapper. Push(id, type, capability, correlationId) returns an IDisposable scope; inner log calls carry DriverInstanceId / DriverType / CapabilityName / CorrelationId structured properties automatically. NewCorrelationId = 12-hex-char GUID slice for cases where no OPC UA RequestHeader.RequestHandle is in flight. CapabilityInvoker — now threads LogContextEnricher around every ExecuteAsync / ExecuteWriteAsync call site. OtOpcUaServer passes driver.DriverType through so logs correlate to the driver type too. Every capability call emits structured fields per the Stream C.4 compliance check. Server.Observability: - HealthEndpointsHost — standalone HttpListener on http://localhost:4841/ (loopback avoids Windows URL-ACL elevation; remote probing via reverse proxy or explicit netsh urlacl grant). Routes: /healthz → 200 when (configDbReachable OR usingStaleConfig); 503 otherwise. Body: status, uptimeSeconds, configDbReachable, usingStaleConfig. /readyz → DriverHealthReport.Aggregate + HttpStatus mapping. Body: verdict, drivers[], degradedDrivers[], uptimeSeconds. anything else → 404. Disposal cooperative with the HttpListener shutdown. - OpcUaApplicationHost starts the health host after the OPC UA server comes up and disposes it on shutdown. New OpcUaServerOptions knobs: HealthEndpointsEnabled (default true), HealthEndpointsPrefix (default http://localhost:4841/). Program.cs: - Serilog pipeline adds Enrich.FromLogContext + opt-in JSON file sink via `Serilog:WriteJson = true` appsetting. Uses Serilog.Formatting.Compact's CompactJsonFormatter (one JSON object per line — SIEMs like Splunk, Datadog, Graylog ingest without a regex parser). Server.Tests: - Existing 3 OpcUaApplicationHost integration tests now set HealthEndpointsEnabled=false to avoid port :4841 collisions under parallel execution. - New HealthEndpointsHostTests (9): /healthz healthy empty fleet; stale-config returns 200 with flag; unreachable+no-cache returns 503; /readyz empty/ Healthy/Faulted/Degraded/Initializing drivers return correct status and bodies; unknown path → 404. Uses ephemeral ports via Interlocked counter. Core.Tests: - DriverHealthReportTests (8): empty fleet, all-healthy, any-Faulted trumps, any-NotReady without Faulted, Degraded without Faulted/NotReady, HttpStatus per-verdict theory. - LogContextEnricherTests (8): all 4 properties attach; scope disposes cleanly; NewCorrelationId shape; null/whitespace driverInstanceId throws. - CapabilityInvokerEnrichmentTests (2): inner logs carry structured properties; no context leak outside the call site. Full solution dotnet test: 1016 passing (baseline 906, +110 for Phase 6.1 so far across Streams A+B+C). Pre-existing Client.CLI Subscribe flake unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
246 lines
9.9 KiB
C#
246 lines
9.9 KiB
C#
using Microsoft.Extensions.Logging;
|
|
using Opc.Ua;
|
|
using Opc.Ua.Configuration;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
|
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Observability;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
|
|
|
/// <summary>
|
|
/// Wraps <see cref="ApplicationInstance"/> to bring the OPC UA server online — builds an
|
|
/// <see cref="ApplicationConfiguration"/> programmatically (no external XML file), ensures
|
|
/// the application certificate exists in the PKI store (auto-generates self-signed on first
|
|
/// run), starts the server, then walks each <see cref="DriverNodeManager"/> and invokes
|
|
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> against it so the driver's
|
|
/// discovery streams into the already-running server's address space.
|
|
/// </summary>
|
|
public sealed class OpcUaApplicationHost : IAsyncDisposable
|
|
{
|
|
private readonly OpcUaServerOptions _options;
|
|
private readonly DriverHost _driverHost;
|
|
private readonly IUserAuthenticator _authenticator;
|
|
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
|
|
private readonly ILoggerFactory _loggerFactory;
|
|
private readonly ILogger<OpcUaApplicationHost> _logger;
|
|
private ApplicationInstance? _application;
|
|
private OtOpcUaServer? _server;
|
|
private HealthEndpointsHost? _healthHost;
|
|
private bool _disposed;
|
|
|
|
public OpcUaApplicationHost(OpcUaServerOptions options, DriverHost driverHost,
|
|
IUserAuthenticator authenticator, ILoggerFactory loggerFactory, ILogger<OpcUaApplicationHost> logger,
|
|
DriverResiliencePipelineBuilder? pipelineBuilder = null)
|
|
{
|
|
_options = options;
|
|
_driverHost = driverHost;
|
|
_authenticator = authenticator;
|
|
_pipelineBuilder = pipelineBuilder ?? new DriverResiliencePipelineBuilder();
|
|
_loggerFactory = loggerFactory;
|
|
_logger = logger;
|
|
}
|
|
|
|
public OtOpcUaServer? Server => _server;
|
|
|
|
/// <summary>
|
|
/// Builds the <see cref="ApplicationConfiguration"/>, validates/creates the application
|
|
/// certificate, constructs + starts the <see cref="OtOpcUaServer"/>, then drives
|
|
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> per registered driver so
|
|
/// the address space is populated before the first client connects.
|
|
/// </summary>
|
|
public async Task StartAsync(CancellationToken ct)
|
|
{
|
|
_application = new ApplicationInstance
|
|
{
|
|
ApplicationName = _options.ApplicationName,
|
|
ApplicationType = ApplicationType.Server,
|
|
ApplicationConfiguration = BuildConfiguration(),
|
|
};
|
|
|
|
var hasCert = await _application.CheckApplicationInstanceCertificate(silent: true, minimumKeySize: CertificateFactory.DefaultKeySize).ConfigureAwait(false);
|
|
if (!hasCert)
|
|
throw new InvalidOperationException(
|
|
$"OPC UA application certificate could not be validated or created in {_options.PkiStoreRoot}");
|
|
|
|
_server = new OtOpcUaServer(_driverHost, _authenticator, _pipelineBuilder, _loggerFactory);
|
|
await _application.Start(_server).ConfigureAwait(false);
|
|
|
|
_logger.LogInformation("OPC UA server started — endpoint={Endpoint} driverCount={Count}",
|
|
_options.EndpointUrl, _server.DriverNodeManagers.Count);
|
|
|
|
// Phase 6.1 Stream C: health endpoints on :4841 (loopback by default — see
|
|
// HealthEndpointsHost remarks for the Windows URL-ACL tradeoff).
|
|
if (_options.HealthEndpointsEnabled)
|
|
{
|
|
_healthHost = new HealthEndpointsHost(
|
|
_driverHost,
|
|
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
|
|
prefix: _options.HealthEndpointsPrefix);
|
|
_healthHost.Start();
|
|
}
|
|
|
|
// Drive each driver's discovery through its node manager. The node manager IS the
|
|
// IAddressSpaceBuilder; GenericDriverNodeManager captures alarm-condition sinks into
|
|
// its internal map and wires OnAlarmEvent → sink routing.
|
|
foreach (var nodeManager in _server.DriverNodeManagers)
|
|
{
|
|
var driverId = nodeManager.Driver.DriverInstanceId;
|
|
try
|
|
{
|
|
var generic = new GenericDriverNodeManager(nodeManager.Driver);
|
|
await generic.BuildAddressSpaceAsync(nodeManager, ct).ConfigureAwait(false);
|
|
_logger.LogInformation("Address space populated for driver {Driver}", driverId);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Per decision #12: driver exceptions isolate — log and keep the server serving
|
|
// the other drivers' subtrees. Re-building this one takes a Reinitialize call.
|
|
_logger.LogError(ex, "Discovery failed for driver {Driver}; subtree faulted", driverId);
|
|
}
|
|
}
|
|
}
|
|
|
|
private ApplicationConfiguration BuildConfiguration()
|
|
{
|
|
Directory.CreateDirectory(_options.PkiStoreRoot);
|
|
|
|
var cfg = new ApplicationConfiguration
|
|
{
|
|
ApplicationName = _options.ApplicationName,
|
|
ApplicationUri = _options.ApplicationUri,
|
|
ApplicationType = ApplicationType.Server,
|
|
ProductUri = "urn:OtOpcUa:Server",
|
|
|
|
SecurityConfiguration = new SecurityConfiguration
|
|
{
|
|
ApplicationCertificate = new CertificateIdentifier
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "own"),
|
|
SubjectName = "CN=" + _options.ApplicationName,
|
|
},
|
|
TrustedIssuerCertificates = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "issuers"),
|
|
},
|
|
TrustedPeerCertificates = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "trusted"),
|
|
},
|
|
RejectedCertificateStore = new CertificateTrustList
|
|
{
|
|
StoreType = CertificateStoreType.Directory,
|
|
StorePath = Path.Combine(_options.PkiStoreRoot, "rejected"),
|
|
},
|
|
AutoAcceptUntrustedCertificates = _options.AutoAcceptUntrustedClientCertificates,
|
|
AddAppCertToTrustedStore = true,
|
|
},
|
|
|
|
TransportConfigurations = new TransportConfigurationCollection(),
|
|
TransportQuotas = new TransportQuotas { OperationTimeout = 15000 },
|
|
|
|
ServerConfiguration = new ServerConfiguration
|
|
{
|
|
BaseAddresses = new StringCollection { _options.EndpointUrl },
|
|
SecurityPolicies = BuildSecurityPolicies(),
|
|
UserTokenPolicies = BuildUserTokenPolicies(),
|
|
MinRequestThreadCount = 5,
|
|
MaxRequestThreadCount = 100,
|
|
MaxQueuedRequestCount = 200,
|
|
},
|
|
|
|
TraceConfiguration = new TraceConfiguration(),
|
|
};
|
|
|
|
cfg.Validate(ApplicationType.Server).GetAwaiter().GetResult();
|
|
|
|
if (cfg.SecurityConfiguration.AutoAcceptUntrustedCertificates)
|
|
{
|
|
cfg.CertificateValidator.CertificateValidation += (_, e) =>
|
|
{
|
|
if (e.Error.StatusCode == StatusCodes.BadCertificateUntrusted)
|
|
e.Accept = true;
|
|
};
|
|
}
|
|
|
|
return cfg;
|
|
}
|
|
|
|
private ServerSecurityPolicyCollection BuildSecurityPolicies()
|
|
{
|
|
var policies = new ServerSecurityPolicyCollection
|
|
{
|
|
// Keep the None policy present so legacy clients can discover + browse. Locked-down
|
|
// deployments remove this by setting Ldap.Enabled=true + dropping None here; left in
|
|
// for PR 19 so the PR 17 test harness continues to pass unchanged.
|
|
new ServerSecurityPolicy
|
|
{
|
|
SecurityMode = MessageSecurityMode.None,
|
|
SecurityPolicyUri = SecurityPolicies.None,
|
|
},
|
|
};
|
|
|
|
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt)
|
|
{
|
|
policies.Add(new ServerSecurityPolicy
|
|
{
|
|
SecurityMode = MessageSecurityMode.SignAndEncrypt,
|
|
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
|
});
|
|
}
|
|
|
|
return policies;
|
|
}
|
|
|
|
private UserTokenPolicyCollection BuildUserTokenPolicies()
|
|
{
|
|
var tokens = new UserTokenPolicyCollection
|
|
{
|
|
new UserTokenPolicy(UserTokenType.Anonymous)
|
|
{
|
|
PolicyId = "Anonymous",
|
|
SecurityPolicyUri = SecurityPolicies.None,
|
|
},
|
|
};
|
|
|
|
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt
|
|
&& _options.Ldap.Enabled)
|
|
{
|
|
tokens.Add(new UserTokenPolicy(UserTokenType.UserName)
|
|
{
|
|
PolicyId = "UserName",
|
|
// Passwords must ride an encrypted channel — scope this token to Basic256Sha256
|
|
// so the stack rejects any attempt to send UserName over the None endpoint.
|
|
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
|
});
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (_disposed) return;
|
|
_disposed = true;
|
|
try
|
|
{
|
|
_server?.Stop();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "OPC UA server stop threw during dispose");
|
|
}
|
|
|
|
if (_healthHost is not null)
|
|
{
|
|
try { await _healthHost.DisposeAsync().ConfigureAwait(false); }
|
|
catch (Exception ex) { _logger.LogWarning(ex, "Health endpoints host dispose threw"); }
|
|
}
|
|
await Task.CompletedTask;
|
|
}
|
|
}
|