Closes Stream C per docs/v2/implementation/phase-6-1-resilience-and-observability.md. Core.Observability (new namespace): - DriverHealthReport — pure-function aggregation over DriverHealthSnapshot list. Empty fleet = Healthy. Any Faulted = Faulted. Any Unknown/Initializing (no Faulted) = NotReady. Any Degraded or Reconnecting (no Faulted, no NotReady) = Degraded. Else Healthy. HttpStatus(verdict) maps to the Stream C.1 state matrix: Healthy/Degraded → 200, NotReady/Faulted → 503. - LogContextEnricher — Serilog LogContext wrapper. Push(id, type, capability, correlationId) returns an IDisposable scope; inner log calls carry DriverInstanceId / DriverType / CapabilityName / CorrelationId structured properties automatically. NewCorrelationId = 12-hex-char GUID slice for cases where no OPC UA RequestHeader.RequestHandle is in flight. CapabilityInvoker — now threads LogContextEnricher around every ExecuteAsync / ExecuteWriteAsync call site. OtOpcUaServer passes driver.DriverType through so logs correlate to the driver type too. Every capability call emits structured fields per the Stream C.4 compliance check. Server.Observability: - HealthEndpointsHost — standalone HttpListener on http://localhost:4841/ (loopback avoids Windows URL-ACL elevation; remote probing via reverse proxy or explicit netsh urlacl grant). Routes: /healthz → 200 when (configDbReachable OR usingStaleConfig); 503 otherwise. Body: status, uptimeSeconds, configDbReachable, usingStaleConfig. /readyz → DriverHealthReport.Aggregate + HttpStatus mapping. Body: verdict, drivers[], degradedDrivers[], uptimeSeconds. anything else → 404. Disposal cooperative with the HttpListener shutdown. - OpcUaApplicationHost starts the health host after the OPC UA server comes up and disposes it on shutdown. New OpcUaServerOptions knobs: HealthEndpointsEnabled (default true), HealthEndpointsPrefix (default http://localhost:4841/). Program.cs: - Serilog pipeline adds Enrich.FromLogContext + opt-in JSON file sink via `Serilog:WriteJson = true` appsetting. Uses Serilog.Formatting.Compact's CompactJsonFormatter (one JSON object per line — SIEMs like Splunk, Datadog, Graylog ingest without a regex parser). Server.Tests: - Existing 3 OpcUaApplicationHost integration tests now set HealthEndpointsEnabled=false to avoid port :4841 collisions under parallel execution. - New HealthEndpointsHostTests (9): /healthz healthy empty fleet; stale-config returns 200 with flag; unreachable+no-cache returns 503; /readyz empty/ Healthy/Faulted/Degraded/Initializing drivers return correct status and bodies; unknown path → 404. Uses ephemeral ports via Interlocked counter. Core.Tests: - DriverHealthReportTests (8): empty fleet, all-healthy, any-Faulted trumps, any-NotReady without Faulted, Degraded without Faulted/NotReady, HttpStatus per-verdict theory. - LogContextEnricherTests (8): all 4 properties attach; scope disposes cleanly; NewCorrelationId shape; null/whitespace driverInstanceId throws. - CapabilityInvokerEnrichmentTests (2): inner logs carry structured properties; no context leak outside the call site. Full solution dotnet test: 1016 passing (baseline 906, +110 for Phase 6.1 so far across Streams A+B+C). Pre-existing Client.CLI Subscribe flake unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
121 lines
5.6 KiB
C#
121 lines
5.6 KiB
C#
using Polly;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Observability;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
|
|
|
/// <summary>
|
|
/// Executes driver-capability calls through a shared Polly pipeline. One invoker per
|
|
/// <c>(DriverInstance, IDriver)</c> pair; the underlying <see cref="DriverResiliencePipelineBuilder"/>
|
|
/// is process-singleton so all invokers share its cache.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Per <c>docs/v2/plan.md</c> decisions #143-144 and Phase 6.1 Stream A.3. The server's dispatch
|
|
/// layer routes every capability call (<c>IReadable.ReadAsync</c>, <c>IWritable.WriteAsync</c>,
|
|
/// <c>ITagDiscovery.DiscoverAsync</c>, <c>ISubscribable.SubscribeAsync/UnsubscribeAsync</c>,
|
|
/// <c>IHostConnectivityProbe</c> probe loop, <c>IAlarmSource.SubscribeAlarmsAsync/AcknowledgeAsync</c>,
|
|
/// and all four <c>IHistoryProvider</c> reads) through this invoker.
|
|
/// </remarks>
|
|
public sealed class CapabilityInvoker
|
|
{
|
|
private readonly DriverResiliencePipelineBuilder _builder;
|
|
private readonly string _driverInstanceId;
|
|
private readonly string _driverType;
|
|
private readonly Func<DriverResilienceOptions> _optionsAccessor;
|
|
|
|
/// <summary>
|
|
/// Construct an invoker for one driver instance.
|
|
/// </summary>
|
|
/// <param name="builder">Shared, process-singleton pipeline builder.</param>
|
|
/// <param name="driverInstanceId">The <c>DriverInstance.Id</c> column value.</param>
|
|
/// <param name="optionsAccessor">
|
|
/// Snapshot accessor for the current resilience options. Invoked per call so Admin-edit +
|
|
/// pipeline-invalidate can take effect without restarting the invoker.
|
|
/// </param>
|
|
/// <param name="driverType">Driver type name for structured-log enrichment (e.g. <c>"Modbus"</c>).</param>
|
|
public CapabilityInvoker(
|
|
DriverResiliencePipelineBuilder builder,
|
|
string driverInstanceId,
|
|
Func<DriverResilienceOptions> optionsAccessor,
|
|
string driverType = "Unknown")
|
|
{
|
|
ArgumentNullException.ThrowIfNull(builder);
|
|
ArgumentNullException.ThrowIfNull(optionsAccessor);
|
|
|
|
_builder = builder;
|
|
_driverInstanceId = driverInstanceId;
|
|
_driverType = driverType;
|
|
_optionsAccessor = optionsAccessor;
|
|
}
|
|
|
|
/// <summary>Execute a capability call returning a value, honoring the per-capability pipeline.</summary>
|
|
/// <typeparam name="TResult">Return type of the underlying driver call.</typeparam>
|
|
public async ValueTask<TResult> ExecuteAsync<TResult>(
|
|
DriverCapability capability,
|
|
string hostName,
|
|
Func<CancellationToken, ValueTask<TResult>> callSite,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(callSite);
|
|
|
|
var pipeline = ResolvePipeline(capability, hostName);
|
|
using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
|
|
{
|
|
return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
/// <summary>Execute a void-returning capability call, honoring the per-capability pipeline.</summary>
|
|
public async ValueTask ExecuteAsync(
|
|
DriverCapability capability,
|
|
string hostName,
|
|
Func<CancellationToken, ValueTask> callSite,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(callSite);
|
|
|
|
var pipeline = ResolvePipeline(capability, hostName);
|
|
using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
|
|
{
|
|
await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Execute a <see cref="DriverCapability.Write"/> call honoring <see cref="WriteIdempotentAttribute"/>
|
|
/// semantics — if <paramref name="isIdempotent"/> is <c>false</c>, retries are disabled regardless
|
|
/// of the tag-level configuration (the pipeline for a non-idempotent write never retries per
|
|
/// decisions #44-45). If <c>true</c>, the call runs through the capability's pipeline which may
|
|
/// retry when the tier configuration permits.
|
|
/// </summary>
|
|
public async ValueTask<TResult> ExecuteWriteAsync<TResult>(
|
|
string hostName,
|
|
bool isIdempotent,
|
|
Func<CancellationToken, ValueTask<TResult>> callSite,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(callSite);
|
|
|
|
if (!isIdempotent)
|
|
{
|
|
var noRetryOptions = _optionsAccessor() with
|
|
{
|
|
CapabilityPolicies = new Dictionary<DriverCapability, CapabilityPolicy>
|
|
{
|
|
[DriverCapability.Write] = _optionsAccessor().Resolve(DriverCapability.Write) with { RetryCount = 0 },
|
|
},
|
|
};
|
|
var pipeline = _builder.GetOrCreate(_driverInstanceId, $"{hostName}::non-idempotent", DriverCapability.Write, noRetryOptions);
|
|
using (LogContextEnricher.Push(_driverInstanceId, _driverType, DriverCapability.Write, LogContextEnricher.NewCorrelationId()))
|
|
{
|
|
return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
return await ExecuteAsync(DriverCapability.Write, hostName, callSite, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
|
|
private ResiliencePipeline ResolvePipeline(DriverCapability capability, string hostName) =>
|
|
_builder.GetOrCreate(_driverInstanceId, hostName, capability, _optionsAccessor());
|
|
}
|