diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Observability/DriverHealthReport.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Observability/DriverHealthReport.cs
new file mode 100644
index 0000000..80aaa12
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Observability/DriverHealthReport.cs
@@ -0,0 +1,86 @@
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Observability;
+
+///
+/// Domain-layer health aggregation for Phase 6.1 Stream C. Pure functions over the driver
+/// fleet — given each driver's , produce a
+/// that maps to HTTP status codes at the endpoint layer.
+///
+///
+/// State matrix per docs/v2/implementation/phase-6-1-resilience-and-observability.md
+/// §Stream C.1:
+///
+/// - /
+/// → /readyz 503 (not yet ready).
+/// - → /readyz 200.
+/// - → /readyz 200 with flagged driver IDs.
+/// - → /readyz 503.
+///
+/// The overall verdict is computed across the fleet: any Faulted → Faulted; any
+/// Unknown/Initializing → NotReady; any Degraded → Degraded; else Healthy. An empty fleet
+/// is Healthy (nothing to degrade).
+///
+public static class DriverHealthReport
+{
+ /// Compute the fleet-wide readiness verdict from per-driver states.
+ public static ReadinessVerdict Aggregate(IReadOnlyList drivers)
+ {
+ ArgumentNullException.ThrowIfNull(drivers);
+ if (drivers.Count == 0) return ReadinessVerdict.Healthy;
+
+ var anyFaulted = drivers.Any(d => d.State == DriverState.Faulted);
+ if (anyFaulted) return ReadinessVerdict.Faulted;
+
+ var anyInitializing = drivers.Any(d =>
+ d.State == DriverState.Unknown || d.State == DriverState.Initializing);
+ if (anyInitializing) return ReadinessVerdict.NotReady;
+
+ // Reconnecting = driver alive but not serving live data; report as Degraded so /readyz
+ // stays 200 (the fleet can still serve cached / last-good data) while operators see the
+ // affected driver in the body.
+ var anyDegraded = drivers.Any(d =>
+ d.State == DriverState.Degraded || d.State == DriverState.Reconnecting);
+ if (anyDegraded) return ReadinessVerdict.Degraded;
+
+ return ReadinessVerdict.Healthy;
+ }
+
+ ///
+ /// Map a to the HTTP status the /readyz endpoint should
+ /// return per the Stream C.1 state matrix.
+ ///
+ public static int HttpStatus(ReadinessVerdict verdict) => verdict switch
+ {
+ ReadinessVerdict.Healthy => 200,
+ ReadinessVerdict.Degraded => 200,
+ ReadinessVerdict.NotReady => 503,
+ ReadinessVerdict.Faulted => 503,
+ _ => 500,
+ };
+}
+
+/// Per-driver snapshot fed into .
+/// Driver instance identifier (from IDriver.DriverInstanceId).
+/// Current from IDriver.GetHealth.
+/// Optional driver-supplied detail (e.g. "primary PLC unreachable").
+public sealed record DriverHealthSnapshot(
+ string DriverInstanceId,
+ DriverState State,
+ string? DetailMessage = null);
+
+/// Overall fleet readiness — derived from driver states by .
+public enum ReadinessVerdict
+{
+ /// All drivers Healthy (or fleet is empty).
+ Healthy,
+
+ /// At least one driver Degraded; none Faulted / NotReady.
+ Degraded,
+
+ /// At least one driver Unknown / Initializing; none Faulted.
+ NotReady,
+
+ /// At least one driver Faulted.
+ Faulted,
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Observability/LogContextEnricher.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Observability/LogContextEnricher.cs
new file mode 100644
index 0000000..31c5396
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Observability/LogContextEnricher.cs
@@ -0,0 +1,53 @@
+using Serilog.Context;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Observability;
+
+///
+/// Convenience wrapper around Serilog — attaches the set of
+/// structured properties a capability call should carry (DriverInstanceId, DriverType,
+/// CapabilityName, CorrelationId). Callers wrap their call-site body in a using
+/// block; inner Log.Information / Log.Warning calls emit the context
+/// automatically via the Serilog enricher chain.
+///
+///
+/// Per docs/v2/implementation/phase-6-1-resilience-and-observability.md §Stream C.2.
+/// The correlation ID should be the OPC UA RequestHeader.RequestHandle when in-flight;
+/// otherwise a short random GUID. Callers supply whichever is available.
+///
+public static class LogContextEnricher
+{
+ /// Attach the capability-call property set. Dispose the returned scope to pop.
+ public static IDisposable Push(string driverInstanceId, string driverType, DriverCapability capability, string correlationId)
+ {
+ ArgumentException.ThrowIfNullOrWhiteSpace(driverInstanceId);
+ ArgumentException.ThrowIfNullOrWhiteSpace(driverType);
+ ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
+
+ var a = LogContext.PushProperty("DriverInstanceId", driverInstanceId);
+ var b = LogContext.PushProperty("DriverType", driverType);
+ var c = LogContext.PushProperty("CapabilityName", capability.ToString());
+ var d = LogContext.PushProperty("CorrelationId", correlationId);
+ return new CompositeScope(a, b, c, d);
+ }
+
+ ///
+ /// Generate a short correlation ID when no OPC UA RequestHandle is available.
+ /// 12-hex-char slice of a GUID — long enough for log correlation, short enough to
+ /// scan visually.
+ ///
+ public static string NewCorrelationId() => Guid.NewGuid().ToString("N")[..12];
+
+ private sealed class CompositeScope : IDisposable
+ {
+ private readonly IDisposable[] _inner;
+ public CompositeScope(params IDisposable[] inner) => _inner = inner;
+
+ public void Dispose()
+ {
+ // Reverse-order disposal matches Serilog's stack semantics.
+ for (var i = _inner.Length - 1; i >= 0; i--)
+ _inner[i].Dispose();
+ }
+ }
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs
index 3c06eb6..8cb536c 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs
@@ -1,5 +1,6 @@
using Polly;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Observability;
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
@@ -19,6 +20,7 @@ public sealed class CapabilityInvoker
{
private readonly DriverResiliencePipelineBuilder _builder;
private readonly string _driverInstanceId;
+ private readonly string _driverType;
private readonly Func _optionsAccessor;
///
@@ -30,16 +32,19 @@ public sealed class CapabilityInvoker
/// Snapshot accessor for the current resilience options. Invoked per call so Admin-edit +
/// pipeline-invalidate can take effect without restarting the invoker.
///
+ /// Driver type name for structured-log enrichment (e.g. "Modbus").
public CapabilityInvoker(
DriverResiliencePipelineBuilder builder,
string driverInstanceId,
- Func optionsAccessor)
+ Func optionsAccessor,
+ string driverType = "Unknown")
{
ArgumentNullException.ThrowIfNull(builder);
ArgumentNullException.ThrowIfNull(optionsAccessor);
_builder = builder;
_driverInstanceId = driverInstanceId;
+ _driverType = driverType;
_optionsAccessor = optionsAccessor;
}
@@ -54,7 +59,10 @@ public sealed class CapabilityInvoker
ArgumentNullException.ThrowIfNull(callSite);
var pipeline = ResolvePipeline(capability, hostName);
- return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
+ {
+ return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ }
}
/// Execute a void-returning capability call, honoring the per-capability pipeline.
@@ -67,7 +75,10 @@ public sealed class CapabilityInvoker
ArgumentNullException.ThrowIfNull(callSite);
var pipeline = ResolvePipeline(capability, hostName);
- await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
+ {
+ await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ }
}
///
@@ -95,7 +106,10 @@ public sealed class CapabilityInvoker
},
};
var pipeline = _builder.GetOrCreate(_driverInstanceId, $"{hostName}::non-idempotent", DriverCapability.Write, noRetryOptions);
- return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ using (LogContextEnricher.Push(_driverInstanceId, _driverType, DriverCapability.Write, LogContextEnricher.NewCorrelationId()))
+ {
+ return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
+ }
}
return await ExecuteAsync(DriverCapability.Write, hostName, callSite, cancellationToken).ConfigureAwait(false);
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj b/src/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj
index 805bcff..d9efa18 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj
@@ -18,6 +18,7 @@
+
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/Observability/HealthEndpointsHost.cs b/src/ZB.MOM.WW.OtOpcUa.Server/Observability/HealthEndpointsHost.cs
new file mode 100644
index 0000000..9b7f8c0
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/Observability/HealthEndpointsHost.cs
@@ -0,0 +1,181 @@
+using System.Net;
+using System.Text;
+using System.Text.Json;
+using Microsoft.Extensions.Logging;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Hosting;
+using ZB.MOM.WW.OtOpcUa.Core.Observability;
+
+namespace ZB.MOM.WW.OtOpcUa.Server.Observability;
+
+///
+/// Standalone host for /healthz and /readyz
+/// separate from the OPC UA binding. Per docs/v2/implementation/phase-6-1-resilience-
+/// and-observability.md §Stream C.1.
+///
+///
+/// Binds to http://localhost:4841 by default — loopback avoids the Windows URL-ACL
+/// elevation requirement that binding to http://+:4841 (wildcard) would impose.
+/// When a deployment needs remote probing, a reverse proxy or explicit netsh urlacl grant
+/// is the expected path; documented in docs/v2/Server-Deployment.md in a follow-up.
+///
+public sealed class HealthEndpointsHost : IAsyncDisposable
+{
+ private readonly string _prefix;
+ private readonly DriverHost _driverHost;
+ private readonly Func _configDbHealthy;
+ private readonly Func _usingStaleConfig;
+ private readonly ILogger _logger;
+ private readonly HttpListener _listener = new();
+ private readonly DateTime _startedUtc = DateTime.UtcNow;
+ private CancellationTokenSource? _cts;
+ private Task? _acceptLoop;
+ private bool _disposed;
+
+ public HealthEndpointsHost(
+ DriverHost driverHost,
+ ILogger logger,
+ Func? configDbHealthy = null,
+ Func? usingStaleConfig = null,
+ string prefix = "http://localhost:4841/")
+ {
+ _driverHost = driverHost;
+ _logger = logger;
+ _configDbHealthy = configDbHealthy ?? (() => true);
+ _usingStaleConfig = usingStaleConfig ?? (() => false);
+ _prefix = prefix.EndsWith('/') ? prefix : prefix + "/";
+ _listener.Prefixes.Add(_prefix);
+ }
+
+ public void Start()
+ {
+ _listener.Start();
+ _cts = new CancellationTokenSource();
+ _acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
+ _logger.LogInformation("Health endpoints listening on {Prefix}", _prefix);
+ }
+
+ private async Task AcceptLoopAsync(CancellationToken ct)
+ {
+ while (!ct.IsCancellationRequested)
+ {
+ HttpListenerContext ctx;
+ try
+ {
+ ctx = await _listener.GetContextAsync().ConfigureAwait(false);
+ }
+ catch (HttpListenerException) when (ct.IsCancellationRequested) { break; }
+ catch (ObjectDisposedException) { break; }
+
+ _ = Task.Run(() => HandleAsync(ctx), ct);
+ }
+ }
+
+ private async Task HandleAsync(HttpListenerContext ctx)
+ {
+ try
+ {
+ var path = ctx.Request.Url?.AbsolutePath ?? "/";
+ switch (path)
+ {
+ case "/healthz":
+ await WriteHealthzAsync(ctx).ConfigureAwait(false);
+ break;
+ case "/readyz":
+ await WriteReadyzAsync(ctx).ConfigureAwait(false);
+ break;
+ default:
+ ctx.Response.StatusCode = 404;
+ break;
+ }
+ }
+ catch (Exception ex)
+ {
+ _logger.LogWarning(ex, "Health endpoint handler failure");
+ try { ctx.Response.StatusCode = 500; } catch { /* ignore */ }
+ }
+ finally
+ {
+ try { ctx.Response.Close(); } catch { /* ignore */ }
+ }
+ }
+
+ private async Task WriteHealthzAsync(HttpListenerContext ctx)
+ {
+ var configHealthy = _configDbHealthy();
+ var staleConfig = _usingStaleConfig();
+ // /healthz is 200 when process alive + (config DB reachable OR cache-warm).
+ // Stale-config still serves 200 so the process isn't flagged dead when the DB
+ // blips; the body surfaces the stale flag for operators.
+ var healthy = configHealthy || staleConfig;
+ ctx.Response.StatusCode = healthy ? 200 : 503;
+
+ var body = JsonSerializer.Serialize(new
+ {
+ status = healthy ? "healthy" : "unhealthy",
+ uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
+ configDbReachable = configHealthy,
+ usingStaleConfig = staleConfig,
+ });
+ await WriteBodyAsync(ctx, body).ConfigureAwait(false);
+ }
+
+ private async Task WriteReadyzAsync(HttpListenerContext ctx)
+ {
+ var snapshots = BuildSnapshots();
+ var verdict = DriverHealthReport.Aggregate(snapshots);
+ ctx.Response.StatusCode = DriverHealthReport.HttpStatus(verdict);
+
+ var body = JsonSerializer.Serialize(new
+ {
+ verdict = verdict.ToString(),
+ uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
+ drivers = snapshots.Select(d => new
+ {
+ id = d.DriverInstanceId,
+ state = d.State.ToString(),
+ detail = d.DetailMessage,
+ }).ToArray(),
+ degradedDrivers = snapshots
+ .Where(d => d.State == DriverState.Degraded || d.State == DriverState.Reconnecting)
+ .Select(d => d.DriverInstanceId)
+ .ToArray(),
+ });
+ await WriteBodyAsync(ctx, body).ConfigureAwait(false);
+ }
+
+ private IReadOnlyList BuildSnapshots()
+ {
+ var list = new List();
+ foreach (var id in _driverHost.RegisteredDriverIds)
+ {
+ var driver = _driverHost.GetDriver(id);
+ if (driver is null) continue;
+ var health = driver.GetHealth();
+ list.Add(new DriverHealthSnapshot(driver.DriverInstanceId, health.State, health.LastError));
+ }
+ return list;
+ }
+
+ private static async Task WriteBodyAsync(HttpListenerContext ctx, string body)
+ {
+ var bytes = Encoding.UTF8.GetBytes(body);
+ ctx.Response.ContentType = "application/json; charset=utf-8";
+ ctx.Response.ContentLength64 = bytes.LongLength;
+ await ctx.Response.OutputStream.WriteAsync(bytes).ConfigureAwait(false);
+ }
+
+ public async ValueTask DisposeAsync()
+ {
+ if (_disposed) return;
+ _disposed = true;
+ _cts?.Cancel();
+ try { _listener.Stop(); } catch { /* ignore */ }
+ if (_acceptLoop is not null)
+ {
+ try { await _acceptLoop.ConfigureAwait(false); } catch { /* ignore */ }
+ }
+ _listener.Close();
+ _cts?.Dispose();
+ }
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs
index e64e672..b692bb7 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs
@@ -4,6 +4,7 @@ using Opc.Ua.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
+using ZB.MOM.WW.OtOpcUa.Server.Observability;
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
@@ -26,6 +27,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
private readonly ILogger _logger;
private ApplicationInstance? _application;
private OtOpcUaServer? _server;
+ private HealthEndpointsHost? _healthHost;
private bool _disposed;
public OpcUaApplicationHost(OpcUaServerOptions options, DriverHost driverHost,
@@ -68,6 +70,17 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
_logger.LogInformation("OPC UA server started — endpoint={Endpoint} driverCount={Count}",
_options.EndpointUrl, _server.DriverNodeManagers.Count);
+ // Phase 6.1 Stream C: health endpoints on :4841 (loopback by default — see
+ // HealthEndpointsHost remarks for the Windows URL-ACL tradeoff).
+ if (_options.HealthEndpointsEnabled)
+ {
+ _healthHost = new HealthEndpointsHost(
+ _driverHost,
+ _loggerFactory.CreateLogger(),
+ prefix: _options.HealthEndpointsPrefix);
+ _healthHost.Start();
+ }
+
// Drive each driver's discovery through its node manager. The node manager IS the
// IAddressSpaceBuilder; GenericDriverNodeManager captures alarm-condition sinks into
// its internal map and wires OnAlarmEvent → sink routing.
@@ -221,6 +234,12 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
{
_logger.LogWarning(ex, "OPC UA server stop threw during dispose");
}
+
+ if (_healthHost is not null)
+ {
+ try { await _healthHost.DisposeAsync().ConfigureAwait(false); }
+ catch (Exception ex) { _logger.LogWarning(ex, "Health endpoints host dispose threw"); }
+ }
await Task.CompletedTask;
}
}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaServerOptions.cs b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaServerOptions.cs
index 2844e29..34bcd09 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaServerOptions.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaServerOptions.cs
@@ -58,6 +58,20 @@ public sealed class OpcUaServerOptions
///
public bool AutoAcceptUntrustedClientCertificates { get; init; } = true;
+ ///
+ /// Whether to start the Phase 6.1 Stream C /healthz + /readyz HTTP listener.
+ /// Defaults to true; set false in embedded deployments that don't need HTTP
+ /// (e.g. tests that only exercise the OPC UA surface).
+ ///
+ public bool HealthEndpointsEnabled { get; init; } = true;
+
+ ///
+ /// URL prefix the health endpoints bind to. Default http://localhost:4841/ — loopback
+ /// avoids Windows URL-ACL elevation. Production deployments that need remote probing should
+ /// either reverse-proxy or use http://+:4841/ with netsh urlacl granted.
+ ///
+ public string HealthEndpointsPrefix { get; init; } = "http://localhost:4841/";
+
///
/// Security profile advertised on the endpoint. Default
/// preserves the PR 17 endpoint shape; set to
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs
index 1fd231a..bc52665 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs
@@ -57,7 +57,7 @@ public sealed class OtOpcUaServer : StandardServer
// per-type tiers into DriverTypeRegistry. Read ResilienceConfig JSON from the
// DriverInstance row in a follow-up PR; for now every driver gets Tier A defaults.
var options = new DriverResilienceOptions { Tier = DriverTier.A };
- var invoker = new CapabilityInvoker(_pipelineBuilder, driver.DriverInstanceId, () => options);
+ var invoker = new CapabilityInvoker(_pipelineBuilder, driver.DriverInstanceId, () => options, driver.DriverType);
var manager = new DriverNodeManager(server, configuration, driver, invoker, logger);
_driverNodeManagers.Add(manager);
}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs b/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs
index 78d900d..75f911b 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs
@@ -4,6 +4,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog;
+using Serilog.Formatting.Compact;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
@@ -13,11 +14,25 @@ using ZB.MOM.WW.OtOpcUa.Server.Security;
var builder = Host.CreateApplicationBuilder(args);
-Log.Logger = new LoggerConfiguration()
+// Per Phase 6.1 Stream C.3: SIEMs (Splunk, Datadog) ingest the JSON file without a
+// regex parser. Plain-text rolling file stays on by default for human readability;
+// JSON file is opt-in via appsetting `Serilog:WriteJson = true`.
+var writeJson = builder.Configuration.GetValue("Serilog:WriteJson");
+var loggerBuilder = new LoggerConfiguration()
.ReadFrom.Configuration(builder.Configuration)
+ .Enrich.FromLogContext()
.WriteTo.Console()
- .WriteTo.File("logs/otopcua-.log", rollingInterval: RollingInterval.Day)
- .CreateLogger();
+ .WriteTo.File("logs/otopcua-.log", rollingInterval: RollingInterval.Day);
+
+if (writeJson)
+{
+ loggerBuilder = loggerBuilder.WriteTo.File(
+ new CompactJsonFormatter(),
+ "logs/otopcua-.json.log",
+ rollingInterval: RollingInterval.Day);
+}
+
+Log.Logger = loggerBuilder.CreateLogger();
builder.Services.AddSerilog();
builder.Services.AddWindowsService(o => o.ServiceName = "OtOpcUa");
diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj b/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj
index 4db194d..17f2eee 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj
+++ b/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj
@@ -21,6 +21,7 @@
+
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/CapabilityInvokerEnrichmentTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/CapabilityInvokerEnrichmentTests.cs
new file mode 100644
index 0000000..5e8d10a
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/CapabilityInvokerEnrichmentTests.cs
@@ -0,0 +1,72 @@
+using Serilog;
+using Serilog.Core;
+using Serilog.Events;
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Resilience;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Observability;
+
+[Trait("Category", "Integration")]
+public sealed class CapabilityInvokerEnrichmentTests
+{
+ [Fact]
+ public async Task InvokerExecute_LogsInsideCallSite_CarryStructuredProperties()
+ {
+ var sink = new InMemorySink();
+ var logger = new LoggerConfiguration()
+ .Enrich.FromLogContext()
+ .WriteTo.Sink(sink)
+ .CreateLogger();
+
+ var invoker = new CapabilityInvoker(
+ new DriverResiliencePipelineBuilder(),
+ driverInstanceId: "drv-live",
+ optionsAccessor: () => new DriverResilienceOptions { Tier = DriverTier.A },
+ driverType: "Modbus");
+
+ await invoker.ExecuteAsync(
+ DriverCapability.Read,
+ "plc-1",
+ ct =>
+ {
+ logger.Information("inside call site");
+ return ValueTask.FromResult(42);
+ },
+ CancellationToken.None);
+
+ var evt = sink.Events.ShouldHaveSingleItem();
+ evt.Properties["DriverInstanceId"].ToString().ShouldBe("\"drv-live\"");
+ evt.Properties["DriverType"].ToString().ShouldBe("\"Modbus\"");
+ evt.Properties["CapabilityName"].ToString().ShouldBe("\"Read\"");
+ evt.Properties.ShouldContainKey("CorrelationId");
+ }
+
+ [Fact]
+ public async Task InvokerExecute_DoesNotLeak_ContextOutsideCallSite()
+ {
+ var sink = new InMemorySink();
+ var logger = new LoggerConfiguration()
+ .Enrich.FromLogContext()
+ .WriteTo.Sink(sink)
+ .CreateLogger();
+
+ var invoker = new CapabilityInvoker(
+ new DriverResiliencePipelineBuilder(),
+ driverInstanceId: "drv-a",
+ optionsAccessor: () => new DriverResilienceOptions { Tier = DriverTier.A });
+
+ await invoker.ExecuteAsync(DriverCapability.Read, "host", _ => ValueTask.FromResult(1), CancellationToken.None);
+ logger.Information("outside");
+
+ var outside = sink.Events.ShouldHaveSingleItem();
+ outside.Properties.ContainsKey("DriverInstanceId").ShouldBeFalse();
+ }
+
+ private sealed class InMemorySink : ILogEventSink
+ {
+ public List Events { get; } = [];
+ public void Emit(LogEvent logEvent) => Events.Add(logEvent);
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/DriverHealthReportTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/DriverHealthReportTests.cs
new file mode 100644
index 0000000..28f0e5d
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/DriverHealthReportTests.cs
@@ -0,0 +1,70 @@
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Observability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Observability;
+
+[Trait("Category", "Unit")]
+public sealed class DriverHealthReportTests
+{
+ [Fact]
+ public void EmptyFleet_IsHealthy()
+ {
+ DriverHealthReport.Aggregate([]).ShouldBe(ReadinessVerdict.Healthy);
+ }
+
+ [Fact]
+ public void AllHealthy_Fleet_IsHealthy()
+ {
+ var verdict = DriverHealthReport.Aggregate([
+ new DriverHealthSnapshot("a", DriverState.Healthy),
+ new DriverHealthSnapshot("b", DriverState.Healthy),
+ ]);
+ verdict.ShouldBe(ReadinessVerdict.Healthy);
+ }
+
+ [Fact]
+ public void AnyFaulted_TrumpsEverything()
+ {
+ var verdict = DriverHealthReport.Aggregate([
+ new DriverHealthSnapshot("a", DriverState.Healthy),
+ new DriverHealthSnapshot("b", DriverState.Degraded),
+ new DriverHealthSnapshot("c", DriverState.Faulted),
+ new DriverHealthSnapshot("d", DriverState.Initializing),
+ ]);
+ verdict.ShouldBe(ReadinessVerdict.Faulted);
+ }
+
+ [Theory]
+ [InlineData(DriverState.Unknown)]
+ [InlineData(DriverState.Initializing)]
+ public void Any_NotReady_WithoutFaulted_IsNotReady(DriverState initializingState)
+ {
+ var verdict = DriverHealthReport.Aggregate([
+ new DriverHealthSnapshot("a", DriverState.Healthy),
+ new DriverHealthSnapshot("b", initializingState),
+ ]);
+ verdict.ShouldBe(ReadinessVerdict.NotReady);
+ }
+
+ [Fact]
+ public void Any_Degraded_WithoutFaultedOrNotReady_IsDegraded()
+ {
+ var verdict = DriverHealthReport.Aggregate([
+ new DriverHealthSnapshot("a", DriverState.Healthy),
+ new DriverHealthSnapshot("b", DriverState.Degraded),
+ ]);
+ verdict.ShouldBe(ReadinessVerdict.Degraded);
+ }
+
+ [Theory]
+ [InlineData(ReadinessVerdict.Healthy, 200)]
+ [InlineData(ReadinessVerdict.Degraded, 200)]
+ [InlineData(ReadinessVerdict.NotReady, 503)]
+ [InlineData(ReadinessVerdict.Faulted, 503)]
+ public void HttpStatus_MatchesStateMatrix(ReadinessVerdict verdict, int expected)
+ {
+ DriverHealthReport.HttpStatus(verdict).ShouldBe(expected);
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/LogContextEnricherTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/LogContextEnricherTests.cs
new file mode 100644
index 0000000..4ee3a0d
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Observability/LogContextEnricherTests.cs
@@ -0,0 +1,78 @@
+using Serilog;
+using Serilog.Core;
+using Serilog.Events;
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Observability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Observability;
+
+[Trait("Category", "Unit")]
+public sealed class LogContextEnricherTests
+{
+ [Fact]
+ public void Scope_Attaches_AllFour_Properties()
+ {
+ var captured = new InMemorySink();
+ var logger = new LoggerConfiguration()
+ .Enrich.FromLogContext()
+ .WriteTo.Sink(captured)
+ .CreateLogger();
+
+ using (LogContextEnricher.Push("drv-1", "Modbus", DriverCapability.Read, "abc123"))
+ {
+ logger.Information("test message");
+ }
+
+ var evt = captured.Events.ShouldHaveSingleItem();
+ evt.Properties["DriverInstanceId"].ToString().ShouldBe("\"drv-1\"");
+ evt.Properties["DriverType"].ToString().ShouldBe("\"Modbus\"");
+ evt.Properties["CapabilityName"].ToString().ShouldBe("\"Read\"");
+ evt.Properties["CorrelationId"].ToString().ShouldBe("\"abc123\"");
+ }
+
+ [Fact]
+ public void Scope_Dispose_Pops_Properties()
+ {
+ var captured = new InMemorySink();
+ var logger = new LoggerConfiguration()
+ .Enrich.FromLogContext()
+ .WriteTo.Sink(captured)
+ .CreateLogger();
+
+ using (LogContextEnricher.Push("drv-1", "Modbus", DriverCapability.Read, "abc123"))
+ {
+ logger.Information("inside");
+ }
+ logger.Information("outside");
+
+ captured.Events.Count.ShouldBe(2);
+ captured.Events[0].Properties.ContainsKey("DriverInstanceId").ShouldBeTrue();
+ captured.Events[1].Properties.ContainsKey("DriverInstanceId").ShouldBeFalse();
+ }
+
+ [Fact]
+ public void NewCorrelationId_Returns_12_Hex_Chars()
+ {
+ var id = LogContextEnricher.NewCorrelationId();
+ id.Length.ShouldBe(12);
+ id.ShouldMatch("^[0-9a-f]{12}$");
+ }
+
+ [Theory]
+ [InlineData(null)]
+ [InlineData("")]
+ [InlineData(" ")]
+ public void Push_Throws_OnMissingDriverInstanceId(string? id)
+ {
+ Should.Throw(() =>
+ LogContextEnricher.Push(id!, "Modbus", DriverCapability.Read, "c"));
+ }
+
+ private sealed class InMemorySink : ILogEventSink
+ {
+ public List Events { get; } = [];
+ public void Emit(LogEvent logEvent) => Events.Add(logEvent);
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HealthEndpointsHostTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HealthEndpointsHostTests.cs
new file mode 100644
index 0000000..70d05d9
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HealthEndpointsHostTests.cs
@@ -0,0 +1,177 @@
+using System.Net.Http;
+using System.Text.Json;
+using Microsoft.Extensions.Logging.Abstractions;
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Hosting;
+using ZB.MOM.WW.OtOpcUa.Server.Observability;
+
+namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
+
+[Trait("Category", "Integration")]
+public sealed class HealthEndpointsHostTests : IAsyncLifetime
+{
+ private static int _portCounter = 48500 + Random.Shared.Next(0, 99);
+ private readonly int _port = Interlocked.Increment(ref _portCounter);
+ private string Prefix => $"http://localhost:{_port}/";
+ private readonly DriverHost _driverHost = new();
+ private HealthEndpointsHost _host = null!;
+ private HttpClient _client = null!;
+
+ public ValueTask InitializeAsync()
+ {
+ _client = new HttpClient { BaseAddress = new Uri(Prefix) };
+ return ValueTask.CompletedTask;
+ }
+
+ public async ValueTask DisposeAsync()
+ {
+ _client.Dispose();
+ if (_host is not null) await _host.DisposeAsync();
+ }
+
+ private HealthEndpointsHost Start(Func? configDbHealthy = null, Func? usingStaleConfig = null)
+ {
+ _host = new HealthEndpointsHost(
+ _driverHost,
+ NullLogger.Instance,
+ configDbHealthy,
+ usingStaleConfig,
+ prefix: Prefix);
+ _host.Start();
+ return _host;
+ }
+
+ [Fact]
+ public async Task Healthz_ReturnsHealthy_EmptyFleet()
+ {
+ Start();
+
+ var response = await _client.GetAsync("/healthz");
+
+ response.IsSuccessStatusCode.ShouldBeTrue();
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("status").GetString().ShouldBe("healthy");
+ body.GetProperty("configDbReachable").GetBoolean().ShouldBeTrue();
+ body.GetProperty("usingStaleConfig").GetBoolean().ShouldBeFalse();
+ }
+
+ [Fact]
+ public async Task Healthz_StaleConfig_Returns200_WithFlag()
+ {
+ Start(configDbHealthy: () => false, usingStaleConfig: () => true);
+
+ var response = await _client.GetAsync("/healthz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("configDbReachable").GetBoolean().ShouldBeFalse();
+ body.GetProperty("usingStaleConfig").GetBoolean().ShouldBeTrue();
+ }
+
+ [Fact]
+ public async Task Healthz_UnreachableConfig_And_NoCache_Returns503()
+ {
+ Start(configDbHealthy: () => false, usingStaleConfig: () => false);
+
+ var response = await _client.GetAsync("/healthz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
+ }
+
+ [Fact]
+ public async Task Readyz_EmptyFleet_Is200_Healthy()
+ {
+ Start();
+
+ var response = await _client.GetAsync("/readyz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("verdict").GetString().ShouldBe("Healthy");
+ }
+
+ [Fact]
+ public async Task Readyz_WithHealthyDriver_Is200()
+ {
+ await _driverHost.RegisterAsync(new StubDriver("drv-1", DriverState.Healthy), "{}", CancellationToken.None);
+ Start();
+
+ var response = await _client.GetAsync("/readyz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("verdict").GetString().ShouldBe("Healthy");
+ body.GetProperty("drivers").GetArrayLength().ShouldBe(1);
+ }
+
+ [Fact]
+ public async Task Readyz_WithFaultedDriver_Is503()
+ {
+ await _driverHost.RegisterAsync(new StubDriver("dead", DriverState.Faulted), "{}", CancellationToken.None);
+ await _driverHost.RegisterAsync(new StubDriver("alive", DriverState.Healthy), "{}", CancellationToken.None);
+ Start();
+
+ var response = await _client.GetAsync("/readyz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("verdict").GetString().ShouldBe("Faulted");
+ }
+
+ [Fact]
+ public async Task Readyz_WithDegradedDriver_Is200_WithDegradedList()
+ {
+ await _driverHost.RegisterAsync(new StubDriver("drv-ok", DriverState.Healthy), "{}", CancellationToken.None);
+ await _driverHost.RegisterAsync(new StubDriver("drv-deg", DriverState.Degraded), "{}", CancellationToken.None);
+ Start();
+
+ var response = await _client.GetAsync("/readyz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
+ var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
+ body.GetProperty("verdict").GetString().ShouldBe("Degraded");
+ body.GetProperty("degradedDrivers").GetArrayLength().ShouldBe(1);
+ body.GetProperty("degradedDrivers")[0].GetString().ShouldBe("drv-deg");
+ }
+
+ [Fact]
+ public async Task Readyz_WithInitializingDriver_Is503()
+ {
+ await _driverHost.RegisterAsync(new StubDriver("init", DriverState.Initializing), "{}", CancellationToken.None);
+ Start();
+
+ var response = await _client.GetAsync("/readyz");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
+ }
+
+ [Fact]
+ public async Task Unknown_Path_Returns404()
+ {
+ Start();
+
+ var response = await _client.GetAsync("/foo");
+
+ response.StatusCode.ShouldBe(System.Net.HttpStatusCode.NotFound);
+ }
+
+ private sealed class StubDriver : IDriver
+ {
+ private readonly DriverState _state;
+ public StubDriver(string id, DriverState state)
+ {
+ DriverInstanceId = id;
+ _state = state;
+ }
+ public string DriverInstanceId { get; }
+ public string DriverType => "Stub";
+ public Task InitializeAsync(string _, CancellationToken ct) => Task.CompletedTask;
+ public Task ReinitializeAsync(string _, CancellationToken ct) => Task.CompletedTask;
+ public Task ShutdownAsync(CancellationToken ct) => Task.CompletedTask;
+ public DriverHealth GetHealth() => new(_state, null, null);
+ public long GetMemoryFootprint() => 0;
+ public Task FlushOptionalCachesAsync(CancellationToken ct) => Task.CompletedTask;
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HistoryReadIntegrationTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HistoryReadIntegrationTests.cs
index 82b8ab0..47b42ec 100644
--- a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HistoryReadIntegrationTests.cs
+++ b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HistoryReadIntegrationTests.cs
@@ -46,7 +46,7 @@ public sealed class HistoryReadIntegrationTests : IAsyncLifetime
ApplicationName = "OtOpcUaHistoryTest",
ApplicationUri = "urn:OtOpcUa:Server:HistoryTest",
PkiStoreRoot = _pkiRoot,
- AutoAcceptUntrustedClientCertificates = true,
+ AutoAcceptUntrustedClientCertificates = true, HealthEndpointsEnabled = false,
};
_server = new OpcUaApplicationHost(options, _driverHost, new DenyAllUserAuthenticator(),
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/MultipleDriverInstancesIntegrationTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/MultipleDriverInstancesIntegrationTests.cs
index cd93e14..6e90a64 100644
--- a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/MultipleDriverInstancesIntegrationTests.cs
+++ b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/MultipleDriverInstancesIntegrationTests.cs
@@ -49,7 +49,7 @@ public sealed class MultipleDriverInstancesIntegrationTests : IAsyncLifetime
ApplicationName = "OtOpcUaMultiDriverTest",
ApplicationUri = "urn:OtOpcUa:Server:MultiDriverTest",
PkiStoreRoot = _pkiRoot,
- AutoAcceptUntrustedClientCertificates = true,
+ AutoAcceptUntrustedClientCertificates = true, HealthEndpointsEnabled = false,
};
_server = new OpcUaApplicationHost(options, _driverHost, new DenyAllUserAuthenticator(),
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs
index a546b9e..bcdcb67 100644
--- a/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs
+++ b/tests/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs
@@ -36,7 +36,7 @@ public sealed class OpcUaServerIntegrationTests : IAsyncLifetime
ApplicationName = "OtOpcUaTest",
ApplicationUri = "urn:OtOpcUa:Server:Test",
PkiStoreRoot = _pkiRoot,
- AutoAcceptUntrustedClientCertificates = true,
+ AutoAcceptUntrustedClientCertificates = true, HealthEndpointsEnabled = false,
};
_server = new OpcUaApplicationHost(options, _driverHost, new DenyAllUserAuthenticator(),