Closes Stream C per docs/v2/implementation/phase-6-1-resilience-and-observability.md. Core.Observability (new namespace): - DriverHealthReport — pure-function aggregation over DriverHealthSnapshot list. Empty fleet = Healthy. Any Faulted = Faulted. Any Unknown/Initializing (no Faulted) = NotReady. Any Degraded or Reconnecting (no Faulted, no NotReady) = Degraded. Else Healthy. HttpStatus(verdict) maps to the Stream C.1 state matrix: Healthy/Degraded → 200, NotReady/Faulted → 503. - LogContextEnricher — Serilog LogContext wrapper. Push(id, type, capability, correlationId) returns an IDisposable scope; inner log calls carry DriverInstanceId / DriverType / CapabilityName / CorrelationId structured properties automatically. NewCorrelationId = 12-hex-char GUID slice for cases where no OPC UA RequestHeader.RequestHandle is in flight. CapabilityInvoker — now threads LogContextEnricher around every ExecuteAsync / ExecuteWriteAsync call site. OtOpcUaServer passes driver.DriverType through so logs correlate to the driver type too. Every capability call emits structured fields per the Stream C.4 compliance check. Server.Observability: - HealthEndpointsHost — standalone HttpListener on http://localhost:4841/ (loopback avoids Windows URL-ACL elevation; remote probing via reverse proxy or explicit netsh urlacl grant). Routes: /healthz → 200 when (configDbReachable OR usingStaleConfig); 503 otherwise. Body: status, uptimeSeconds, configDbReachable, usingStaleConfig. /readyz → DriverHealthReport.Aggregate + HttpStatus mapping. Body: verdict, drivers[], degradedDrivers[], uptimeSeconds. anything else → 404. Disposal cooperative with the HttpListener shutdown. - OpcUaApplicationHost starts the health host after the OPC UA server comes up and disposes it on shutdown. New OpcUaServerOptions knobs: HealthEndpointsEnabled (default true), HealthEndpointsPrefix (default http://localhost:4841/). Program.cs: - Serilog pipeline adds Enrich.FromLogContext + opt-in JSON file sink via `Serilog:WriteJson = true` appsetting. Uses Serilog.Formatting.Compact's CompactJsonFormatter (one JSON object per line — SIEMs like Splunk, Datadog, Graylog ingest without a regex parser). Server.Tests: - Existing 3 OpcUaApplicationHost integration tests now set HealthEndpointsEnabled=false to avoid port :4841 collisions under parallel execution. - New HealthEndpointsHostTests (9): /healthz healthy empty fleet; stale-config returns 200 with flag; unreachable+no-cache returns 503; /readyz empty/ Healthy/Faulted/Degraded/Initializing drivers return correct status and bodies; unknown path → 404. Uses ephemeral ports via Interlocked counter. Core.Tests: - DriverHealthReportTests (8): empty fleet, all-healthy, any-Faulted trumps, any-NotReady without Faulted, Degraded without Faulted/NotReady, HttpStatus per-verdict theory. - LogContextEnricherTests (8): all 4 properties attach; scope disposes cleanly; NewCorrelationId shape; null/whitespace driverInstanceId throws. - CapabilityInvokerEnrichmentTests (2): inner logs carry structured properties; no context leak outside the call site. Full solution dotnet test: 1016 passing (baseline 906, +110 for Phase 6.1 so far across Streams A+B+C). Pre-existing Client.CLI Subscribe flake unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
178 lines
6.4 KiB
C#
178 lines
6.4 KiB
C#
using System.Net.Http;
|
|
using System.Text.Json;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using Shouldly;
|
|
using Xunit;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Observability;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
|
|
|
|
[Trait("Category", "Integration")]
|
|
public sealed class HealthEndpointsHostTests : IAsyncLifetime
|
|
{
|
|
private static int _portCounter = 48500 + Random.Shared.Next(0, 99);
|
|
private readonly int _port = Interlocked.Increment(ref _portCounter);
|
|
private string Prefix => $"http://localhost:{_port}/";
|
|
private readonly DriverHost _driverHost = new();
|
|
private HealthEndpointsHost _host = null!;
|
|
private HttpClient _client = null!;
|
|
|
|
public ValueTask InitializeAsync()
|
|
{
|
|
_client = new HttpClient { BaseAddress = new Uri(Prefix) };
|
|
return ValueTask.CompletedTask;
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
_client.Dispose();
|
|
if (_host is not null) await _host.DisposeAsync();
|
|
}
|
|
|
|
private HealthEndpointsHost Start(Func<bool>? configDbHealthy = null, Func<bool>? usingStaleConfig = null)
|
|
{
|
|
_host = new HealthEndpointsHost(
|
|
_driverHost,
|
|
NullLogger<HealthEndpointsHost>.Instance,
|
|
configDbHealthy,
|
|
usingStaleConfig,
|
|
prefix: Prefix);
|
|
_host.Start();
|
|
return _host;
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Healthz_ReturnsHealthy_EmptyFleet()
|
|
{
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/healthz");
|
|
|
|
response.IsSuccessStatusCode.ShouldBeTrue();
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("status").GetString().ShouldBe("healthy");
|
|
body.GetProperty("configDbReachable").GetBoolean().ShouldBeTrue();
|
|
body.GetProperty("usingStaleConfig").GetBoolean().ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Healthz_StaleConfig_Returns200_WithFlag()
|
|
{
|
|
Start(configDbHealthy: () => false, usingStaleConfig: () => true);
|
|
|
|
var response = await _client.GetAsync("/healthz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("configDbReachable").GetBoolean().ShouldBeFalse();
|
|
body.GetProperty("usingStaleConfig").GetBoolean().ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Healthz_UnreachableConfig_And_NoCache_Returns503()
|
|
{
|
|
Start(configDbHealthy: () => false, usingStaleConfig: () => false);
|
|
|
|
var response = await _client.GetAsync("/healthz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Readyz_EmptyFleet_Is200_Healthy()
|
|
{
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/readyz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("verdict").GetString().ShouldBe("Healthy");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Readyz_WithHealthyDriver_Is200()
|
|
{
|
|
await _driverHost.RegisterAsync(new StubDriver("drv-1", DriverState.Healthy), "{}", CancellationToken.None);
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/readyz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("verdict").GetString().ShouldBe("Healthy");
|
|
body.GetProperty("drivers").GetArrayLength().ShouldBe(1);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Readyz_WithFaultedDriver_Is503()
|
|
{
|
|
await _driverHost.RegisterAsync(new StubDriver("dead", DriverState.Faulted), "{}", CancellationToken.None);
|
|
await _driverHost.RegisterAsync(new StubDriver("alive", DriverState.Healthy), "{}", CancellationToken.None);
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/readyz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("verdict").GetString().ShouldBe("Faulted");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Readyz_WithDegradedDriver_Is200_WithDegradedList()
|
|
{
|
|
await _driverHost.RegisterAsync(new StubDriver("drv-ok", DriverState.Healthy), "{}", CancellationToken.None);
|
|
await _driverHost.RegisterAsync(new StubDriver("drv-deg", DriverState.Degraded), "{}", CancellationToken.None);
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/readyz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK);
|
|
var body = JsonDocument.Parse(await response.Content.ReadAsStringAsync()).RootElement;
|
|
body.GetProperty("verdict").GetString().ShouldBe("Degraded");
|
|
body.GetProperty("degradedDrivers").GetArrayLength().ShouldBe(1);
|
|
body.GetProperty("degradedDrivers")[0].GetString().ShouldBe("drv-deg");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Readyz_WithInitializingDriver_Is503()
|
|
{
|
|
await _driverHost.RegisterAsync(new StubDriver("init", DriverState.Initializing), "{}", CancellationToken.None);
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/readyz");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.ServiceUnavailable);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Unknown_Path_Returns404()
|
|
{
|
|
Start();
|
|
|
|
var response = await _client.GetAsync("/foo");
|
|
|
|
response.StatusCode.ShouldBe(System.Net.HttpStatusCode.NotFound);
|
|
}
|
|
|
|
private sealed class StubDriver : IDriver
|
|
{
|
|
private readonly DriverState _state;
|
|
public StubDriver(string id, DriverState state)
|
|
{
|
|
DriverInstanceId = id;
|
|
_state = state;
|
|
}
|
|
public string DriverInstanceId { get; }
|
|
public string DriverType => "Stub";
|
|
public Task InitializeAsync(string _, CancellationToken ct) => Task.CompletedTask;
|
|
public Task ReinitializeAsync(string _, CancellationToken ct) => Task.CompletedTask;
|
|
public Task ShutdownAsync(CancellationToken ct) => Task.CompletedTask;
|
|
public DriverHealth GetHealth() => new(_state, null, null);
|
|
public long GetMemoryFootprint() => 0;
|
|
public Task FlushOptionalCachesAsync(CancellationToken ct) => Task.CompletedTask;
|
|
}
|
|
}
|