fix(server): resolve Medium code-review finding (Server-007)

Add configDbHealthy parameter to OpcUaApplicationHost; wire a
DbHealthCache (CanConnectAsync cached 10 s) in Program.cs so /healthz
reflects real config-DB reachability instead of the previous always-true
default; /healthz now returns 503 on a DB outage unless stale-config
cache is warm.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-22 10:59:08 -04:00
parent 8e8199752f
commit fccb529d5f
3 changed files with 59 additions and 4 deletions

View File

@@ -116,13 +116,13 @@
| Severity | Medium |
| Category | Error handling & resilience |
| Location | `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs:179-183` |
| Status | Open |
| Status | Resolved |
**Description:** `HealthEndpointsHost` is built without a `configDbHealthy` delegate, so the default `() => true` is used — `/healthz` always reports `configDbReachable = true` and never 503s on a DB outage. `_staleConfigFlag` is also never supplied by `Program.cs`, so the stale-config signal is inert too. `/healthz` degenerates to a pure liveness probe; operators get a false-healthy during a DB outage.
**Recommendation:** Wire a real config-DB probe (cheap cached `SELECT 1`) into `HealthEndpointsHost`, and register `StaleConfigFlag` in `Program.cs`. Or move DB health to `/readyz` and drop the misleading `configDbReachable` field.
**Resolution:** _(open)_
**Resolution:** Resolved 2026-05-22 — added `Func<bool>? configDbHealthy` parameter to `OpcUaApplicationHost` (defaults null, backward-compatible); `Program.cs` constructs a `DbHealthCache` that calls `CanConnectAsync` every 10 s and caches the result, then passes `() => dbHealthCache.IsHealthy`; `/healthz` now reflects real DB reachability and returns 503 on a DB outage (unless stale-config cache is warm).
### Server-008
| Field | Value |

View File

@@ -53,6 +53,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
// don't opt into the new server-side history routing or alarm-condition state machine.
private readonly IHistoryRouter? _historyRouter;
private readonly AlarmConditionService? _alarmConditionService;
private readonly Func<bool>? _configDbHealthy;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<OpcUaApplicationHost> _logger;
@@ -73,7 +74,13 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? virtualReadable = null,
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? scriptedAlarmReadable = null,
IHistoryRouter? historyRouter = null,
AlarmConditionService? alarmConditionService = null)
AlarmConditionService? alarmConditionService = null,
// Server-007: optional cached probe for the configuration DB. When supplied,
// /healthz reflects real DB reachability and returns 503 on a DB outage (unless
// a stale-config cache is warm). When null, falls back to the prior always-true
// default so existing test construction sites and deployments without a DB probe
// are unaffected.
Func<bool>? configDbHealthy = null)
{
_options = options;
_driverHost = driverHost;
@@ -89,6 +96,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
_scriptedAlarmReadable = scriptedAlarmReadable;
_historyRouter = historyRouter;
_alarmConditionService = alarmConditionService;
_configDbHealthy = configDbHealthy;
_loggerFactory = loggerFactory;
_logger = logger;
}
@@ -179,6 +187,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
_healthHost = new HealthEndpointsHost(
_driverHost,
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
configDbHealthy: _configDbHealthy,
usingStaleConfig: _staleConfigFlag is null ? null : () => _staleConfigFlag.IsStale,
prefix: _options.HealthEndpointsPrefix);
_healthHost.Start();

View File

@@ -197,6 +197,14 @@ if (wonderwareEnabled)
builder.Services.AddSingleton<OpcUaApplicationHost>(sp =>
{
var registry = sp.GetRequiredService<DriverEquipmentContentRegistry>();
// Server-007: wire a real config-DB probe so /healthz reflects true DB reachability.
// The probe runs CanConnectAsync and caches the result for 10 s to avoid a DB round-trip
// on every /healthz poll. The Func<bool> surface on HealthEndpointsHost is synchronous;
// the cached value is updated on a background timer rather than blocking the HTTP handler.
var dbFactory = sp.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>();
var dbHealthCache = new DbHealthCache(dbFactory);
return new OpcUaApplicationHost(
sp.GetRequiredService<OpcUaServerOptions>(),
sp.GetRequiredService<DriverHost>(),
@@ -205,7 +213,8 @@ builder.Services.AddSingleton<OpcUaApplicationHost>(sp =>
sp.GetRequiredService<ILogger<OpcUaApplicationHost>>(),
equipmentContentLookup: registry.Get,
historyRouter: sp.GetRequiredService<IHistoryRouter>(),
alarmConditionService: sp.GetRequiredService<AlarmConditionService>());
alarmConditionService: sp.GetRequiredService<AlarmConditionService>(),
configDbHealthy: () => dbHealthCache.IsHealthy);
});
builder.Services.AddHostedService<OpcUaServerService>();
@@ -261,3 +270,40 @@ builder.Services.AddSingleton<Phase7Composer>();
var host = builder.Build();
await host.RunAsync();
// Server-007: lightweight cached config-DB health probe for /healthz.
// Runs CanConnectAsync once and caches the result for 10 s so the /healthz
// endpoint never blocks an HTTP handler thread on a DB round-trip while still
// surfacing real DB reachability rather than the previous always-true default.
internal sealed class DbHealthCache : IDisposable
{
private static readonly TimeSpan CacheTtl = TimeSpan.FromSeconds(10);
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _factory;
private readonly Timer _timer;
private volatile bool _isHealthy = true; // optimistic until first probe completes
public DbHealthCache(IDbContextFactory<OtOpcUaConfigDbContext> factory)
{
_factory = factory;
// Fire immediately, then repeat every CacheTtl.
_timer = new Timer(_ => _ = RefreshAsync(), null, TimeSpan.Zero, CacheTtl);
}
public bool IsHealthy => _isHealthy;
private async Task RefreshAsync()
{
try
{
await using var ctx = await _factory.CreateDbContextAsync().ConfigureAwait(false);
_isHealthy = await ctx.Database.CanConnectAsync().ConfigureAwait(false);
}
catch
{
_isHealthy = false;
}
}
public void Dispose() => _timer.Dispose();
}