diff --git a/code-reviews/Host/findings.md b/code-reviews/Host/findings.md index 877f5c8..b13ecc2 100644 --- a/code-reviews/Host/findings.md +++ b/code-reviews/Host/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-16 | | Reviewer | claude-agent | | Commit reviewed | `9c60592` | -| Open findings | 11 | +| Open findings | 10 | ## Summary @@ -54,7 +54,7 @@ no safe workaround. |--|--| | Severity | High | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Host/Program.cs:135-145` | **Description** @@ -81,7 +81,18 @@ checks and filter by tag). Add a regression test asserting a non-leader node ret **Resolution** -_Unresolved._ +Resolved 2026-05-16 (commit ``). Root cause confirmed against +`Program.cs`: the `/health/ready` mapping had no `Predicate`, so it executed all +three registered checks including the leader-only `active-node` check, while +`ActiveNodeHealthCheck` returns `Unhealthy` on any non-leader node — making a fully +operational standby central node permanently report `503`. Fix: added +`Predicate = check => check.Name != "active-node"` to the `/health/ready` +`HealthCheckOptions`, so readiness now reflects cluster membership + DB connectivity +only (REQ-HOST-4a); leadership remains reported solely by `/health/active`. +Regression test `HealthCheckTests.HealthReady_Endpoint_ExcludesActiveNodeCheck` +asserts the `active-node` check name does not appear in the `/health/ready` +response body; it failed before the fix and passes after. Full Host suite green +(156 passed). ### Host-002 — Akka.Persistence required by REQ-HOST-6 is not configured and not used diff --git a/src/ScadaLink.Host/Program.cs b/src/ScadaLink.Host/Program.cs index 0ec5930..d4ca049 100644 --- a/src/ScadaLink.Host/Program.cs +++ b/src/ScadaLink.Host/Program.cs @@ -131,9 +131,14 @@ try app.UseAuthorization(); app.UseAntiforgery(); - // WP-12: Map readiness endpoint — returns 503 until all checks pass, 200 when ready + // WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready. + // REQ-HOST-4a defines readiness as cluster membership + DB connectivity, + // explicitly NOT cluster leadership. The leader-only "active-node" check is + // excluded here so a fully operational standby central node reports ready; + // leadership is reported separately on /health/active. app.MapHealthChecks("/health/ready", new HealthCheckOptions { + Predicate = check => check.Name != "active-node", ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse }); diff --git a/tests/ScadaLink.Host.Tests/HealthCheckTests.cs b/tests/ScadaLink.Host.Tests/HealthCheckTests.cs index 0f7d770..33dfe49 100644 --- a/tests/ScadaLink.Host.Tests/HealthCheckTests.cs +++ b/tests/ScadaLink.Host.Tests/HealthCheckTests.cs @@ -110,6 +110,54 @@ public class HealthCheckTests : IDisposable } } + [Fact] + public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck() + { + // Host-001 regression: /health/ready must reflect cluster membership + DB + // connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only + // "active-node" check belongs solely to /health/active. If /health/ready + // included "active-node", a fully operational standby central node would + // permanently report 503, breaking load-balancer failover readiness. + var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT"); + try + { + Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central"); + + var factory = new WebApplicationFactory() + .WithWebHostBuilder(builder => + { + builder.ConfigureAppConfiguration((context, config) => + { + config.AddInMemoryCollection(new Dictionary + { + ["ScadaLink:Node:NodeHostname"] = "localhost", + ["ScadaLink:Node:RemotingPort"] = "0", + ["ScadaLink:Cluster:SeedNodes:0"] = "akka.tcp://scadalink@localhost:2551", + ["ScadaLink:Cluster:SeedNodes:1"] = "akka.tcp://scadalink@localhost:2552", + ["ScadaLink:Database:SkipMigrations"] = "true", + }); + }); + builder.UseSetting("ScadaLink:Node:Role", "Central"); + builder.UseSetting("ScadaLink:Database:SkipMigrations", "true"); + }); + _disposables.Add(factory); + + var client = factory.CreateClient(); + _disposables.Add(client); + + var response = await client.GetAsync("/health/ready"); + var body = await response.Content.ReadAsStringAsync(); + + // The readiness body lists each executed check by name in its entries map. + // The leader-only "active-node" check must not be among them. + Assert.DoesNotContain("active-node", body); + } + finally + { + Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv); + } + } + [Fact] public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy() {