fix(host): resolve Host-001 — exclude leader-only active-node check from /health/ready

This commit is contained in:
Joseph Doherty
2026-05-16 19:40:40 -04:00
parent 7d7214a4ca
commit a0e6a36e79
3 changed files with 68 additions and 4 deletions

View File

@@ -8,7 +8,7 @@
| Last reviewed | 2026-05-16 |
| Reviewer | claude-agent |
| Commit reviewed | `9c60592` |
| Open findings | 11 |
| Open findings | 10 |
## Summary
@@ -54,7 +54,7 @@ no safe workaround.
|--|--|
| Severity | High |
| Category | Correctness & logic bugs |
| Status | Open |
| Status | Resolved |
| Location | `src/ScadaLink.Host/Program.cs:135-145` |
**Description**
@@ -81,7 +81,18 @@ checks and filter by tag). Add a regression test asserting a non-leader node ret
**Resolution**
_Unresolved._
Resolved 2026-05-16 (commit `<pending>`). Root cause confirmed against
`Program.cs`: the `/health/ready` mapping had no `Predicate`, so it executed all
three registered checks including the leader-only `active-node` check, while
`ActiveNodeHealthCheck` returns `Unhealthy` on any non-leader node — making a fully
operational standby central node permanently report `503`. Fix: added
`Predicate = check => check.Name != "active-node"` to the `/health/ready`
`HealthCheckOptions`, so readiness now reflects cluster membership + DB connectivity
only (REQ-HOST-4a); leadership remains reported solely by `/health/active`.
Regression test `HealthCheckTests.HealthReady_Endpoint_ExcludesActiveNodeCheck`
asserts the `active-node` check name does not appear in the `/health/ready`
response body; it failed before the fix and passes after. Full Host suite green
(156 passed).
### Host-002 — Akka.Persistence required by REQ-HOST-6 is not configured and not used

View File

@@ -131,9 +131,14 @@ try
app.UseAuthorization();
app.UseAntiforgery();
// WP-12: Map readiness endpoint — returns 503 until all checks pass, 200 when ready
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
// explicitly NOT cluster leadership. The leader-only "active-node" check is
// excluded here so a fully operational standby central node reports ready;
// leadership is reported separately on /health/active.
app.MapHealthChecks("/health/ready", new HealthCheckOptions
{
Predicate = check => check.Name != "active-node",
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});

View File

@@ -110,6 +110,54 @@ public class HealthCheckTests : IDisposable
}
}
[Fact]
public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck()
{
// Host-001 regression: /health/ready must reflect cluster membership + DB
// connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only
// "active-node" check belongs solely to /health/active. If /health/ready
// included "active-node", a fully operational standby central node would
// permanently report 503, breaking load-balancer failover readiness.
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
try
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
var factory = new WebApplicationFactory<Program>()
.WithWebHostBuilder(builder =>
{
builder.ConfigureAppConfiguration((context, config) =>
{
config.AddInMemoryCollection(new Dictionary<string, string?>
{
["ScadaLink:Node:NodeHostname"] = "localhost",
["ScadaLink:Node:RemotingPort"] = "0",
["ScadaLink:Cluster:SeedNodes:0"] = "akka.tcp://scadalink@localhost:2551",
["ScadaLink:Cluster:SeedNodes:1"] = "akka.tcp://scadalink@localhost:2552",
["ScadaLink:Database:SkipMigrations"] = "true",
});
});
builder.UseSetting("ScadaLink:Node:Role", "Central");
builder.UseSetting("ScadaLink:Database:SkipMigrations", "true");
});
_disposables.Add(factory);
var client = factory.CreateClient();
_disposables.Add(client);
var response = await client.GetAsync("/health/ready");
var body = await response.Content.ReadAsStringAsync();
// The readiness body lists each executed check by name in its entries map.
// The leader-only "active-node" check must not be among them.
Assert.DoesNotContain("active-node", body);
}
finally
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv);
}
}
[Fact]
public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
{