fix(host): resolve Host-001 — exclude leader-only active-node check from /health/ready
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 11 |
|
||||
| Open findings | 10 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -54,7 +54,7 @@ no safe workaround.
|
||||
|--|--|
|
||||
| Severity | High |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/Program.cs:135-145` |
|
||||
|
||||
**Description**
|
||||
@@ -81,7 +81,18 @@ checks and filter by tag). Add a regression test asserting a non-leader node ret
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `<pending>`). Root cause confirmed against
|
||||
`Program.cs`: the `/health/ready` mapping had no `Predicate`, so it executed all
|
||||
three registered checks including the leader-only `active-node` check, while
|
||||
`ActiveNodeHealthCheck` returns `Unhealthy` on any non-leader node — making a fully
|
||||
operational standby central node permanently report `503`. Fix: added
|
||||
`Predicate = check => check.Name != "active-node"` to the `/health/ready`
|
||||
`HealthCheckOptions`, so readiness now reflects cluster membership + DB connectivity
|
||||
only (REQ-HOST-4a); leadership remains reported solely by `/health/active`.
|
||||
Regression test `HealthCheckTests.HealthReady_Endpoint_ExcludesActiveNodeCheck`
|
||||
asserts the `active-node` check name does not appear in the `/health/ready`
|
||||
response body; it failed before the fix and passes after. Full Host suite green
|
||||
(156 passed).
|
||||
|
||||
### Host-002 — Akka.Persistence required by REQ-HOST-6 is not configured and not used
|
||||
|
||||
|
||||
@@ -131,9 +131,14 @@ try
|
||||
app.UseAuthorization();
|
||||
app.UseAntiforgery();
|
||||
|
||||
// WP-12: Map readiness endpoint — returns 503 until all checks pass, 200 when ready
|
||||
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
|
||||
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
|
||||
// explicitly NOT cluster leadership. The leader-only "active-node" check is
|
||||
// excluded here so a fully operational standby central node reports ready;
|
||||
// leadership is reported separately on /health/active.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = check => check.Name != "active-node",
|
||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||
});
|
||||
|
||||
|
||||
@@ -110,6 +110,54 @@ public class HealthCheckTests : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck()
|
||||
{
|
||||
// Host-001 regression: /health/ready must reflect cluster membership + DB
|
||||
// connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only
|
||||
// "active-node" check belongs solely to /health/active. If /health/ready
|
||||
// included "active-node", a fully operational standby central node would
|
||||
// permanently report 503, breaking load-balancer failover readiness.
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaLink:Node:NodeHostname"] = "localhost",
|
||||
["ScadaLink:Node:RemotingPort"] = "0",
|
||||
["ScadaLink:Cluster:SeedNodes:0"] = "akka.tcp://scadalink@localhost:2551",
|
||||
["ScadaLink:Cluster:SeedNodes:1"] = "akka.tcp://scadalink@localhost:2552",
|
||||
["ScadaLink:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaLink:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaLink:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
|
||||
var client = factory.CreateClient();
|
||||
_disposables.Add(client);
|
||||
|
||||
var response = await client.GetAsync("/health/ready");
|
||||
var body = await response.Content.ReadAsStringAsync();
|
||||
|
||||
// The readiness body lists each executed check by name in its entries map.
|
||||
// The leader-only "active-node" check must not be among them.
|
||||
Assert.DoesNotContain("active-node", body);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user