fix(host): resolve Host-001 — exclude leader-only active-node check from /health/ready
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
| Last reviewed | 2026-05-16 |
|
| Last reviewed | 2026-05-16 |
|
||||||
| Reviewer | claude-agent |
|
| Reviewer | claude-agent |
|
||||||
| Commit reviewed | `9c60592` |
|
| Commit reviewed | `9c60592` |
|
||||||
| Open findings | 11 |
|
| Open findings | 10 |
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
@@ -54,7 +54,7 @@ no safe workaround.
|
|||||||
|--|--|
|
|--|--|
|
||||||
| Severity | High |
|
| Severity | High |
|
||||||
| Category | Correctness & logic bugs |
|
| Category | Correctness & logic bugs |
|
||||||
| Status | Open |
|
| Status | Resolved |
|
||||||
| Location | `src/ScadaLink.Host/Program.cs:135-145` |
|
| Location | `src/ScadaLink.Host/Program.cs:135-145` |
|
||||||
|
|
||||||
**Description**
|
**Description**
|
||||||
@@ -81,7 +81,18 @@ checks and filter by tag). Add a regression test asserting a non-leader node ret
|
|||||||
|
|
||||||
**Resolution**
|
**Resolution**
|
||||||
|
|
||||||
_Unresolved._
|
Resolved 2026-05-16 (commit `<pending>`). Root cause confirmed against
|
||||||
|
`Program.cs`: the `/health/ready` mapping had no `Predicate`, so it executed all
|
||||||
|
three registered checks including the leader-only `active-node` check, while
|
||||||
|
`ActiveNodeHealthCheck` returns `Unhealthy` on any non-leader node — making a fully
|
||||||
|
operational standby central node permanently report `503`. Fix: added
|
||||||
|
`Predicate = check => check.Name != "active-node"` to the `/health/ready`
|
||||||
|
`HealthCheckOptions`, so readiness now reflects cluster membership + DB connectivity
|
||||||
|
only (REQ-HOST-4a); leadership remains reported solely by `/health/active`.
|
||||||
|
Regression test `HealthCheckTests.HealthReady_Endpoint_ExcludesActiveNodeCheck`
|
||||||
|
asserts the `active-node` check name does not appear in the `/health/ready`
|
||||||
|
response body; it failed before the fix and passes after. Full Host suite green
|
||||||
|
(156 passed).
|
||||||
|
|
||||||
### Host-002 — Akka.Persistence required by REQ-HOST-6 is not configured and not used
|
### Host-002 — Akka.Persistence required by REQ-HOST-6 is not configured and not used
|
||||||
|
|
||||||
|
|||||||
@@ -131,9 +131,14 @@ try
|
|||||||
app.UseAuthorization();
|
app.UseAuthorization();
|
||||||
app.UseAntiforgery();
|
app.UseAntiforgery();
|
||||||
|
|
||||||
// WP-12: Map readiness endpoint — returns 503 until all checks pass, 200 when ready
|
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
|
||||||
|
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
|
||||||
|
// explicitly NOT cluster leadership. The leader-only "active-node" check is
|
||||||
|
// excluded here so a fully operational standby central node reports ready;
|
||||||
|
// leadership is reported separately on /health/active.
|
||||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||||
{
|
{
|
||||||
|
Predicate = check => check.Name != "active-node",
|
||||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -110,6 +110,54 @@ public class HealthCheckTests : IDisposable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck()
|
||||||
|
{
|
||||||
|
// Host-001 regression: /health/ready must reflect cluster membership + DB
|
||||||
|
// connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only
|
||||||
|
// "active-node" check belongs solely to /health/active. If /health/ready
|
||||||
|
// included "active-node", a fully operational standby central node would
|
||||||
|
// permanently report 503, breaking load-balancer failover readiness.
|
||||||
|
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||||
|
|
||||||
|
var factory = new WebApplicationFactory<Program>()
|
||||||
|
.WithWebHostBuilder(builder =>
|
||||||
|
{
|
||||||
|
builder.ConfigureAppConfiguration((context, config) =>
|
||||||
|
{
|
||||||
|
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||||
|
{
|
||||||
|
["ScadaLink:Node:NodeHostname"] = "localhost",
|
||||||
|
["ScadaLink:Node:RemotingPort"] = "0",
|
||||||
|
["ScadaLink:Cluster:SeedNodes:0"] = "akka.tcp://scadalink@localhost:2551",
|
||||||
|
["ScadaLink:Cluster:SeedNodes:1"] = "akka.tcp://scadalink@localhost:2552",
|
||||||
|
["ScadaLink:Database:SkipMigrations"] = "true",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
builder.UseSetting("ScadaLink:Node:Role", "Central");
|
||||||
|
builder.UseSetting("ScadaLink:Database:SkipMigrations", "true");
|
||||||
|
});
|
||||||
|
_disposables.Add(factory);
|
||||||
|
|
||||||
|
var client = factory.CreateClient();
|
||||||
|
_disposables.Add(client);
|
||||||
|
|
||||||
|
var response = await client.GetAsync("/health/ready");
|
||||||
|
var body = await response.Content.ReadAsStringAsync();
|
||||||
|
|
||||||
|
// The readiness body lists each executed check by name in its entries map.
|
||||||
|
// The leader-only "active-node" check must not be among them.
|
||||||
|
Assert.DoesNotContain("active-node", body);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
|
public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user