fix(cluster-infrastructure): resolve ClusterInfrastructure-009,010 — DownIfAlone consumption (via Host-012), validator enforces DownIfAlone=true
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
| Last reviewed | 2026-05-17 |
|
| Last reviewed | 2026-05-17 |
|
||||||
| Reviewer | claude-agent |
|
| Reviewer | claude-agent |
|
||||||
| Commit reviewed | `39d737e` |
|
| Commit reviewed | `39d737e` |
|
||||||
| Open findings | 2 |
|
| Open findings | 0 |
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
@@ -516,7 +516,7 @@ inspection of `ServiceCollectionExtensions.cs` and
|
|||||||
|--|--|
|
|--|--|
|
||||||
| Severity | Medium |
|
| Severity | Medium |
|
||||||
| Category | Design-document adherence |
|
| Category | Design-document adherence |
|
||||||
| Status | Open |
|
| Status | Resolved |
|
||||||
| Location | `src/ScadaLink.ClusterInfrastructure/ClusterOptions.cs:74` |
|
| Location | `src/ScadaLink.ClusterInfrastructure/ClusterOptions.cs:74` |
|
||||||
|
|
||||||
**Description**
|
**Description**
|
||||||
@@ -563,7 +563,21 @@ controls nothing.
|
|||||||
|
|
||||||
**Resolution**
|
**Resolution**
|
||||||
|
|
||||||
_Unresolved._
|
Root cause verified against the source at commit `39d737e`:
|
||||||
|
`src/ScadaLink.Host/Actors/AkkaHostedService.cs:147` hard-codes `down-if-alone = on`
|
||||||
|
inside the `keep-oldest` block, and `BuildHocon` consumes every other `ClusterOptions`
|
||||||
|
field but never reads `clusterOptions.DownIfAlone`. The finding's facts hold. The fix
|
||||||
|
is correctly scoped to the **Host** module — the configuration property and its
|
||||||
|
validation legitimately live in `ScadaLink.ClusterInfrastructure` (this module),
|
||||||
|
and the per-CLAUDE.md ownership split (CI-001) places HOCON generation in
|
||||||
|
`ScadaLink.Host`.
|
||||||
|
|
||||||
|
**Resolved** — by cross-reference, date 2026-05-17. No code change in this module:
|
||||||
|
`ClusterOptions.DownIfAlone` and its validation (CI-010) are correct and complete here.
|
||||||
|
The consumption fix — wiring `DownIfAlone` into `BuildHocon` so the property has runtime
|
||||||
|
effect — is tracked and fixed as **Host-012** in `code-reviews/Host/findings.md`,
|
||||||
|
addressed this session. CI-009 closes against that cross-reference. Module test suite
|
||||||
|
green (18 passed).
|
||||||
|
|
||||||
### ClusterInfrastructure-010 — Validator does not enforce `DownIfAlone = true` despite the design doc requiring it
|
### ClusterInfrastructure-010 — Validator does not enforce `DownIfAlone = true` despite the design doc requiring it
|
||||||
|
|
||||||
@@ -571,7 +585,7 @@ _Unresolved._
|
|||||||
|--|--|
|
|--|--|
|
||||||
| Severity | Low |
|
| Severity | Low |
|
||||||
| Category | Error handling & resilience |
|
| Category | Error handling & resilience |
|
||||||
| Status | Open |
|
| Status | Resolved |
|
||||||
| Location | `src/ScadaLink.ClusterInfrastructure/ClusterOptionsValidator.cs:21-71` |
|
| Location | `src/ScadaLink.ClusterInfrastructure/ClusterOptionsValidator.cs:21-71` |
|
||||||
|
|
||||||
**Description**
|
**Description**
|
||||||
@@ -602,4 +616,15 @@ should be closed as resolved alongside it.
|
|||||||
|
|
||||||
**Resolution**
|
**Resolution**
|
||||||
|
|
||||||
_Unresolved._
|
Confirmed against the source: `ClusterOptionsValidator.Validate` guarded every other
|
||||||
|
safety-critical setting but performed no check on `DownIfAlone`, so `DownIfAlone: false`
|
||||||
|
passed validation cleanly. With CI-009/Host-012 wiring the property into the HOCON this
|
||||||
|
session, an unguarded `false` would now silently produce the unsafe isolated-single-node
|
||||||
|
behaviour the design doc forbids.
|
||||||
|
|
||||||
|
**Resolved** — fixing commit `commit pending`, date 2026-05-17. Added a `DownIfAlone`
|
||||||
|
check to `ClusterOptionsValidator.Validate` that fails when the flag is `false`, with a
|
||||||
|
message explaining the isolated-single-node-cluster hazard, consistent with how the
|
||||||
|
validator already rejects quorum split-brain strategies. Developed test-first:
|
||||||
|
`ClusterOptionsValidatorTests.DownIfAloneFalse_FailsValidation` was written first,
|
||||||
|
confirmed failing, then passing after the fix. Module test suite green (18 passed).
|
||||||
|
|||||||
@@ -65,6 +65,15 @@ public sealed class ClusterOptionsValidator : IValidateOptions<ClusterOptions>
|
|||||||
"declared unreachable before a heartbeat can arrive.");
|
"declared unreachable before a heartbeat can arrive.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!options.DownIfAlone)
|
||||||
|
{
|
||||||
|
failures.Add(
|
||||||
|
"ClusterOptions.DownIfAlone must be true for the keep-oldest resolver "
|
||||||
|
+ "(Component-ClusterInfrastructure.md → Split-Brain Resolution); with it false the "
|
||||||
|
+ "oldest node can run as an isolated single-node cluster during a partition while the "
|
||||||
|
+ "younger node forms its own, producing two live clusters.");
|
||||||
|
}
|
||||||
|
|
||||||
return failures.Count > 0
|
return failures.Count > 0
|
||||||
? ValidateOptionsResult.Fail(failures)
|
? ValidateOptionsResult.Fail(failures)
|
||||||
: ValidateOptionsResult.Success;
|
: ValidateOptionsResult.Success;
|
||||||
|
|||||||
@@ -94,6 +94,18 @@ public class ClusterOptionsValidatorTests
|
|||||||
Assert.Contains("StableAfter", result.FailureMessage);
|
Assert.Contains("StableAfter", result.FailureMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DownIfAloneFalse_FailsValidation()
|
||||||
|
{
|
||||||
|
var options = ValidOptions();
|
||||||
|
options.DownIfAlone = false;
|
||||||
|
|
||||||
|
var result = new ClusterOptionsValidator().Validate(null, options);
|
||||||
|
|
||||||
|
Assert.True(result.Failed);
|
||||||
|
Assert.Contains("DownIfAlone", result.FailureMessage);
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void Validate_AccumulatesAllFailures()
|
public void Validate_AccumulatesAllFailures()
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user