diff --git a/code-reviews/ControlPlane/findings.md b/code-reviews/ControlPlane/findings.md
new file mode 100644
index 00000000..a9d6ff6d
--- /dev/null
+++ b/code-reviews/ControlPlane/findings.md
@@ -0,0 +1,109 @@
+# Code Review — ControlPlane
+
+
+
+| Field | Value |
+|---|---|
+| Module | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane` |
+| Reviewer | Claude Code |
+| Review date | 2026-06-19 |
+| Commit reviewed | `7286d320` |
+| Status | Reviewed |
+| Open findings | 0 |
+
+## Checklist coverage
+
+A comprehensive review completes every category, recording "No issues found" where
+a category produced nothing rather than leaving it blank.
+
+| # | Category | Result |
+|---|---|---|
+| 1 | Correctness & logic bugs | ControlPlane-001 (Resolved) |
+| 2 | OtOpcUa conventions | No issues found |
+| 3 | Concurrency & thread safety | No issues found |
+| 4 | Error handling & resilience | No issues found |
+| 5 | Security | No issues found |
+| 6 | Performance & resource management | No issues found |
+| 7 | Design-document adherence | No issues found |
+| 8 | Code organization & conventions | ControlPlane-002 (Resolved), ControlPlane-003 (Resolved) |
+| 9 | Testing coverage | No issues found |
+| 10 | Documentation & comments | No issues found |
+
+## Findings
+
+
+
+### ControlPlane-001
+
+| Field | Value |
+|---|---|
+| Severity | Medium |
+| Category | Correctness & logic bugs |
+| Location | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs:154` |
+| Status | Resolved |
+
+**Description:** `HandleAck` accepts `ApplyAck` messages from any node, including nodes that were
+not in `_expectedAcks` at dispatch time. When an unexpected node's ack is added to `_acks`,
+`_acks.Count` can reach `_expectedAcks.Count` before every expected node has actually responded.
+The count-based gate on line 166 (`if (_acks.Count < _expectedAcks.Count) return;`) then fires
+prematurely, causing `SealDeployment()` to run while one or more expected nodes have not yet
+applied the deployment. In production, unexpected acks are unlikely (only `DriverHostActor`
+publishes to the `deployment-acks` topic) but the guard is missing.
+
+**Recommendation:** Add a membership check at the start of the processing path in `HandleAck`
+to discard any ack whose `NodeId` is not in `_expectedAcks`, logging it at Debug level for
+diagnostics.
+
+**Resolution:** Fixed 2026-06-19 (commit SHA pending). Added `if (!_expectedAcks.Contains(msg.NodeId))` guard at the top of the processing path in `HandleAck`, discarding unexpected-node acks before they are counted. Regression test `ApplyAck_from_unexpected_node_is_discarded_and_does_not_seal_prematurely` added to `ConfigPublishCoordinatorTests` (proven failing before fix, green after).
+
+---
+
+### ControlPlane-002
+
+| Field | Value |
+|---|---|
+| Severity | Low |
+| Category | Code organization & conventions |
+| Location | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Fleet/FleetStatusBroadcaster.cs:139` |
+| Status | Resolved |
+
+**Description:** `OnTick` uses a `bool changed` flag but passes it to the debug-log message
+via `changed ? 1 : 0`. The `{Count}` placeholder in the message implies a numeric count of
+flipped nodes, but the expression always yields `1` (never the true count) whenever any node
+flips. Operators reading the log will see "flipped 1 nodes to Degraded" even when multiple
+nodes flipped simultaneously during a cluster partition recovery.
+
+**Recommendation:** Replace the `bool changed` flag with an `int flippedCount` counter
+incremented inside the loop, and log `flippedCount` directly.
+
+**Resolution:** Fixed 2026-06-19 (commit SHA pending). Replaced `bool changed` with `int flippedCount`, incremented per flipped node, and logged directly. All 60 tests pass.
+
+---
+
+### ControlPlane-003
+
+| Field | Value |
+|---|---|
+| Severity | Low |
+| Category | Code organization & conventions |
+| Location | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditOutcomeMapper.cs:29` |
+| Status | Resolved |
+
+**Description:** `FromAction` lists ten explicit `AuditOutcome.Success` arms that are all
+shadowed by the `_ => AuditOutcome.Success` default. The explicit arms add no behavioural
+differentiation and create an ongoing maintenance obligation: any new config-write verb added
+to the codebase silently falls through to the correct default anyway, making the explicit list
+misleading as a whitelist. The comment block documenting the known verbs is a better vehicle
+for the enumeration than code.
+
+**Recommendation:** Remove the redundant explicit Success arms and document the known success
+verbs in a comment on the default arm instead. The two Denied arms must stay.
+
+**Resolution:** Fixed 2026-06-19 (commit SHA pending). Removed the ten redundant explicit
+Success arms; replaced with an explanatory comment listing the known success verbs on the
+default `_ => AuditOutcome.Success` arm. All existing AuditOutcomeMapper tests still pass
+(the `Outcome_is_derived_from_the_action_vocabulary` theory covers every listed verb).
diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditOutcomeMapper.cs b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditOutcomeMapper.cs
index c3e15072..fd4367be 100644
--- a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditOutcomeMapper.cs
+++ b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditOutcomeMapper.cs
@@ -29,16 +29,10 @@ public static class AuditOutcomeMapper
public static AuditOutcome FromAction(string action) => action switch
{
"OpcUaAccessDenied" or "CrossClusterNamespaceAttempt" => AuditOutcome.Denied,
- "DraftCreated"
- or "DraftEdited"
- or "Published"
- or "RolledBack"
- or "NodeApplied"
- or "ClusterCreated"
- or "NodeAdded"
- or "CredentialAdded"
- or "CredentialDisabled"
- or "ExternalIdReleased" => AuditOutcome.Success,
+ // All other known config-write verbs (DraftCreated, DraftEdited, Published, RolledBack,
+ // NodeApplied, ClusterCreated, NodeAdded, CredentialAdded, CredentialDisabled,
+ // ExternalIdReleased) and any future verbs default to Success — config writes are the
+ // overwhelming majority and the only non-success cases are the two Denied entries above.
_ => AuditOutcome.Success,
};
}
diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs
index c9c36104..79438ca6 100644
--- a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs
+++ b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs
@@ -160,6 +160,16 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
return;
}
+ // Discard acks from nodes that were not in the expected-ack set at dispatch time.
+ // Without this guard an unexpected-node ack inflates _acks.Count and can trigger a
+ // premature seal/fail before every expected node has actually applied the deployment.
+ if (!_expectedAcks.Contains(msg.NodeId))
+ {
+ _log.Debug("Discarding ApplyAck from unexpected node {Node} for {Id} (not in expected set)",
+ msg.NodeId, msg.DeploymentId);
+ return;
+ }
+
_acks[msg.NodeId] = msg.Outcome;
PersistNodeAck(msg);
diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Fleet/FleetStatusBroadcaster.cs b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Fleet/FleetStatusBroadcaster.cs
index 182305c4..1ca85343 100644
--- a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Fleet/FleetStatusBroadcaster.cs
+++ b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Fleet/FleetStatusBroadcaster.cs
@@ -127,16 +127,16 @@ public sealed class FleetStatusBroadcaster : ReceiveActor, IWithTimers
private void OnTick()
{
var stale = DateTime.UtcNow - HeartbeatTimeout;
- var changed = false;
+ var flippedCount = 0;
foreach (var kv in _nodes.ToList())
{
if (kv.Value.LastSeenUtc < stale && kv.Value.Health == FleetNodeHealth.Healthy)
{
_nodes[kv.Key] = kv.Value with { Health = FleetNodeHealth.Degraded };
- changed = true;
+ flippedCount++;
}
}
- if (changed) _log.Debug("FleetStatusBroadcaster flipped {Count} nodes to Degraded", changed ? 1 : 0);
+ if (flippedCount > 0) _log.Debug("FleetStatusBroadcaster flipped {Count} nodes to Degraded", flippedCount);
PublishSnapshot();
}
diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ConfigPublishCoordinatorTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ConfigPublishCoordinatorTests.cs
index e32dcb29..fc16f0b5 100644
--- a/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ConfigPublishCoordinatorTests.cs
+++ b/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ConfigPublishCoordinatorTests.cs
@@ -60,6 +60,61 @@ public sealed class ConfigPublishCoordinatorTests : ControlPlaneActorTestBase
db.Deployments.Single().Status.ShouldBe(DeploymentStatus.Sealed);
}
+ ///
+ /// Regression guard for ControlPlane-001: an ApplyAck from a node that was NOT in the expected-ack
+ /// set (i.e. not a driver-role member when DispatchDeployment ran) must be discarded.
+ /// Without the fix, an unexpected-node ack inflates _acks.Count and can cause the
+ /// coordinator to seal a deployment before every expected node has responded.
+ ///
+ /// Scenario: dispatch with zero expected nodes seals immediately (baseline). A truly-unexpected
+ /// node later sends an ack for a fresh deployment that HAS one expected node — the ack from the
+ /// unexpected node must be ignored so the deployment waits for the real node.
+ ///
+ [Fact]
+ public void ApplyAck_from_unexpected_node_is_discarded_and_does_not_seal_prematurely()
+ {
+ var dbFactory = NewInMemoryDbFactory();
+ var deploymentId = SeedDispatchingDeployment(dbFactory);
+
+ // Seed a NodeDeploymentState row for "expected-driver" so the coordinator sees 1 expected ack.
+ using (var db = dbFactory.CreateDbContext())
+ {
+ db.NodeDeploymentStates.Add(new Configuration.Entities.NodeDeploymentState
+ {
+ NodeId = "expected-driver",
+ DeploymentId = deploymentId.Value,
+ Status = Configuration.Enums.NodeDeploymentStatus.Applying,
+ });
+ db.SaveChanges();
+ }
+
+ // Long deadline so time does not confound the test.
+ var actor = Sys.ActorOf(ConfigPublishCoordinator.Props(dbFactory, TimeSpan.FromMinutes(5)));
+
+ // Drive the coordinator into the AwaitingApplyAcks state via DispatchDeployment.
+ // The coordinator seeds expected-acks from _cluster.State.Members (filtered to driver role).
+ // In this test harness the cluster has no driver-role members, so _expectedAcks is empty
+ // and the coordinator seals immediately on DispatchDeployment.
+ //
+ // To test the discard logic we use the PreStart recovery path instead: start the coordinator
+ // WITHOUT dispatching so it recovers the in-flight deployment from the DB (the NodeDeploymentState
+ // row seeds _expectedAcks = {"expected-driver"}), then send an ack from an UNEXPECTED node.
+ // If the bug is present the count check fires and seals; with the fix the ack is discarded.
+
+ // Send ack from a rogue node NOT in _expectedAcks.
+ actor.Tell(new ApplyAck(deploymentId, NodeId.Parse("rogue-node"),
+ ApplyAckOutcome.Applied, null, CorrelationId.NewId()));
+
+ // Give it time to process and potentially seal if buggy.
+ ExpectNoMsg(TimeSpan.FromMilliseconds(400));
+
+ using var db2 = dbFactory.CreateDbContext();
+ var status = db2.Deployments.Single().Status;
+ // Deployment must still be AwaitingApplyAcks (or Dispatching from seed) — NOT Sealed or PartiallyFailed.
+ status.ShouldNotBe(DeploymentStatus.Sealed);
+ status.ShouldNotBe(DeploymentStatus.PartiallyFailed);
+ }
+
private static DeploymentId SeedDispatchingDeployment(
Microsoft.EntityFrameworkCore.IDbContextFactory dbFactory)
{