feat(adminui): FleetDiagnosticsClient real Akka ActorSelection round-trip (F17)
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been cancelled
v2-ci / build (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been cancelled
v2-ci / integration (push) Has been cancelled

- New Commons.Messages.Fleet.GetDiagnostics request record.
- DriverHostActor handles GetDiagnostics in all three states (Steady, Applying,
  Stale); replies with a NodeDiagnosticsSnapshot built from _currentRevision
  + the local NodeId. Drivers list is empty until F7 wires the per-instance
  children.
- FleetDiagnosticsClient now resolves the target via ActorSelection at
  akka.tcp://{system}@{nodeId}/user/driver-host and Asks with a 3s timeout.
  On timeout/peer-down it returns an empty snapshot so the UI degrades
  gracefully rather than throwing.

Two new integration tests in Host.IntegrationTests:
- GetDiagnostics_returns_snapshot_with_target_NodeId verifies the
  cross-node Ask/Reply works.
- GetDiagnostics_after_deploy_reports_current_revision exercises the
  end-to-end path: AdminOps starts a deployment, both DriverHostActors
  apply, then diagnostics reports the new revision on both nodes.

All 98 v2 tests pass (was 96 + 2 new).
This commit is contained in:
Joseph Doherty
2026-05-26 06:58:11 -04:00
parent 337a691629
commit 8f32b89fb9
4 changed files with 139 additions and 17 deletions
@@ -2,7 +2,9 @@ using Akka.Actor;
using Akka.Cluster.Tools.PublishSubscribe;
using Akka.Event;
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Fleet;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
@@ -134,6 +136,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
private void Steady()
{
Receive<DispatchDeployment>(HandleDispatchFromSteady);
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
}
@@ -151,6 +154,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
_localNode, msg.DeploymentId, _applyingDeploymentId);
Self.Forward(msg); // re-deliver after we transition back
});
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
}
@@ -160,11 +164,24 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
{
_log.Warning("DriverHost {Node}: ignoring DispatchDeployment while Stale (DB unreachable)", _localNode);
});
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<RetryConfigDbConnection>(_ => TryRecoverFromStale());
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
Timers.StartPeriodicTimer("retry-db", RetryConfigDbConnection.Instance, ReconnectInterval);
}
private void HandleGetDiagnostics(GetDiagnostics msg)
{
// Driver-instance children aren't spawned yet (F7); the snapshot reports an empty driver
// list. CurrentRevision is real — it's what the host believes is its applied revision.
var snapshot = new NodeDiagnosticsSnapshot(
NodeId: _localNode,
CurrentRevision: _currentRevision,
Drivers: Array.Empty<DriverInstanceDiagnostics>(),
AsOfUtc: DateTime.UtcNow);
Sender.Tell(snapshot);
}
private void HandleDispatchFromSteady(DispatchDeployment msg)
{
if (_currentRevision is { } cur && cur == msg.RevisionHash)