feat(adminui): Reconnect/Restart on DriverStatusPanel (DriverOperator-gated)

- RestartDriver / ReconnectDriver messages + AdminOperationsActor
  handlers (broadcast via driver-control DPS topic; audited via
  ConfigEdits).
- DriverHostActor subscribes to driver-control; locates the
  matching child DriverInstanceActor and stops+respawns it
  (Restart) or sends it a ForceReconnect internal message
  (Reconnect — re-enters Reconnecting state without full stop).
  DriverInstanceSpec constructor call uses named args to handle
  the full 6-parameter signature.
- New DriverOperator authorization policy mapped to DriverOperator
  or FleetAdmin role; documented in docs/security.md. Map LDAP
  group via GroupToRole (e.g. "ot-driver-operator": "DriverOperator").
- DriverStatusPanel renders Reconnect + Restart buttons when the
  user holds the DriverOperator policy (hidden otherwise). Restart
  requires an in-page Razor confirm block (no JS confirm, keeps
  SignalR event loop unblocked). Both buttons show a spinner and
  are disabled during in-flight; result chip auto-clears after 8s.
  Username sourced from AuthenticationStateProvider.

Reconnect resolves to "ForceReconnect" (re-enter Reconnecting,
not full stop+respawn) — transport drops and retries while actor
and in-memory state are preserved. All DriverInstanceActor states
handle ForceReconnect safely (no-op when already in transition).
This commit is contained in:
Joseph Doherty
2026-05-28 11:14:04 -04:00
parent 4b374fd177
commit ffcc8d1065
8 changed files with 333 additions and 2 deletions
@@ -4,6 +4,7 @@ using Akka.Cluster.Tools.PublishSubscribe;
using Akka.Event;
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Fleet;
using ZB.MOM.WW.OtOpcUa.Commons.Observability;
@@ -36,6 +37,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
{
public const string DeploymentsTopic = "deployments";
public const string DeploymentAcksTopic = "deployment-acks";
public const string DriverControlTopic = "driver-control";
public static readonly TimeSpan ReconnectInterval = TimeSpan.FromSeconds(30);
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
@@ -123,6 +125,8 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
{
// Subscribe to deployments topic so the coordinator's broadcast lands here.
DistributedPubSub.Get(Context.System).Mediator.Tell(new Subscribe(DeploymentsTopic, Self));
// Subscribe to driver-control topic so AdminUI Reconnect/Restart commands land here.
DistributedPubSub.Get(Context.System).Mediator.Tell(new Subscribe(DriverControlTopic, Self));
Bootstrap();
}
@@ -187,6 +191,8 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
Receive<DispatchDeployment>(HandleDispatchFromSteady);
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<DriverInstanceActor.AttributeValuePublished>(ForwardToMux);
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
}
@@ -206,6 +212,8 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
});
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<DriverInstanceActor.AttributeValuePublished>(ForwardToMux);
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
}
@@ -225,6 +233,8 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
});
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<RetryConfigDbConnection>(_ => TryRecoverFromStale());
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
Timers.StartPeriodicTimer("retry-db", RetryConfigDbConnection.Instance, ReconnectInterval);
}
@@ -444,6 +454,42 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}
private void HandleRestartDriver(RestartDriver msg)
{
// DPS broadcast — only act if this node hosts the requested instance.
if (!_children.TryGetValue(msg.DriverInstanceId, out var entry))
return;
_log.Info("DriverHost {Node}: restarting driver {Id} by request of {User}",
_localNode, msg.DriverInstanceId, msg.ActorByUserName);
// Stop the existing child actor — DriverInstanceActor.PostStop calls ShutdownAsync.
Context.Stop(entry.Actor);
_children.Remove(msg.DriverInstanceId);
// Respawn using the same spec that was applied during the last reconcile.
SpawnChild(new DriverInstanceSpec(
DriverInstanceRowId: Guid.Empty,
DriverInstanceId: msg.DriverInstanceId,
Name: msg.DriverInstanceId,
DriverType: entry.DriverType,
Enabled: true,
DriverConfig: entry.LastConfigJson));
}
private void HandleReconnectDriver(ReconnectDriver msg)
{
// DPS broadcast — only act if this node hosts the requested instance.
if (!_children.TryGetValue(msg.DriverInstanceId, out var entry))
return;
_log.Info("DriverHost {Node}: reconnecting driver {Id} by request of {User}",
_localNode, msg.DriverInstanceId, msg.ActorByUserName);
// Tell the child to drop its transport and re-enter the Reconnecting state.
entry.Actor.Tell(new DriverInstanceActor.ForceReconnect());
}
private void TryRecoverFromStale()
{
try