Files
scadalink-design/src/ScadaLink.Host/Actors/AkkaHostedService.cs
Joseph Doherty eb8ead58d2 feat: wire SQLite replication between site nodes and fix ConfigurationDatabase tests
Add SiteReplicationActor (runs on every site node) to replicate deployed
configs and store-and-forward buffer operations to the standby peer via
cluster member discovery and fire-and-forget Tell. Wire ReplicationService
handler and pass replication actor to DeploymentManagerActor singleton.

Fix 5 pre-existing ConfigurationDatabase test failures: RowVersion NOT NULL
on SQLite, stale migration name assertion, and seed data count mismatch.
2026-03-18 08:28:02 -04:00

323 lines
14 KiB
C#

using System.Collections.Immutable;
using Akka.Actor;
using Akka.Cluster;
using Akka.Cluster.Tools.Client;
using Akka.Cluster.Tools.Singleton;
using Akka.Configuration;
using Microsoft.Extensions.Options;
using ScadaLink.ClusterInfrastructure;
using ScadaLink.Communication;
using ScadaLink.Communication.Actors;
using ScadaLink.Host.Actors;
using ScadaLink.SiteRuntime;
using ScadaLink.SiteRuntime.Actors;
using ScadaLink.SiteRuntime.Messages;
using ScadaLink.SiteRuntime.Persistence;
using ScadaLink.SiteRuntime.Scripts;
using ScadaLink.SiteRuntime.Streaming;
using ScadaLink.StoreAndForward;
namespace ScadaLink.Host.Actors;
/// <summary>
/// Hosted service that manages the Akka.NET actor system lifecycle.
/// Creates the actor system on start, registers actors, and triggers
/// CoordinatedShutdown on stop.
///
/// WP-3: Transport heartbeat is explicitly configured in HOCON from CommunicationOptions.
/// </summary>
public class AkkaHostedService : IHostedService
{
private readonly IServiceProvider _serviceProvider;
private readonly NodeOptions _nodeOptions;
private readonly ClusterOptions _clusterOptions;
private readonly CommunicationOptions _communicationOptions;
private readonly ILogger<AkkaHostedService> _logger;
private ActorSystem? _actorSystem;
public AkkaHostedService(
IServiceProvider serviceProvider,
IOptions<NodeOptions> nodeOptions,
IOptions<ClusterOptions> clusterOptions,
IOptions<CommunicationOptions> communicationOptions,
ILogger<AkkaHostedService> logger)
{
_serviceProvider = serviceProvider;
_nodeOptions = nodeOptions.Value;
_clusterOptions = clusterOptions.Value;
_communicationOptions = communicationOptions.Value;
_logger = logger;
}
/// <summary>
/// Gets the actor system once started. Null before StartAsync completes.
/// </summary>
public ActorSystem? ActorSystem => _actorSystem;
public Task StartAsync(CancellationToken cancellationToken)
{
var seedNodesStr = string.Join(",",
_clusterOptions.SeedNodes.Select(s => $"\"{s}\""));
// For site nodes, include a site-specific role (e.g., "site-SiteA") alongside the base role
var roles = BuildRoles();
var rolesStr = string.Join(",", roles.Select(r => $"\"{r}\""));
// WP-3: Transport heartbeat explicitly configured from CommunicationOptions (not framework defaults)
var transportHeartbeatSec = _communicationOptions.TransportHeartbeatInterval.TotalSeconds;
var transportFailureSec = _communicationOptions.TransportFailureThreshold.TotalSeconds;
var hocon = $@"
akka {{
actor {{
provider = cluster
}}
remote {{
dot-netty.tcp {{
hostname = ""{_nodeOptions.NodeHostname}""
port = {_nodeOptions.RemotingPort}
}}
transport-failure-detector {{
heartbeat-interval = {transportHeartbeatSec:F0}s
acceptable-heartbeat-pause = {transportFailureSec:F0}s
}}
}}
cluster {{
seed-nodes = [{seedNodesStr}]
roles = [{rolesStr}]
min-nr-of-members = {_clusterOptions.MinNrOfMembers}
split-brain-resolver {{
active-strategy = {_clusterOptions.SplitBrainResolverStrategy}
stable-after = {_clusterOptions.StableAfter.TotalSeconds:F0}s
keep-oldest {{
down-if-alone = on
}}
}}
failure-detector {{
heartbeat-interval = {_clusterOptions.HeartbeatInterval.TotalSeconds:F0}s
acceptable-heartbeat-pause = {_clusterOptions.FailureDetectionThreshold.TotalSeconds:F0}s
}}
run-coordinated-shutdown-when-down = on
}}
coordinated-shutdown {{
run-by-clr-shutdown-hook = on
}}
}}";
var config = ConfigurationFactory.ParseString(hocon);
_actorSystem = ActorSystem.Create("scadalink", config);
_logger.LogInformation(
"Akka.NET actor system 'scadalink' started. Role={Role}, Roles={Roles}, Hostname={Hostname}, Port={Port}, " +
"TransportHeartbeat={TransportHeartbeat}s, TransportFailure={TransportFailure}s",
_nodeOptions.Role,
string.Join(", ", roles),
_nodeOptions.NodeHostname,
_nodeOptions.RemotingPort,
transportHeartbeatSec,
transportFailureSec);
// Register the dead letter monitor actor
var loggerFactory = _serviceProvider.GetRequiredService<ILoggerFactory>();
var dlmLogger = loggerFactory.CreateLogger<DeadLetterMonitorActor>();
var dlmHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
_actorSystem.ActorOf(
Props.Create(() => new DeadLetterMonitorActor(dlmLogger, dlmHealthCollector)),
"dead-letter-monitor");
// Register role-specific actors
if (_nodeOptions.Role.Equals("Central", StringComparison.OrdinalIgnoreCase))
{
RegisterCentralActors();
}
else if (_nodeOptions.Role.Equals("Site", StringComparison.OrdinalIgnoreCase))
{
RegisterSiteActors();
}
return Task.CompletedTask;
}
public async Task StopAsync(CancellationToken cancellationToken)
{
if (_actorSystem != null)
{
_logger.LogInformation("Shutting down Akka.NET actor system via CoordinatedShutdown...");
var shutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem);
await shutdown.Run(Akka.Actor.CoordinatedShutdown.ClrExitReason.Instance);
_logger.LogInformation("Akka.NET actor system shutdown complete.");
}
}
/// <summary>
/// Builds the list of cluster roles for this node. Site nodes get both "Site"
/// and a site-specific role (e.g., "site-SiteA") to scope singleton placement.
/// </summary>
private List<string> BuildRoles()
{
var roles = new List<string> { _nodeOptions.Role };
if (_nodeOptions.Role.Equals("Site", StringComparison.OrdinalIgnoreCase)
&& !string.IsNullOrEmpty(_nodeOptions.SiteId))
{
roles.Add($"site-{_nodeOptions.SiteId}");
}
return roles;
}
/// <summary>
/// Registers central-side actors including the CentralCommunicationActor.
/// WP-4: Central communication actor routes all 8 message patterns to sites.
/// </summary>
private void RegisterCentralActors()
{
var siteClientFactory = new DefaultSiteClientFactory();
var centralCommActor = _actorSystem!.ActorOf(
Props.Create(() => new CentralCommunicationActor(_serviceProvider, siteClientFactory)),
"central-communication");
// Register CentralCommunicationActor with ClusterClientReceptionist so site ClusterClients can reach it
ClusterClientReceptionist.Get(_actorSystem).RegisterService(centralCommActor);
_logger.LogInformation("CentralCommunicationActor registered with ClusterClientReceptionist");
// Wire up the CommunicationService with the actor reference
var commService = _serviceProvider.GetService<CommunicationService>();
commService?.SetCommunicationActor(centralCommActor);
// Management Service — accessible via ClusterClient
var mgmtLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<ScadaLink.ManagementService.ManagementActor>();
var mgmtActor = _actorSystem!.ActorOf(
Props.Create(() => new ScadaLink.ManagementService.ManagementActor(_serviceProvider, mgmtLogger)),
"management");
ClusterClientReceptionist.Get(_actorSystem).RegisterService(mgmtActor);
_logger.LogInformation("ManagementActor registered with ClusterClientReceptionist");
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");
}
/// <summary>
/// Registers site-specific actors including the Deployment Manager cluster singleton
/// and the SiteCommunicationActor.
/// The singleton is scoped to the site-specific cluster role so it runs on exactly
/// one node within this site's cluster.
/// </summary>
private void RegisterSiteActors()
{
var siteRole = $"site-{_nodeOptions.SiteId}";
var storage = _serviceProvider.GetRequiredService<SiteStorageService>();
var compilationService = _serviceProvider.GetRequiredService<ScriptCompilationService>();
var sharedScriptLibrary = _serviceProvider.GetRequiredService<SharedScriptLibrary>();
var streamManager = _serviceProvider.GetService<SiteStreamManager>();
var siteRuntimeOptionsValue = _serviceProvider.GetService<IOptions<SiteRuntimeOptions>>()?.Value
?? new SiteRuntimeOptions();
var dmLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<DeploymentManagerActor>();
// WP-34: Create DCL Manager Actor for tag subscriptions
var dclFactory = _serviceProvider.GetService<ScadaLink.DataConnectionLayer.IDataConnectionFactory>();
var dclOptions = _serviceProvider.GetService<IOptions<ScadaLink.DataConnectionLayer.DataConnectionOptions>>()?.Value
?? new ScadaLink.DataConnectionLayer.DataConnectionOptions();
IActorRef? dclManager = null;
if (dclFactory != null)
{
var healthCollector = _serviceProvider.GetRequiredService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
dclManager = _actorSystem!.ActorOf(
Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor(
dclFactory, dclOptions, healthCollector)),
"dcl-manager");
_logger.LogInformation("Data Connection Layer manager actor created");
}
// Resolve the health collector for the Deployment Manager
var siteHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
// Create SiteReplicationActor on every node (not a singleton)
var sfStorage = _serviceProvider.GetRequiredService<StoreAndForwardStorage>();
var replicationService = _serviceProvider.GetRequiredService<ReplicationService>();
var replicationLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<SiteReplicationActor>();
var replicationActor = _actorSystem!.ActorOf(
Props.Create(() => new SiteReplicationActor(
storage, sfStorage, replicationService, siteRole, replicationLogger)),
"site-replication");
// Wire S&F replication handler to forward operations via the replication actor
replicationService.SetReplicationHandler(op =>
{
replicationActor.Tell(new ReplicateStoreAndForward(op));
return Task.CompletedTask;
});
_logger.LogInformation("SiteReplicationActor created and S&F replication handler wired");
// Create the Deployment Manager as a cluster singleton
var singletonProps = ClusterSingletonManager.Props(
singletonProps: Props.Create(() => new DeploymentManagerActor(
storage,
compilationService,
sharedScriptLibrary,
streamManager,
siteRuntimeOptionsValue,
dmLogger,
dclManager,
replicationActor,
siteHealthCollector,
_serviceProvider)),
terminationMessage: PoisonPill.Instance,
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
.WithRole(siteRole)
.WithSingletonName("deployment-manager"));
_actorSystem!.ActorOf(singletonProps, "deployment-manager-singleton");
// Create a proxy for other actors to communicate with the singleton
var proxyProps = ClusterSingletonProxy.Props(
singletonManagerPath: "/user/deployment-manager-singleton",
settings: ClusterSingletonProxySettings.Create(_actorSystem)
.WithRole(siteRole)
.WithSingletonName("deployment-manager"));
var dmProxy = _actorSystem.ActorOf(proxyProps, "deployment-manager-proxy");
// WP-4: Create SiteCommunicationActor for receiving messages from central
var siteCommActor = _actorSystem.ActorOf(
Props.Create(() => new SiteCommunicationActor(
_nodeOptions.SiteId!,
_communicationOptions,
dmProxy)),
"site-communication");
// Register local handlers with SiteCommunicationActor
siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.Artifacts, dmProxy));
// Register SiteCommunicationActor with ClusterClientReceptionist so central ClusterClients can reach it
ClusterClientReceptionist.Get(_actorSystem).RegisterService(siteCommActor);
_logger.LogInformation(
"Site actors registered. DeploymentManager singleton scoped to role={SiteRole}, SiteCommunicationActor created.",
siteRole);
// Create ClusterClient to central if contact points are configured
if (_communicationOptions.CentralContactPoints.Count > 0)
{
var contacts = _communicationOptions.CentralContactPoints
.Select(cp => ActorPath.Parse($"{cp}/system/receptionist"))
.ToImmutableHashSet();
var clientSettings = ClusterClientSettings.Create(_actorSystem)
.WithInitialContacts(contacts);
var centralClient = _actorSystem.ActorOf(
ClusterClient.Props(clientSettings), "central-cluster-client");
var siteCommSelection = _actorSystem.ActorSelection("/user/site-communication");
siteCommSelection.Tell(new RegisterCentralClient(centralClient));
_logger.LogInformation(
"Created ClusterClient to central with {Count} contact point(s) for site {SiteId}",
contacts.Count, _nodeOptions.SiteId);
}
}
}