using System.Collections.Immutable; using Akka.Actor; using Akka.Cluster; using Akka.Cluster.Tools.Client; using Akka.Cluster.Tools.Singleton; using Akka.Configuration; using Microsoft.Extensions.Options; using ScadaLink.ClusterInfrastructure; using ScadaLink.Communication; using ScadaLink.Communication.Actors; using ScadaLink.Host.Actors; using ScadaLink.SiteRuntime; using ScadaLink.SiteRuntime.Actors; using ScadaLink.SiteRuntime.Messages; using ScadaLink.SiteRuntime.Persistence; using ScadaLink.SiteRuntime.Scripts; using ScadaLink.SiteRuntime.Streaming; using ScadaLink.StoreAndForward; namespace ScadaLink.Host.Actors; /// /// Hosted service that manages the Akka.NET actor system lifecycle. /// Creates the actor system on start, registers actors, and triggers /// CoordinatedShutdown on stop. /// /// WP-3: Transport heartbeat is explicitly configured in HOCON from CommunicationOptions. /// public class AkkaHostedService : IHostedService { private readonly IServiceProvider _serviceProvider; private readonly NodeOptions _nodeOptions; private readonly ClusterOptions _clusterOptions; private readonly CommunicationOptions _communicationOptions; private readonly ILogger _logger; private ActorSystem? _actorSystem; public AkkaHostedService( IServiceProvider serviceProvider, IOptions nodeOptions, IOptions clusterOptions, IOptions communicationOptions, ILogger logger) { _serviceProvider = serviceProvider; _nodeOptions = nodeOptions.Value; _clusterOptions = clusterOptions.Value; _communicationOptions = communicationOptions.Value; _logger = logger; } /// /// Gets the actor system once started. Null before StartAsync completes. /// public ActorSystem? ActorSystem => _actorSystem; public Task StartAsync(CancellationToken cancellationToken) { var seedNodesStr = string.Join(",", _clusterOptions.SeedNodes.Select(s => $"\"{s}\"")); // For site nodes, include a site-specific role (e.g., "site-SiteA") alongside the base role var roles = BuildRoles(); var rolesStr = string.Join(",", roles.Select(r => $"\"{r}\"")); // WP-3: Transport heartbeat explicitly configured from CommunicationOptions (not framework defaults) var transportHeartbeatSec = _communicationOptions.TransportHeartbeatInterval.TotalSeconds; var transportFailureSec = _communicationOptions.TransportFailureThreshold.TotalSeconds; var hocon = $@" akka {{ actor {{ provider = cluster }} remote {{ dot-netty.tcp {{ hostname = ""{_nodeOptions.NodeHostname}"" port = {_nodeOptions.RemotingPort} }} transport-failure-detector {{ heartbeat-interval = {transportHeartbeatSec:F0}s acceptable-heartbeat-pause = {transportFailureSec:F0}s }} }} cluster {{ seed-nodes = [{seedNodesStr}] roles = [{rolesStr}] min-nr-of-members = {_clusterOptions.MinNrOfMembers} split-brain-resolver {{ active-strategy = {_clusterOptions.SplitBrainResolverStrategy} stable-after = {_clusterOptions.StableAfter.TotalSeconds:F0}s keep-oldest {{ down-if-alone = on }} }} failure-detector {{ heartbeat-interval = {_clusterOptions.HeartbeatInterval.TotalSeconds:F0}s acceptable-heartbeat-pause = {_clusterOptions.FailureDetectionThreshold.TotalSeconds:F0}s }} run-coordinated-shutdown-when-down = on }} coordinated-shutdown {{ run-by-clr-shutdown-hook = on }} }}"; var config = ConfigurationFactory.ParseString(hocon); _actorSystem = ActorSystem.Create("scadalink", config); _logger.LogInformation( "Akka.NET actor system 'scadalink' started. Role={Role}, Roles={Roles}, Hostname={Hostname}, Port={Port}, " + "TransportHeartbeat={TransportHeartbeat}s, TransportFailure={TransportFailure}s", _nodeOptions.Role, string.Join(", ", roles), _nodeOptions.NodeHostname, _nodeOptions.RemotingPort, transportHeartbeatSec, transportFailureSec); // Register the dead letter monitor actor var loggerFactory = _serviceProvider.GetRequiredService(); var dlmLogger = loggerFactory.CreateLogger(); var dlmHealthCollector = _serviceProvider.GetService(); _actorSystem.ActorOf( Props.Create(() => new DeadLetterMonitorActor(dlmLogger, dlmHealthCollector)), "dead-letter-monitor"); // Register role-specific actors if (_nodeOptions.Role.Equals("Central", StringComparison.OrdinalIgnoreCase)) { RegisterCentralActors(); } else if (_nodeOptions.Role.Equals("Site", StringComparison.OrdinalIgnoreCase)) { RegisterSiteActors(); } return Task.CompletedTask; } public async Task StopAsync(CancellationToken cancellationToken) { if (_actorSystem != null) { _logger.LogInformation("Shutting down Akka.NET actor system via CoordinatedShutdown..."); var shutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem); await shutdown.Run(Akka.Actor.CoordinatedShutdown.ClrExitReason.Instance); _logger.LogInformation("Akka.NET actor system shutdown complete."); } } /// /// Builds the list of cluster roles for this node. Site nodes get both "Site" /// and a site-specific role (e.g., "site-SiteA") to scope singleton placement. /// private List BuildRoles() { var roles = new List { _nodeOptions.Role }; if (_nodeOptions.Role.Equals("Site", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrEmpty(_nodeOptions.SiteId)) { roles.Add($"site-{_nodeOptions.SiteId}"); } return roles; } /// /// Registers central-side actors including the CentralCommunicationActor. /// WP-4: Central communication actor routes all 8 message patterns to sites. /// private void RegisterCentralActors() { var siteClientFactory = new DefaultSiteClientFactory(); var centralCommActor = _actorSystem!.ActorOf( Props.Create(() => new CentralCommunicationActor(_serviceProvider, siteClientFactory)), "central-communication"); // Register CentralCommunicationActor with ClusterClientReceptionist so site ClusterClients can reach it ClusterClientReceptionist.Get(_actorSystem).RegisterService(centralCommActor); _logger.LogInformation("CentralCommunicationActor registered with ClusterClientReceptionist"); // Wire up the CommunicationService with the actor reference var commService = _serviceProvider.GetService(); commService?.SetCommunicationActor(centralCommActor); // Management Service — accessible via ClusterClient var mgmtLogger = _serviceProvider.GetRequiredService() .CreateLogger(); var mgmtActor = _actorSystem!.ActorOf( Props.Create(() => new ScadaLink.ManagementService.ManagementActor(_serviceProvider, mgmtLogger)), "management"); ClusterClientReceptionist.Get(_actorSystem).RegisterService(mgmtActor); _logger.LogInformation("ManagementActor registered with ClusterClientReceptionist"); _logger.LogInformation("Central actors registered. CentralCommunicationActor created."); } /// /// Registers site-specific actors including the Deployment Manager cluster singleton /// and the SiteCommunicationActor. /// The singleton is scoped to the site-specific cluster role so it runs on exactly /// one node within this site's cluster. /// private void RegisterSiteActors() { var siteRole = $"site-{_nodeOptions.SiteId}"; var storage = _serviceProvider.GetRequiredService(); var compilationService = _serviceProvider.GetRequiredService(); var sharedScriptLibrary = _serviceProvider.GetRequiredService(); var streamManager = _serviceProvider.GetService(); var siteRuntimeOptionsValue = _serviceProvider.GetService>()?.Value ?? new SiteRuntimeOptions(); var dmLogger = _serviceProvider.GetRequiredService() .CreateLogger(); // WP-34: Create DCL Manager Actor for tag subscriptions var dclFactory = _serviceProvider.GetService(); var dclOptions = _serviceProvider.GetService>()?.Value ?? new ScadaLink.DataConnectionLayer.DataConnectionOptions(); IActorRef? dclManager = null; if (dclFactory != null) { var healthCollector = _serviceProvider.GetRequiredService(); dclManager = _actorSystem!.ActorOf( Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor( dclFactory, dclOptions, healthCollector)), "dcl-manager"); _logger.LogInformation("Data Connection Layer manager actor created"); } // Resolve the health collector for the Deployment Manager var siteHealthCollector = _serviceProvider.GetService(); // Create SiteReplicationActor on every node (not a singleton) var sfStorage = _serviceProvider.GetRequiredService(); var replicationService = _serviceProvider.GetRequiredService(); var replicationLogger = _serviceProvider.GetRequiredService() .CreateLogger(); var replicationActor = _actorSystem!.ActorOf( Props.Create(() => new SiteReplicationActor( storage, sfStorage, replicationService, siteRole, replicationLogger)), "site-replication"); // Wire S&F replication handler to forward operations via the replication actor replicationService.SetReplicationHandler(op => { replicationActor.Tell(new ReplicateStoreAndForward(op)); return Task.CompletedTask; }); _logger.LogInformation("SiteReplicationActor created and S&F replication handler wired"); // Create the Deployment Manager as a cluster singleton var singletonProps = ClusterSingletonManager.Props( singletonProps: Props.Create(() => new DeploymentManagerActor( storage, compilationService, sharedScriptLibrary, streamManager, siteRuntimeOptionsValue, dmLogger, dclManager, replicationActor, siteHealthCollector, _serviceProvider)), terminationMessage: PoisonPill.Instance, settings: ClusterSingletonManagerSettings.Create(_actorSystem!) .WithRole(siteRole) .WithSingletonName("deployment-manager")); _actorSystem!.ActorOf(singletonProps, "deployment-manager-singleton"); // Create a proxy for other actors to communicate with the singleton var proxyProps = ClusterSingletonProxy.Props( singletonManagerPath: "/user/deployment-manager-singleton", settings: ClusterSingletonProxySettings.Create(_actorSystem) .WithRole(siteRole) .WithSingletonName("deployment-manager")); var dmProxy = _actorSystem.ActorOf(proxyProps, "deployment-manager-proxy"); // WP-4: Create SiteCommunicationActor for receiving messages from central var siteCommActor = _actorSystem.ActorOf( Props.Create(() => new SiteCommunicationActor( _nodeOptions.SiteId!, _communicationOptions, dmProxy)), "site-communication"); // Register local handlers with SiteCommunicationActor siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.Artifacts, dmProxy)); // Register SiteCommunicationActor with ClusterClientReceptionist so central ClusterClients can reach it ClusterClientReceptionist.Get(_actorSystem).RegisterService(siteCommActor); _logger.LogInformation( "Site actors registered. DeploymentManager singleton scoped to role={SiteRole}, SiteCommunicationActor created.", siteRole); // Create ClusterClient to central if contact points are configured if (_communicationOptions.CentralContactPoints.Count > 0) { var contacts = _communicationOptions.CentralContactPoints .Select(cp => ActorPath.Parse($"{cp}/system/receptionist")) .ToImmutableHashSet(); var clientSettings = ClusterClientSettings.Create(_actorSystem) .WithInitialContacts(contacts); var centralClient = _actorSystem.ActorOf( ClusterClient.Props(clientSettings), "central-cluster-client"); var siteCommSelection = _actorSystem.ActorSelection("/user/site-communication"); siteCommSelection.Tell(new RegisterCentralClient(centralClient)); _logger.LogInformation( "Created ClusterClient to central with {Count} contact point(s) for site {SiteId}", contacts.Count, _nodeOptions.SiteId); } } }