feat: replace site registration with database-driven site addressing

Central now resolves site Akka remoting addresses from the Sites DB table
(NodeAAddress/NodeBAddress) instead of relying on runtime RegisterSite
messages. Eliminates the race condition where sites starting before central
had their registration dead-lettered. Addresses are cached in
CentralCommunicationActor with 60s periodic refresh and on-demand refresh
when sites are added/edited/deleted via UI or CLI.
This commit is contained in:
Joseph Doherty
2026-03-17 23:13:10 -04:00
parent eb8d5ca2c0
commit 9e97c1acd2
21 changed files with 1641 additions and 92 deletions

View File

@@ -12,6 +12,7 @@ public static class SiteCommands
command.Add(BuildList(contactPointsOption, formatOption));
command.Add(BuildCreate(contactPointsOption, formatOption));
command.Add(BuildUpdate(contactPointsOption, formatOption));
command.Add(BuildDelete(contactPointsOption, formatOption));
command.Add(BuildDeployArtifacts(contactPointsOption, formatOption));
@@ -34,19 +35,53 @@ public static class SiteCommands
var nameOption = new Option<string>("--name") { Description = "Site name", Required = true };
var identifierOption = new Option<string>("--identifier") { Description = "Site identifier", Required = true };
var descOption = new Option<string?>("--description") { Description = "Site description" };
var nodeAOption = new Option<string?>("--node-a-address") { Description = "Akka address for Node A" };
var nodeBOption = new Option<string?>("--node-b-address") { Description = "Akka address for Node B" };
var cmd = new Command("create") { Description = "Create a new site" };
cmd.Add(nameOption);
cmd.Add(identifierOption);
cmd.Add(descOption);
cmd.Add(nodeAOption);
cmd.Add(nodeBOption);
cmd.SetAction(async (ParseResult result) =>
{
var name = result.GetValue(nameOption)!;
var identifier = result.GetValue(identifierOption)!;
var desc = result.GetValue(descOption);
var nodeA = result.GetValue(nodeAOption);
var nodeB = result.GetValue(nodeBOption);
return await CommandHelpers.ExecuteCommandAsync(
result, contactPointsOption, formatOption,
new CreateSiteCommand(name, identifier, desc));
new CreateSiteCommand(name, identifier, desc, nodeA, nodeB));
});
return cmd;
}
private static Command BuildUpdate(Option<string> contactPointsOption, Option<string> formatOption)
{
var idOption = new Option<int>("--id") { Description = "Site ID", Required = true };
var nameOption = new Option<string>("--name") { Description = "Site name", Required = true };
var descOption = new Option<string?>("--description") { Description = "Site description" };
var nodeAOption = new Option<string?>("--node-a-address") { Description = "Akka address for Node A" };
var nodeBOption = new Option<string?>("--node-b-address") { Description = "Akka address for Node B" };
var cmd = new Command("update") { Description = "Update an existing site" };
cmd.Add(idOption);
cmd.Add(nameOption);
cmd.Add(descOption);
cmd.Add(nodeAOption);
cmd.Add(nodeBOption);
cmd.SetAction(async (ParseResult result) =>
{
var id = result.GetValue(idOption);
var name = result.GetValue(nameOption)!;
var desc = result.GetValue(descOption);
var nodeA = result.GetValue(nodeAOption);
var nodeB = result.GetValue(nodeBOption);
return await CommandHelpers.ExecuteCommandAsync(
result, contactPointsOption, formatOption,
new UpdateSiteCommand(id, name, desc, nodeA, nodeB));
});
return cmd;
}

View File

@@ -2,10 +2,12 @@
@using ScadaLink.Security
@using ScadaLink.Commons.Entities.Sites
@using ScadaLink.Commons.Interfaces.Repositories
@using ScadaLink.Communication
@using ScadaLink.DeploymentManager
@attribute [Authorize(Policy = AuthorizationPolicies.RequireAdmin)]
@inject ISiteRepository SiteRepository
@inject ArtifactDeploymentService ArtifactDeploymentService
@inject CommunicationService CommunicationService
@inject AuthenticationStateProvider AuthStateProvider
<div class="container-fluid mt-3">
@@ -61,6 +63,18 @@
<button class="btn btn-outline-secondary btn-sm" @onclick="CancelForm">Cancel</button>
</div>
</div>
<div class="row g-2 align-items-end mt-1">
<div class="col-md-6">
<label class="form-label small">Node A Address</label>
<input type="text" class="form-control form-control-sm" @bind="_formNodeAAddress"
placeholder="akka.tcp://scadalink@host:port/user/site-communication" />
</div>
<div class="col-md-6">
<label class="form-label small">Node B Address (optional)</label>
<input type="text" class="form-control form-control-sm" @bind="_formNodeBAddress"
placeholder="akka.tcp://scadalink@host:port/user/site-communication" />
</div>
</div>
@if (_formError != null)
{
<div class="text-danger small mt-1">@_formError</div>
@@ -76,6 +90,8 @@
<th>Name</th>
<th>Identifier</th>
<th>Description</th>
<th>Node A</th>
<th>Node B</th>
<th>Data Connections</th>
<th style="width: 260px;">Actions</th>
</tr>
@@ -84,7 +100,7 @@
@if (_sites.Count == 0)
{
<tr>
<td colspan="6" class="text-muted text-center">No sites configured.</td>
<td colspan="8" class="text-muted text-center">No sites configured.</td>
</tr>
}
@foreach (var site in _sites)
@@ -94,6 +110,8 @@
<td>@site.Name</td>
<td><code>@site.SiteIdentifier</code></td>
<td class="text-muted small">@(site.Description ?? "—")</td>
<td class="small text-truncate" style="max-width: 200px;" title="@site.NodeAAddress">@(site.NodeAAddress ?? "—")</td>
<td class="small text-truncate" style="max-width: 200px;" title="@site.NodeBAddress">@(site.NodeBAddress ?? "—")</td>
<td>
@{
var conns = _siteConnections.GetValueOrDefault(site.Id);
@@ -143,6 +161,8 @@
private string _formName = string.Empty;
private string _formIdentifier = string.Empty;
private string? _formDescription;
private string? _formNodeAAddress;
private string? _formNodeBAddress;
private string? _formError;
private bool _deploying;
@@ -185,6 +205,8 @@
_formName = string.Empty;
_formIdentifier = string.Empty;
_formDescription = null;
_formNodeAAddress = null;
_formNodeBAddress = null;
_formError = null;
_showForm = true;
}
@@ -195,6 +217,8 @@
_formName = site.Name;
_formIdentifier = site.SiteIdentifier;
_formDescription = site.Description;
_formNodeAAddress = site.NodeAAddress;
_formNodeBAddress = site.NodeBAddress;
_formError = null;
_showForm = true;
}
@@ -222,6 +246,8 @@
{
_editingSite.Name = _formName.Trim();
_editingSite.Description = _formDescription?.Trim();
_editingSite.NodeAAddress = _formNodeAAddress?.Trim();
_editingSite.NodeBAddress = _formNodeBAddress?.Trim();
await SiteRepository.UpdateSiteAsync(_editingSite);
}
else
@@ -233,12 +259,15 @@
}
var site = new Site(_formName.Trim(), _formIdentifier.Trim())
{
Description = _formDescription?.Trim()
Description = _formDescription?.Trim(),
NodeAAddress = _formNodeAAddress?.Trim(),
NodeBAddress = _formNodeBAddress?.Trim()
};
await SiteRepository.AddSiteAsync(site);
}
await SiteRepository.SaveChangesAsync();
CommunicationService.RefreshSiteAddresses();
_showForm = false;
_editingSite = null;
_toast.ShowSuccess(_editingSite == null ? "Site created." : "Site updated.");
@@ -262,6 +291,7 @@
{
await SiteRepository.DeleteSiteAsync(site.Id);
await SiteRepository.SaveChangesAsync();
CommunicationService.RefreshSiteAddresses();
_toast.ShowSuccess($"Site '{site.Name}' deleted.");
await LoadDataAsync();
}

View File

@@ -6,6 +6,8 @@ public class Site
public string Name { get; set; }
public string SiteIdentifier { get; set; }
public string? Description { get; set; }
public string? NodeAAddress { get; set; }
public string? NodeBAddress { get; set; }
public Site(string name, string siteIdentifier)
{

View File

@@ -2,8 +2,8 @@ namespace ScadaLink.Commons.Messages.Management;
public record ListSitesCommand;
public record GetSiteCommand(int SiteId);
public record CreateSiteCommand(string Name, string SiteIdentifier, string? Description);
public record UpdateSiteCommand(int SiteId, string Name, string? Description);
public record CreateSiteCommand(string Name, string SiteIdentifier, string? Description, string? NodeAAddress = null, string? NodeBAddress = null);
public record UpdateSiteCommand(int SiteId, string Name, string? Description, string? NodeAAddress = null, string? NodeBAddress = null);
public record DeleteSiteCommand(int SiteId);
public record ListAreasCommand(int SiteId);
public record CreateAreaCommand(int SiteId, string Name, int? ParentAreaId);

View File

@@ -1,5 +1,7 @@
using Akka.Actor;
using Akka.Event;
using Microsoft.Extensions.DependencyInjection;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Messages.Communication;
using ScadaLink.Commons.Messages.Health;
@@ -7,7 +9,7 @@ namespace ScadaLink.Communication.Actors;
/// <summary>
/// Central-side actor that routes messages from central to site clusters via Akka remoting.
/// Maintains a registry of known site actor paths (learned from heartbeats/connection events).
/// Resolves site addresses from the database on a periodic refresh cycle.
///
/// WP-4: All 8 message patterns routed through this actor.
/// WP-5: Ask timeout on connection drop (no central buffering). Debug streams killed on interruption.
@@ -15,12 +17,14 @@ namespace ScadaLink.Communication.Actors;
public class CentralCommunicationActor : ReceiveActor
{
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly IServiceProvider _serviceProvider;
/// <summary>
/// Maps SiteId → remote SiteCommunicationActor selection.
/// Updated when heartbeats arrive or connection state changes.
/// Cached site address entries loaded from the database.
/// Maps SiteIdentifier → (NodeA selection, NodeB selection).
/// Refreshed periodically via RefreshSiteAddresses.
/// </summary>
private readonly Dictionary<string, ActorSelection> _siteSelections = new();
private Dictionary<string, (ActorSelection? NodeA, ActorSelection? NodeB)> _siteAddressCache = new();
/// <summary>
/// Tracks active debug view subscriptions: correlationId → (siteId, subscriber).
@@ -34,31 +38,30 @@ public class CentralCommunicationActor : ReceiveActor
/// </summary>
private readonly Dictionary<string, string> _inProgressDeployments = new();
public CentralCommunicationActor()
private ICancelable? _refreshSchedule;
public CentralCommunicationActor(IServiceProvider serviceProvider)
{
_serviceProvider = serviceProvider;
// Site address cache loaded from database
Receive<SiteAddressCacheLoaded>(HandleSiteAddressCacheLoaded);
// Periodic refresh trigger
Receive<RefreshSiteAddresses>(_ => LoadSiteAddressesFromDb());
// Site registration via heartbeats
Receive<HeartbeatMessage>(HandleHeartbeat);
// Connection state changes
Receive<ConnectionStateChanged>(HandleConnectionStateChanged);
// Site registration command (manual or from discovery)
Receive<RegisterSite>(HandleRegisterSite);
// Route enveloped messages to sites
Receive<SiteEnvelope>(HandleSiteEnvelope);
}
private void HandleHeartbeat(HeartbeatMessage heartbeat)
{
// Heartbeats arrive from sites — forward to any interested central actors
// The sender's path tells us the site's communication actor address
if (!_siteSelections.ContainsKey(heartbeat.SiteId))
{
var senderPath = Sender.Path.ToString();
_log.Info("Learned site {0} from heartbeat at path {1}", heartbeat.SiteId, senderPath);
}
// Forward heartbeat to parent/subscribers (central health monitoring)
Context.Parent.Tell(heartbeat);
}
@@ -94,7 +97,7 @@ public class CentralCommunicationActor : ReceiveActor
_inProgressDeployments.Remove(deploymentId);
}
_siteSelections.Remove(msg.SiteId);
// Note: Do NOT remove from _siteAddressCache — addresses are persistent in the database
}
else
{
@@ -102,30 +105,71 @@ public class CentralCommunicationActor : ReceiveActor
}
}
private void HandleRegisterSite(RegisterSite msg)
{
var selection = Context.ActorSelection(msg.RemoteActorPath);
_siteSelections[msg.SiteId] = selection;
_log.Info("Registered site {0} at path {1}", msg.SiteId, msg.RemoteActorPath);
}
private void HandleSiteEnvelope(SiteEnvelope envelope)
{
if (!_siteSelections.TryGetValue(envelope.SiteId, out var siteSelection))
if (!_siteAddressCache.TryGetValue(envelope.SiteId, out var entry))
{
_log.Warning("No known path for site {0}, cannot route message {1}",
_log.Warning("No known address for site {0}, cannot route message {1}",
envelope.SiteId, envelope.Message.GetType().Name);
// The Ask will timeout on the caller side — no central buffering (WP-5)
return;
}
// Prefer NodeA, fall back to NodeB
var selection = entry.NodeA ?? entry.NodeB;
if (selection == null)
{
_log.Warning("Site {0} has no configured node addresses, cannot route message {1}",
envelope.SiteId, envelope.Message.GetType().Name);
return;
}
// Track debug subscriptions for cleanup on disconnect
TrackMessageForCleanup(envelope);
// Forward the inner message to the site, preserving the original sender
// so the site can reply directly to the caller (completing the Ask pattern)
siteSelection.Tell(envelope.Message, Sender);
selection.Tell(envelope.Message, Sender);
}
private void LoadSiteAddressesFromDb()
{
var self = Self;
Task.Run(async () =>
{
using var scope = _serviceProvider.CreateScope();
var repo = scope.ServiceProvider.GetRequiredService<ISiteRepository>();
var sites = await repo.GetAllSitesAsync();
var cache = new Dictionary<string, (string? NodeAAddress, string? NodeBAddress)>();
foreach (var site in sites)
{
if (string.IsNullOrWhiteSpace(site.NodeAAddress) && string.IsNullOrWhiteSpace(site.NodeBAddress))
continue;
cache[site.SiteIdentifier] = (site.NodeAAddress, site.NodeBAddress);
}
return new SiteAddressCacheLoaded(cache);
}).PipeTo(self);
}
private void HandleSiteAddressCacheLoaded(SiteAddressCacheLoaded msg)
{
var newCache = new Dictionary<string, (ActorSelection? NodeA, ActorSelection? NodeB)>();
foreach (var (siteId, (nodeAAddr, nodeBAddr)) in msg.Addresses)
{
var nodeA = !string.IsNullOrWhiteSpace(nodeAAddr)
? Context.ActorSelection(nodeAAddr)
: null;
var nodeB = !string.IsNullOrWhiteSpace(nodeBAddr)
? Context.ActorSelection(nodeBAddr)
: null;
newCache[siteId] = (nodeA, nodeB);
}
_siteAddressCache = newCache;
_log.Info("Site address cache refreshed with {0} site(s)", _siteAddressCache.Count);
}
private void TrackMessageForCleanup(SiteEnvelope envelope)
@@ -149,11 +193,20 @@ public class CentralCommunicationActor : ReceiveActor
protected override void PreStart()
{
_log.Info("CentralCommunicationActor started");
// Schedule periodic refresh of site addresses from the database
_refreshSchedule = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable(
TimeSpan.Zero,
TimeSpan.FromSeconds(60),
Self,
new RefreshSiteAddresses(),
ActorRefs.NoSender);
}
protected override void PostStop()
{
_log.Info("CentralCommunicationActor stopped. In-progress deployments treated as failed (WP-5).");
_refreshSchedule?.Cancel();
// On central failover, all in-progress deployments are failed
_inProgressDeployments.Clear();
_debugSubscriptions.Clear();
@@ -161,9 +214,15 @@ public class CentralCommunicationActor : ReceiveActor
}
/// <summary>
/// Command to register a site's remote communication actor path.
/// Command to trigger a refresh of site addresses from the database.
/// </summary>
public record RegisterSite(string SiteId, string RemoteActorPath);
public record RefreshSiteAddresses;
/// <summary>
/// Internal message carrying the loaded site address data from the database.
/// ActorSelection creation happens on the actor thread in HandleSiteAddressCacheLoaded.
/// </summary>
internal record SiteAddressCacheLoaded(Dictionary<string, (string? NodeAAddress, string? NodeBAddress)> Addresses);
/// <summary>
/// Notification sent to debug view subscribers when the stream is terminated

View File

@@ -9,6 +9,7 @@ using ScadaLink.Commons.Messages.InboundApi;
using ScadaLink.Commons.Messages.Integration;
using ScadaLink.Commons.Messages.Lifecycle;
using ScadaLink.Commons.Messages.RemoteQuery;
using ScadaLink.Communication.Actors;
namespace ScadaLink.Communication;
@@ -39,6 +40,14 @@ public class CommunicationService
_centralCommunicationActor = centralCommunicationActor;
}
/// <summary>
/// Triggers an immediate refresh of the site address cache from the database.
/// </summary>
public void RefreshSiteAddresses()
{
GetActor().Tell(new RefreshSiteAddresses());
}
private IActorRef GetActor()
{
return _centralCommunicationActor

View File

@@ -21,6 +21,9 @@ public class SiteConfiguration : IEntityTypeConfiguration<Site>
builder.Property(s => s.Description)
.HasMaxLength(2000);
builder.Property(s => s.NodeAAddress).HasMaxLength(500);
builder.Property(s => s.NodeBAddress).HasMaxLength(500);
builder.HasIndex(s => s.Name).IsUnique();
builder.HasIndex(s => s.SiteIdentifier).IsUnique();
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
#pragma warning disable CA1814 // Prefer jagged arrays over multidimensional
namespace ScadaLink.ConfigurationDatabase.Migrations
{
/// <inheritdoc />
public partial class AddSiteNodeAddresses : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "NodeAAddress",
table: "Sites",
type: "nvarchar(500)",
maxLength: 500,
nullable: true);
migrationBuilder.AddColumn<string>(
name: "NodeBAddress",
table: "Sites",
type: "nvarchar(500)",
maxLength: 500,
nullable: true);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "NodeAAddress",
table: "Sites");
migrationBuilder.DropColumn(
name: "NodeBAddress",
table: "Sites");
}
}
}

View File

@@ -699,6 +699,24 @@ namespace ScadaLink.ConfigurationDatabase.Migrations
Id = 1,
LdapGroupName = "SCADA-Admins",
Role = "Admin"
},
new
{
Id = 2,
LdapGroupName = "SCADA-Designers",
Role = "Design"
},
new
{
Id = 3,
LdapGroupName = "SCADA-Deploy-All",
Role = "Deployment"
},
new
{
Id = 4,
LdapGroupName = "SCADA-Deploy-SiteA",
Role = "Deployment"
});
});
@@ -773,6 +791,14 @@ namespace ScadaLink.ConfigurationDatabase.Migrations
.HasMaxLength(200)
.HasColumnType("nvarchar(200)");
b.Property<string>("NodeAAddress")
.HasMaxLength(500)
.HasColumnType("nvarchar(500)");
b.Property<string>("NodeBAddress")
.HasMaxLength(500)
.HasColumnType("nvarchar(500)");
b.Property<string>("SiteIdentifier")
.IsRequired()
.HasMaxLength(100)

View File

@@ -169,7 +169,7 @@ akka {{
private void RegisterCentralActors()
{
var centralCommActor = _actorSystem!.ActorOf(
Props.Create(() => new CentralCommunicationActor()),
Props.Create(() => new CentralCommunicationActor(_serviceProvider)),
"central-communication");
// Wire up the CommunicationService with the actor reference
@@ -264,13 +264,8 @@ akka {{
var siteCommActor = _actorSystem.ActorSelection("/user/site-communication");
siteCommActor.Tell(new RegisterCentralPath(_communicationOptions.CentralActorPath));
// Also register this site with Central so it knows our address
var centralSelection = _actorSystem.ActorSelection(_communicationOptions.CentralActorPath);
var localSiteCommPath = $"akka.tcp://scadalink@{_nodeOptions.NodeHostname}:{_nodeOptions.RemotingPort}/user/site-communication";
centralSelection.Tell(new RegisterSite(_nodeOptions.SiteId!, localSiteCommPath));
_logger.LogInformation(
"Registered with Central at {CentralPath} as site {SiteId}",
"Configured central heartbeat path at {CentralPath} for site {SiteId}",
_communicationOptions.CentralActorPath, _nodeOptions.SiteId);
}
}

View File

@@ -83,11 +83,14 @@
<script src="/_framework/blazor.web.js"></script>
<script>
// Reconnection overlay for failover behavior
if (Blazor) {
Blazor.addEventListener('enhancedload', () => {
document.getElementById('reconnect-modal').style.display = 'none';
});
}
// Blazor object is available after blazor.web.js initializes
document.addEventListener('DOMContentLoaded', () => {
if (typeof Blazor !== 'undefined') {
Blazor.addEventListener('enhancedload', () => {
document.getElementById('reconnect-modal').style.display = 'none';
});
}
});
</script>
<script src="/lib/bootstrap/js/bootstrap.bundle.min.js"></script>
</body>

View File

@@ -130,6 +130,7 @@ try
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});
app.MapStaticAssets();
app.MapCentralUI<ScadaLink.Host.Components.App>();
app.MapInboundAPI();
await app.RunAsync();

View File

@@ -13,6 +13,7 @@ using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Messages.Management;
using ScadaLink.DeploymentManager;
using ScadaLink.HealthMonitoring;
using ScadaLink.Communication;
using ScadaLink.TemplateEngine;
using ScadaLink.TemplateEngine.Services;
@@ -320,9 +321,16 @@ public class ManagementActor : ReceiveActor
private static async Task<object?> HandleCreateSite(IServiceProvider sp, CreateSiteCommand cmd)
{
var repo = sp.GetRequiredService<ISiteRepository>();
var site = new Site(cmd.Name, cmd.SiteIdentifier) { Description = cmd.Description };
var site = new Site(cmd.Name, cmd.SiteIdentifier)
{
Description = cmd.Description,
NodeAAddress = cmd.NodeAAddress,
NodeBAddress = cmd.NodeBAddress
};
await repo.AddSiteAsync(site);
await repo.SaveChangesAsync();
var commService = sp.GetService<CommunicationService>();
commService?.RefreshSiteAddresses();
return site;
}
@@ -333,8 +341,12 @@ public class ManagementActor : ReceiveActor
?? throw new InvalidOperationException($"Site with ID {cmd.SiteId} not found.");
site.Name = cmd.Name;
site.Description = cmd.Description;
site.NodeAAddress = cmd.NodeAAddress;
site.NodeBAddress = cmd.NodeBAddress;
await repo.UpdateSiteAsync(site);
await repo.SaveChangesAsync();
var commService = sp.GetService<CommunicationService>();
commService?.RefreshSiteAddresses();
return site;
}
@@ -348,6 +360,8 @@ public class ManagementActor : ReceiveActor
$"Cannot delete site {cmd.SiteId}: it has {instances.Count} instance(s).");
await repo.DeleteSiteAsync(cmd.SiteId);
await repo.SaveChangesAsync();
var commService = sp.GetService<CommunicationService>();
commService?.RefreshSiteAddresses();
return true;
}