fix(site-runtime): resolve SiteRuntime-004..011 — deploy-after-persist, remove reflection, deterministic IDs, non-blocking startup, dedicated script scheduler, config-change detection, semantic trust-model check

This commit is contained in:
Joseph Doherty
2026-05-16 21:44:10 -04:00
parent 24a4a2d165
commit a88bec9376
17 changed files with 1112 additions and 150 deletions

View File

@@ -99,6 +99,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
// Internal startup messages
Receive<StartupConfigsLoaded>(HandleStartupConfigsLoaded);
Receive<SharedScriptsLoaded>(HandleSharedScriptsLoaded);
Receive<StartNextBatch>(HandleStartNextBatch);
// Internal enable result
@@ -156,7 +157,14 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
}
/// <summary>
/// Processes the loaded configs from SQLite and begins staggered Instance Actor creation.
/// Processes the loaded configs from SQLite.
///
/// SiteRuntime-008: shared scripts must be compiled before Instance Actors are
/// created, but the SQLite read and Roslyn compilation must not block the
/// singleton's mailbox. The compilation is run on a background task and a
/// <see cref="SharedScriptsLoaded"/> message is piped back; only then does
/// staggered Instance Actor creation begin. The deployed configs are stashed on the
/// actor field in the meantime.
/// </summary>
private void HandleStartupConfigsLoaded(StartupConfigsLoaded msg)
{
@@ -166,9 +174,6 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
return;
}
// Load and compile shared scripts from SQLite before creating Instance Actors
LoadSharedScriptsFromStorage();
var enabledConfigs = msg.Configs.Where(c => c.IsEnabled).ToList();
_totalDeployedCount = msg.Configs.Count;
_logger.LogInformation(
@@ -176,11 +181,25 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
msg.Configs.Count, enabledConfigs.Count);
UpdateInstanceCounts();
if (enabledConfigs.Count == 0)
// Load and compile shared scripts off the actor thread, then resume startup.
LoadSharedScriptsFromStorage(enabledConfigs);
}
/// <summary>
/// SiteRuntime-008: once shared scripts have been compiled off-thread, begins
/// staggered Instance Actor creation for the enabled configs captured at startup.
/// </summary>
private void HandleSharedScriptsLoaded(SharedScriptsLoaded msg)
{
_logger.LogInformation(
"Loaded {Compiled}/{Total} shared scripts from SQLite",
msg.CompiledCount, msg.TotalCount);
if (msg.EnabledConfigs.Count == 0)
return;
// Start the first batch immediately
var batchState = new BatchState(enabledConfigs, 0);
var batchState = new BatchState(msg.EnabledConfigs, 0);
Self.Tell(new StartNextBatch(batchState));
}
@@ -275,6 +294,13 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
/// Creates the Instance Actor, persists the config, and replies to the deployer.
/// A redeployment is an update of an existing instance, so the deployed-instance
/// counter is only incremented for genuinely new deployments.
///
/// SiteRuntime-005: the deployer is <b>not</b> told <see cref="DeploymentStatus.Success"/>
/// until SQLite persistence has committed. The site's deployed-config store is the
/// durable source of truth — a config that was never persisted would be silently lost
/// on the next restart/failover, so reporting Success before the row is committed is
/// incorrect. The reply is sent from <see cref="HandleDeployPersistenceResult"/> once
/// the persistence outcome is known.
/// </summary>
private void ApplyDeployment(DeployInstanceCommand command, IActorRef sender, bool isRedeploy)
{
@@ -307,33 +333,56 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
instanceName, command.FlattenedConfigurationJson,
command.DeploymentId, command.RevisionHash, true));
return new DeployPersistenceResult(command.DeploymentId, instanceName, true, null, sender);
return new DeployPersistenceResult(
command.DeploymentId, instanceName, true, null, sender, isRedeploy);
}).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return t.Result;
return new DeployPersistenceResult(
command.DeploymentId, instanceName, false,
t.Exception?.GetBaseException().Message, sender);
t.Exception?.GetBaseException().Message, sender, isRedeploy);
}).PipeTo(Self);
// Reply immediately — deployment is applied (actor is running)
sender.Tell(new DeploymentStatusResponse(
command.DeploymentId,
instanceName,
DeploymentStatus.Success,
null,
DateTimeOffset.UtcNow));
}
/// <summary>
/// SiteRuntime-005: reports the deployment outcome to central only after the
/// persistence result is known. On a persistence failure the Instance Actor that was
/// created optimistically is stopped and the deployed-instance counter rolled back,
/// so the in-memory state stays consistent with durable storage, and central is told
/// the deployment <see cref="DeploymentStatus.Failed"/>.
/// </summary>
private void HandleDeployPersistenceResult(DeployPersistenceResult result)
{
if (!result.Success)
if (result.Success)
{
_logger.LogError(
"Failed to persist deployment {DeploymentId} for {Instance}: {Error}",
result.DeploymentId, result.InstanceName, result.Error);
result.OriginalSender.Tell(new DeploymentStatusResponse(
result.DeploymentId,
result.InstanceName,
DeploymentStatus.Success,
null,
DateTimeOffset.UtcNow));
return;
}
_logger.LogError(
"Failed to persist deployment {DeploymentId} for {Instance}: {Error}",
result.DeploymentId, result.InstanceName, result.Error);
// Persistence failed — undo the optimistic actor creation and counter bump so
// the site does not advertise an instance it cannot durably recover.
if (_instanceActors.Remove(result.InstanceName, out var orphan))
Context.Stop(orphan);
if (!result.IsRedeploy)
_totalDeployedCount = Math.Max(0, _totalDeployedCount - 1);
UpdateInstanceCounts();
result.OriginalSender.Tell(new DeploymentStatusResponse(
result.DeploymentId,
result.InstanceName,
DeploymentStatus.Failed,
result.Error ?? "Deployment persistence failed",
DateTimeOffset.UtcNow));
}
/// <summary>
@@ -492,10 +541,20 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
// ── DCL connection management ──
private readonly HashSet<string> _createdConnections = new();
/// <summary>
/// Tracks the configuration last sent to the DCL for each connection name, keyed by
/// a hash of the connection's protocol/endpoints/credentials/failover count
/// (SiteRuntime-010). A name whose hash is unchanged is skipped; a name whose config
/// changed re-issues a <c>CreateConnectionCommand</c> so the DCL adopts the new
/// configuration instead of keeping a stale connection after a redeployment.
/// </summary>
private readonly Dictionary<string, string> _createdConnections = new();
/// <summary>
/// Sets up DCL connections from the flattened config (idempotent: tracks created connections).
/// Sets up DCL connections from the flattened config. Idempotent on unchanged
/// configuration, but re-issues the create command when a connection's endpoint,
/// credentials, backup endpoint, or failover retry count has changed since it was
/// last sent (SiteRuntime-010).
/// </summary>
private void EnsureDclConnections(string configJson)
{
@@ -508,7 +567,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
foreach (var (name, connConfig) in config.Connections)
{
if (_createdConnections.Contains(name))
var configHash = ComputeConnectionConfigHash(connConfig);
if (_createdConnections.TryGetValue(name, out var lastHash) && lastHash == configHash)
continue;
var primaryDetails = FlattenConnectionConfig(connConfig.Protocol, connConfig.ConfigurationJson);
@@ -519,10 +579,11 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
_dclManager.Tell(new Commons.Messages.DataConnection.CreateConnectionCommand(
name, connConfig.Protocol, primaryDetails, backupDetails, connConfig.FailoverRetryCount));
_createdConnections.Add(name);
var changed = _createdConnections.ContainsKey(name);
_createdConnections[name] = configHash;
_logger.LogInformation(
"Created DCL connection {Connection} (protocol={Protocol})",
name, connConfig.Protocol);
"{Action} DCL connection {Connection} (protocol={Protocol})",
changed ? "Updated" : "Created", name, connConfig.Protocol);
}
}
catch (Exception ex)
@@ -531,6 +592,26 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
}
}
/// <summary>
/// Computes a stable hash over the configuration fields that affect how the DCL
/// connects, so a changed endpoint/credential/backup/failover count is detected
/// (SiteRuntime-010).
/// </summary>
private static string ComputeConnectionConfigHash(
Commons.Types.Flattening.ConnectionConfig connConfig)
{
var material = string.Join(
"",
connConfig.Protocol,
connConfig.ConfigurationJson ?? string.Empty,
connConfig.BackupConfigurationJson ?? string.Empty,
connConfig.FailoverRetryCount.ToString());
var bytes = System.Security.Cryptography.SHA256.HashData(
System.Text.Encoding.UTF8.GetBytes(material));
return Convert.ToHexString(bytes);
}
private static IDictionary<string, string> FlattenConnectionConfig(string protocol, string? json)
{
if (string.IsNullOrEmpty(json))
@@ -559,25 +640,35 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
// ── Shared Script Loading ──
private void LoadSharedScriptsFromStorage()
/// <summary>
/// SiteRuntime-008: reads and compiles all shared scripts on a background task so the
/// SQLite read and Roslyn compilation never block the singleton's mailbox thread. The
/// result is piped back as a <see cref="SharedScriptsLoaded"/> message, carrying the
/// enabled configs to resume staggered Instance Actor creation on the actor thread.
/// </summary>
private void LoadSharedScriptsFromStorage(List<DeployedInstance> enabledConfigs)
{
try
Task.Run(async () =>
{
var scripts = _storage.GetAllSharedScriptsAsync().GetAwaiter().GetResult();
var scripts = await _storage.GetAllSharedScriptsAsync();
var compiled = 0;
foreach (var script in scripts)
{
if (_sharedScriptLibrary.CompileAndRegister(script.Name, script.Code))
compiled++;
}
_logger.LogInformation(
"Loaded {Compiled}/{Total} shared scripts from SQLite",
compiled, scripts.Count);
}
catch (Exception ex)
return new SharedScriptsLoaded(enabledConfigs, compiled, scripts.Count);
}).ContinueWith(t =>
{
_logger.LogError(ex, "Failed to load shared scripts from SQLite");
}
if (t.IsCompletedSuccessfully)
return t.Result;
_logger.LogError(
t.Exception?.GetBaseException(), "Failed to load shared scripts from SQLite");
// A shared-script load failure must not abandon startup — proceed with
// Instance Actor creation; scripts that need a missing shared script fail
// at execution time and are recorded to the site event log.
return new SharedScriptsLoaded(enabledConfigs, 0, 0);
}).PipeTo(Self);
}
// ── Debug View routing ──
@@ -891,12 +982,22 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
// ── Internal messages ──
internal record StartupConfigsLoaded(List<DeployedInstance> Configs, string? Error);
/// <summary>
/// Internal message piped back once shared scripts have been compiled off-thread
/// (SiteRuntime-008). Carries the enabled configs so staggered Instance Actor
/// creation resumes on the actor thread.
/// </summary>
internal record SharedScriptsLoaded(
List<DeployedInstance> EnabledConfigs, int CompiledCount, int TotalCount);
internal record StartNextBatch(BatchState State);
internal record BatchState(List<DeployedInstance> Configs, int NextIndex);
internal record EnableResult(
EnableInstanceCommand Command, DeployedInstance? Config, string? Error, IActorRef OriginalSender);
internal record DeployPersistenceResult(
string DeploymentId, string InstanceName, bool Success, string? Error, IActorRef OriginalSender);
string DeploymentId, string InstanceName, bool Success, string? Error,
IActorRef OriginalSender, bool IsRedeploy);
/// <summary>
/// A redeployment command buffered until the previous Instance Actor terminates.