fix(site-runtime): resolve SiteRuntime-004..011 — deploy-after-persist, remove reflection, deterministic IDs, non-blocking startup, dedicated script scheduler, config-change detection, semantic trust-model check
This commit is contained in:
@@ -99,6 +99,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
|
||||
// Internal startup messages
|
||||
Receive<StartupConfigsLoaded>(HandleStartupConfigsLoaded);
|
||||
Receive<SharedScriptsLoaded>(HandleSharedScriptsLoaded);
|
||||
Receive<StartNextBatch>(HandleStartNextBatch);
|
||||
|
||||
// Internal enable result
|
||||
@@ -156,7 +157,14 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Processes the loaded configs from SQLite and begins staggered Instance Actor creation.
|
||||
/// Processes the loaded configs from SQLite.
|
||||
///
|
||||
/// SiteRuntime-008: shared scripts must be compiled before Instance Actors are
|
||||
/// created, but the SQLite read and Roslyn compilation must not block the
|
||||
/// singleton's mailbox. The compilation is run on a background task and a
|
||||
/// <see cref="SharedScriptsLoaded"/> message is piped back; only then does
|
||||
/// staggered Instance Actor creation begin. The deployed configs are stashed on the
|
||||
/// actor field in the meantime.
|
||||
/// </summary>
|
||||
private void HandleStartupConfigsLoaded(StartupConfigsLoaded msg)
|
||||
{
|
||||
@@ -166,9 +174,6 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
return;
|
||||
}
|
||||
|
||||
// Load and compile shared scripts from SQLite before creating Instance Actors
|
||||
LoadSharedScriptsFromStorage();
|
||||
|
||||
var enabledConfigs = msg.Configs.Where(c => c.IsEnabled).ToList();
|
||||
_totalDeployedCount = msg.Configs.Count;
|
||||
_logger.LogInformation(
|
||||
@@ -176,11 +181,25 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
msg.Configs.Count, enabledConfigs.Count);
|
||||
UpdateInstanceCounts();
|
||||
|
||||
if (enabledConfigs.Count == 0)
|
||||
// Load and compile shared scripts off the actor thread, then resume startup.
|
||||
LoadSharedScriptsFromStorage(enabledConfigs);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SiteRuntime-008: once shared scripts have been compiled off-thread, begins
|
||||
/// staggered Instance Actor creation for the enabled configs captured at startup.
|
||||
/// </summary>
|
||||
private void HandleSharedScriptsLoaded(SharedScriptsLoaded msg)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Loaded {Compiled}/{Total} shared scripts from SQLite",
|
||||
msg.CompiledCount, msg.TotalCount);
|
||||
|
||||
if (msg.EnabledConfigs.Count == 0)
|
||||
return;
|
||||
|
||||
// Start the first batch immediately
|
||||
var batchState = new BatchState(enabledConfigs, 0);
|
||||
var batchState = new BatchState(msg.EnabledConfigs, 0);
|
||||
Self.Tell(new StartNextBatch(batchState));
|
||||
}
|
||||
|
||||
@@ -275,6 +294,13 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
/// Creates the Instance Actor, persists the config, and replies to the deployer.
|
||||
/// A redeployment is an update of an existing instance, so the deployed-instance
|
||||
/// counter is only incremented for genuinely new deployments.
|
||||
///
|
||||
/// SiteRuntime-005: the deployer is <b>not</b> told <see cref="DeploymentStatus.Success"/>
|
||||
/// until SQLite persistence has committed. The site's deployed-config store is the
|
||||
/// durable source of truth — a config that was never persisted would be silently lost
|
||||
/// on the next restart/failover, so reporting Success before the row is committed is
|
||||
/// incorrect. The reply is sent from <see cref="HandleDeployPersistenceResult"/> once
|
||||
/// the persistence outcome is known.
|
||||
/// </summary>
|
||||
private void ApplyDeployment(DeployInstanceCommand command, IActorRef sender, bool isRedeploy)
|
||||
{
|
||||
@@ -307,33 +333,56 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
instanceName, command.FlattenedConfigurationJson,
|
||||
command.DeploymentId, command.RevisionHash, true));
|
||||
|
||||
return new DeployPersistenceResult(command.DeploymentId, instanceName, true, null, sender);
|
||||
return new DeployPersistenceResult(
|
||||
command.DeploymentId, instanceName, true, null, sender, isRedeploy);
|
||||
}).ContinueWith(t =>
|
||||
{
|
||||
if (t.IsCompletedSuccessfully)
|
||||
return t.Result;
|
||||
return new DeployPersistenceResult(
|
||||
command.DeploymentId, instanceName, false,
|
||||
t.Exception?.GetBaseException().Message, sender);
|
||||
t.Exception?.GetBaseException().Message, sender, isRedeploy);
|
||||
}).PipeTo(Self);
|
||||
|
||||
// Reply immediately — deployment is applied (actor is running)
|
||||
sender.Tell(new DeploymentStatusResponse(
|
||||
command.DeploymentId,
|
||||
instanceName,
|
||||
DeploymentStatus.Success,
|
||||
null,
|
||||
DateTimeOffset.UtcNow));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SiteRuntime-005: reports the deployment outcome to central only after the
|
||||
/// persistence result is known. On a persistence failure the Instance Actor that was
|
||||
/// created optimistically is stopped and the deployed-instance counter rolled back,
|
||||
/// so the in-memory state stays consistent with durable storage, and central is told
|
||||
/// the deployment <see cref="DeploymentStatus.Failed"/>.
|
||||
/// </summary>
|
||||
private void HandleDeployPersistenceResult(DeployPersistenceResult result)
|
||||
{
|
||||
if (!result.Success)
|
||||
if (result.Success)
|
||||
{
|
||||
_logger.LogError(
|
||||
"Failed to persist deployment {DeploymentId} for {Instance}: {Error}",
|
||||
result.DeploymentId, result.InstanceName, result.Error);
|
||||
result.OriginalSender.Tell(new DeploymentStatusResponse(
|
||||
result.DeploymentId,
|
||||
result.InstanceName,
|
||||
DeploymentStatus.Success,
|
||||
null,
|
||||
DateTimeOffset.UtcNow));
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogError(
|
||||
"Failed to persist deployment {DeploymentId} for {Instance}: {Error}",
|
||||
result.DeploymentId, result.InstanceName, result.Error);
|
||||
|
||||
// Persistence failed — undo the optimistic actor creation and counter bump so
|
||||
// the site does not advertise an instance it cannot durably recover.
|
||||
if (_instanceActors.Remove(result.InstanceName, out var orphan))
|
||||
Context.Stop(orphan);
|
||||
if (!result.IsRedeploy)
|
||||
_totalDeployedCount = Math.Max(0, _totalDeployedCount - 1);
|
||||
UpdateInstanceCounts();
|
||||
|
||||
result.OriginalSender.Tell(new DeploymentStatusResponse(
|
||||
result.DeploymentId,
|
||||
result.InstanceName,
|
||||
DeploymentStatus.Failed,
|
||||
result.Error ?? "Deployment persistence failed",
|
||||
DateTimeOffset.UtcNow));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -492,10 +541,20 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
|
||||
// ── DCL connection management ──
|
||||
|
||||
private readonly HashSet<string> _createdConnections = new();
|
||||
/// <summary>
|
||||
/// Tracks the configuration last sent to the DCL for each connection name, keyed by
|
||||
/// a hash of the connection's protocol/endpoints/credentials/failover count
|
||||
/// (SiteRuntime-010). A name whose hash is unchanged is skipped; a name whose config
|
||||
/// changed re-issues a <c>CreateConnectionCommand</c> so the DCL adopts the new
|
||||
/// configuration instead of keeping a stale connection after a redeployment.
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, string> _createdConnections = new();
|
||||
|
||||
/// <summary>
|
||||
/// Sets up DCL connections from the flattened config (idempotent: tracks created connections).
|
||||
/// Sets up DCL connections from the flattened config. Idempotent on unchanged
|
||||
/// configuration, but re-issues the create command when a connection's endpoint,
|
||||
/// credentials, backup endpoint, or failover retry count has changed since it was
|
||||
/// last sent (SiteRuntime-010).
|
||||
/// </summary>
|
||||
private void EnsureDclConnections(string configJson)
|
||||
{
|
||||
@@ -508,7 +567,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
|
||||
foreach (var (name, connConfig) in config.Connections)
|
||||
{
|
||||
if (_createdConnections.Contains(name))
|
||||
var configHash = ComputeConnectionConfigHash(connConfig);
|
||||
if (_createdConnections.TryGetValue(name, out var lastHash) && lastHash == configHash)
|
||||
continue;
|
||||
|
||||
var primaryDetails = FlattenConnectionConfig(connConfig.Protocol, connConfig.ConfigurationJson);
|
||||
@@ -519,10 +579,11 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
_dclManager.Tell(new Commons.Messages.DataConnection.CreateConnectionCommand(
|
||||
name, connConfig.Protocol, primaryDetails, backupDetails, connConfig.FailoverRetryCount));
|
||||
|
||||
_createdConnections.Add(name);
|
||||
var changed = _createdConnections.ContainsKey(name);
|
||||
_createdConnections[name] = configHash;
|
||||
_logger.LogInformation(
|
||||
"Created DCL connection {Connection} (protocol={Protocol})",
|
||||
name, connConfig.Protocol);
|
||||
"{Action} DCL connection {Connection} (protocol={Protocol})",
|
||||
changed ? "Updated" : "Created", name, connConfig.Protocol);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -531,6 +592,26 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a stable hash over the configuration fields that affect how the DCL
|
||||
/// connects, so a changed endpoint/credential/backup/failover count is detected
|
||||
/// (SiteRuntime-010).
|
||||
/// </summary>
|
||||
private static string ComputeConnectionConfigHash(
|
||||
Commons.Types.Flattening.ConnectionConfig connConfig)
|
||||
{
|
||||
var material = string.Join(
|
||||
"",
|
||||
connConfig.Protocol,
|
||||
connConfig.ConfigurationJson ?? string.Empty,
|
||||
connConfig.BackupConfigurationJson ?? string.Empty,
|
||||
connConfig.FailoverRetryCount.ToString());
|
||||
|
||||
var bytes = System.Security.Cryptography.SHA256.HashData(
|
||||
System.Text.Encoding.UTF8.GetBytes(material));
|
||||
return Convert.ToHexString(bytes);
|
||||
}
|
||||
|
||||
private static IDictionary<string, string> FlattenConnectionConfig(string protocol, string? json)
|
||||
{
|
||||
if (string.IsNullOrEmpty(json))
|
||||
@@ -559,25 +640,35 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
|
||||
// ── Shared Script Loading ──
|
||||
|
||||
private void LoadSharedScriptsFromStorage()
|
||||
/// <summary>
|
||||
/// SiteRuntime-008: reads and compiles all shared scripts on a background task so the
|
||||
/// SQLite read and Roslyn compilation never block the singleton's mailbox thread. The
|
||||
/// result is piped back as a <see cref="SharedScriptsLoaded"/> message, carrying the
|
||||
/// enabled configs to resume staggered Instance Actor creation on the actor thread.
|
||||
/// </summary>
|
||||
private void LoadSharedScriptsFromStorage(List<DeployedInstance> enabledConfigs)
|
||||
{
|
||||
try
|
||||
Task.Run(async () =>
|
||||
{
|
||||
var scripts = _storage.GetAllSharedScriptsAsync().GetAwaiter().GetResult();
|
||||
var scripts = await _storage.GetAllSharedScriptsAsync();
|
||||
var compiled = 0;
|
||||
foreach (var script in scripts)
|
||||
{
|
||||
if (_sharedScriptLibrary.CompileAndRegister(script.Name, script.Code))
|
||||
compiled++;
|
||||
}
|
||||
_logger.LogInformation(
|
||||
"Loaded {Compiled}/{Total} shared scripts from SQLite",
|
||||
compiled, scripts.Count);
|
||||
}
|
||||
catch (Exception ex)
|
||||
return new SharedScriptsLoaded(enabledConfigs, compiled, scripts.Count);
|
||||
}).ContinueWith(t =>
|
||||
{
|
||||
_logger.LogError(ex, "Failed to load shared scripts from SQLite");
|
||||
}
|
||||
if (t.IsCompletedSuccessfully)
|
||||
return t.Result;
|
||||
_logger.LogError(
|
||||
t.Exception?.GetBaseException(), "Failed to load shared scripts from SQLite");
|
||||
// A shared-script load failure must not abandon startup — proceed with
|
||||
// Instance Actor creation; scripts that need a missing shared script fail
|
||||
// at execution time and are recorded to the site event log.
|
||||
return new SharedScriptsLoaded(enabledConfigs, 0, 0);
|
||||
}).PipeTo(Self);
|
||||
}
|
||||
|
||||
// ── Debug View routing ──
|
||||
@@ -891,12 +982,22 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
// ── Internal messages ──
|
||||
|
||||
internal record StartupConfigsLoaded(List<DeployedInstance> Configs, string? Error);
|
||||
|
||||
/// <summary>
|
||||
/// Internal message piped back once shared scripts have been compiled off-thread
|
||||
/// (SiteRuntime-008). Carries the enabled configs so staggered Instance Actor
|
||||
/// creation resumes on the actor thread.
|
||||
/// </summary>
|
||||
internal record SharedScriptsLoaded(
|
||||
List<DeployedInstance> EnabledConfigs, int CompiledCount, int TotalCount);
|
||||
|
||||
internal record StartNextBatch(BatchState State);
|
||||
internal record BatchState(List<DeployedInstance> Configs, int NextIndex);
|
||||
internal record EnableResult(
|
||||
EnableInstanceCommand Command, DeployedInstance? Config, string? Error, IActorRef OriginalSender);
|
||||
internal record DeployPersistenceResult(
|
||||
string DeploymentId, string InstanceName, bool Success, string? Error, IActorRef OriginalSender);
|
||||
string DeploymentId, string InstanceName, bool Success, string? Error,
|
||||
IActorRef OriginalSender, bool IsRedeploy);
|
||||
|
||||
/// <summary>
|
||||
/// A redeployment command buffered until the previous Instance Actor terminates.
|
||||
|
||||
Reference in New Issue
Block a user