fix(site-runtime): resolve SiteRuntime-001/002/003 — route data-sourced writes to DCL, real per-attribute API results, race-free redeploy

This commit is contained in:
Joseph Doherty
2026-05-16 19:57:28 -04:00
parent 1ae11d1135
commit 09b4bd5dfa
9 changed files with 575 additions and 52 deletions

View File

@@ -39,6 +39,12 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
private readonly ISiteHealthCollector? _healthCollector;
private readonly IServiceProvider? _serviceProvider;
private readonly Dictionary<string, IActorRef> _instanceActors = new();
/// <summary>
/// Tracks Instance Actors that are terminating as part of a redeployment, keyed by
/// the terminating actor ref. The buffered command is applied once <see cref="Terminated"/>
/// confirms the child has fully stopped (SiteRuntime-003).
/// </summary>
private readonly Dictionary<IActorRef, PendingRedeploy> _pendingRedeploys = new();
private int _totalDeployedCount;
public ITimerScheduler Timers { get; set; } = null!;
@@ -94,6 +100,10 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
// Internal deploy persistence result
Receive<DeployPersistenceResult>(HandleDeployPersistenceResult);
// Terminated signal — drains a buffered redeployment once the previous
// Instance Actor has fully stopped (SiteRuntime-003).
Receive<Terminated>(HandleTerminated);
}
protected override void PreStart()
@@ -211,6 +221,13 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
/// <summary>
/// Handles a new deployment: stores config in SQLite, clears previous static overrides,
/// and creates or replaces the Instance Actor.
///
/// Redeployment of an already-running instance must wait for the previous Instance
/// Actor to fully terminate (including PostStop on its descendants) before the
/// replacement is created — otherwise <see cref="Context.ActorOf"/> can collide on
/// the still-registered child name. Instead of guessing with a fixed timer, the
/// terminating child is watched and the in-flight command is buffered until the
/// <see cref="Terminated"/> signal arrives.
/// </summary>
private void HandleDeploy(DeployInstanceCommand command)
{
@@ -219,28 +236,54 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
"Deploying instance {Instance}, deploymentId={DeploymentId}",
instanceName, command.DeploymentId);
// Stop existing actor if present (redeployment replaces)
// Redeployment replaces a running instance. Watch + stop the existing actor
// and buffer this command until its Terminated signal confirms the child
// (and its whole subtree) has fully stopped and freed its actor name.
if (_instanceActors.TryGetValue(instanceName, out var existing))
{
Context.Stop(existing);
_instanceActors.Remove(instanceName);
// Wait for the child to be removed from the children collection
// by yielding and retrying — Context.Stop is processed before the next message
Context.System.Scheduler.ScheduleTellOnce(
TimeSpan.FromMilliseconds(500), Self, command, Sender);
_pendingRedeploys[existing] = new PendingRedeploy(command, Sender);
Context.Watch(existing);
Context.Stop(existing);
UpdateInstanceCounts();
return;
}
// Fresh deployment — no existing actor to replace.
ApplyDeployment(command, Sender, isRedeploy: false);
}
/// <summary>
/// Recreates an Instance Actor once its predecessor has fully terminated during a
/// redeployment, draining the buffered <see cref="DeployInstanceCommand"/>.
/// </summary>
private void HandleTerminated(Terminated terminated)
{
if (!_pendingRedeploys.Remove(terminated.ActorRef, out var pending))
return;
ApplyDeployment(pending.Command, pending.OriginalSender, isRedeploy: true);
}
/// <summary>
/// Creates the Instance Actor, persists the config, and replies to the deployer.
/// A redeployment is an update of an existing instance, so the deployed-instance
/// counter is only incremented for genuinely new deployments.
/// </summary>
private void ApplyDeployment(DeployInstanceCommand command, IActorRef sender, bool isRedeploy)
{
var instanceName = command.InstanceUniqueName;
// Ensure DCL connections exist for any data-sourced attributes
EnsureDclConnections(command.FlattenedConfigurationJson);
// Create the Instance Actor immediately (no existing actor to replace)
// Create the Instance Actor immediately
CreateInstanceActor(instanceName, command.FlattenedConfigurationJson);
_totalDeployedCount++;
if (!isRedeploy)
_totalDeployedCount++;
UpdateInstanceCounts();
// Persist to SQLite and clear static overrides asynchronously
var sender = Sender;
Task.Run(async () =>
{
await _storage.StoreDeployedConfigAsync(
@@ -614,9 +657,11 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
/// <summary>
/// Writes attribute values on a deployed instance for a Route.To().SetAttribute(s)
/// call (or a central Test Run bound to the instance). Writes are Tell'd to the
/// Instance Actor — serialized through its mailbox — and acknowledged optimistically,
/// matching the fire-and-forget semantics of Instance.SetAttribute.
/// call (or a central Test Run bound to the instance). Each write is Ask'd to the
/// Instance Actor, which routes data-sourced attributes through the DCL and static
/// attributes to a persisted override. The response reflects the real per-attribute
/// outcome (a non-existent attribute or a failed device write reports failure),
/// rather than an unconditional optimistic ack.
/// </summary>
private void RouteInboundApiSetAttributes(RouteToSetAttributesRequest request)
{
@@ -629,14 +674,33 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
return;
}
foreach (var (name, value) in request.AttributeValues)
{
instanceActor.Tell(new SetStaticAttributeCommand(
request.CorrelationId, request.InstanceUniqueName, name, value, DateTimeOffset.UtcNow));
}
var sender = Sender;
var correlationId = request.CorrelationId;
var asks = request.AttributeValues
.Select(kvp => instanceActor.Ask<SetStaticAttributeResponse>(
new SetStaticAttributeCommand(
correlationId, request.InstanceUniqueName, kvp.Key, kvp.Value, DateTimeOffset.UtcNow),
TimeSpan.FromSeconds(30)))
.ToArray();
Sender.Tell(new RouteToSetAttributesResponse(
request.CorrelationId, true, null, DateTimeOffset.UtcNow));
Task.WhenAll(asks).ContinueWith(t =>
{
if (!t.IsCompletedSuccessfully)
return new RouteToSetAttributesResponse(
correlationId, false,
t.Exception?.GetBaseException().Message ?? "Attribute write timed out",
DateTimeOffset.UtcNow);
var failures = t.Result
.Where(r => !r.Success)
.Select(r => $"{r.AttributeName}: {r.ErrorMessage}")
.ToArray();
return failures.Length == 0
? new RouteToSetAttributesResponse(correlationId, true, null, DateTimeOffset.UtcNow)
: new RouteToSetAttributesResponse(
correlationId, false, string.Join("; ", failures), DateTimeOffset.UtcNow);
}).PipeTo(sender);
}
/// <summary>
@@ -789,4 +853,9 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
EnableInstanceCommand Command, DeployedInstance? Config, string? Error, IActorRef OriginalSender);
internal record DeployPersistenceResult(
string DeploymentId, string InstanceName, bool Success, string? Error, IActorRef OriginalSender);
/// <summary>
/// A redeployment command buffered until the previous Instance Actor terminates.
/// </summary>
internal record PendingRedeploy(DeployInstanceCommand Command, IActorRef OriginalSender);
}