feat(deployment-manager): resolve DeploymentManager-006 — query site deployment state before redeploy and reconcile

Adds DeploymentStateQuery request/response contracts (Commons), a site-side
handler (SiteRuntime), a CommunicationService query method (Communication), and
reconciliation in DeploymentService: when a prior record is InProgress or
Failed-on-timeout, query the site; if it already holds the target revision hash
mark the record Success without re-sending; on query failure fall through to a
normal deploy (site-side stale-rejection is the safety net).
This commit is contained in:
Joseph Doherty
2026-05-16 20:12:24 -04:00
parent cac8aebe9f
commit bc548e1447
13 changed files with 662 additions and 19 deletions

View File

@@ -78,6 +78,12 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
Receive<EnableInstanceCommand>(HandleEnable);
Receive<DeleteInstanceCommand>(HandleDelete);
// DeploymentManager-006: query-the-site-before-redeploy idempotency.
// Central asks for the instance's currently-applied deployment identity
// before re-sending a deployment whose prior record is stuck InProgress
// or Failed due to a timeout.
Receive<DeploymentStateQueryRequest>(HandleDeploymentStateQuery);
// WP-33: Handle system-wide artifact deployment
Receive<DeployArtifactsCommand>(HandleDeployArtifacts);
@@ -446,6 +452,44 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
_logger.LogInformation("Instance {Instance} deleted", instanceName);
}
/// <summary>
/// DeploymentManager-006: answers a central query for the instance's
/// currently-applied deployment identity. The site's deployed-config store
/// (SQLite) is the authoritative record — it covers both enabled and
/// disabled instances, and survives node restart/failover. If the instance
/// has no stored config, the response reports <c>IsDeployed = false</c> with
/// null identity so central falls through to a normal deploy.
/// </summary>
private void HandleDeploymentStateQuery(DeploymentStateQueryRequest request)
{
var sender = Sender;
var instanceName = request.InstanceUniqueName;
_storage.GetAllDeployedConfigsAsync().ContinueWith(t =>
{
if (!t.IsCompletedSuccessfully)
{
_logger.LogError(
t.Exception?.GetBaseException(),
"Failed to read deployed configs for deployment state query of {Instance}",
instanceName);
// Treat a storage read failure as "unknown" — central falls
// through to a normal deploy and relies on site-side
// stale-rejection as the safety net.
return new DeploymentStateQueryResponse(
request.CorrelationId, instanceName, false, null, null, DateTimeOffset.UtcNow);
}
var config = t.Result.FirstOrDefault(c => c.InstanceUniqueName == instanceName);
return config == null
? new DeploymentStateQueryResponse(
request.CorrelationId, instanceName, false, null, null, DateTimeOffset.UtcNow)
: new DeploymentStateQueryResponse(
request.CorrelationId, instanceName, true,
config.DeploymentId, config.RevisionHash, DateTimeOffset.UtcNow);
}).PipeTo(sender);
}
// ── DCL connection management ──
private readonly HashSet<string> _createdConnections = new();