feat(deployment-manager): resolve DeploymentManager-006 — query site deployment state before redeploy and reconcile

Adds DeploymentStateQuery request/response contracts (Commons), a site-side
handler (SiteRuntime), a CommunicationService query method (Communication), and
reconciliation in DeploymentService: when a prior record is InProgress or
Failed-on-timeout, query the site; if it already holds the target revision hash
mark the record Success without re-sending; on query failure fall through to a
normal deploy (site-side stale-rejection is the safety net).
This commit is contained in:
Joseph Doherty
2026-05-16 20:12:24 -04:00
parent cac8aebe9f
commit bc548e1447
13 changed files with 662 additions and 19 deletions

View File

@@ -76,6 +76,11 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers
Receive<EnableInstanceCommand>(msg => _deploymentManagerProxy.Forward(msg));
Receive<DeleteInstanceCommand>(msg => _deploymentManagerProxy.Forward(msg));
// DeploymentManager-006: query-the-site-before-redeploy — forward to
// the Deployment Manager, which owns the deployed-config store and
// answers with the instance's currently-applied deployment identity.
Receive<DeploymentStateQueryRequest>(msg => _deploymentManagerProxy.Forward(msg));
// Pattern 3: Artifact Deployment — forward to artifact handler if registered
Receive<DeployArtifactsCommand>(msg =>
{

View File

@@ -73,6 +73,26 @@ public class CommunicationService
envelope, _options.DeploymentTimeout, cancellationToken);
}
/// <summary>
/// DeploymentManager-006: queries a site for the currently-applied deployment
/// identity of a single instance. Used by the Deployment Manager before a
/// re-deploy to reconcile against the site's actual state. Sent over the
/// existing ClusterClient command/control transport; the Ask times out (no
/// central buffering) if the site is unreachable, and the caller falls
/// through to a normal deploy.
/// </summary>
public async Task<DeploymentStateQueryResponse> QueryDeploymentStateAsync(
string siteId, DeploymentStateQueryRequest request, CancellationToken cancellationToken = default)
{
_logger.LogDebug(
"Sending DeploymentStateQueryRequest to site {SiteId}, instance={Instance}, correlationId={CorrelationId}",
siteId, request.InstanceUniqueName, request.CorrelationId);
var envelope = new SiteEnvelope(siteId, request);
return await GetActor().Ask<DeploymentStateQueryResponse>(
envelope, _options.QueryTimeout, cancellationToken);
}
// ── Pattern 2: Lifecycle ──
public async Task<InstanceLifecycleResponse> DisableInstanceAsync(