using Akka.Actor; using Akka.TestKit.Xunit2; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using NSubstitute; using ScadaLink.Commons.Entities.Deployment; using ScadaLink.Commons.Entities.Instances; using ScadaLink.Commons.Entities.Sites; using ScadaLink.Commons.Interfaces.Repositories; using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Commons.Messages.Deployment; using ScadaLink.Commons.Messages.Lifecycle; using ScadaLink.Commons.Types; using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Flattening; using ScadaLink.Communication; using ScadaLink.TemplateEngine.Flattening; namespace ScadaLink.DeploymentManager.Tests; /// /// WP-1/2/4/5/6/8/16: Tests for central-side DeploymentService. /// public class DeploymentServiceTests : TestKit { private readonly IDeploymentManagerRepository _repo; private readonly IFlatteningPipeline _pipeline; private readonly CommunicationService _comms; private readonly OperationLockManager _lockManager; private readonly IAuditService _audit; private readonly DeploymentService _service; public DeploymentServiceTests() { _repo = Substitute.For(); _pipeline = Substitute.For(); _comms = new CommunicationService( Options.Create(new CommunicationOptions()), NullLogger.Instance); _lockManager = new OperationLockManager(); _audit = Substitute.For(); var options = Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }); var siteRepo = CreateSiteRepoStub(); _service = new DeploymentService( _repo, siteRepo, _pipeline, _comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), options, NullLogger.Instance); } // ── WP-1: Deployment flow ── [Fact] public async Task DeployInstanceAsync_InstanceNotFound_ReturnsFailure() { _repo.GetInstanceByIdAsync(1).Returns((Instance?)null); var result = await _service.DeployInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("not found", result.Error); } [Fact] public async Task DeployInstanceAsync_ValidationFails_ReturnsFailure() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(1).Returns(instance); var validationResult = new ValidationResult { Errors = [ValidationEntry.Error(ValidationCategory.ScriptCompilation, "Compile error")] }; _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(new FlattenedConfiguration(), "hash1", validationResult))); var result = await _service.DeployInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("validation failed", result.Error); } [Fact] public async Task DeployInstanceAsync_FlatteningFails_ReturnsFailure() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(1).Returns(instance); _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Failure("Template chain empty")); var result = await _service.DeployInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("Validation failed", result.Error); } // ── DeploymentManager-021: missing Site row -> hard failure, no silent fabrication ── [Fact] public async Task DeployInstanceAsync_SiteRowMissing_FailsLoudlyInsteadOfSilentlySubstituting() { // DeploymentManager-021 regression: previously ResolveSiteIdentifierAsync // silently returned the numeric siteId rendered as a string when the // site row was missing (FK was deleted, race with admin delete, DB // inconsistency). That bogus identifier then surfaced downstream as a // confusing "unknown site" routing error that hid the real cause. // // After the fix the resolver throws InvalidOperationException naming // the unresolved id; on the deploy path the existing try/catch turns // it into a Failed deployment record whose error message reflects the // actual problem. var instance = new Instance("OrphanInst") { Id = 99, SiteId = 42, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(99, Arg.Any()).Returns(instance); SetupValidPipeline(99, "OrphanInst", "sha256:abc"); // Build a fresh service whose siteRepo explicitly returns null for the // instance's SiteId (the helper above seeds every id, so we shadow it // for SiteId=42 only). var siteRepo = CreateSiteRepoStub(); siteRepo.GetSiteByIdAsync(42, Arg.Any()).Returns((Site?)null); var service = new DeploymentService( _repo, siteRepo, _pipeline, _comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }), NullLogger.Instance); var result = await service.DeployInstanceAsync(99, "admin"); Assert.True(result.IsFailure); // The descriptive message names the unresolved id so the operator sees // the actual problem (missing site row), not a downstream routing error. Assert.Contains("42", result.Error); Assert.Contains("not found", result.Error, StringComparison.OrdinalIgnoreCase); } // ── DeploymentManager-022: no transient Pending -> single InProgress insert ── [Fact] public async Task DeployInstanceAsync_NoTransientPendingWrite_RecordCreatedDirectlyInProgress() { // DeploymentManager-022 regression: previously the deploy path wrote // the record as Pending, then immediately updated it to InProgress // with no work in between — an extra SaveChangesAsync round-trip, an // extra notifier push, and a Pending->InProgress flicker in the // CentralUI deployment-status page. After the fix the record is // inserted directly in InProgress (one Add + one notify); no // intermediate Pending row is ever persisted or notified. var instance = new Instance("DirectInProgressInst") { Id = 200, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(200, Arg.Any()).Returns(instance); SetupValidPipeline(200, "DirectInProgressInst", "sha256:dp22"); // The catch path later flips the same record reference to Failed, so // snapshot the Status at insert time rather than reading the live // reference at assertion time. DeploymentStatus? statusAtInsert = null; await _repo.AddDeploymentRecordAsync( Arg.Do(r => statusAtInsert = r.Status), Arg.Any()); // The communication actor is unset so the call throws after the insert; // we only care about the status the insert was made with. await _service.DeployInstanceAsync(200, "admin"); // The single Add happens with the record already in InProgress. Assert.NotNull(statusAtInsert); Assert.Equal(DeploymentStatus.InProgress, statusAtInsert!.Value); // No Pending update was issued — the resolver never wrote the // intermediate Pending row. await _repo.DidNotReceive().UpdateDeploymentRecordAsync( Arg.Is(r => r.Status == DeploymentStatus.Pending), Arg.Any()); } // ── WP-2: Deployment identity ── [Fact] public async Task DeployInstanceAsync_CreatesUniqueDeploymentId() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(1).Returns(instance); // Pipeline succeeds var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; var validResult = ValidationResult.Success(); _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "sha256:abc", validResult))); // Capture the deployment record DeploymentRecord? captured = null; await _repo.AddDeploymentRecordAsync(Arg.Do(r => captured = r), Arg.Any()); // CommunicationService will throw because actor not set -- this tests the flow up to that point try { await _service.DeployInstanceAsync(1, "admin"); } catch (InvalidOperationException) { // Expected -- CommunicationService not initialized } Assert.NotNull(captured); Assert.False(string.IsNullOrEmpty(captured!.DeploymentId)); Assert.Equal(32, captured.DeploymentId.Length); // GUID without hyphens Assert.Equal("sha256:abc", captured.RevisionHash); } // ── WP-4: State transition validation ── [Fact] public async Task DeployInstanceAsync_EnabledInstance_AllowsDeploy() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(1).Returns(instance); var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "hash", ValidationResult.Success()))); // Will fail at communication layer, but passes state validation try { await _service.DeployInstanceAsync(1, "admin"); } catch (InvalidOperationException) { } // If we got past state validation, the deployment record was created await _repo.Received().AddDeploymentRecordAsync(Arg.Any(), Arg.Any()); } // ── DeploymentManager-001: unexpected exception must not leave record InProgress ── [Fact] public async Task DeployInstanceAsync_CommunicationThrowsUnexpectedException_RecordMarkedFailed() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(1).Returns(instance); var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "sha256:abc", ValidationResult.Success()))); // Capture the deployment record so we can inspect its final state. DeploymentRecord? captured = null; await _repo.AddDeploymentRecordAsync( Arg.Do(r => captured = r), Arg.Any()); // _comms has no actor set, so DeployInstanceAsync throws // InvalidOperationException -- a non-timeout, non-cancellation exception. var result = await _service.DeployInstanceAsync(1, "admin"); // The exception must be handled, not escape. Assert.True(result.IsFailure); Assert.Contains("Deployment failed", result.Error); // The record must not be left stuck in InProgress. Assert.NotNull(captured); Assert.Equal(DeploymentStatus.Failed, captured!.Status); Assert.NotNull(captured.ErrorMessage); Assert.NotNull(captured.CompletedAt); } // ── DeploymentManager-002: failure write must not use a cancelled token ── [Fact] public async Task DeployInstanceAsync_FailureWrite_UsesNonCancellableToken() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(Arg.Any(), Arg.Any()).Returns(instance); var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; _pipeline.FlattenAndValidateAsync(Arg.Any(), Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "sha256:abc", ValidationResult.Success()))); DeploymentRecord? captured = null; await _repo.AddDeploymentRecordAsync( Arg.Do(r => captured = r), Arg.Any()); // Simulate a repository that rejects already-cancelled tokens (the // real EF Core behaviour when the operation token is cancelled). If the // catch block passes the operation's cancelled token, the Failed-status // write throws and the record stays InProgress -- the exact bug. _repo.UpdateDeploymentRecordAsync( Arg.Is(r => r.Status == DeploymentStatus.Failed), Arg.Is(ct => ct.IsCancellationRequested)) .Returns(_ => throw new OperationCanceledException()); _repo.SaveChangesAsync(Arg.Is(ct => ct.IsCancellationRequested)) .Returns>(_ => throw new OperationCanceledException()); // The communication call fails (no actor set). The catch block must // persist the Failed status with a non-cancellable token, so cleanup // succeeds even when the caller's token is cancelled. var result = await _service.DeployInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.NotNull(captured); Assert.Equal(DeploymentStatus.Failed, captured!.Status); // The Failed-status write happened with a non-cancelled token. await _repo.Received().UpdateDeploymentRecordAsync( Arg.Is(r => r.Status == DeploymentStatus.Failed), Arg.Is(ct => !ct.IsCancellationRequested)); } // ── WP-6: Lifecycle commands ── [Fact] public async Task DisableInstanceAsync_InstanceNotFound_ReturnsFailure() { _repo.GetInstanceByIdAsync(1).Returns((Instance?)null); var result = await _service.DisableInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("not found", result.Error); } [Fact] public async Task DisableInstanceAsync_WhenDisabled_ReturnsTransitionError() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.Disabled }; _repo.GetInstanceByIdAsync(1).Returns(instance); var result = await _service.DisableInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("not allowed", result.Error); } [Fact] public async Task EnableInstanceAsync_WhenEnabled_ReturnsTransitionError() { var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(1).Returns(instance); var result = await _service.EnableInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("not allowed", result.Error); } [Fact] public async Task DeleteInstanceAsync_InstanceNotFound_ReturnsFailure() { _repo.GetInstanceByIdAsync(1).Returns((Instance?)null); var result = await _service.DeleteInstanceAsync(1, "admin"); Assert.True(result.IsFailure); Assert.Contains("not found", result.Error); } // ── DeploymentManager-004: site-success but central-delete-failure must not escape uncaught ── [Fact] public async Task DeleteInstanceAsync_SiteSucceeds_CentralDeleteFails_ReturnsDistinctFailure() { // The site destroys the Instance Actor and removes its config (response // Success), but the central record removal throws. The exception must // NOT propagate uncaught -- it must be surfaced as a distinct failure so // an operator can reconcile the orphaned central record. var instance = new Instance("OrphanInst") { Id = 30, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(30, Arg.Any()).Returns(instance); _repo.DeleteInstanceAsync(30, Arg.Any()) .Returns(_ => throw new InvalidOperationException("db unavailable")); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:x", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeleteInstanceAsync(30, "admin"); // The failure is surfaced (not thrown) and clearly says the site // succeeded but the central record could not be removed. Assert.True(result.IsFailure); Assert.Contains("site", result.Error, StringComparison.OrdinalIgnoreCase); Assert.Contains("central", result.Error, StringComparison.OrdinalIgnoreCase); } // ── WP-8: Deployment comparison ── [Fact] public async Task GetDeploymentComparisonAsync_NoSnapshot_ReturnsFailure() { _repo.GetDeployedSnapshotByInstanceIdAsync(1).Returns((DeployedConfigSnapshot?)null); var result = await _service.GetDeploymentComparisonAsync(1); Assert.True(result.IsFailure); Assert.Contains("No deployed snapshot", result.Error); } [Fact] public async Task GetDeploymentComparisonAsync_SameHash_NotStale() { var snapshot = new DeployedConfigSnapshot("dep1", "sha256:abc", "{}") { InstanceId = 1, DeployedAt = DateTimeOffset.UtcNow }; _repo.GetDeployedSnapshotByInstanceIdAsync(1).Returns(snapshot); var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "sha256:abc", ValidationResult.Success()))); var result = await _service.GetDeploymentComparisonAsync(1); Assert.True(result.IsSuccess); Assert.False(result.Value.IsStale); } [Fact] public async Task GetDeploymentComparisonAsync_DifferentHash_IsStale() { var snapshot = new DeployedConfigSnapshot("dep1", "sha256:abc", "{}") { InstanceId = 1, DeployedAt = DateTimeOffset.UtcNow }; _repo.GetDeployedSnapshotByInstanceIdAsync(1).Returns(snapshot); var config = new FlattenedConfiguration { InstanceUniqueName = "TestInst" }; _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, "sha256:xyz", ValidationResult.Success()))); var result = await _service.GetDeploymentComparisonAsync(1); Assert.True(result.IsSuccess); Assert.True(result.Value.IsStale); } // ── DeploymentManager-007: comparison must produce a structured diff ── [Fact] public async Task GetDeploymentComparisonAsync_ProducesStructuredDiff() { // The deployed snapshot has one attribute; the current template-derived // config has a different attribute. The comparison must surface a real // Added/Removed diff via the TemplateEngine DiffService, not just a // boolean staleness flag. var deployedConfig = new FlattenedConfiguration { InstanceUniqueName = "DiffInst", Attributes = [new ResolvedAttribute { CanonicalName = "OldAttr", DataType = "Int" }] }; var snapshot = new DeployedConfigSnapshot( "dep1", "sha256:old", System.Text.Json.JsonSerializer.Serialize(deployedConfig)) { InstanceId = 40, DeployedAt = DateTimeOffset.UtcNow }; _repo.GetDeployedSnapshotByInstanceIdAsync(40, Arg.Any()).Returns(snapshot); var currentConfig = new FlattenedConfiguration { InstanceUniqueName = "DiffInst", Attributes = [new ResolvedAttribute { CanonicalName = "NewAttr", DataType = "Int" }] }; _pipeline.FlattenAndValidateAsync(40, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(currentConfig, "sha256:new", ValidationResult.Success()))); var result = await _service.GetDeploymentComparisonAsync(40); Assert.True(result.IsSuccess); Assert.True(result.Value.IsStale); // A structured diff is present with the added and removed attributes. Assert.NotNull(result.Value.Diff); Assert.True(result.Value.Diff!.HasChanges); Assert.Contains(result.Value.Diff.AttributeChanges, c => c.CanonicalName == "NewAttr" && c.ChangeType == DiffChangeType.Added); Assert.Contains(result.Value.Diff.AttributeChanges, c => c.CanonicalName == "OldAttr" && c.ChangeType == DiffChangeType.Removed); } // ── WP-2: GetDeploymentStatusAsync ── [Fact] public async Task GetDeploymentStatusAsync_ReturnsRecordByDeploymentId() { var record = new DeploymentRecord("dep1", "admin") { Status = DeploymentStatus.Success }; _repo.GetDeploymentByDeploymentIdAsync("dep1").Returns(record); var result = await _service.GetDeploymentStatusAsync("dep1"); Assert.NotNull(result); Assert.Equal("dep1", result!.DeploymentId); Assert.Equal(DeploymentStatus.Success, result.Status); } // ── Audit logging ── [Fact] public async Task DeployInstanceAsync_FlatteningFails_DoesNotReachAudit() { // DeploymentManager-011: this test previously asserted nothing. A // flatten failure returns before any site communication, so no audit // entry is written. var instance = new Instance("TestInst") { Id = 1, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(1).Returns(instance); _pipeline.FlattenAndValidateAsync(1, Arg.Any()) .Returns(Result.Failure("Error")); await _service.DeployInstanceAsync(1, "admin"); await _audit.DidNotReceive().LogAsync( Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); } [Fact] public async Task DeployInstanceAsync_SiteSucceeds_WritesDeployAuditEntry() { // DeploymentManager-011: a successful deployment must write a "Deploy" // audit entry referencing the deployed instance. var instance = new Instance("AuditInst") { Id = 50, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(50, Arg.Any()).Returns(instance); SetupValidPipeline(50, "AuditInst", "sha256:target"); _repo.GetCurrentDeploymentStatusAsync(50, Arg.Any()) .Returns((DeploymentRecord?)null); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(50, "admin"); Assert.True(result.IsSuccess); await _audit.Received().LogAsync( "admin", "Deploy", "Instance", "50", "AuditInst", Arg.Any(), Arg.Any()); } // ── DeploymentManager-011: lifecycle success paths ── [Fact] public async Task DisableInstanceAsync_SiteSucceeds_SetsDisabledStateAndAudits() { var instance = new Instance("DisInst") { Id = 51, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(51, Arg.Any()).Returns(instance); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "x", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DisableInstanceAsync(51, "admin"); Assert.True(result.IsSuccess); Assert.Equal(InstanceState.Disabled, instance.State); await _repo.Received().UpdateInstanceAsync(instance, Arg.Any()); await _audit.Received().LogAsync( "admin", "Disable", "Instance", "51", "DisInst", Arg.Any(), Arg.Any()); } [Fact] public async Task EnableInstanceAsync_SiteSucceeds_SetsEnabledStateAndAudits() { var instance = new Instance("EnInst") { Id = 52, SiteId = 1, State = InstanceState.Disabled }; _repo.GetInstanceByIdAsync(52, Arg.Any()).Returns(instance); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "x", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.EnableInstanceAsync(52, "admin"); Assert.True(result.IsSuccess); Assert.Equal(InstanceState.Enabled, instance.State); await _repo.Received().UpdateInstanceAsync(instance, Arg.Any()); await _audit.Received().LogAsync( "admin", "Enable", "Instance", "52", "EnInst", Arg.Any(), Arg.Any()); } [Fact] public async Task DeleteInstanceAsync_SiteSucceeds_RemovesRecordAndAudits() { var instance = new Instance("DelInst") { Id = 53, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(53, Arg.Any()).Returns(instance); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "x", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeleteInstanceAsync(53, "admin"); Assert.True(result.IsSuccess); await _repo.Received().DeleteInstanceAsync(53, Arg.Any()); await _audit.Received().LogAsync( "admin", "Delete", "Instance", "53", "DelInst", Arg.Any(), Arg.Any()); } [Fact] public async Task DeployInstanceAsync_SameInstance_OperationLockSerializesConcurrentDeploys() { // DeploymentManager-011: two concurrent deploys of the SAME instance // must be serialized by the per-instance operation lock — the site sees // them one at a time, never overlapping. var instance = new Instance("LockInst") { Id = 54, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(54, Arg.Any()).Returns(instance); SetupValidPipeline(54, "LockInst", "sha256:target"); _repo.GetCurrentDeploymentStatusAsync(54, Arg.Any()) .Returns((DeploymentRecord?)null); var serializationCounters = new SerializationProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new SerializationProbeActor(serializationCounters))); var service = CreateServiceWithCommActor(commActor); var deploy1 = service.DeployInstanceAsync(54, "admin"); var deploy2 = service.DeployInstanceAsync(54, "admin"); var results = await Task.WhenAll(deploy1, deploy2); Assert.True(results[0].IsSuccess); Assert.True(results[1].IsSuccess); // The probe records the maximum concurrency observed; the lock must // keep it at 1 for a single instance. Assert.Equal(1, serializationCounters.MaxConcurrent); } // ── DeploymentManager-006: query-the-site-before-redeploy idempotency ── /// /// Builds a DeploymentService whose CommunicationService is backed by the /// supplied actor, so the site query and deploy commands can be observed. /// private DeploymentService CreateServiceWithCommActor(IActorRef commActor) { var comms = new CommunicationService( Options.Create(new CommunicationOptions { QueryTimeout = TimeSpan.FromSeconds(5), DeploymentTimeout = TimeSpan.FromSeconds(5) }), NullLogger.Instance); comms.SetCommunicationActor(commActor); var siteRepo = CreateSiteRepoStub(); return new DeploymentService( _repo, siteRepo, _pipeline, comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5) }), NullLogger.Instance); } private void SetupValidPipeline(int instanceId, string instanceName, string revisionHash) { var config = new FlattenedConfiguration { InstanceUniqueName = instanceName }; _pipeline.FlattenAndValidateAsync(instanceId, Arg.Any()) .Returns(Result.Success( new FlatteningPipelineResult(config, revisionHash, ValidationResult.Success()))); } /// /// DeploymentManager-021 test helper: returns an /// substitute that resolves /// for ANY integer id to a stub whose /// SiteIdentifier is "site-{id}". Prior to the /// DeploymentManager-021 fix the production `ResolveSiteIdentifierAsync` /// silently substituted the numeric id when the site row was missing, so /// these tests passed without seeding any Sites. After the fix a missing /// site throws — every test that drives a deploy/lifecycle path needs a /// real-shaped back, and this helper centralises that /// arrangement so individual tests don't repeat the boilerplate. /// private static ISiteRepository CreateSiteRepoStub() { var siteRepo = Substitute.For(); siteRepo.GetSiteByIdAsync(Arg.Any(), Arg.Any()) .Returns(callInfo => { var id = callInfo.ArgAt(0); return new Site($"Test Site {id}", $"site-{id}") { Id = id }; }); return siteRepo; } [Fact] public async Task DeployInstanceAsync_PriorInProgressRecord_SiteHasTargetHash_MarksSuccessWithoutRedeploy() { // Prior record stuck InProgress -> site is queried. The site reports it // already has the TARGET revision hash, so the prior record is marked // Success and NO new DeployInstanceCommand is sent. var instance = new Instance("RedeployInst") { Id = 7, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(7, Arg.Any()).Returns(instance); SetupValidPipeline(7, "RedeployInst", "sha256:target"); var prior = new DeploymentRecord("dep-prior", "admin") { InstanceId = 7, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:target" }; _repo.GetCurrentDeploymentStatusAsync(7, Arg.Any()).Returns(prior); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(7, "admin"); Assert.True(result.IsSuccess); Assert.Equal(DeploymentStatus.Success, prior.Status); // The site query was issued, but no new deploy command was sent. Assert.Equal(1, counters.QueryCount); Assert.Equal(0, counters.DeployCount); // No new deployment record was created — the prior one was reconciled. await _repo.DidNotReceive().AddDeploymentRecordAsync( Arg.Any(), Arg.Any()); } [Fact] public async Task DeployInstanceAsync_PriorInProgressRecord_SiteHasDifferentHash_ProceedsWithDeploy() { // Prior record stuck InProgress -> site is queried. The site has a // DIFFERENT revision hash, so the normal deploy proceeds. var instance = new Instance("RedeployInst2") { Id = 8, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(8, Arg.Any()).Returns(instance); SetupValidPipeline(8, "RedeployInst2", "sha256:target"); var prior = new DeploymentRecord("dep-prior2", "admin") { InstanceId = 8, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:old" }; _repo.GetCurrentDeploymentStatusAsync(8, Arg.Any()).Returns(prior); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:old", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(8, "admin"); Assert.True(result.IsSuccess); Assert.Equal(1, counters.QueryCount); // The normal deploy proceeded — a new command was sent. Assert.Equal(1, counters.DeployCount); await _repo.Received().AddDeploymentRecordAsync( Arg.Any(), Arg.Any()); } [Fact] public async Task DeployInstanceAsync_PriorFailedTimeoutRecord_QueriesSite() { // A prior record Failed due to a timeout also triggers the site query. var instance = new Instance("TimedOutInst") { Id = 9, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(9, Arg.Any()).Returns(instance); SetupValidPipeline(9, "TimedOutInst", "sha256:target"); var prior = new DeploymentRecord("dep-prior3", "admin") { InstanceId = 9, Status = DeploymentStatus.Failed, RevisionHash = "sha256:target", ErrorMessage = "Communication failure: deployment Ask timed out" }; _repo.GetCurrentDeploymentStatusAsync(9, Arg.Any()).Returns(prior); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(9, "admin"); Assert.True(result.IsSuccess); Assert.Equal(1, counters.QueryCount); Assert.Equal(0, counters.DeployCount); Assert.Equal(DeploymentStatus.Success, prior.Status); } [Fact] public async Task DeployInstanceAsync_PriorSuccessRecord_SkipsSiteQuery() { // A clean prior Success record must NOT trigger the extra round-trip. var instance = new Instance("CleanInst") { Id = 10, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(10, Arg.Any()).Returns(instance); SetupValidPipeline(10, "CleanInst", "sha256:target"); var prior = new DeploymentRecord("dep-clean", "admin") { InstanceId = 10, Status = DeploymentStatus.Success, RevisionHash = "sha256:old" }; _repo.GetCurrentDeploymentStatusAsync(10, Arg.Any()).Returns(prior); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(10, "admin"); Assert.True(result.IsSuccess); // No site query — the prior deploy completed cleanly. Assert.Equal(0, counters.QueryCount); Assert.Equal(1, counters.DeployCount); } [Fact] public async Task DeployInstanceAsync_FreshFirstTimeDeploy_SkipsSiteQuery() { // No prior record at all -> fresh deploy, no extra round-trip. var instance = new Instance("FreshInst") { Id = 11, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(11, Arg.Any()).Returns(instance); SetupValidPipeline(11, "FreshInst", "sha256:target"); _repo.GetCurrentDeploymentStatusAsync(11, Arg.Any()) .Returns((DeploymentRecord?)null); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(11, "admin"); Assert.True(result.IsSuccess); Assert.Equal(0, counters.QueryCount); Assert.Equal(1, counters.DeployCount); } [Fact] public async Task DeployInstanceAsync_PriorInProgressRecord_QueryFails_FallsThroughToDeploy() { // The site query fails (unreachable / times out). The deploy must NOT // abort — it falls through to a normal deploy and relies on site-side // stale-rejection as the safety net. var instance = new Instance("UnreachableInst") { Id = 12, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(12, Arg.Any()).Returns(instance); SetupValidPipeline(12, "UnreachableInst", "sha256:target"); var prior = new DeploymentRecord("dep-prior5", "admin") { InstanceId = 12, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:target" }; _repo.GetCurrentDeploymentStatusAsync(12, Arg.Any()).Returns(prior); // The probe drops the query (no reply) -> the Ask times out. var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: true))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(12, "admin"); // Did not abort — the deploy proceeded after the failed query. Assert.True(result.IsSuccess); Assert.Equal(1, counters.QueryCount); Assert.Equal(1, counters.DeployCount); } // ── DeploymentManager-015: reconciliation must perform the normal success side effects ── [Fact] public async Task DeployInstanceAsync_Reconciled_SetsInstanceEnabledAndStoresSnapshot() { // A prior deploy timed out; the site actually applied the target // revision. Reconciliation marks the prior record Success WITHOUT // re-sending -- but it must still perform the same side effects as the // normal success path: set the instance State to Enabled and store the // deployed-config snapshot. Otherwise central and site diverge. var instance = new Instance("ReconcileSideEffects") { Id = 70, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(70, Arg.Any()).Returns(instance); SetupValidPipeline(70, "ReconcileSideEffects", "sha256:target"); var prior = new DeploymentRecord("dep-prior-70", "admin") { InstanceId = 70, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:target" }; _repo.GetCurrentDeploymentStatusAsync(70, Arg.Any()).Returns(prior); _repo.GetDeployedSnapshotByInstanceIdAsync(70, Arg.Any()) .Returns((DeployedConfigSnapshot?)null); DeployedConfigSnapshot? storedSnapshot = null; await _repo.AddDeployedSnapshotAsync( Arg.Do(s => storedSnapshot = s), Arg.Any()); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(70, "admin"); Assert.True(result.IsSuccess); // No re-deploy was sent -- this was reconciled. Assert.Equal(1, counters.QueryCount); Assert.Equal(0, counters.DeployCount); // DeploymentManager-015: the instance State must reflect the deployed // config the site is actually running. Assert.Equal(InstanceState.Enabled, instance.State); await _repo.Received().UpdateInstanceAsync(instance, Arg.Any()); // DeploymentManager-015: a deployed-config snapshot must be stored so // GetDeploymentComparisonAsync has something to compare against. Assert.NotNull(storedSnapshot); Assert.Equal(70, storedSnapshot!.InstanceId); Assert.Equal("sha256:target", storedSnapshot.RevisionHash); } // ── DeploymentManager-018: reconciliation must preserve an intentional Disabled state ── [Fact] public async Task DeployInstanceAsync_Reconciled_DisabledInstance_PreservesDisabledState() { // DeploymentManager-018: after a central failover, the in-memory // OperationLockManager is lost (by design — in-progress treated as // failed). The prior deployment record remains InProgress in the DB. // The operator can legitimately invoke Disable on the instance between // the timed-out deploy and the redeploy. Disable does not change the // deployed config, so the site still reports the target revision hash. // When the operator retries the deploy, the reconciliation branch must // NOT silently overwrite Instance.State back to Enabled — that would // undo the explicit operator action with no audit trail. var instance = new Instance("ReconcileDisabled") { Id = 72, SiteId = 1, State = InstanceState.Disabled }; _repo.GetInstanceByIdAsync(72, Arg.Any()).Returns(instance); SetupValidPipeline(72, "ReconcileDisabled", "sha256:target"); var prior = new DeploymentRecord("dep-prior-72", "admin") { InstanceId = 72, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:target" }; _repo.GetCurrentDeploymentStatusAsync(72, Arg.Any()).Returns(prior); _repo.GetDeployedSnapshotByInstanceIdAsync(72, Arg.Any()) .Returns((DeployedConfigSnapshot?)null); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(72, "admin"); // The reconciliation still succeeds and the prior record is marked // Success — central and site agree on the applied config. Assert.True(result.IsSuccess); Assert.Equal(DeploymentStatus.Success, prior.Status); Assert.Equal(1, counters.QueryCount); Assert.Equal(0, counters.DeployCount); // DeploymentManager-018: the operator's explicit Disable must survive // the reconciliation — Instance.State stays Disabled, not silently // flipped to Enabled. Assert.Equal(InstanceState.Disabled, instance.State); } // ── DeploymentManager-016: reconciled record must carry the target revision hash ── [Fact] public async Task DeployInstanceAsync_Reconciled_PriorRecordRevisionHashUpdatedToTarget() { // The prior record carries a stale revision hash (R1), but the site // reports it has the freshly-flattened target revision (R2). After // reconciliation the prior record's RevisionHash must agree with the // site's applied revision -- not keep the stale R1. var instance = new Instance("ReconcileStaleHash") { Id = 71, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(71, Arg.Any()).Returns(instance); SetupValidPipeline(71, "ReconcileStaleHash", "sha256:target"); var prior = new DeploymentRecord("dep-prior-71", "admin") { InstanceId = 71, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:stale-R1" }; _repo.GetCurrentDeploymentStatusAsync(71, Arg.Any()).Returns(prior); _repo.GetDeployedSnapshotByInstanceIdAsync(71, Arg.Any()) .Returns((DeployedConfigSnapshot?)null); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(71, "admin"); Assert.True(result.IsSuccess); Assert.Equal(DeploymentStatus.Success, prior.Status); // DeploymentManager-016: the persisted record's RevisionHash must agree // with the site's applied revision, not keep the stale R1. Assert.Equal("sha256:target", prior.RevisionHash); } // ── DeploymentManager-020: reconciliation audit attributes to the CURRENT user, not the prior deployer ── [Fact] public async Task DeployInstanceAsync_Reconciled_AuditAttributesCurrentUserNotPriorDeployer() { // DeploymentManager-020: a redeploy that reconciles a timed-out prior // record must be audited as the action of the user driving THIS // redeploy — not the user who originally issued the now-reconciled // deployment. The prior deployer is preserved in the detail object so // forensics still see who started the rescued run. var instance = new Instance("ReconcileAuditUser") { Id = 73, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(73, Arg.Any()).Returns(instance); SetupValidPipeline(73, "ReconcileAuditUser", "sha256:target"); var prior = new DeploymentRecord("dep-prior-73", "originalUser") { InstanceId = 73, Status = DeploymentStatus.InProgress, RevisionHash = "sha256:target" }; _repo.GetCurrentDeploymentStatusAsync(73, Arg.Any()).Returns(prior); _repo.GetDeployedSnapshotByInstanceIdAsync(73, Arg.Any()) .Returns((DeployedConfigSnapshot?)null); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(73, "currentUser"); Assert.True(result.IsSuccess); // DeploymentManager-020: audit row's actor is the current user. await _audit.Received().LogAsync( "currentUser", "DeployReconciled", "Instance", "73", "ReconcileAuditUser", Arg.Any(), Arg.Any()); // And the prior deployer was NOT used as the actor. await _audit.DidNotReceive().LogAsync( "originalUser", "DeployReconciled", "Instance", "73", "ReconcileAuditUser", Arg.Any(), Arg.Any()); } // ── DeploymentManager-012: LifecycleCommandTimeout must actually bound lifecycle commands ── [Fact] public async Task DisableInstanceAsync_SiteUnresponsive_LifecycleCommandTimeoutBoundsTheWait() { // The site never replies to the DisableInstanceCommand. A short // LifecycleCommandTimeout must abort the wait quickly -- if the option // is dead code the call would instead hang until CommunicationOptions // .LifecycleTimeout (much longer) elapses. var instance = new Instance("StuckInst") { Id = 60, SiteId = 1, State = InstanceState.Enabled }; _repo.GetInstanceByIdAsync(60, Arg.Any()).Returns(instance); // Probe drops every message -> no reply ever arrives. var commActor = Sys.ActorOf(Props.Create(() => new SilentProbeActor())); var comms = new CommunicationService( Options.Create(new CommunicationOptions { // Long communication-layer timeout: if LifecycleCommandTimeout // were dead, the test would wait this long. LifecycleTimeout = TimeSpan.FromSeconds(30) }), NullLogger.Instance); comms.SetCommunicationActor(commActor); var siteRepo = CreateSiteRepoStub(); var service = new DeploymentService( _repo, siteRepo, _pipeline, comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5), LifecycleCommandTimeout = TimeSpan.FromMilliseconds(300) }), NullLogger.Instance); var sw = System.Diagnostics.Stopwatch.StartNew(); var result = await service.DisableInstanceAsync(60, "admin"); sw.Stop(); Assert.True(result.IsFailure); // The 300ms LifecycleCommandTimeout bounded the wait well under the // 30s communication-layer timeout. Assert.True(sw.Elapsed < TimeSpan.FromSeconds(10), $"Lifecycle command was not bounded by LifecycleCommandTimeout (took {sw.Elapsed})."); } /// Stand-in actor that never replies to anything. private class SilentProbeActor : ReceiveActor { public SilentProbeActor() => ReceiveAny(_ => { }); } // ── DeploymentManager-019: lifecycle timeouts must write an audit entry ── /// /// DeploymentManager-019: when a Disable times out at the site, the /// operator's attempted action must still be recorded in the audit log /// with the documented DisableTimedOut action — pre-fix nothing /// was written and the audit trail was silent about the attempt. /// [Fact] public async Task DisableInstanceAsync_LifecycleTimeout_WritesDisableTimedOutAuditEntry() { var instance = new Instance("TimeoutAuditInst") { Id = 61, SiteId = 1, State = InstanceState.Enabled, }; _repo.GetInstanceByIdAsync(61, Arg.Any()).Returns(instance); var commActor = Sys.ActorOf(Props.Create(() => new SilentProbeActor())); var comms = new CommunicationService( Options.Create(new CommunicationOptions { LifecycleTimeout = TimeSpan.FromSeconds(30) }), NullLogger.Instance); comms.SetCommunicationActor(commActor); var siteRepo = CreateSiteRepoStub(); var deadline = TimeSpan.FromMilliseconds(300); var service = new DeploymentService( _repo, siteRepo, _pipeline, comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5), LifecycleCommandTimeout = deadline, }), NullLogger.Instance); var result = await service.DisableInstanceAsync(61, "operator-jane"); Assert.True(result.IsFailure); // The DisableTimedOut audit entry must have been written. Pre-fix the // catch block returned without calling _auditService at all. await _audit.Received(1).LogAsync( "operator-jane", "DisableTimedOut", "Instance", "61", instance.UniqueName, Arg.Any(), Arg.Any()); } /// /// DeploymentManager-019: same audit guarantee for the Enable path. /// [Fact] public async Task EnableInstanceAsync_LifecycleTimeout_WritesEnableTimedOutAuditEntry() { var instance = new Instance("EnableTimeoutInst") { Id = 62, SiteId = 1, State = InstanceState.Disabled, }; _repo.GetInstanceByIdAsync(62, Arg.Any()).Returns(instance); var commActor = Sys.ActorOf(Props.Create(() => new SilentProbeActor())); var comms = new CommunicationService( Options.Create(new CommunicationOptions { LifecycleTimeout = TimeSpan.FromSeconds(30) }), NullLogger.Instance); comms.SetCommunicationActor(commActor); var siteRepo = CreateSiteRepoStub(); var service = new DeploymentService( _repo, siteRepo, _pipeline, comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5), LifecycleCommandTimeout = TimeSpan.FromMilliseconds(300), }), NullLogger.Instance); var result = await service.EnableInstanceAsync(62, "operator-jane"); Assert.True(result.IsFailure); await _audit.Received(1).LogAsync( "operator-jane", "EnableTimedOut", "Instance", "62", instance.UniqueName, Arg.Any(), Arg.Any()); } /// /// DeploymentManager-019: same audit guarantee for the Delete path. /// [Fact] public async Task DeleteInstanceAsync_LifecycleTimeout_WritesDeleteTimedOutAuditEntry() { var instance = new Instance("DeleteTimeoutInst") { Id = 63, SiteId = 1, State = InstanceState.Enabled, }; _repo.GetInstanceByIdAsync(63, Arg.Any()).Returns(instance); var commActor = Sys.ActorOf(Props.Create(() => new SilentProbeActor())); var comms = new CommunicationService( Options.Create(new CommunicationOptions { LifecycleTimeout = TimeSpan.FromSeconds(30) }), NullLogger.Instance); comms.SetCommunicationActor(commActor); var siteRepo = CreateSiteRepoStub(); var service = new DeploymentService( _repo, siteRepo, _pipeline, comms, _lockManager, _audit, new DiffService(), new DeploymentStatusNotifier(NullLogger.Instance), Options.Create(new DeploymentManagerOptions { OperationLockTimeout = TimeSpan.FromSeconds(5), LifecycleCommandTimeout = TimeSpan.FromMilliseconds(300), }), NullLogger.Instance); var result = await service.DeleteInstanceAsync(63, "operator-jane"); Assert.True(result.IsFailure); await _audit.Received(1).LogAsync( "operator-jane", "DeleteTimedOut", "Instance", "63", instance.UniqueName, Arg.Any(), Arg.Any()); } // ── DeploymentManager-003: post-success persistence must commit the Success status ── [Fact] public async Task DeployInstanceAsync_SiteSucceeds_SnapshotWriteFails_RecordStillCommittedSuccess() { // The site applies the deployment (response Success), but storing the // deployed-config snapshot afterwards throws. The deployment record's // Success status MUST still be durably committed -- otherwise central // and site diverge: the site runs the new config while central shows a // non-Success record forever. var instance = new Instance("SnapFailInst") { Id = 20, SiteId = 1, State = InstanceState.NotDeployed }; _repo.GetInstanceByIdAsync(20, Arg.Any()).Returns(instance); SetupValidPipeline(20, "SnapFailInst", "sha256:target"); _repo.GetCurrentDeploymentStatusAsync(20, Arg.Any()) .Returns((DeploymentRecord?)null); DeploymentRecord? captured = null; await _repo.AddDeploymentRecordAsync( Arg.Do(r => captured = r), Arg.Any()); // The snapshot store throws. _repo.GetDeployedSnapshotByInstanceIdAsync(20, Arg.Any()) .Returns((DeployedConfigSnapshot?)null); _repo.AddDeployedSnapshotAsync(Arg.Any(), Arg.Any()) .Returns(_ => throw new InvalidOperationException("snapshot store unavailable")); var counters = new ReconcileProbeCounters(); var commActor = Sys.ActorOf(Props.Create(() => new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false))); var service = CreateServiceWithCommActor(commActor); var result = await service.DeployInstanceAsync(20, "admin"); // The site succeeded -> the deployment is reported successful. Assert.True(result.IsSuccess); Assert.NotNull(captured); Assert.Equal(DeploymentStatus.Success, captured!.Status); // The Success status was committed (a SaveChanges happened with the // record in Success state) BEFORE the snapshot write was attempted. await _repo.Received().UpdateDeploymentRecordAsync( Arg.Is(r => r.Status == DeploymentStatus.Success), Arg.Any()); } /// /// Per-test counters for . DeploymentManager-024: /// each test owns its own instance, passed into the actor's constructor, so /// counters are no longer shared static state that races under parallel /// test execution. /// private sealed class SerializationProbeCounters { public int MaxConcurrent; public int Current; public readonly object Gate = new(); } /// /// Stand-in CentralCommunicationActor that measures deploy concurrency. It /// defers each deploy reply via the scheduler, so if two deploys for the /// same instance were NOT serialized by the operation lock their windows /// would overlap and MaxConcurrent would exceed 1. /// private class SerializationProbeActor : ReceiveActor, IWithTimers { public ITimerScheduler Timers { get; set; } = null!; public SerializationProbeActor(SerializationProbeCounters counters) { Receive(env => { if (env.Message is DeployInstanceCommand d) { lock (counters.Gate) { counters.Current++; if (counters.Current > counters.MaxConcurrent) counters.MaxConcurrent = counters.Current; } var replyTo = Sender; // Defer the reply so the deploy "window" stays open long // enough for a non-serialized second deploy to overlap. Timers.StartSingleTimer( d.DeploymentId, new CompleteDeploy(d, replyTo), TimeSpan.FromMilliseconds(150)); } else if (env.Message is DeploymentStateQueryRequest q) { Sender.Tell(new DeploymentStateQueryResponse( q.CorrelationId, q.InstanceUniqueName, false, null, null, DateTimeOffset.UtcNow)); } }); Receive(c => { lock (counters.Gate) { counters.Current--; } c.ReplyTo.Tell(new DeploymentStatusResponse( c.Command.DeploymentId, c.Command.InstanceUniqueName, DeploymentStatus.Success, null, DateTimeOffset.UtcNow)); }); } private sealed record CompleteDeploy(DeployInstanceCommand Command, IActorRef ReplyTo); } /// /// Per-test counters for . DeploymentManager-024: /// each test owns its own instance so counter assertions cannot race across /// tests running in parallel. /// private sealed class ReconcileProbeCounters { public int QueryCount; public int DeployCount; } /// /// Stand-in CentralCommunicationActor for reconciliation tests. Counts the /// site queries and deploy commands it receives (into a per-test /// instance), answers queries with a /// configurable applied revision hash, and (optionally) drops the query to /// simulate an unreachable site so the central Ask times out. /// private class ReconcileProbeActor : ReceiveActor { public ReconcileProbeActor(ReconcileProbeCounters counters, string siteHash, bool failQuery) { Receive(env => { switch (env.Message) { case DeploymentStateQueryRequest q: Interlocked.Increment(ref counters.QueryCount); if (!failQuery) { Sender.Tell(new DeploymentStateQueryResponse( q.CorrelationId, q.InstanceUniqueName, true, "dep-applied", siteHash, DateTimeOffset.UtcNow)); } // failQuery: drop the message -> caller's Ask times out. break; case DeployInstanceCommand d: Interlocked.Increment(ref counters.DeployCount); Sender.Tell(new DeploymentStatusResponse( d.DeploymentId, d.InstanceUniqueName, DeploymentStatus.Success, null, DateTimeOffset.UtcNow)); break; case DisableInstanceCommand dis: Sender.Tell(new InstanceLifecycleResponse( dis.CommandId, dis.InstanceUniqueName, true, null, DateTimeOffset.UtcNow)); break; case EnableInstanceCommand en: Sender.Tell(new InstanceLifecycleResponse( en.CommandId, en.InstanceUniqueName, true, null, DateTimeOffset.UtcNow)); break; case DeleteInstanceCommand del: Sender.Tell(new InstanceLifecycleResponse( del.CommandId, del.InstanceUniqueName, true, null, DateTimeOffset.UtcNow)); break; } }); } } }