Phase 8: Production readiness — failover tests, security hardening, sandboxing, deployment docs
- WP-1-3: Central/site failover + dual-node recovery tests (17 tests) - WP-4: Performance testing framework for target scale (7 tests) - WP-5: Security hardening (LDAPS, JWT key length, no secrets in logs) (11 tests) - WP-6: Script sandboxing adversarial tests (28 tests, all forbidden APIs) - WP-7: Recovery drill test scaffolds (5 tests) - WP-8: Observability validation (structured logs, correlation IDs, metrics) (6 tests) - WP-9: Message contract compatibility (forward/backward compat) (18 tests) - WP-10: Deployment packaging (installation guide, production checklist, topology) - WP-11: Operational runbooks (failover, troubleshooting, maintenance) 92 new tests, all passing. Zero warnings.
This commit is contained in:
191
tests/ScadaLink.IntegrationTests/RecoveryDrillTests.cs
Normal file
191
tests/ScadaLink.IntegrationTests/RecoveryDrillTests.cs
Normal file
@@ -0,0 +1,191 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.StoreAndForward;
|
||||
|
||||
namespace ScadaLink.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// WP-7 (Phase 8): Recovery drill test scaffolds.
|
||||
/// Mid-deploy failover, communication drops, and site restart with persisted configs.
|
||||
/// </summary>
|
||||
public class RecoveryDrillTests
|
||||
{
|
||||
[Trait("Category", "Integration")]
|
||||
[Fact]
|
||||
public void MidDeployFailover_SiteStateQuery_ThenRedeploy()
|
||||
{
|
||||
// Scenario: Deployment in progress, central node fails over.
|
||||
// New central node queries site for current deployment state, then re-issues deploy.
|
||||
|
||||
// Step 1: Deployment started
|
||||
var initialStatus = new DeploymentStatusResponse(
|
||||
"dep-1", "pump-station-1", DeploymentStatus.InProgress,
|
||||
null, DateTimeOffset.UtcNow);
|
||||
|
||||
Assert.Equal(DeploymentStatus.InProgress, initialStatus.Status);
|
||||
|
||||
// Step 2: Central failover — new node queries site state
|
||||
// Site reports current status (InProgress or whatever it actually is)
|
||||
var queriedStatus = new DeploymentStatusResponse(
|
||||
"dep-1", "pump-station-1", DeploymentStatus.InProgress,
|
||||
null, DateTimeOffset.UtcNow.AddSeconds(5));
|
||||
|
||||
Assert.Equal(DeploymentStatus.InProgress, queriedStatus.Status);
|
||||
|
||||
// Step 3: Central re-deploys with same deployment ID + revision hash
|
||||
// Idempotent: same deploymentId + revisionHash = no-op if already applied
|
||||
var redeployCommand = new DeployInstanceCommand(
|
||||
"dep-1", "pump-station-1", "abc123",
|
||||
"""{"attributes":[],"scripts":[],"alarms":[]}""",
|
||||
"admin", DateTimeOffset.UtcNow.AddSeconds(10));
|
||||
|
||||
Assert.Equal("dep-1", redeployCommand.DeploymentId);
|
||||
Assert.Equal("abc123", redeployCommand.RevisionHash);
|
||||
|
||||
// Step 4: Site applies (idempotent — revision hash matches)
|
||||
var completedStatus = new DeploymentStatusResponse(
|
||||
"dep-1", "pump-station-1", DeploymentStatus.Success,
|
||||
null, DateTimeOffset.UtcNow.AddSeconds(15));
|
||||
|
||||
Assert.Equal(DeploymentStatus.Success, completedStatus.Status);
|
||||
}
|
||||
|
||||
[Trait("Category", "Integration")]
|
||||
[Fact]
|
||||
public async Task CommunicationDrop_DuringArtifactDeployment_BuffersForRetry()
|
||||
{
|
||||
// Scenario: Communication drops while deploying system-wide artifacts.
|
||||
// The deployment command is buffered by S&F and retried when connection restores.
|
||||
var dbPath = Path.Combine(Path.GetTempPath(), $"sf_commdrop_{Guid.NewGuid():N}.db");
|
||||
var connStr = $"Data Source={dbPath}";
|
||||
|
||||
try
|
||||
{
|
||||
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||
await storage.InitializeAsync();
|
||||
|
||||
var options = new StoreAndForwardOptions
|
||||
{
|
||||
DefaultRetryInterval = TimeSpan.FromSeconds(5),
|
||||
DefaultMaxRetries = 100,
|
||||
};
|
||||
var service = new StoreAndForwardService(storage, options, NullLogger<StoreAndForwardService>.Instance);
|
||||
await service.StartAsync();
|
||||
|
||||
// Register a handler that simulates communication failure
|
||||
var callCount = 0;
|
||||
service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||
_ =>
|
||||
{
|
||||
callCount++;
|
||||
throw new InvalidOperationException("Connection to site lost");
|
||||
});
|
||||
|
||||
// Attempt delivery — should fail and buffer
|
||||
var result = await service.EnqueueAsync(
|
||||
StoreAndForwardCategory.ExternalSystem,
|
||||
"site-01/artifacts",
|
||||
"""{"deploymentId":"dep-1","artifacts":["shared-script-v2"]}""");
|
||||
|
||||
Assert.True(result.Accepted);
|
||||
Assert.True(result.WasBuffered);
|
||||
Assert.Equal(1, callCount);
|
||||
|
||||
// Verify the message is in the buffer
|
||||
var depths = await service.GetBufferDepthAsync();
|
||||
Assert.True(depths.ContainsKey(StoreAndForwardCategory.ExternalSystem));
|
||||
Assert.Equal(1, depths[StoreAndForwardCategory.ExternalSystem]);
|
||||
|
||||
await service.StopAsync();
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (File.Exists(dbPath))
|
||||
File.Delete(dbPath);
|
||||
}
|
||||
}
|
||||
|
||||
[Trait("Category", "Integration")]
|
||||
[Fact]
|
||||
public async Task SiteRestart_WithPersistedConfigs_RebuildFromSQLite()
|
||||
{
|
||||
// Scenario: Site restarts. Deployed instance configs are persisted in SQLite.
|
||||
// On startup, the Deployment Manager Actor reads configs from SQLite and
|
||||
// recreates Instance Actors.
|
||||
var dbPath = Path.Combine(Path.GetTempPath(), $"sf_restart_{Guid.NewGuid():N}.db");
|
||||
var connStr = $"Data Source={dbPath}";
|
||||
|
||||
try
|
||||
{
|
||||
// Pre-restart: S&F messages in buffer
|
||||
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||
await storage.InitializeAsync();
|
||||
|
||||
for (var i = 0; i < 3; i++)
|
||||
{
|
||||
await storage.EnqueueAsync(new StoreAndForwardMessage
|
||||
{
|
||||
Id = $"msg-{i}",
|
||||
Category = StoreAndForwardCategory.ExternalSystem,
|
||||
Target = "api-endpoint",
|
||||
PayloadJson = $$"""{"instanceName":"machine-{{i}}","value":42}""",
|
||||
MaxRetries = 50,
|
||||
RetryIntervalMs = 30000,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
Status = StoreAndForwardMessageStatus.Pending,
|
||||
OriginInstanceName = $"machine-{i}"
|
||||
});
|
||||
}
|
||||
|
||||
// Post-restart: new storage instance reads same DB
|
||||
var restartedStorage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||
await restartedStorage.InitializeAsync();
|
||||
|
||||
var pending = await restartedStorage.GetMessagesForRetryAsync();
|
||||
Assert.Equal(3, pending.Count);
|
||||
|
||||
// Verify each message retains its origin instance
|
||||
Assert.Contains(pending, m => m.OriginInstanceName == "machine-0");
|
||||
Assert.Contains(pending, m => m.OriginInstanceName == "machine-1");
|
||||
Assert.Contains(pending, m => m.OriginInstanceName == "machine-2");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (File.Exists(dbPath))
|
||||
File.Delete(dbPath);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DeploymentIdempotency_SameRevisionHash_NoOp()
|
||||
{
|
||||
// Verify the deployment model supports idempotency via revision hash.
|
||||
// Two deploy commands with the same deploymentId + revisionHash should
|
||||
// produce the same result (site can detect the duplicate and skip).
|
||||
var cmd1 = new DeployInstanceCommand(
|
||||
"dep-1", "pump-1", "rev-abc123",
|
||||
"""{"attributes":[]}""", "admin", DateTimeOffset.UtcNow);
|
||||
|
||||
var cmd2 = new DeployInstanceCommand(
|
||||
"dep-1", "pump-1", "rev-abc123",
|
||||
"""{"attributes":[]}""", "admin", DateTimeOffset.UtcNow.AddSeconds(30));
|
||||
|
||||
Assert.Equal(cmd1.DeploymentId, cmd2.DeploymentId);
|
||||
Assert.Equal(cmd1.RevisionHash, cmd2.RevisionHash);
|
||||
Assert.Equal(cmd1.InstanceUniqueName, cmd2.InstanceUniqueName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FlattenedConfigSnapshot_ContainsRevisionHash()
|
||||
{
|
||||
// The FlattenedConfigurationSnapshot includes a revision hash for staleness detection.
|
||||
var snapshot = new FlattenedConfigurationSnapshot(
|
||||
"inst-1", "rev-abc123",
|
||||
"""{"attributes":[],"scripts":[],"alarms":[]}""",
|
||||
DateTimeOffset.UtcNow);
|
||||
|
||||
Assert.Equal("rev-abc123", snapshot.RevisionHash);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user