- WP-1-3: Central/site failover + dual-node recovery tests (17 tests) - WP-4: Performance testing framework for target scale (7 tests) - WP-5: Security hardening (LDAPS, JWT key length, no secrets in logs) (11 tests) - WP-6: Script sandboxing adversarial tests (28 tests, all forbidden APIs) - WP-7: Recovery drill test scaffolds (5 tests) - WP-8: Observability validation (structured logs, correlation IDs, metrics) (6 tests) - WP-9: Message contract compatibility (forward/backward compat) (18 tests) - WP-10: Deployment packaging (installation guide, production checklist, topology) - WP-11: Operational runbooks (failover, troubleshooting, maintenance) 92 new tests, all passing. Zero warnings.
192 lines
7.7 KiB
C#
192 lines
7.7 KiB
C#
using System.Text.Json;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using ScadaLink.Commons.Messages.Deployment;
|
|
using ScadaLink.Commons.Types.Enums;
|
|
using ScadaLink.StoreAndForward;
|
|
|
|
namespace ScadaLink.IntegrationTests;
|
|
|
|
/// <summary>
|
|
/// WP-7 (Phase 8): Recovery drill test scaffolds.
|
|
/// Mid-deploy failover, communication drops, and site restart with persisted configs.
|
|
/// </summary>
|
|
public class RecoveryDrillTests
|
|
{
|
|
[Trait("Category", "Integration")]
|
|
[Fact]
|
|
public void MidDeployFailover_SiteStateQuery_ThenRedeploy()
|
|
{
|
|
// Scenario: Deployment in progress, central node fails over.
|
|
// New central node queries site for current deployment state, then re-issues deploy.
|
|
|
|
// Step 1: Deployment started
|
|
var initialStatus = new DeploymentStatusResponse(
|
|
"dep-1", "pump-station-1", DeploymentStatus.InProgress,
|
|
null, DateTimeOffset.UtcNow);
|
|
|
|
Assert.Equal(DeploymentStatus.InProgress, initialStatus.Status);
|
|
|
|
// Step 2: Central failover — new node queries site state
|
|
// Site reports current status (InProgress or whatever it actually is)
|
|
var queriedStatus = new DeploymentStatusResponse(
|
|
"dep-1", "pump-station-1", DeploymentStatus.InProgress,
|
|
null, DateTimeOffset.UtcNow.AddSeconds(5));
|
|
|
|
Assert.Equal(DeploymentStatus.InProgress, queriedStatus.Status);
|
|
|
|
// Step 3: Central re-deploys with same deployment ID + revision hash
|
|
// Idempotent: same deploymentId + revisionHash = no-op if already applied
|
|
var redeployCommand = new DeployInstanceCommand(
|
|
"dep-1", "pump-station-1", "abc123",
|
|
"""{"attributes":[],"scripts":[],"alarms":[]}""",
|
|
"admin", DateTimeOffset.UtcNow.AddSeconds(10));
|
|
|
|
Assert.Equal("dep-1", redeployCommand.DeploymentId);
|
|
Assert.Equal("abc123", redeployCommand.RevisionHash);
|
|
|
|
// Step 4: Site applies (idempotent — revision hash matches)
|
|
var completedStatus = new DeploymentStatusResponse(
|
|
"dep-1", "pump-station-1", DeploymentStatus.Success,
|
|
null, DateTimeOffset.UtcNow.AddSeconds(15));
|
|
|
|
Assert.Equal(DeploymentStatus.Success, completedStatus.Status);
|
|
}
|
|
|
|
[Trait("Category", "Integration")]
|
|
[Fact]
|
|
public async Task CommunicationDrop_DuringArtifactDeployment_BuffersForRetry()
|
|
{
|
|
// Scenario: Communication drops while deploying system-wide artifacts.
|
|
// The deployment command is buffered by S&F and retried when connection restores.
|
|
var dbPath = Path.Combine(Path.GetTempPath(), $"sf_commdrop_{Guid.NewGuid():N}.db");
|
|
var connStr = $"Data Source={dbPath}";
|
|
|
|
try
|
|
{
|
|
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
|
await storage.InitializeAsync();
|
|
|
|
var options = new StoreAndForwardOptions
|
|
{
|
|
DefaultRetryInterval = TimeSpan.FromSeconds(5),
|
|
DefaultMaxRetries = 100,
|
|
};
|
|
var service = new StoreAndForwardService(storage, options, NullLogger<StoreAndForwardService>.Instance);
|
|
await service.StartAsync();
|
|
|
|
// Register a handler that simulates communication failure
|
|
var callCount = 0;
|
|
service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
|
_ =>
|
|
{
|
|
callCount++;
|
|
throw new InvalidOperationException("Connection to site lost");
|
|
});
|
|
|
|
// Attempt delivery — should fail and buffer
|
|
var result = await service.EnqueueAsync(
|
|
StoreAndForwardCategory.ExternalSystem,
|
|
"site-01/artifacts",
|
|
"""{"deploymentId":"dep-1","artifacts":["shared-script-v2"]}""");
|
|
|
|
Assert.True(result.Accepted);
|
|
Assert.True(result.WasBuffered);
|
|
Assert.Equal(1, callCount);
|
|
|
|
// Verify the message is in the buffer
|
|
var depths = await service.GetBufferDepthAsync();
|
|
Assert.True(depths.ContainsKey(StoreAndForwardCategory.ExternalSystem));
|
|
Assert.Equal(1, depths[StoreAndForwardCategory.ExternalSystem]);
|
|
|
|
await service.StopAsync();
|
|
}
|
|
finally
|
|
{
|
|
if (File.Exists(dbPath))
|
|
File.Delete(dbPath);
|
|
}
|
|
}
|
|
|
|
[Trait("Category", "Integration")]
|
|
[Fact]
|
|
public async Task SiteRestart_WithPersistedConfigs_RebuildFromSQLite()
|
|
{
|
|
// Scenario: Site restarts. Deployed instance configs are persisted in SQLite.
|
|
// On startup, the Deployment Manager Actor reads configs from SQLite and
|
|
// recreates Instance Actors.
|
|
var dbPath = Path.Combine(Path.GetTempPath(), $"sf_restart_{Guid.NewGuid():N}.db");
|
|
var connStr = $"Data Source={dbPath}";
|
|
|
|
try
|
|
{
|
|
// Pre-restart: S&F messages in buffer
|
|
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
|
await storage.InitializeAsync();
|
|
|
|
for (var i = 0; i < 3; i++)
|
|
{
|
|
await storage.EnqueueAsync(new StoreAndForwardMessage
|
|
{
|
|
Id = $"msg-{i}",
|
|
Category = StoreAndForwardCategory.ExternalSystem,
|
|
Target = "api-endpoint",
|
|
PayloadJson = $$"""{"instanceName":"machine-{{i}}","value":42}""",
|
|
MaxRetries = 50,
|
|
RetryIntervalMs = 30000,
|
|
CreatedAt = DateTimeOffset.UtcNow,
|
|
Status = StoreAndForwardMessageStatus.Pending,
|
|
OriginInstanceName = $"machine-{i}"
|
|
});
|
|
}
|
|
|
|
// Post-restart: new storage instance reads same DB
|
|
var restartedStorage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
|
await restartedStorage.InitializeAsync();
|
|
|
|
var pending = await restartedStorage.GetMessagesForRetryAsync();
|
|
Assert.Equal(3, pending.Count);
|
|
|
|
// Verify each message retains its origin instance
|
|
Assert.Contains(pending, m => m.OriginInstanceName == "machine-0");
|
|
Assert.Contains(pending, m => m.OriginInstanceName == "machine-1");
|
|
Assert.Contains(pending, m => m.OriginInstanceName == "machine-2");
|
|
}
|
|
finally
|
|
{
|
|
if (File.Exists(dbPath))
|
|
File.Delete(dbPath);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void DeploymentIdempotency_SameRevisionHash_NoOp()
|
|
{
|
|
// Verify the deployment model supports idempotency via revision hash.
|
|
// Two deploy commands with the same deploymentId + revisionHash should
|
|
// produce the same result (site can detect the duplicate and skip).
|
|
var cmd1 = new DeployInstanceCommand(
|
|
"dep-1", "pump-1", "rev-abc123",
|
|
"""{"attributes":[]}""", "admin", DateTimeOffset.UtcNow);
|
|
|
|
var cmd2 = new DeployInstanceCommand(
|
|
"dep-1", "pump-1", "rev-abc123",
|
|
"""{"attributes":[]}""", "admin", DateTimeOffset.UtcNow.AddSeconds(30));
|
|
|
|
Assert.Equal(cmd1.DeploymentId, cmd2.DeploymentId);
|
|
Assert.Equal(cmd1.RevisionHash, cmd2.RevisionHash);
|
|
Assert.Equal(cmd1.InstanceUniqueName, cmd2.InstanceUniqueName);
|
|
}
|
|
|
|
[Fact]
|
|
public void FlattenedConfigSnapshot_ContainsRevisionHash()
|
|
{
|
|
// The FlattenedConfigurationSnapshot includes a revision hash for staleness detection.
|
|
var snapshot = new FlattenedConfigurationSnapshot(
|
|
"inst-1", "rev-abc123",
|
|
"""{"attributes":[],"scripts":[],"alarms":[]}""",
|
|
DateTimeOffset.UtcNow);
|
|
|
|
Assert.Equal("rev-abc123", snapshot.RevisionHash);
|
|
}
|
|
}
|