feat(controlplane): ConfigPublishCoordinator deadline timeout + failover PreStart recovery
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ZB.MOM.WW.OtOpcUa.ControlPlane.Coordinators;
|
||||
using ZB.MOM.WW.OtOpcUa.ControlPlane.Tests.Harness;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Tests;
|
||||
|
||||
public sealed class ConfigPublishCoordinatorTimeoutTests : ControlPlaneActorTestBase
|
||||
{
|
||||
private static readonly RevisionHash TestRevision = RevisionHash.Parse(new string('b', 64));
|
||||
|
||||
[Fact]
|
||||
public void DeadlineElapsed_for_current_deployment_marks_TimedOut()
|
||||
{
|
||||
var dbFactory = NewInMemoryDbFactory();
|
||||
var deploymentId = SeedDispatchingDeployment(dbFactory);
|
||||
|
||||
// Short deadline so we can drive it deterministically in the test.
|
||||
var actor = Sys.ActorOf(ConfigPublishCoordinator.Props(dbFactory, TimeSpan.FromMilliseconds(150)));
|
||||
|
||||
// Seed a NodeDeploymentState row so the coordinator doesn't see "zero expected acks"
|
||||
// and short-circuit to Sealed. We pretend a driver node exists in the cluster.
|
||||
using (var db = dbFactory.CreateDbContext())
|
||||
{
|
||||
db.NodeDeploymentStates.Add(new Configuration.Entities.NodeDeploymentState
|
||||
{
|
||||
NodeId = "phantom-driver",
|
||||
DeploymentId = deploymentId.Value,
|
||||
Status = NodeDeploymentStatus.Applying,
|
||||
});
|
||||
db.SaveChanges();
|
||||
}
|
||||
|
||||
// Drive the deadline ourselves rather than waiting for the cluster's empty driver-set
|
||||
// bypass. Tell the actor the deadline elapsed for this id.
|
||||
actor.Tell(new ConfigPublishCoordinator.DeadlineElapsed(deploymentId));
|
||||
|
||||
AwaitAssert(() =>
|
||||
{
|
||||
using var db = dbFactory.CreateDbContext();
|
||||
db.Deployments.Single().Status.ShouldBe(DeploymentStatus.TimedOut);
|
||||
}, duration: TimeSpan.FromSeconds(3));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Stale_DeadlineElapsed_for_other_deployment_is_ignored()
|
||||
{
|
||||
var dbFactory = NewInMemoryDbFactory();
|
||||
var deploymentId = SeedDispatchingDeployment(dbFactory);
|
||||
var actor = Sys.ActorOf(ConfigPublishCoordinator.Props(dbFactory, TimeSpan.FromMinutes(1)));
|
||||
|
||||
// Tell the actor a deadline elapsed for a completely different deployment id.
|
||||
actor.Tell(new ConfigPublishCoordinator.DeadlineElapsed(DeploymentId.NewId()));
|
||||
|
||||
// The seeded one should remain in its starting state (no transition triggered).
|
||||
ExpectNoMsg(TimeSpan.FromMilliseconds(250));
|
||||
using var db = dbFactory.CreateDbContext();
|
||||
var status = db.Deployments.Single().Status;
|
||||
status.ShouldBeOneOf(DeploymentStatus.Dispatching, DeploymentStatus.AwaitingApplyAcks, DeploymentStatus.Sealed);
|
||||
status.ShouldNotBe(DeploymentStatus.TimedOut);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PreStart_recovers_inflight_deployment_state()
|
||||
{
|
||||
var dbFactory = NewInMemoryDbFactory();
|
||||
var deploymentId = SeedDispatchingDeployment(dbFactory, status: DeploymentStatus.AwaitingApplyAcks);
|
||||
|
||||
// Seed two NodeDeploymentState rows — one already Applied, one still Applying.
|
||||
using (var db = dbFactory.CreateDbContext())
|
||||
{
|
||||
db.NodeDeploymentStates.Add(new Configuration.Entities.NodeDeploymentState
|
||||
{
|
||||
NodeId = "driver-a", DeploymentId = deploymentId.Value, Status = NodeDeploymentStatus.Applied,
|
||||
});
|
||||
db.NodeDeploymentStates.Add(new Configuration.Entities.NodeDeploymentState
|
||||
{
|
||||
NodeId = "driver-b", DeploymentId = deploymentId.Value, Status = NodeDeploymentStatus.Applying,
|
||||
});
|
||||
db.SaveChanges();
|
||||
}
|
||||
|
||||
// Start a fresh coordinator — simulates singleton failover to this node.
|
||||
var actor = Sys.ActorOf(ConfigPublishCoordinator.Props(dbFactory, TimeSpan.FromMinutes(5)));
|
||||
|
||||
// Send the missing ACK; the recovered state should expect exactly that node, and the
|
||||
// deployment should now seal (both nodes acked Applied).
|
||||
actor.Tell(new ApplyAck(deploymentId, NodeId.Parse("driver-b"),
|
||||
ApplyAckOutcome.Applied, null, CorrelationId.NewId()));
|
||||
|
||||
AwaitAssert(() =>
|
||||
{
|
||||
using var db = dbFactory.CreateDbContext();
|
||||
db.Deployments.Single().Status.ShouldBe(DeploymentStatus.Sealed);
|
||||
}, duration: TimeSpan.FromSeconds(3));
|
||||
}
|
||||
|
||||
private static DeploymentId SeedDispatchingDeployment(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
|
||||
DeploymentStatus status = DeploymentStatus.Dispatching)
|
||||
{
|
||||
var id = DeploymentId.NewId();
|
||||
using var db = dbFactory.CreateDbContext();
|
||||
db.Deployments.Add(new Configuration.Entities.Deployment
|
||||
{
|
||||
DeploymentId = id.Value,
|
||||
RevisionHash = TestRevision.Value,
|
||||
Status = status,
|
||||
CreatedBy = "test",
|
||||
});
|
||||
db.SaveChanges();
|
||||
return id;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user