fix(drivers): adopt corrected config via ApplyDelta while (re)connecting (#7)

A DriverInstanceActor stuck Reconnecting/Connecting now adopts a config delivered via ApplyDelta and
re-initialises with it, instead of dead-lettering and retrying the stale config forever. A monotonic
init generation supersedes the in-flight init so the corrected config always wins.
This commit is contained in:
Joseph Doherty
2026-06-14 17:15:28 -04:00
parent 806a252f25
commit 751786ec8c
2 changed files with 105 additions and 8 deletions
@@ -33,8 +33,8 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
public static readonly TimeSpan DefaultReconnectInterval = TimeSpan.FromSeconds(10);
public sealed record InitializeRequested(string DriverConfigJson);
public sealed record InitializeSucceeded;
public sealed record InitializeFailed(string Reason);
public sealed record InitializeSucceeded(int Generation);
public sealed record InitializeFailed(string Reason, int Generation);
public sealed record DisconnectObserved(string Reason);
public sealed record ApplyDelta(string DriverConfigJson, CorrelationId Correlation);
public sealed record ApplyResult(bool Success, string? Reason, CorrelationId Correlation);
@@ -86,6 +86,12 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
private readonly ILoggingAdapter _log = Context.GetLogger();
private string? _currentConfigJson;
/// <summary>Monotonic token tagging each <see cref="InitializeAsync"/> attempt. An init result is
/// honoured only when its generation matches the latest; an older result is from a superseded attempt
/// (e.g. an <see cref="ApplyDelta"/> adopted a new config mid-(re)connect) and is dropped. Touched only
/// on the actor thread, so no lock is needed.</summary>
private int _initGeneration;
/// <summary>
/// Timestamps of recent Faulted-state transitions; used to compute the 5-minute error count.
/// No lock needed — every read/write site runs inside an Akka message handler, which is
@@ -222,8 +228,10 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
// "write timeout". Synchronous Receive: Sender.Tell on the actor thread is safe (#4a-instance).
Receive<WriteAttribute>(_ =>
Sender.Tell(new WriteAttributeResult(false, "driver not connected")));
Receive<InitializeSucceeded>(_ =>
Receive<ApplyDelta>(AdoptConfigDuringInit);
Receive<InitializeSucceeded>(msg =>
{
if (msg.Generation != _initGeneration) return;
_log.Info("DriverInstance {Id}: connected", _driverInstanceId);
Become(Connected);
PublishHealthSnapshot();
@@ -232,6 +240,7 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
});
Receive<InitializeFailed>(msg =>
{
if (msg.Generation != _initGeneration) return;
_log.Warning("DriverInstance {Id}: initialize failed: {Reason}", _driverInstanceId, msg.Reason);
RecordFault();
Become(Reconnecting);
@@ -306,8 +315,10 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
// timeout on an inbound write to a transiently-down driver). Synchronous Receive (#4a-instance).
Receive<WriteAttribute>(_ =>
Sender.Tell(new WriteAttributeResult(false, "driver not connected")));
Receive<InitializeSucceeded>(_ =>
Receive<ApplyDelta>(AdoptConfigDuringInit);
Receive<InitializeSucceeded>(msg =>
{
if (msg.Generation != _initGeneration) return;
Timers.Cancel("retry-connect");
_log.Info("DriverInstance {Id}: reconnected", _driverInstanceId);
Become(Connected);
@@ -338,21 +349,36 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
private void InitializeAsync(string driverConfigJson)
{
_currentConfigJson = driverConfigJson;
var generation = ++_initGeneration;
var self = Self;
_ = Task.Run(async () =>
{
try
{
await _driver.InitializeAsync(driverConfigJson, CancellationToken.None);
self.Tell(new InitializeSucceeded());
self.Tell(new InitializeSucceeded(generation));
}
catch (Exception ex)
{
self.Tell(new InitializeFailed(ex.Message));
self.Tell(new InitializeFailed(ex.Message, generation));
}
});
}
/// <summary>Adopt a new config while not connected: ApplyDelta in Connecting/Reconnecting re-inits
/// immediately with the new config. <see cref="InitializeAsync"/> swaps <c>_currentConfigJson</c> and
/// bumps the generation, so the in-flight (old-config) init is superseded and its result is dropped.
/// The actor stays in its current state; the new init's result drives the next transition. In
/// Reconnecting the retry timer is left running — if this immediate attempt fails it keeps retrying
/// the new config (a redundant concurrent attempt is deduped by the generation guard).</summary>
private void AdoptConfigDuringInit(ApplyDelta msg)
{
_log.Info("DriverInstance {Id}: ApplyDelta during (re)connect — adopting new config, re-initialising now",
_driverInstanceId);
InitializeAsync(msg.DriverConfigJson);
Sender.Tell(new ApplyResult(true, "config adopted; reinitializing", msg.Correlation));
}
private async Task HandleApplyDeltaAsync(ApplyDelta msg)
{
var replyTo = Sender;