fix(runtime): fast-fail RouteNodeWrite while Stale + micro-opts + raw-blob routing test

This commit is contained in:
Joseph Doherty
2026-06-14 00:16:47 -04:00
parent 02e37a6d68
commit 4cda275b8d
2 changed files with 142 additions and 7 deletions
@@ -92,10 +92,11 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
/// NodeId(s)</c>. Rebuilt every apply by <see cref="PushDesiredSubscriptions"/> from the
/// composition's <c>EquipmentTags</c> (mirroring <c>VirtualTagHostActor._nodeIdByVtag</c>), and
/// resolved in <see cref="ForwardToMux"/> so a driver value published by wire-ref FullName lands
/// on the variable's actual folder-scoped NodeId. A list because the same driver ref can back
/// several equipment variables (e.g. identical machines sharing a register).
/// on the variable's actual folder-scoped NodeId. A set because the same driver ref can back
/// several equipment variables (e.g. identical machines sharing a register), and the per-apply
/// rebuild dedups by NodeId.
/// </summary>
private readonly Dictionary<(string DriverInstanceId, string FullName), List<string>> _nodeIdByDriverRef = new();
private readonly Dictionary<(string DriverInstanceId, string FullName), HashSet<string>> _nodeIdByDriverRef = new();
/// <summary>
/// Inverse of <see cref="_nodeIdByDriverRef"/>: <c>folder-scoped equipment NodeId →
@@ -516,7 +517,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
? new NodeWriteResult(t.Result.Success, t.Result.Reason)
: new NodeWriteResult(false, "write timeout"),
CancellationToken.None,
TaskContinuationOptions.ExecuteSynchronously,
TaskContinuationOptions.None,
TaskScheduler.Default)
.PipeTo(replyTo);
}
@@ -545,6 +546,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<RedundancyStateChanged>(OnRedundancyStateChanged);
// An inbound operator write can't be serviced while the config DB is unreachable — fast-fail so the
// node-manager's bounded Ask gets an immediate clear status instead of dead-lettering into a timeout.
Receive<RouteNodeWrite>(_ =>
Sender.Tell(new NodeWriteResult(false, "driver host stale (config DB unreachable)")));
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
Timers.StartPeriodicTimer("retry-db", RetryConfigDbConnection.Instance, ReconnectInterval);
}
@@ -751,9 +756,9 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
{
var key = (t.DriverInstanceId, t.FullName);
var nodeId = EquipmentNodeIds.Variable(t.EquipmentId, t.FolderPath, t.Name);
if (!_nodeIdByDriverRef.TryGetValue(key, out var list))
_nodeIdByDriverRef[key] = list = new List<string>();
if (!list.Contains(nodeId)) list.Add(nodeId);
if (!_nodeIdByDriverRef.TryGetValue(key, out var set))
_nodeIdByDriverRef[key] = set = new HashSet<string>(StringComparer.Ordinal);
set.Add(nodeId);
_driverRefByNodeId[nodeId] = key;
}