fix(communication): resolve Communication-004..008 — Resume supervision, gRPC option wiring, address-load logging, sync dispose, flap detection
This commit is contained in:
@@ -84,6 +84,15 @@ public class CentralCommunicationActor : ReceiveActor
|
||||
// Periodic refresh trigger
|
||||
Receive<RefreshSiteAddresses>(_ => LoadSiteAddressesFromDb());
|
||||
|
||||
// Communication-006: a faulted LoadSiteAddressesFromDb task is piped here as a
|
||||
// Status.Failure. Without this handler the failure was an unhandled message
|
||||
// (debug-level only) and the refresh failed silently — operators could not
|
||||
// distinguish "no sites configured" from "database is down". Log at Warning.
|
||||
Receive<Status.Failure>(failure =>
|
||||
_log.Warning(failure.Cause,
|
||||
"Failed to load site addresses from the database; the site ClusterClient "
|
||||
+ "cache was not refreshed and may be stale or empty"));
|
||||
|
||||
// Health monitoring: heartbeats and health reports from sites
|
||||
Receive<HeartbeatMessage>(HandleHeartbeat);
|
||||
Receive<SiteHealthReport>(HandleSiteHealthReport);
|
||||
@@ -296,6 +305,25 @@ public class CentralCommunicationActor : ReceiveActor
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coordinator supervision strategy (CLAUDE.md: "Resume for coordinator actors").
|
||||
/// CentralCommunicationActor is a long-lived coordinator that owns the per-site
|
||||
/// ClusterClient map; a transient fault in a child (e.g. a ClusterClient child)
|
||||
/// must Resume so the child's connection state is preserved rather than wiped by
|
||||
/// a Restart.
|
||||
/// </summary>
|
||||
protected override SupervisorStrategy SupervisorStrategy()
|
||||
{
|
||||
return new OneForOneStrategy(
|
||||
maxNrOfRetries: -1,
|
||||
withinTimeRange: Timeout.InfiniteTimeSpan,
|
||||
decider: Decider.From(ex =>
|
||||
{
|
||||
_log.Warning(ex, "Child actor of CentralCommunicationActor faulted, resuming (state preserved)");
|
||||
return Directive.Resume;
|
||||
}));
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
_log.Info("CentralCommunicationActor started");
|
||||
|
||||
Reference in New Issue
Block a user