06f2df4f89
Notify-and-fetch follow-ups: - PendingDeploymentPurgeActor: a central cluster singleton (not readiness-gated, best-effort) that sweeps expired PendingDeployment staging rows on CommunicationOptions.PendingDeploymentPurgeInterval (default 1h). Modeled on the kpi-history-recorder pattern: self-scheduling timer, per-tick DI scope -> IDeploymentManagerRepository, continue-on-error. Wired in AkkaHostedService.RegisterCentralActors (manager + proxy + drain); resolves the deferred TODO in DeploymentService. Correctness never depends on it (supersession bounds rows to <=1/instance; the fetch endpoint enforces the TTL), so it is deliberately absent from RequiredSingletonsHealthCheck. - SQL Server integration test for StagePendingIfAbsentAsync re-staging an instance's OWN DeploymentId over an expired row against the real UNIQUE index on DeploymentId — confirms EF orders DELETE before INSERT in one SaveChanges (SQLite's constraint timing differs from SQL Server's). Plus a same-instance supersession variant on real SQL Server. Tests: 2 TestKit actor tests + 2 SQL Server integration tests (both ran green against the infra MSSQL container); 235 Communication + 15 PendingDeployment tests pass; Host builds 0 warnings.
84 lines
4.3 KiB
C#
84 lines
4.3 KiB
C#
namespace ZB.MOM.WW.ScadaBridge.Communication;
|
|
|
|
/// <summary>
|
|
/// Configuration options for central-site communication, including per-pattern
|
|
/// timeouts and transport heartbeat settings.
|
|
/// </summary>
|
|
public class CommunicationOptions
|
|
{
|
|
/// <summary>Timeout for deployment commands (typically longest due to apply logic).</summary>
|
|
public TimeSpan DeploymentTimeout { get; set; } = TimeSpan.FromMinutes(2);
|
|
|
|
/// <summary>Timeout for lifecycle commands (disable, enable, delete).</summary>
|
|
public TimeSpan LifecycleTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
|
|
|
/// <summary>Timeout for artifact deployment commands.</summary>
|
|
public TimeSpan ArtifactDeploymentTimeout { get; set; } = TimeSpan.FromMinutes(1);
|
|
|
|
/// <summary>Timeout for remote query requests (event logs, parked messages).</summary>
|
|
public TimeSpan QueryTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
|
|
|
/// <summary>Timeout for integration call routing.</summary>
|
|
public TimeSpan IntegrationTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
|
|
|
/// <summary>Timeout for debug view subscribe/unsubscribe handshake.</summary>
|
|
public TimeSpan DebugViewTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
|
|
|
/// <summary>Timeout for health report acknowledgement (fire-and-forget, but bounded).</summary>
|
|
public TimeSpan HealthReportTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: timeout for forwarding a buffered notification to central
|
|
/// and awaiting its <c>NotificationSubmitAck</c>. A timeout is treated as a
|
|
/// transient failure — the Store-and-Forward engine keeps the message buffered
|
|
/// and retries the forward at the fixed retry interval.
|
|
/// </summary>
|
|
public TimeSpan NotificationForwardTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
|
|
|
/// <summary>
|
|
/// Contact point addresses for the central cluster (e.g. "akka.tcp://scadabridge@central-a:8081").
|
|
/// Used by site nodes to create a ClusterClient for reaching central.
|
|
/// </summary>
|
|
public List<string> CentralContactPoints { get; set; } = new();
|
|
|
|
/// <summary>gRPC keepalive ping interval for streaming connections.</summary>
|
|
public TimeSpan GrpcKeepAlivePingDelay { get; set; } = TimeSpan.FromSeconds(15);
|
|
|
|
/// <summary>gRPC keepalive ping timeout — stream is considered dead if no response within this period.</summary>
|
|
public TimeSpan GrpcKeepAlivePingTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
|
|
|
/// <summary>Maximum lifetime for a single gRPC stream before the server forces re-establishment.</summary>
|
|
public TimeSpan GrpcMaxStreamLifetime { get; set; } = TimeSpan.FromHours(4);
|
|
|
|
/// <summary>Maximum number of concurrent gRPC streaming subscriptions per site node.</summary>
|
|
public int GrpcMaxConcurrentStreams { get; set; } = 100;
|
|
|
|
/// <summary>Akka.Remote transport heartbeat interval.</summary>
|
|
public TimeSpan TransportHeartbeatInterval { get; set; } = TimeSpan.FromSeconds(5);
|
|
|
|
/// <summary>Akka.Remote transport failure detection threshold.</summary>
|
|
public TimeSpan TransportFailureThreshold { get; set; } = TimeSpan.FromSeconds(15);
|
|
|
|
/// <summary>
|
|
/// Base URL (Traefik/LB) the SITE uses to fetch deploy configs from central,
|
|
/// e.g. "https://central.example:9000". Carried in RefreshDeploymentCommand so
|
|
/// sites need no new standing config. Empty disables notify-and-fetch fallback.
|
|
/// </summary>
|
|
public string CentralFetchBaseUrl { get; set; } = "";
|
|
|
|
/// <summary>
|
|
/// How long a staged PendingDeployment (and its fetch token) stays valid. Must
|
|
/// comfortably cover both site nodes' fetches within one deploy window.
|
|
/// </summary>
|
|
public TimeSpan PendingDeploymentTtl { get; set; } = TimeSpan.FromMinutes(5);
|
|
|
|
/// <summary>
|
|
/// How often the central <c>PendingDeploymentPurgeActor</c> singleton reclaims
|
|
/// expired (TTL-elapsed) PendingDeployment staging rows. Best-effort hygiene only:
|
|
/// supersession bounds pending rows to ≤1 per instance and the config-fetch endpoint
|
|
/// already enforces the TTL, so this purge merely sweeps rows left behind by instances
|
|
/// that are deployed once and never re-deployed. Default 1 hour ≫ <see cref="PendingDeploymentTtl"/>.
|
|
/// </summary>
|
|
public TimeSpan PendingDeploymentPurgeInterval { get; set; } = TimeSpan.FromHours(1);
|
|
}
|