fix(communication): resolve Communication-004..008 — Resume supervision, gRPC option wiring, address-load logging, sync dispose, flap detection
This commit is contained in:
@@ -13,21 +13,45 @@ namespace ScadaLink.Communication.Grpc;
|
||||
/// SiteStreamGrpcServer. The central-side DebugStreamBridgeActor uses this
|
||||
/// to open server-streaming calls for individual instances.
|
||||
/// </summary>
|
||||
public class SiteStreamGrpcClient : IAsyncDisposable
|
||||
public class SiteStreamGrpcClient : IAsyncDisposable, IDisposable
|
||||
{
|
||||
private readonly GrpcChannel? _channel;
|
||||
private readonly SiteStreamService.SiteStreamServiceClient? _client;
|
||||
private readonly ILogger? _logger;
|
||||
private readonly ConcurrentDictionary<string, CancellationTokenSource> _subscriptions = new();
|
||||
|
||||
/// <summary>
|
||||
/// The HTTP/2 keepalive ping delay actually applied to this client's channel.
|
||||
/// Exposed for tests verifying that <see cref="CommunicationOptions"/> is honoured.
|
||||
/// </summary>
|
||||
internal TimeSpan KeepAlivePingDelay { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The HTTP/2 keepalive ping timeout actually applied to this client's channel.
|
||||
/// Exposed for tests verifying that <see cref="CommunicationOptions"/> is honoured.
|
||||
/// </summary>
|
||||
internal TimeSpan KeepAlivePingTimeout { get; }
|
||||
|
||||
public SiteStreamGrpcClient(string endpoint, ILogger logger)
|
||||
: this(endpoint, logger, new CommunicationOptions())
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a client whose HTTP/2 keepalive is taken from <see cref="CommunicationOptions"/>
|
||||
/// rather than hard-coded, satisfying the design doc's "gRPC Connection Keepalive"
|
||||
/// section which states these values are configurable.
|
||||
/// </summary>
|
||||
public SiteStreamGrpcClient(string endpoint, ILogger logger, CommunicationOptions options)
|
||||
{
|
||||
KeepAlivePingDelay = options.GrpcKeepAlivePingDelay;
|
||||
KeepAlivePingTimeout = options.GrpcKeepAlivePingTimeout;
|
||||
_channel = GrpcChannel.ForAddress(endpoint, new GrpcChannelOptions
|
||||
{
|
||||
HttpHandler = new SocketsHttpHandler
|
||||
{
|
||||
KeepAlivePingDelay = TimeSpan.FromSeconds(15),
|
||||
KeepAlivePingTimeout = TimeSpan.FromSeconds(10),
|
||||
KeepAlivePingDelay = options.GrpcKeepAlivePingDelay,
|
||||
KeepAlivePingTimeout = options.GrpcKeepAlivePingTimeout,
|
||||
KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always
|
||||
}
|
||||
});
|
||||
@@ -205,7 +229,13 @@ public class SiteStreamGrpcClient : IAsyncDisposable
|
||||
_ => AlarmLevel.None
|
||||
};
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
/// <summary>
|
||||
/// Releases all subscription CancellationTokenSources and the underlying
|
||||
/// gRPC channel. All teardown here is synchronous (CTS disposal and
|
||||
/// <see cref="GrpcChannel.Dispose"/>), so a synchronous <see cref="Dispose"/>
|
||||
/// can release everything without sync-over-async blocking.
|
||||
/// </summary>
|
||||
private void ReleaseResources()
|
||||
{
|
||||
foreach (var cts in _subscriptions.Values)
|
||||
{
|
||||
@@ -214,9 +244,22 @@ public class SiteStreamGrpcClient : IAsyncDisposable
|
||||
}
|
||||
_subscriptions.Clear();
|
||||
|
||||
if (_channel is not null)
|
||||
_channel.Dispose();
|
||||
_channel?.Dispose();
|
||||
}
|
||||
|
||||
await ValueTask.CompletedTask;
|
||||
public virtual ValueTask DisposeAsync()
|
||||
{
|
||||
ReleaseResources();
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Synchronous disposal. All resources held by this client are released
|
||||
/// synchronously, so callers (e.g. <see cref="SiteStreamGrpcClientFactory.Dispose"/>)
|
||||
/// need not block on the async disposal path.
|
||||
/// </summary>
|
||||
public virtual void Dispose()
|
||||
{
|
||||
ReleaseResources();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace ScadaLink.Communication.Grpc;
|
||||
|
||||
@@ -12,22 +13,43 @@ public class SiteStreamGrpcClientFactory : IAsyncDisposable, IDisposable
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, SiteStreamGrpcClient> _clients = new();
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly CommunicationOptions _options;
|
||||
|
||||
public SiteStreamGrpcClientFactory(ILoggerFactory loggerFactory)
|
||||
: this(loggerFactory, Options.Create(new CommunicationOptions()))
|
||||
{
|
||||
_loggerFactory = loggerFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns an existing client for the site or creates a new one.
|
||||
/// DI constructor — flows <see cref="CommunicationOptions"/> into every created
|
||||
/// <see cref="SiteStreamGrpcClient"/> so the configured gRPC keepalive settings
|
||||
/// are applied rather than hard-coded defaults.
|
||||
/// </summary>
|
||||
public SiteStreamGrpcClientFactory(ILoggerFactory loggerFactory, IOptions<CommunicationOptions> options)
|
||||
{
|
||||
_loggerFactory = loggerFactory;
|
||||
_options = options.Value;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns an existing client for the site or creates a new one. The new
|
||||
/// client is created via <see cref="CreateClient"/> and tracked so the
|
||||
/// factory's <see cref="Dispose"/> / <see cref="DisposeAsync"/> release it.
|
||||
/// </summary>
|
||||
public virtual SiteStreamGrpcClient GetOrCreate(string siteIdentifier, string grpcEndpoint)
|
||||
{
|
||||
return _clients.GetOrAdd(siteIdentifier, _ =>
|
||||
{
|
||||
var logger = _loggerFactory.CreateLogger<SiteStreamGrpcClient>();
|
||||
return new SiteStreamGrpcClient(grpcEndpoint, logger);
|
||||
});
|
||||
return _clients.GetOrAdd(siteIdentifier, _ => CreateClient(grpcEndpoint));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a single <see cref="SiteStreamGrpcClient"/>. Overridable so tests
|
||||
/// can substitute a tracking client while still exercising the factory's real
|
||||
/// caching and disposal machinery.
|
||||
/// </summary>
|
||||
protected virtual SiteStreamGrpcClient CreateClient(string grpcEndpoint)
|
||||
{
|
||||
var logger = _loggerFactory.CreateLogger<SiteStreamGrpcClient>();
|
||||
return new SiteStreamGrpcClient(grpcEndpoint, logger, _options);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -50,8 +72,19 @@ public class SiteStreamGrpcClientFactory : IAsyncDisposable, IDisposable
|
||||
_clients.Clear();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Synchronous disposal. Communication-007: this used to block on
|
||||
/// <c>DisposeAsync().AsTask().GetAwaiter().GetResult()</c> (sync-over-async,
|
||||
/// a stall/deadlock risk during host shutdown). Each
|
||||
/// <see cref="SiteStreamGrpcClient"/> releases all of its resources
|
||||
/// synchronously, so we dispose them directly with no async path.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
foreach (var client in _clients.Values)
|
||||
{
|
||||
client.Dispose();
|
||||
}
|
||||
_clients.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Threading.Channels;
|
||||
using Akka.Actor;
|
||||
using Grpc.Core;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using GrpcStatus = Grpc.Core.Status;
|
||||
|
||||
namespace ScadaLink.Communication.Grpc;
|
||||
@@ -19,6 +20,7 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
private readonly ILogger<SiteStreamGrpcServer> _logger;
|
||||
private readonly ConcurrentDictionary<string, StreamEntry> _activeStreams = new();
|
||||
private readonly int _maxConcurrentStreams;
|
||||
private readonly TimeSpan _maxStreamLifetime;
|
||||
private volatile bool _ready;
|
||||
private long _actorCounter;
|
||||
|
||||
@@ -26,10 +28,36 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
ISiteStreamSubscriber streamSubscriber,
|
||||
ILogger<SiteStreamGrpcServer> logger,
|
||||
int maxConcurrentStreams = 100)
|
||||
: this(streamSubscriber, logger, maxConcurrentStreams, TimeSpan.FromHours(4))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DI constructor — binds <see cref="CommunicationOptions.GrpcMaxConcurrentStreams"/>
|
||||
/// and <see cref="CommunicationOptions.GrpcMaxStreamLifetime"/> so the documented
|
||||
/// concurrency limit and the 4-hour zombie-stream session timeout are honoured
|
||||
/// rather than hard-coded.
|
||||
/// </summary>
|
||||
public SiteStreamGrpcServer(
|
||||
ISiteStreamSubscriber streamSubscriber,
|
||||
ILogger<SiteStreamGrpcServer> logger,
|
||||
IOptions<CommunicationOptions> options)
|
||||
: this(streamSubscriber, logger,
|
||||
options.Value.GrpcMaxConcurrentStreams,
|
||||
options.Value.GrpcMaxStreamLifetime)
|
||||
{
|
||||
}
|
||||
|
||||
private SiteStreamGrpcServer(
|
||||
ISiteStreamSubscriber streamSubscriber,
|
||||
ILogger<SiteStreamGrpcServer> logger,
|
||||
int maxConcurrentStreams,
|
||||
TimeSpan maxStreamLifetime)
|
||||
{
|
||||
_streamSubscriber = streamSubscriber;
|
||||
_logger = logger;
|
||||
_maxConcurrentStreams = maxConcurrentStreams;
|
||||
_maxStreamLifetime = maxStreamLifetime;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -49,6 +77,12 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
/// </summary>
|
||||
public int ActiveStreamCount => _activeStreams.Count;
|
||||
|
||||
/// <summary>Effective max concurrent stream limit. Exposed for tests.</summary>
|
||||
internal int MaxConcurrentStreams => _maxConcurrentStreams;
|
||||
|
||||
/// <summary>Effective per-stream session lifetime. Exposed for tests.</summary>
|
||||
internal TimeSpan MaxStreamLifetime => _maxStreamLifetime;
|
||||
|
||||
public override async Task SubscribeInstance(
|
||||
InstanceStreamRequest request,
|
||||
IServerStreamWriter<SiteStreamEvent> responseStream,
|
||||
@@ -69,6 +103,11 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
|
||||
throw new RpcException(new GrpcStatus(StatusCode.ResourceExhausted, "Max concurrent streams reached"));
|
||||
|
||||
using var streamCts = CancellationTokenSource.CreateLinkedTokenSource(context.CancellationToken);
|
||||
// Session timeout (design doc "gRPC Connection Keepalive": 4-hour third layer
|
||||
// of dead-client detection) — forces a long-lived zombie stream to terminate
|
||||
// even if keepalive PINGs never detect the loss.
|
||||
if (_maxStreamLifetime > TimeSpan.Zero && _maxStreamLifetime != Timeout.InfiniteTimeSpan)
|
||||
streamCts.CancelAfter(_maxStreamLifetime);
|
||||
var entry = new StreamEntry(streamCts);
|
||||
_activeStreams[request.CorrelationId] = entry;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user