258 lines
11 KiB
C#
258 lines
11 KiB
C#
using System.Collections.Concurrent;
|
|
using Google.Protobuf.WellKnownTypes;
|
|
using Grpc.Core;
|
|
using Grpc.Net.Client;
|
|
using Microsoft.Extensions.Logging;
|
|
using ZB.MOM.WW.ScadaBridge.Communication;
|
|
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
|
|
using ProtoPullRequest = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest;
|
|
using ProtoPullResponse = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse;
|
|
using PullAuditEventsResponse = ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration.PullAuditEventsResponse;
|
|
|
|
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
|
|
|
/// <summary>
|
|
/// Production <see cref="IPullAuditEventsClient"/> (Audit Log #23, M6) that the
|
|
/// central <see cref="SiteAuditReconciliationActor"/> uses to pull the next
|
|
/// reconciliation batch from a site over the <c>PullAuditEvents</c> unary gRPC
|
|
/// RPC served by <c>SiteStreamGrpcServer</c>.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// <b>Endpoint resolution.</b> The actor passes only a <c>siteId</c>; this
|
|
/// client resolves it to a gRPC authority via <see cref="ISiteEnumerator"/>
|
|
/// (<see cref="SiteEntry.GrpcEndpoint"/>) on every call so a NodeA→NodeB
|
|
/// failover flip or an edited site address takes effect on the next tick — the
|
|
/// same liveness guarantee <c>SiteStreamGrpcClientFactory</c> gives the
|
|
/// real-time stream. A site with no registered endpoint yields an empty
|
|
/// response (no dial); reconciliation simply has nothing to pull from it.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Fault tolerance.</b> Per the <see cref="IPullAuditEventsClient"/>
|
|
/// contract, tolerable transport faults (connection refused / site offline =
|
|
/// <see cref="StatusCode.Unavailable"/>, slow site = <see cref="StatusCode.DeadlineExceeded"/>,
|
|
/// shutdown = <see cref="StatusCode.Cancelled"/>, plus bare
|
|
/// <see cref="HttpRequestException"/> / <c>SocketException</c> before a gRPC
|
|
/// status is established) are caught and collapsed to an empty response — one
|
|
/// offline site must never sink the rest of the reconciliation tick. Any other
|
|
/// fault (e.g. a malformed reply that fails DTO mapping) is also swallowed to
|
|
/// empty: audit reconciliation is best-effort and a throw would only get
|
|
/// re-caught by the actor's own per-site guard.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Testability.</b> The unary call is reached through the
|
|
/// <see cref="IPullAuditEventsInvoker"/> seam. Production binds
|
|
/// <see cref="GrpcPullAuditEventsInvoker"/> (one cached <see cref="GrpcChannel"/>
|
|
/// per endpoint, keepalive from <see cref="CommunicationOptions"/>); unit tests
|
|
/// inject a fake invoker so no real HTTP/2 endpoint is required.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class GrpcPullAuditEventsClient : IPullAuditEventsClient
|
|
{
|
|
private readonly ISiteEnumerator _sites;
|
|
private readonly IPullAuditEventsInvoker _invoker;
|
|
private readonly ILogger<GrpcPullAuditEventsClient> _logger;
|
|
|
|
/// <summary>
|
|
/// Creates the client over the given site enumerator and unary-call invoker.
|
|
/// </summary>
|
|
/// <param name="sites">Resolves a <c>siteId</c> to its gRPC endpoint.</param>
|
|
/// <param name="invoker">Seam that issues the <c>PullAuditEvents</c> unary RPC against a resolved endpoint.</param>
|
|
/// <param name="logger">Logger for transport-fault diagnostics.</param>
|
|
public GrpcPullAuditEventsClient(
|
|
ISiteEnumerator sites,
|
|
IPullAuditEventsInvoker invoker,
|
|
ILogger<GrpcPullAuditEventsClient> logger)
|
|
{
|
|
_sites = sites ?? throw new ArgumentNullException(nameof(sites));
|
|
_invoker = invoker ?? throw new ArgumentNullException(nameof(invoker));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<PullAuditEventsResponse> PullAsync(
|
|
string siteId,
|
|
DateTime sinceUtc,
|
|
int batchSize,
|
|
CancellationToken ct)
|
|
{
|
|
var endpoint = await ResolveEndpointAsync(siteId, ct).ConfigureAwait(false);
|
|
if (endpoint is null)
|
|
{
|
|
// No gRPC address registered for the site — absence of an address is
|
|
// a configuration decision (mirrors ISiteEnumerator's own contract),
|
|
// not a runtime error, so there is simply nothing to pull.
|
|
_logger.LogDebug(
|
|
"PullAuditEvents skipped: no gRPC endpoint registered for site {SiteId}.", siteId);
|
|
return Empty;
|
|
}
|
|
|
|
var request = new ProtoPullRequest
|
|
{
|
|
// ReadPendingSinceAsync treats DateTime.MinValue as "from the start";
|
|
// EnsureUtc keeps Timestamp.FromDateTime happy (it requires UTC kind).
|
|
SinceUtc = Timestamp.FromDateTime(EnsureUtc(sinceUtc)),
|
|
BatchSize = batchSize,
|
|
};
|
|
|
|
ProtoPullResponse reply;
|
|
try
|
|
{
|
|
reply = await _invoker.InvokeAsync(endpoint, request, ct).ConfigureAwait(false);
|
|
}
|
|
catch (RpcException ex) when (IsTolerable(ex.StatusCode))
|
|
{
|
|
_logger.LogDebug(ex,
|
|
"PullAuditEvents tolerable transport fault for site {SiteId} ({Endpoint}): {Status}. Returning empty batch.",
|
|
siteId, endpoint, ex.StatusCode);
|
|
return Empty;
|
|
}
|
|
catch (Exception ex) when (ex is HttpRequestException or System.Net.Sockets.SocketException)
|
|
{
|
|
_logger.LogDebug(ex,
|
|
"PullAuditEvents connection-layer fault for site {SiteId} ({Endpoint}). Returning empty batch.",
|
|
siteId, endpoint);
|
|
return Empty;
|
|
}
|
|
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
|
{
|
|
// Reconciliation tick was cancelled (host shutdown / scope dispose).
|
|
return Empty;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Any other fault (e.g. a malformed reply that fails DTO mapping
|
|
// below would actually surface here only if mapping moved inline,
|
|
// but a non-RpcException transport fault wrapper lands here too).
|
|
// Audit reconciliation is best-effort; swallow to empty rather than
|
|
// throw — the actor's per-site guard would only re-catch it.
|
|
_logger.LogWarning(ex,
|
|
"PullAuditEvents unexpected fault for site {SiteId} ({Endpoint}). Returning empty batch.",
|
|
siteId, endpoint);
|
|
return Empty;
|
|
}
|
|
|
|
// Map proto DTOs to canonical AuditEvent records and order oldest-first
|
|
// (the wire is already ordered by the site queue, but the
|
|
// IPullAuditEventsClient contract is explicit, so sort defensively).
|
|
var events = reply.Events
|
|
.Select(AuditEventDtoMapper.FromDto)
|
|
.OrderBy(e => e.OccurredAtUtc)
|
|
.ToList();
|
|
|
|
return new PullAuditEventsResponse(events, reply.MoreAvailable);
|
|
}
|
|
|
|
private async Task<string?> ResolveEndpointAsync(string siteId, CancellationToken ct)
|
|
{
|
|
var sites = await _sites.EnumerateAsync(ct).ConfigureAwait(false);
|
|
foreach (var site in sites)
|
|
{
|
|
if (string.Equals(site.SiteId, siteId, StringComparison.Ordinal) &&
|
|
!string.IsNullOrWhiteSpace(site.GrpcEndpoint))
|
|
{
|
|
return site.GrpcEndpoint;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private static readonly PullAuditEventsResponse Empty =
|
|
new(Array.Empty<ZB.MOM.WW.Audit.AuditEvent>(), MoreAvailable: false);
|
|
|
|
private static bool IsTolerable(StatusCode code) => code is
|
|
StatusCode.Unavailable or
|
|
StatusCode.DeadlineExceeded or
|
|
StatusCode.Cancelled;
|
|
|
|
private static DateTime EnsureUtc(DateTime value) =>
|
|
value.Kind == DateTimeKind.Utc
|
|
? value
|
|
: DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc);
|
|
|
|
/// <summary>
|
|
/// Seam over the <c>PullAuditEvents</c> unary gRPC call against a resolved
|
|
/// site endpoint. Extracted so <see cref="GrpcPullAuditEventsClient"/> can
|
|
/// be unit-tested without a real <see cref="GrpcChannel"/>. Production binds
|
|
/// <see cref="GrpcPullAuditEventsInvoker"/>.
|
|
/// </summary>
|
|
public interface IPullAuditEventsInvoker
|
|
{
|
|
/// <summary>
|
|
/// Issues the <c>PullAuditEvents</c> unary RPC against <paramref name="endpoint"/>.
|
|
/// May throw <see cref="RpcException"/> / <see cref="HttpRequestException"/>
|
|
/// on transport faults — the caller classifies and swallows tolerable ones.
|
|
/// </summary>
|
|
/// <param name="endpoint">The site gRPC authority (e.g. <c>http://site-a:8083</c>).</param>
|
|
/// <param name="request">The wire-format pull request.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>The wire-format pull response.</returns>
|
|
Task<ProtoPullResponse> InvokeAsync(string endpoint, ProtoPullRequest request, CancellationToken ct);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Production <see cref="GrpcPullAuditEventsClient.IPullAuditEventsInvoker"/>:
|
|
/// caches one <see cref="GrpcChannel"/> per endpoint (keepalive from
|
|
/// <see cref="CommunicationOptions"/>, mirroring <c>SiteStreamGrpcClient</c>)
|
|
/// and issues the unary <c>PullAuditEventsAsync</c> call. The cache flushes a
|
|
/// stale channel when an endpoint is re-keyed (NodeA→NodeB failover / address
|
|
/// edit), the same liveness guarantee <c>SiteStreamGrpcClientFactory</c> gives
|
|
/// the streaming client.
|
|
/// </summary>
|
|
public sealed class GrpcPullAuditEventsInvoker
|
|
: GrpcPullAuditEventsClient.IPullAuditEventsInvoker, IDisposable
|
|
{
|
|
private readonly ConcurrentDictionary<string, GrpcChannel> _channels = new(StringComparer.Ordinal);
|
|
private readonly CommunicationOptions _options;
|
|
|
|
/// <summary>
|
|
/// Creates the invoker using default <see cref="CommunicationOptions"/>.
|
|
/// </summary>
|
|
public GrpcPullAuditEventsInvoker()
|
|
: this(new CommunicationOptions())
|
|
{
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates the invoker, applying the configured gRPC keepalive settings to
|
|
/// every channel it opens.
|
|
/// </summary>
|
|
/// <param name="options">Communication options supplying gRPC keepalive timings.</param>
|
|
public GrpcPullAuditEventsInvoker(CommunicationOptions options)
|
|
{
|
|
_options = options ?? throw new ArgumentNullException(nameof(options));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<ProtoPullResponse> InvokeAsync(
|
|
string endpoint, ProtoPullRequest request, CancellationToken ct)
|
|
{
|
|
var channel = _channels.GetOrAdd(endpoint, CreateChannel);
|
|
var client = new SiteStreamService.SiteStreamServiceClient(channel);
|
|
using var call = client.PullAuditEventsAsync(request, cancellationToken: ct);
|
|
return await call.ResponseAsync.ConfigureAwait(false);
|
|
}
|
|
|
|
private GrpcChannel CreateChannel(string endpoint) =>
|
|
GrpcChannel.ForAddress(endpoint, new GrpcChannelOptions
|
|
{
|
|
HttpHandler = new SocketsHttpHandler
|
|
{
|
|
KeepAlivePingDelay = _options.GrpcKeepAlivePingDelay,
|
|
KeepAlivePingTimeout = _options.GrpcKeepAlivePingTimeout,
|
|
KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always,
|
|
},
|
|
});
|
|
|
|
/// <summary>Disposes all cached channels.</summary>
|
|
public void Dispose()
|
|
{
|
|
foreach (var channel in _channels.Values)
|
|
{
|
|
channel.Dispose();
|
|
}
|
|
_channels.Clear();
|
|
}
|
|
}
|