fix(high-severity): close 9 of 10 open High findings across 8 modules

Comm-016: delete dead HandleConnectionStateChanged + _debugSubscriptions /
_inProgressDeployments tracking + ConnectionStateChanged message record.
Disconnect detection is owned by the transport layers (gRPC keepalive PING
~25s; Ask-timeout at CommunicationService). Updates the
Component-Communication.md design doc to make that explicit.

SnF-018: NotificationForwarder.DeliverAsync now discards a corrupt buffered
payload (Warning log + return true) instead of returning false and parking
the row — honoring the design's "notifications do not park" invariant.

DM-018: reconciliation no longer force-sets Enabled, preserving an
intentional Disabled state after central failover.

ESG-018: DeliverBufferedAsync (both ExternalSystemClient + DatabaseGateway)
catches JsonException and returns false, turning a corrupt buffered row
into a parked operation instead of a retry-forever poison message.

InboundAPI-022: register ActiveNodeGate as IActiveNodeGate in the Central
DI branch so standby-node gating is actually wired up in production.

NS-019: remove orphaned NotificationDeliveryService /
INotificationDeliveryService / NotificationResult; central notification
delivery now lives entirely in NotificationOutbox.

SEL-016: normalise From/To filters to UTC before ISO-string compare so
non-UTC DateTimeOffset clients no longer get spuriously excluded events.

TE-017: include Description on attributes/alarms and a HashableConnections
projection (protocol, endpoint JSON, failover count) in the revision hash
and DiffService; staleness detection now catches description-only and
connection-endpoint edits.

Transport-001 and Transport-002 (also High) remain Open — they're being
handled in a follow-up batch because both touch BundleImporter.cs and
must serialise.
This commit is contained in:
Joseph Doherty
2026-05-28 05:40:15 -04:00
parent f936f55f51
commit ac96b83b08
38 changed files with 852 additions and 1729 deletions
@@ -1,5 +1,7 @@
using System.Text.Json;
using Akka.Actor;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using ScadaLink.Commons.Messages.Notification;
namespace ScadaLink.StoreAndForward;
@@ -31,6 +33,7 @@ public sealed class NotificationForwarder
private readonly IActorRef _siteCommunicationActor;
private readonly string _sourceSiteId;
private readonly TimeSpan _forwardTimeout;
private readonly ILogger<NotificationForwarder> _logger;
/// <param name="siteCommunicationActor">
/// The site communication actor. It forwards a <see cref="NotificationSubmit"/> to
@@ -42,14 +45,21 @@ public sealed class NotificationForwarder
/// How long to wait for central's ack before treating the forward as a transient
/// failure. Sourced from host configuration.
/// </param>
/// <param name="logger">
/// Optional logger. StoreAndForward-018: a corrupt buffered payload is logged at
/// Warning before being discarded so an operator has a forensic trail of the row
/// that vanished from the buffer.
/// </param>
public NotificationForwarder(
IActorRef siteCommunicationActor,
string sourceSiteId,
TimeSpan forwardTimeout)
TimeSpan forwardTimeout,
ILogger<NotificationForwarder>? logger = null)
{
_siteCommunicationActor = siteCommunicationActor;
_sourceSiteId = sourceSiteId;
_forwardTimeout = forwardTimeout;
_logger = logger ?? NullLogger<NotificationForwarder>.Instance;
}
/// <summary>
@@ -61,11 +71,26 @@ public sealed class NotificationForwarder
/// <param name="message">The buffered store-and-forward message to deliver to central.</param>
public async Task<bool> DeliverAsync(StoreAndForwardMessage message)
{
// An unreadable payload cannot be fixed by retrying — park it (return false),
// mirroring how the former SMTP handler treated a corrupt buffered payload.
// StoreAndForward-018: an unreadable payload cannot be fixed by retrying.
// The design doc explicitly forbids parking notifications ("notifications do
// not park — they are retried at the fixed forward interval until central
// acks"; Component-StoreAndForward.md). The earlier behaviour returned false
// here, which the S&F engine interprets as a permanent failure and parks
// the row — contradicting the invariant and surfacing the row in the
// central UI's parked-message list. The correct outcome for a corrupt-payload
// notification is to DISCARD: log a Warning with the buffered row id +
// payload preview for forensics, then return true so the engine clears the
// buffer via its standard success-path cleanup. The buffered row is
// unrecoverable; retrying or parking would both make the queue worse, not
// better.
if (!TryBuildSubmit(message, out var submit))
{
return false;
_logger.LogWarning(
"Discarding corrupt buffered notification {NotificationId} (payload is not deserialisable as NotificationSubmit). " +
"Payload preview: {PayloadPreview}",
message.Id,
PreviewPayload(message.PayloadJson));
return true;
}
// The reply may legitimately be a non-accepted ack, so it is not requested as
@@ -140,6 +165,25 @@ public sealed class NotificationForwarder
};
return true;
}
private const int CorruptPayloadPreviewMaxLength = 200;
/// <summary>
/// Returns a length-capped preview of a corrupt buffered payload for the Warning
/// log line emitted on discard. The full payload may be megabytes and is not
/// suitable for the structured log; the preview retains the leading characters,
/// which is what an operator typically uses to identify the producing script.
/// </summary>
private static string PreviewPayload(string? payloadJson)
{
if (string.IsNullOrEmpty(payloadJson))
{
return "<empty>";
}
return payloadJson.Length <= CorruptPayloadPreviewMaxLength
? payloadJson
: payloadJson.Substring(0, CorruptPayloadPreviewMaxLength) + "…";
}
}
/// <summary>