feat(siteeventlog): emit store_and_forward + notification events (M1.7)

StoreAndForwardService gains an optional ISiteEventLogger? ctor param (default
null so the many direct-construction tests still compile) and, when wired,
mirrors its own buffer/retry/park activity onto site operational events via the
existing OnActivity hook (which already isolates a throwing subscriber, so a
failing event log can never be misclassified as a transient delivery failure):

- store_and_forward (ExternalSystem / CachedDbWrite): queued/retried/delivered/
  parked. Warning on buffer/retry, Error on park, Info on retry-recovery; an
  immediate-success delivery is the hot path and is not logged.
- notification (the site forward-to-central path): logged ONLY on forward
  FAILURE (buffered after the immediate forward threw) and on park, per the
  Component-SiteEventLogging spec — routine enqueue and forward-success are
  deliberately not logged (central's Notifications table is the audit record).

Wired through AddStoreAndForward (resolves ISiteEventLogger optionally from DI);
StoreAndForward project now references SiteEventLogging (acyclic: SiteEventLogging
references only Commons). Also documents the 'notification' category on the
ISiteEventLogger.LogEventAsync eventType param (folds in M1.8 doc fix).
This commit is contained in:
Joseph Doherty
2026-06-15 12:31:04 -04:00
parent 09b9e8f259
commit d8b5dbb386
5 changed files with 281 additions and 3 deletions
@@ -11,7 +11,7 @@ public interface ISiteEventLogger
/// <see cref="Task"/> completes once the event is durably persisted and faults if
/// the write fails, so callers that <c>await</c> it observe success or failure.
/// </summary>
/// <param name="eventType">Category: script, alarm, deployment, connection, store_and_forward, instance_lifecycle</param>
/// <param name="eventType">Category: script, alarm, deployment, connection, store_and_forward, instance_lifecycle, notification</param>
/// <param name="severity">Info, Warning, or Error</param>
/// <param name="instanceId">Optional instance ID associated with the event</param>
/// <param name="source">Source identifier, e.g., "ScriptActor:MonitorSpeed"</param>
@@ -2,6 +2,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward;
@@ -49,13 +50,19 @@ public static class ServiceCollectionExtensions
// observable in the central audit log instead of producing a
// silent empty-string SourceSite.
var siteId = siteContext?.SiteId ?? string.Empty;
// M1.7: optional site operational-event log. Resolved through
// GetService so a host (or test) that has not called
// AddSiteEventLogging simply gets null and the S&F activity stays
// a no-op for site-event purposes.
var siteEventLogger = sp.GetService<ISiteEventLogger>();
return new StoreAndForwardService(
storage,
options,
logger,
replication,
cachedCallObserver,
siteId);
siteId,
siteEventLogger);
});
services.AddSingleton<ReplicationService>(sp =>
@@ -3,6 +3,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward;
@@ -44,6 +45,15 @@ public class StoreAndForwardService
/// </summary>
private readonly ICachedCallLifecycleObserver? _cachedCallObserver;
/// <summary>
/// M1.7: optional site operational-event log. When non-null the service maps
/// its own buffer/retry/park activity (the same activity that drives
/// <see cref="OnActivity"/>) onto site events — <c>store_and_forward</c> for the
/// cached-call categories and <c>notification</c> for the site's
/// forward-to-central notification path. Best-effort and fire-and-forget so a
/// failing logger never affects delivery bookkeeping.
/// </summary>
private readonly ISiteEventLogger? _siteEventLogger;
/// <summary>
/// Audit Log #23 (M3 Bundle E — Task E4): site id stamped onto the
/// cached-call attempt context so the audit bridge can build the
/// <see cref="SiteCallOperational"/> half of the telemetry packet.
@@ -173,13 +183,20 @@ public class StoreAndForwardService
/// <param name="replication">Optional replication service for standby synchronization.</param>
/// <param name="cachedCallObserver">Optional observer for cached call lifecycle events.</param>
/// <param name="siteId">The site identifier this service belongs to.</param>
/// <param name="siteEventLogger">
/// M1.7: optional site operational-event log. When non-null, buffer/retry/park
/// activity is mirrored to site events (<c>store_and_forward</c> /
/// <c>notification</c> by category). Optional with a <c>null</c> default so the
/// many direct-construction tests still compile unchanged.
/// </param>
public StoreAndForwardService(
StoreAndForwardStorage storage,
StoreAndForwardOptions options,
ILogger<StoreAndForwardService> logger,
ReplicationService? replication = null,
ICachedCallLifecycleObserver? cachedCallObserver = null,
string siteId = "")
string siteId = "",
ISiteEventLogger? siteEventLogger = null)
{
_storage = storage;
_options = options;
@@ -191,6 +208,91 @@ public class StoreAndForwardService
// audit pipeline keying off SourceSite) never see an empty string and
// a misconfigured host is recognisable in the central log.
_siteId = string.IsNullOrWhiteSpace(siteId) ? UnknownSiteSentinel : siteId;
_siteEventLogger = siteEventLogger;
// M1.7: ride the existing activity hook to emit site operational events.
// RaiseActivity already isolates a throwing subscriber, so a failing
// event log can never be misclassified as a transient delivery failure
// (StoreAndForward-009). Only subscribe when a logger is wired so the
// legacy (test/central) construction path stays a no-op.
if (_siteEventLogger != null)
{
OnActivity += EmitSiteEvent;
}
}
/// <summary>
/// M1.7: maps one store-and-forward activity to a site operational event,
/// following the Site Event Logging spec's per-category scope
/// (Component-SiteEventLogging.md §"Events Logged"):
/// <list type="bullet">
/// <item><description>Cached-call categories
/// (<see cref="StoreAndForwardCategory.ExternalSystem"/> /
/// <see cref="StoreAndForwardCategory.CachedDbWrite"/>) log under
/// <c>store_and_forward</c> for queued / retried / parked / retry-delivered
/// activity.</description></item>
/// <item><description>The site's notification forward-to-central path
/// (<see cref="StoreAndForwardCategory.Notification"/>) logs under
/// <c>notification</c> ONLY on a forward FAILURE (buffered after the
/// immediate forward threw) or a park (long-buffered / retries exhausted).
/// Routine enqueue and forward-success are deliberately NOT logged — central's
/// <c>Notifications</c> table is the record of audit; the site only fills the
/// in-transit blind spot when central is unreachable.</description></item>
/// </list>
/// A successful immediate cached-call <c>Delivered</c> is the normal hot path and
/// is not logged.
/// </summary>
private void EmitSiteEvent(string action, StoreAndForwardCategory category, string detail)
{
var logger = _siteEventLogger;
if (logger == null)
{
return;
}
// An immediate-delivery success is the normal hot path, not an
// operational event. A retry-loop success (detail "Delivered to … after
// N retries") IS logged for cached calls — it records a recovery.
if (action == "Delivered" && detail.StartsWith("Immediate", StringComparison.Ordinal))
{
return;
}
if (category == StoreAndForwardCategory.Notification)
{
// Spec: log only forward-failure (the immediate forward threw and the
// notification was buffered for retry — detail "Buffered for retry:")
// and park. A routine "No handler registered, buffered" enqueue and a
// forward-success "Delivered" are deliberately NOT logged.
var isForwardFailure = action == "Queued"
&& detail.StartsWith("Buffered for retry", StringComparison.Ordinal);
if (!isForwardFailure && action != "Parked")
{
return;
}
var notifSeverity = action == "Parked" ? "Error" : "Warning";
_ = logger.LogEventAsync(
"notification", notifSeverity, instanceId: null,
source: "StoreAndForwardService",
message: $"Notification {action.ToLowerInvariant()}: {detail}");
return;
}
// Cached-call categories: queued / retried / parked / retry-delivered.
// Severity: parking is an Error (delivery abandoned for retry purposes);
// queue/retry/requeue are Warning; a retry-loop Delivered is Info.
var severity = action switch
{
"Parked" => "Error",
"Delivered" => "Info",
_ => "Warning",
};
_ = logger.LogEventAsync(
"store_and_forward", severity, instanceId: null,
source: "StoreAndForwardService",
message: $"Operation {action.ToLowerInvariant()}: {detail}");
}
/// <summary>
@@ -17,6 +17,7 @@
<ItemGroup>
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.Commons/ZB.MOM.WW.ScadaBridge.Commons.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.SiteEventLogging/ZB.MOM.WW.ScadaBridge.SiteEventLogging.csproj" />
</ItemGroup>
<ItemGroup>
@@ -0,0 +1,168 @@
using System.Collections.Concurrent;
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward.Tests;
/// <summary>
/// M1.7: the StoreAndForwardService emits site operational events for its own
/// buffer/park activity — <c>store_and_forward</c> for cached-call categories
/// (ExternalSystem / CachedDbWrite) and <c>notification</c> for the site's
/// notification forward-to-central path. Emission rides the existing
/// <c>OnActivity</c> hook and is best-effort (a failing logger never affects
/// delivery bookkeeping).
/// </summary>
public class StoreAndForwardSiteEventTests : IAsyncLifetime, IDisposable
{
private sealed record Entry(string EventType, string Severity, string Source, string Message);
private sealed class FakeSiteEventLogger : ISiteEventLogger
{
private readonly ConcurrentQueue<Entry> _entries = new();
public IReadOnlyList<Entry> Entries => _entries.ToArray();
public IReadOnlyList<Entry> OfType(string t) => _entries.Where(e => e.EventType == t).ToArray();
public Task LogEventAsync(string eventType, string severity, string? instanceId,
string source, string message, string? details = null)
{
_entries.Enqueue(new Entry(eventType, severity, source, message));
return Task.CompletedTask;
}
public long FailedWriteCount => 0;
}
private readonly SqliteConnection _keepAlive;
private readonly StoreAndForwardStorage _storage;
private readonly StoreAndForwardOptions _options;
private readonly FakeSiteEventLogger _siteLog = new();
private readonly StoreAndForwardService _service;
public StoreAndForwardSiteEventTests()
{
var dbName = $"SiteEvt_{Guid.NewGuid():N}";
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
_keepAlive = new SqliteConnection(connStr);
_keepAlive.Open();
_storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
_options = new StoreAndForwardOptions
{
DefaultRetryInterval = TimeSpan.Zero,
DefaultMaxRetries = 1,
RetryTimerInterval = TimeSpan.FromMinutes(10)
};
_service = new StoreAndForwardService(
_storage, _options, NullLogger<StoreAndForwardService>.Instance,
replication: null, cachedCallObserver: null, siteId: "site-a",
siteEventLogger: _siteLog);
}
public async Task InitializeAsync() => await _storage.InitializeAsync();
public Task DisposeAsync() => Task.CompletedTask;
public void Dispose() => _keepAlive.Dispose();
[Fact]
public async Task BufferForRetry_ExternalSystem_EmitsStoreAndForwardSiteEvent()
{
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => throw new HttpRequestException("transient"));
await _service.EnqueueAsync(StoreAndForwardCategory.ExternalSystem, "api.example.com", """{}""", "Pump1");
var rows = _siteLog.OfType("store_and_forward");
Assert.Contains(rows, r => r.Severity == "Warning" &&
r.Source == "StoreAndForwardService" &&
r.Message.Contains("queued", StringComparison.OrdinalIgnoreCase));
// The cached-call categories must NOT surface as notification events.
Assert.Empty(_siteLog.OfType("notification"));
}
[Fact]
public async Task ForwardFailure_Notification_EmitsNotificationSiteEvent()
{
// The site's notification role is forward-to-central. When the immediate
// forward to central throws (central unreachable), the notification is
// buffered for retry — a forward FAILURE, which the spec says to log as a
// `notification` site event (filling the in-transit blind spot).
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => throw new HttpRequestException("central unreachable"));
await _service.EnqueueAsync(StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1");
var rows = _siteLog.OfType("notification");
Assert.Contains(rows, r => r.Severity == "Warning" &&
r.Source == "StoreAndForwardService" &&
r.Message.Contains("queued", StringComparison.OrdinalIgnoreCase));
// A notification forward-failure is not a store_and_forward (cached-call) event.
Assert.Empty(_siteLog.OfType("store_and_forward"));
}
[Fact]
public async Task RoutineEnqueue_Notification_DoesNotEmitSiteEvent()
{
// Spec: routine enqueue / forward-success on the notification path are
// deliberately NOT logged — central's Notifications table is the audit
// record of record. A successful immediate forward emits no site event.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => Task.FromResult(true));
await _service.EnqueueAsync(StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1");
Assert.Empty(_siteLog.OfType("notification"));
}
[Fact]
public async Task Park_Notification_EmitsErrorNotificationSiteEvent()
{
// A long-buffered notification that exhausts retries is parked — the spec
// logs this as a `notification` event (Error severity).
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => throw new HttpRequestException("central unreachable"));
await _service.EnqueueAsync(
StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1",
attemptImmediateDelivery: false, maxRetries: 1);
await _service.RetryPendingMessagesAsync();
var rows = _siteLog.OfType("notification");
Assert.Contains(rows, r => r.Severity == "Error" &&
r.Message.Contains("parked", StringComparison.OrdinalIgnoreCase));
}
[Fact]
public async Task Park_ExternalSystem_EmitsErrorStoreAndForwardSiteEvent()
{
// MaxRetries = 1 → the first sweep retry parks the message.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => throw new HttpRequestException("transient"));
await _service.EnqueueAsync(
StoreAndForwardCategory.ExternalSystem, "api.example.com", """{}""", "Pump1",
attemptImmediateDelivery: false, maxRetries: 1);
await _service.RetryPendingMessagesAsync();
var rows = _siteLog.OfType("store_and_forward");
Assert.Contains(rows, r => r.Severity == "Error" &&
r.Message.Contains("parked", StringComparison.OrdinalIgnoreCase));
}
[Fact]
public async Task DeliveredImmediately_DoesNotEmitSiteEvent()
{
// A successful immediate delivery is the normal hot path — it is not a
// store-and-forward buffering event, so no operational event is logged.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => Task.FromResult(true));
await _service.EnqueueAsync(StoreAndForwardCategory.ExternalSystem, "api", """{}""", "Pump1");
Assert.Empty(_siteLog.OfType("store_and_forward"));
Assert.Empty(_siteLog.OfType("notification"));
}
}