d33617d65d
Per-probe health-check child scopes were disposing the AddTransient-bridged ActorSystem (IDisposable), terminating the live cluster node ~4s after boot and leaving every singleton-proxy Ask to hang the full 30s QueryTimeout — the central report pages (/notifications, /site-calls, /monitoring/health) loaded in ~30s. Bridge it as a singleton via a new lazy AkkaHostedService.GetOrCreateActorSystem() so child-scope disposal never touches it. Verified: 0 post-startup terminates, healthy active/standby, report pages ~0.05s, Playwright 68 passed / 0 failed.
153 lines
9.2 KiB
C#
153 lines
9.2 KiB
C#
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
|
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
|
using ZB.MOM.WW.ScadaBridge.Communication;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
|
|
using ZB.MOM.WW.ScadaBridge.DataConnectionLayer;
|
|
using ZB.MOM.WW.ScadaBridge.ExternalSystemGateway;
|
|
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
|
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
|
using ZB.MOM.WW.ScadaBridge.Host.Health;
|
|
using ZB.MOM.WW.ScadaBridge.NotificationService;
|
|
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
|
|
using ZB.MOM.WW.ScadaBridge.SiteRuntime;
|
|
using ZB.MOM.WW.ScadaBridge.StoreAndForward;
|
|
using ZB.MOM.WW.Telemetry;
|
|
|
|
namespace ZB.MOM.WW.ScadaBridge.Host;
|
|
|
|
/// <summary>
|
|
/// Extracted site-role DI registrations so both Program.cs and tests
|
|
/// use the same composition root.
|
|
/// </summary>
|
|
public static class SiteServiceRegistration
|
|
{
|
|
/// <summary>Registers all DI services required for the site role.</summary>
|
|
/// <param name="services">The service collection to register into.</param>
|
|
/// <param name="config">Application configuration for options binding.</param>
|
|
public static void Configure(IServiceCollection services, IConfiguration config)
|
|
{
|
|
// Shared components
|
|
services.AddClusterInfrastructure();
|
|
services.AddCommunication();
|
|
services.AddSiteHealthMonitoring();
|
|
services.AddExternalSystemGateway();
|
|
// AddNotificationService() is intentionally NOT registered on the site path.
|
|
// Sites no longer deliver notifications over SMTP — a buffered notification is
|
|
// forwarded to the central cluster (via NotificationForwarder / SiteCommunicationActor),
|
|
// and central owns SMTP delivery through the Notification Outbox. The SMTP machinery
|
|
// (OAuth2TokenService, ISmtpClientWrapper) has no consumer on a site node.
|
|
|
|
// Health report transport: sends SiteHealthReport to SiteCommunicationActor via Akka
|
|
services.AddSingleton<ISiteIdentityProvider, SiteIdentityProvider>();
|
|
services.AddSingleton<IHealthReportTransport, AkkaHealthReportTransport>();
|
|
|
|
// Site-only components — AddSiteRuntime registers SiteStorageService with SQLite path
|
|
// and site-local repository implementations (IExternalSystemRepository, INotificationRepository)
|
|
var siteDbPath = config["ScadaBridge:Database:SiteDbPath"] ?? "site.db";
|
|
services.AddSiteRuntime($"Data Source={siteDbPath}");
|
|
services.AddDataConnectionLayer();
|
|
// Audit Log #23 (M3 Bundle F): adapter that surfaces the site id to
|
|
// StoreAndForwardService through DI WITHOUT introducing a
|
|
// StoreAndForward → HealthMonitoring project-reference cycle. Must be
|
|
// registered BEFORE AddStoreAndForward so the S&F factory resolves a
|
|
// non-empty SiteId at construction time (otherwise the S&F service is
|
|
// a singleton and the empty-string value would be cached for the
|
|
// lifetime of the process).
|
|
services.AddSingleton<ZB.MOM.WW.ScadaBridge.StoreAndForward.IStoreAndForwardSiteContext, StoreAndForwardSiteContext>();
|
|
services.AddStoreAndForward();
|
|
services.AddSiteEventLogging();
|
|
|
|
// Audit Log (#23) — site-side hot-path writer + telemetry collaborators.
|
|
// The SiteAuditTelemetryActor itself is registered by AkkaHostedService
|
|
// in the site-role block; this call wires every DI dependency it (and
|
|
// ScriptRuntimeContext, when Bundle F lands) reaches for.
|
|
services.AddAuditLog(config);
|
|
|
|
// Audit Log (#23) M2 Bundle G — bridge FallbackAuditWriter primary
|
|
// failures into the site health report payload as
|
|
// SiteAuditWriteFailures. Must come AFTER both AddSiteHealthMonitoring
|
|
// (registers ISiteHealthCollector) and AddAuditLog (registers the
|
|
// NoOp default this call replaces).
|
|
services.AddAuditLogHealthMetricsBridge();
|
|
|
|
// WP-13: Akka.NET bootstrap via hosted service
|
|
services.AddSingleton<AkkaHostedService>();
|
|
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
|
|
|
// HOST-021: bridge the AkkaHostedService-owned ActorSystem to DI as a SINGLETON via
|
|
// GetOrCreateActorSystem(). The shared ZB.MOM.WW.Health Akka checks resolve ActorSystem
|
|
// from DI, per probe, inside a child scope. ActorSystem is IDisposable, so a TRANSIENT
|
|
// (or scoped) bridge is captured-and-disposed by each probe's scope — disposing the live
|
|
// system mid-flight (CoordinatedShutdown/ActorSystemTerminateReason) and tearing down the
|
|
// node. A singleton is resolved from the root and never disposed by a child scope; routing
|
|
// through GetOrCreateActorSystem (instead of a plain singleton factory over .ActorSystem)
|
|
// means the first resolve CREATES the system rather than caching a null if a probe wins
|
|
// the startup race.
|
|
services.AddSingleton<Akka.Actor.ActorSystem>(sp =>
|
|
sp.GetRequiredService<AkkaHostedService>().GetOrCreateActorSystem());
|
|
|
|
// Cluster node status provider for health reports
|
|
services.AddSingleton<IClusterNodeProvider>(sp =>
|
|
{
|
|
var akkaService = sp.GetRequiredService<AkkaHostedService>();
|
|
var nodeOptions = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<NodeOptions>>().Value;
|
|
var siteRole = $"site-{nodeOptions.SiteId}";
|
|
return new AkkaClusterNodeProvider(akkaService, siteRole);
|
|
});
|
|
|
|
// Options binding
|
|
BindSharedOptions(services, config);
|
|
services.Configure<SiteRuntimeOptions>(config.GetSection("ScadaBridge:SiteRuntime"));
|
|
services.Configure<DataConnectionOptions>(config.GetSection("ScadaBridge:DataConnection"));
|
|
services.Configure<StoreAndForwardOptions>(config.GetSection("ScadaBridge:StoreAndForward"));
|
|
services.Configure<SiteEventLogOptions>(config.GetSection("ScadaBridge:SiteEventLog"));
|
|
}
|
|
|
|
/// <summary>Binds shared options sections (Node, Cluster, Database, Communication, etc.) used by both site and central roles.</summary>
|
|
/// <param name="services">The service collection to bind options into.</param>
|
|
/// <param name="config">Application configuration supplying the option values.</param>
|
|
public static void BindSharedOptions(IServiceCollection services, IConfiguration config)
|
|
{
|
|
services.Configure<NodeOptions>(config.GetSection("ScadaBridge:Node"));
|
|
// Bind + eagerly validate: ClusterOptionsValidator is registered (TryAddEnumerable)
|
|
// by the ClusterInfrastructure module, so chaining ValidateOnStart() here makes a bad
|
|
// ScadaBridge:Cluster section fail fast at host build instead of lazily on first resolve.
|
|
services.AddOptions<ClusterOptions>().Bind(config.GetSection("ScadaBridge:Cluster")).ValidateOnStart();
|
|
services.Configure<DatabaseOptions>(config.GetSection("ScadaBridge:Database"));
|
|
services.Configure<CommunicationOptions>(config.GetSection("ScadaBridge:Communication"));
|
|
// Bind + eagerly validate: HealthMonitoringOptionsValidator is registered (TryAddEnumerable)
|
|
// by the HealthMonitoring module, so chaining ValidateOnStart() here makes a bad
|
|
// ScadaBridge:HealthMonitoring section fail fast at host build instead of lazily on first resolve.
|
|
services.AddOptions<HealthMonitoringOptions>().Bind(config.GetSection("ScadaBridge:HealthMonitoring")).ValidateOnStart();
|
|
services.Configure<NotificationOptions>(config.GetSection("ScadaBridge:Notification"));
|
|
services.Configure<LoggingOptions>(config.GetSection("ScadaBridge:Logging"));
|
|
|
|
// Audit Log (#23) — exposes ScadaBridge:Node:NodeName to downstream audit
|
|
// writers so they can stamp the SourceNode column. Registered here in
|
|
// shared bootstrap because every node (central + site) needs it.
|
|
services.AddSingleton<INodeIdentityProvider, NodeIdentityProvider>();
|
|
|
|
// Observability — shared ZB.MOM.WW.Telemetry. Registered in shared bootstrap so
|
|
// BOTH the central and site composition roots wire the OTel Resource (the
|
|
// service.name/site.id/node.role identity triple) + standard instrumentation +
|
|
// the always-on Prometheus exporter. Mount the /metrics scrape endpoint per role
|
|
// with app.MapZbMetrics(). The same `?? "central"` SiteId default Program.cs uses
|
|
// is applied here so the Resource attribute matches the log-enricher value.
|
|
// The application meter is named so OTel observes its instruments; emit points are
|
|
// wired by follow-on tasks (the instruments are no-op until a listener attaches).
|
|
services.AddZbTelemetry(o =>
|
|
{
|
|
o.ServiceName = "scadabridge";
|
|
o.SiteId = config["ScadaBridge:Node:SiteId"] ?? "central";
|
|
o.NodeRole = config["ScadaBridge:Node:Role"];
|
|
o.Meters = [ScadaBridgeTelemetry.MeterName];
|
|
if (Enum.TryParse<ZbExporter>(config["ScadaBridge:Telemetry:Exporter"], ignoreCase: true, out var exporter))
|
|
o.Exporter = exporter;
|
|
var otlp = config["ScadaBridge:Telemetry:OtlpEndpoint"];
|
|
if (!string.IsNullOrWhiteSpace(otlp))
|
|
o.OtlpEndpoint = otlp;
|
|
});
|
|
}
|
|
}
|