perf: close Theme 6 — 11 allocation / N+1 / lock-contention findings

Well-localised perf fixes across 8 modules.

Lock decoupling / SQL streaming:
- AuditLog-005: SqliteAuditWriter gains dedicated read-only _readConnection
  (+ _readLock) backed by WAL journal mode. GetBacklogStatsAsync,
  ReadPendingAsync, ReadPendingSinceAsync, ReadForwardedAsync no longer
  contend with the hot-path INSERT lock — backlog probes on a 30s timer
  can't stall the writer under multi-hundred-K Pending backlog.
- SEL-022: dropped Cache=Shared from SiteEventLogger's default connection
  string (single-connection logger; mode was dormant config).

Memory / streaming:
- CLI-019: bundle export streams base64 in 1 MB-aligned chunks via
  Convert.TryFromBase64Chars straight into the FileStream — no more
  full-bundle byte[] allocation.
- CentralUI-031: TransportImport now stages the upload to a per-session
  temp file under Path.GetTempPath() (replaces in-memory byte[] field);
  page implements IDisposable to delete the temp file on reset / new
  upload / dispose. Per-circuit working set drops from ~100 MB to ~80 KB.

N+1 hoisting:
- Transport-008: added ITemplateEngineRepository.GetTemplatesWithChildrenAsync
  bulk method; BundleImporter.PreviewAsync calls it once instead of per-
  template-name. Single query with .Include(...).AsSplitQuery().
- DM-023: BuildDeployArtifactsCommandAsync's per-site loop now references
  a pre-fetched GlobalArtifactSnapshot (shared scripts, external systems,
  DB connections, notification lists, SMTP) instead of re-querying per site.
- MgmtSvc-023: HandleQueryDeployments unfiltered branch uses one
  GetAllInstancesAsync bulk load + Dictionary<int,int?> lookup (was a
  GetInstanceByIdAsync per record).

Small allocations / per-tick rebuilds:
- InboundAPI-019: AuditWriteMiddleware gates EnableBuffering() on
  RequestHasBody() so GET/HEAD/DELETE/TRACE/OPTIONS and Content-Length:0
  requests skip the FileBufferingReadStream allocation.
- NotifOutbox-006: ResolveAdapters dictionary now cached on
  _adaptersCache (built lazily on first sweep) + actor-lifetime
  _adaptersScope; ResolveAdapters no longer rebuilds per dispatch tick.

Verify-only:
- Comm-017: Confirmed _inProgressDeployments was deleted by Comm-016 in
  commit ac96b83 — marked Resolved with that attribution. No code change.

Doc-correction:
- NS-022: Updated MailKitSmtpClientWrapper XML doc to spell out single-
  connection / per-delivery-factory contract (option (b) — transient
  client per Send — rejected because it re-handshakes TLS per email).

10+ new regression tests across 8 test projects. Build clean; affected
suites all green. README regenerated: 54 open (was 65).
This commit is contained in:
Joseph Doherty
2026-05-28 07:47:24 -04:00
parent 2ed5c6c379
commit 55f46e7c92
34 changed files with 1131 additions and 149 deletions
@@ -40,6 +40,18 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
private const int SqliteErrorConstraint = 19;
private readonly SqliteConnection _connection;
// AuditLog-005: dedicated read-only connection used by GetBacklogStatsAsync,
// ReadPendingAsync, ReadPendingSinceAsync, and ReadForwardedAsync so a slow
// backlog scan (COUNT(*) over hundreds of thousands of Pending rows under a
// central outage) never parks the hot-path writer behind _writeLock.
// SQLite-with-WAL allows a second connection on the same file to read
// concurrently with the writer; the writer's WAL pragma is set in
// InitializeSchema before this connection is opened. The reader connection
// has its own _readLock because SqliteConnection itself is not thread-safe
// even in read-only mode — multiple read callers can otherwise interleave
// commands on the shared connection.
private readonly SqliteConnection _readConnection;
private readonly object _readLock = new();
private readonly SqliteAuditWriterOptions _options;
private readonly ILogger<SqliteAuditWriter> _logger;
private readonly INodeIdentityProvider _nodeIdentity;
@@ -74,6 +86,17 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
InitializeSchema();
// AuditLog-005: open a second connection for read-only callers
// (GetBacklogStatsAsync, ReadPendingAsync, ReadPendingSinceAsync,
// ReadForwardedAsync). InitializeSchema set journal_mode=WAL on the
// writer connection, which is a database-level setting that persists
// for the file — subsequent connections to the same file see WAL and
// can read concurrently with the writer without taking _writeLock.
// Reuse the same connection string so the read connection sees the
// same Data Source / Cache settings as the writer.
_readConnection = new SqliteConnection(connectionString);
_readConnection.Open();
_writeQueue = Channel.CreateBounded<PendingAuditEvent>(
new BoundedChannelOptions(_options.ChannelCapacity)
{
@@ -100,6 +123,23 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
pragmaCmd.ExecuteNonQuery();
}
// AuditLog-005: enable WAL so a second connection on the same file can
// serve read-only callers (GetBacklogStatsAsync, ReadPendingAsync,
// ReadPendingSinceAsync, ReadForwardedAsync) concurrently with the
// batched writer, decoupling those reads from _writeLock. WAL is a
// database-level setting persisted in the file header; setting it on
// the writer connection means every connection opened to the file
// afterwards inherits WAL behaviour. PRAGMA journal_mode returns the
// mode actually adopted ("memory" for ":memory:" / shared-cache memory
// mode, "wal" for file-backed) — we don't error if WAL was rejected
// because the read connection's correctness does not depend on WAL
// itself, only its concurrency advantage does.
using (var pragmaCmd = _connection.CreateCommand())
{
pragmaCmd.CommandText = "PRAGMA journal_mode = WAL";
pragmaCmd.ExecuteNonQuery();
}
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
CREATE TABLE IF NOT EXISTS AuditLog (
@@ -392,14 +432,18 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0.");
}
// SqliteConnection is not thread-safe so we go through the same write
// lock the batch INSERTer uses. The actor caller is single-threaded,
// so contention is bounded.
lock (_writeLock)
// AuditLog-005: read via the dedicated _readConnection so this scan
// (which can be expensive when the backlog grows under a central
// outage) does not block the batched writer on _writeLock. WAL mode
// gives us a stable snapshot of the table while writes proceed on the
// writer connection. _readLock serialises this connection across
// multiple concurrent read callers since SqliteConnection itself is
// not thread-safe.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
@@ -445,12 +489,14 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0.");
}
// Mirror ReadPendingAsync: the write lock guards the single connection.
lock (_writeLock)
// AuditLog-005: mirror ReadPendingAsync — read via _readConnection /
// _readLock so this query never contends with the batched writer on
// _writeLock.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
@@ -520,12 +566,13 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
throw new ArgumentOutOfRangeException(nameof(batchSize), "batchSize must be > 0.");
}
// Mirror ReadPendingAsync: the write lock guards the single connection.
lock (_writeLock)
// AuditLog-005: read via _readConnection / _readLock — same lock-
// decoupling as ReadPendingAsync.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
@@ -599,7 +646,13 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
int pendingCount;
DateTime? oldestPending;
lock (_writeLock)
// AuditLog-005: read via the dedicated _readConnection (under
// _readLock) so this probe — polled every 30 s by SiteAuditBacklogReporter
// — never blocks the batched hot-path writer on _writeLock. Under a
// central outage the Pending backlog can grow to hundreds of thousands
// of rows and the COUNT(*) scan correspondingly stretches; that no
// longer adds tail latency to user-facing audit writes.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
@@ -607,7 +660,7 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
// index range avoids a second scan. The IX_SiteAuditLog_ForwardState_Occurred
// index makes both aggregates cheap (count is a covering scan, min
// is the first key).
using var cmd = _connection.CreateCommand();
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT COUNT(*), MIN(OccurredAtUtc)
FROM AuditLog
@@ -758,6 +811,15 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable
_disposed = true;
_connection.Dispose();
}
// AuditLog-005: dispose the dedicated read connection after the writer
// is fully drained and closed. _readLock is taken to fence out any
// in-flight read caller that grabbed the lock before _disposed flipped
// — they observe ObjectDisposedException on the next attempt.
lock (_readLock)
{
_readConnection.Dispose();
}
}
/// <summary>An audit event awaiting persistence by the background writer.</summary>
+64 -3
View File
@@ -121,9 +121,15 @@ public static class BundleCommands
using var doc = JsonDocument.Parse(jsonOk);
var base64 = doc.RootElement.GetProperty("base64Bundle").GetString()!;
var byteCount = doc.RootElement.GetProperty("byteCount").GetInt32();
var bytes = Convert.FromBase64String(base64);
File.WriteAllBytes(output, bytes);
Console.WriteLine($"Wrote {bytes.Length:N0} bytes to {output} (server reported {byteCount:N0}).");
// CLI-019: stream the base64 → file write so a 100 MB bundle
// doesn't double-buffer through Convert.FromBase64String's
// ~100 MB byte[] on the LOH plus a synchronous File.WriteAllBytes.
// The management envelope's body is still buffered into the
// jsonOk string (wire-format limit), but the decode + write
// are now chunked, so peak working-set drops from
// ~base64+byte[]+envelope to ~base64+small-chunk.
var written = StreamBase64ToFile(base64, output);
Console.WriteLine($"Wrote {written:N0} bytes to {output} (server reported {byteCount:N0}).");
return 0;
});
});
@@ -250,6 +256,61 @@ public static class BundleCommands
// the longer BundleCommandTimeout and a per-command success handler, so the
// exit-code contract is unified across every command group.
// CLI-019: chunked base64 → file streaming. The management envelope's
// success body is a single buffered JSON string (the wire format does not
// currently support response-body streaming), so we cannot remove the
// ~base64-string + ~envelope-string allocation. What we CAN — and do —
// remove is the intermediate ~bytecount-sized byte[] that
// Convert.FromBase64String allocates plus the synchronous File.WriteAllBytes:
// we slice the base64 string into 4-byte-multiple chunks (4 base64 chars
// decode into exactly 3 bytes, so any multiple of 4 is a clean boundary)
// and decode each chunk into a small rented buffer that we copy into the
// output FileStream. The chunk size is a tradeoff — large enough that the
// per-chunk loop overhead is negligible, small enough that we never put
// anything on the LOH (1 MB is below the 85 KB LOH threshold's larger
// cousin for buffers we don't keep). Returns the total decoded byte count
// for the post-write summary line.
internal const int Base64StreamChunkChars = 1024 * 1024; // 1 MB of base64 chars ≈ 768 KB decoded
internal static long StreamBase64ToFile(string base64, string outputPath)
{
if (base64 is null) throw new ArgumentNullException(nameof(base64));
if (string.IsNullOrEmpty(outputPath)) throw new ArgumentException("Output path required.", nameof(outputPath));
// Skip any leading whitespace and trailing padding noise. Convert.TryFromBase64Chars
// tolerates internal whitespace, but slicing on arbitrary positions would split a
// run of base64 chars mid-quad — round the chunk to a multiple of 4 so each slice
// is independently decodable.
var chunkChars = Base64StreamChunkChars - (Base64StreamChunkChars % 4);
var totalChars = base64.Length;
var totalWritten = 0L;
using var fileStream = new FileStream(
outputPath, FileMode.Create, FileAccess.Write, FileShare.None,
bufferSize: 81920, useAsync: false);
// 4 base64 chars = 3 bytes, so the decoded buffer is sized accordingly.
var byteBuffer = new byte[(chunkChars / 4) * 3];
for (var offset = 0; offset < totalChars; offset += chunkChars)
{
var take = Math.Min(chunkChars, totalChars - offset);
var slice = base64.AsSpan(offset, take);
// The final slice may be shorter than chunkChars and may carry
// trailing '=' padding; TryFromBase64Chars handles that.
if (!Convert.TryFromBase64Chars(slice, byteBuffer, out var written))
{
throw new FormatException(
$"Bundle response contained invalid base64 at character offset {offset}.");
}
fileStream.Write(byteBuffer, 0, written);
totalWritten += written;
}
return totalWritten;
}
private static Option<IReadOnlyList<string>?> NameListOption(string name, string description)
{
var opt = new Option<IReadOnlyList<string>?>(name)
@@ -103,7 +103,7 @@
<div class="text-muted small fst-italic">Reading bundle…</div>
}
@if (_bundleBytes is not null && _errorMessage is null)
@if (_bundleTempPath is not null && _errorMessage is null)
{
@if (_session is not null)
{
@@ -39,11 +39,17 @@ namespace ScadaLink.CentralUI.Components.Pages.Design;
///
/// Cached bundle bytes: because <see cref="IBundleImporter.LoadAsync"/> currently
/// peeks the manifest by attempting decryption, encrypted bundles require two
/// LoadAsync invocations. We cache the raw bytes in <c>_bundleBytes</c> after the
/// first read so the user does not need to re-select the file before entering the
/// passphrase. The bytes are cleared on Done / Back-to-Upload.
/// LoadAsync invocations. CentralUI-031: we previously cached the raw bytes in a
/// <c>byte[] _bundleBytes</c> field, which buffered the full upload (default cap
/// 100 MB) in the component's per-circuit state — multiplied across concurrent
/// operator sessions, that produced real central-node memory pressure. The
/// bytes are now streamed once to a per-session temp file under
/// <c>Path.GetTempPath()/scadalink-transport-staging/</c> and only the path is
/// retained on the component. The file is deleted on Back-to-Upload / Reset /
/// successful Apply / component Dispose, so an abandoned wizard does not leak
/// staged bundle plaintext beyond circuit teardown.
/// </summary>
public partial class TransportImport : ComponentBase
public partial class TransportImport : ComponentBase, IDisposable
{
public enum ImportWizardStep
{
@@ -66,13 +72,23 @@ public partial class TransportImport : ComponentBase
private ImportWizardStep _step = ImportWizardStep.Upload;
private string? _errorMessage;
// ---- Session + cached bytes ----
// Bundle bytes are cached so the same file can be re-attempted with a
// passphrase without forcing the user to re-pick it. Cleared in ResetAll.
private byte[]? _bundleBytes;
// ---- Session + cached bundle path ----
// CentralUI-031: the upload is streamed to a per-session temp file and only
// the path is retained on the component, so we don't hold an entire bundle
// (up to MaxBundleSizeMb, default 100 MB) in per-circuit memory across the
// wizard's lifetime. The file is deleted on every wizard reset path and on
// component disposal so an abandoned wizard cannot leak staged plaintext
// beyond circuit teardown.
private string? _bundleTempPath;
private BundleSession? _session;
private bool _uploadInProgress;
// Staging directory for in-flight bundle uploads. Lives under the system
// temp directory rather than wwwroot/ because the file is never served to
// a browser — it is only read by the in-process IBundleImporter.
private static readonly string StagingDir =
Path.Combine(Path.GetTempPath(), "scadalink-transport-staging");
// ---- Step 2: passphrase ----
private string _passphrase = string.Empty;
private int _failedUnlockAttempts;
@@ -106,7 +122,7 @@ public partial class TransportImport : ComponentBase
_errorMessage = null;
_uploadInProgress = true;
_session = null;
_bundleBytes = null;
DeleteBundleTempFile();
try
{
var maxBytes = Options.Value.MaxBundleSizeMb * 1024L * 1024L;
@@ -116,13 +132,21 @@ public partial class TransportImport : ComponentBase
return;
}
// CentralUI-031: stream the upload directly to a per-session temp
// file so the central node's working set is bounded by the
// FileStream buffer (~80 KB) rather than the full bundle bytes.
// OpenReadStream's MaxAllowedSize defaults to 500_000 bytes — bump
// it to the configured cap so the read doesn't throw before we get
// to the importer's own length check.
using var fileStream = e.File.OpenReadStream(maxBytes);
using var ms = new MemoryStream();
await fileStream.CopyToAsync(ms);
_bundleBytes = ms.ToArray();
Directory.CreateDirectory(StagingDir);
_bundleTempPath = Path.Combine(StagingDir, $"{Guid.NewGuid():N}.scadabundle");
using (var fileStream = e.File.OpenReadStream(maxBytes))
await using (var dest = new FileStream(
_bundleTempPath, FileMode.CreateNew, FileAccess.Write, FileShare.None,
bufferSize: 81920, useAsync: true))
{
await fileStream.CopyToAsync(dest);
}
await TryLoadAsync(passphrase: null);
}
@@ -150,14 +174,19 @@ public partial class TransportImport : ComponentBase
/// </summary>
private async Task TryLoadAsync(string? passphrase)
{
if (_bundleBytes is null)
if (_bundleTempPath is null || !File.Exists(_bundleTempPath))
{
_errorMessage = "No bundle bytes cached — please re-select the file.";
_errorMessage = "No bundle staged — please re-select the file.";
return;
}
try
{
using var stream = new MemoryStream(_bundleBytes);
// CentralUI-031: read the staged bundle straight off disk; the
// importer's LoadAsync only walks the stream forward, so a plain
// FileStream is sufficient (no need to buffer it back into memory).
using var stream = new FileStream(
_bundleTempPath, FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 81920, useAsync: true);
_session = await BundleImporter.LoadAsync(stream, passphrase, CancellationToken.None);
_errorMessage = null;
}
@@ -528,7 +557,7 @@ public partial class TransportImport : ComponentBase
private void ResetSessionState()
{
_session = null;
_bundleBytes = null;
DeleteBundleTempFile();
_preview = null;
_resolutions = null;
_passphrase = string.Empty;
@@ -536,4 +565,44 @@ public partial class TransportImport : ComponentBase
_result = null;
_validationErrors = null;
}
/// <summary>
/// CentralUI-031: deletes the staged bundle temp file if any. Swallows IO
/// failures — an undeletable temp file is best-effort cleanup and must not
/// block the wizard.
/// </summary>
private void DeleteBundleTempFile()
{
var path = _bundleTempPath;
_bundleTempPath = null;
if (path is null) return;
try
{
if (File.Exists(path))
{
File.Delete(path);
}
}
catch (IOException)
{
// Another handle may still be open (e.g. an in-flight LoadAsync
// read). Leave the file behind; the OS temp dir is reaped on its
// own schedule. Audit-failure-style: never block the user-facing
// action.
}
catch (UnauthorizedAccessException)
{
// Same rationale.
}
}
/// <summary>
/// CentralUI-031: ensures the staged temp file does not survive circuit
/// teardown. Blazor invokes Dispose when the user navigates away or the
/// circuit ends, so an abandoned wizard cleans up automatically.
/// </summary>
public void Dispose()
{
DeleteBundleTempFile();
}
}
@@ -15,6 +15,18 @@ public interface ITemplateEngineRepository
/// <param name="id">The template ID.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<Template?> GetTemplateWithChildrenAsync(int id, CancellationToken cancellationToken = default);
/// <summary>
/// Bulk variant of <see cref="GetTemplateWithChildrenAsync(int, CancellationToken)"/>
/// that fetches every template whose <see cref="Template.Name"/> matches one of
/// <paramref name="names"/> in a single SQL/EF query, eager-loading
/// Attributes / Alarms / Scripts / Compositions. Resolves the Transport-008
/// N+1 in <c>BundleImporter.PreviewAsync</c> — names that don't match an
/// existing template are omitted from the result rather than producing a
/// null entry, so callers should look up by name into the returned list.
/// </summary>
/// <param name="names">Template names to load. Duplicate / null / empty names are filtered out.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task<IReadOnlyList<Template>> GetTemplatesWithChildrenAsync(IEnumerable<string> names, CancellationToken cancellationToken = default);
/// <summary>Retrieves all templates.</summary>
/// <param name="cancellationToken">Cancellation token.</param>
Task<IReadOnlyList<Template>> GetAllTemplatesAsync(CancellationToken cancellationToken = default);
@@ -39,6 +39,30 @@ public class TemplateEngineRepository : ITemplateEngineRepository
return await GetTemplateByIdAsync(id, cancellationToken);
}
/// <inheritdoc />
public async Task<IReadOnlyList<Template>> GetTemplatesWithChildrenAsync(
IEnumerable<string> names, CancellationToken cancellationToken = default)
{
// Transport-008: bulk lookup replaces the per-name N+1 in
// BundleImporter.PreviewAsync. Filter out null / empty / duplicate
// names before the query so EF emits a clean, deduplicated IN clause.
if (names is null) return Array.Empty<Template>();
var distinct = names
.Where(n => !string.IsNullOrEmpty(n))
.Distinct(StringComparer.Ordinal)
.ToArray();
if (distinct.Length == 0) return Array.Empty<Template>();
return await _context.Templates
.Where(t => distinct.Contains(t.Name))
.Include(t => t.Attributes)
.Include(t => t.Alarms)
.Include(t => t.Scripts)
.Include(t => t.Compositions)
.AsSplitQuery()
.ToListAsync(cancellationToken);
}
/// <inheritdoc />
public async Task<IReadOnlyList<Template>> GetAllTemplatesAsync(CancellationToken cancellationToken = default)
{
@@ -79,16 +79,75 @@ public class ArtifactDeploymentService
/// single-site retries).
/// </param>
/// <returns>A deployment artifacts command for the site.</returns>
/// <remarks>
/// DeploymentManager-023: this convenience overload runs the global artifact queries
/// for a single site (used by <see cref="RetryForSiteAsync"/>). The multi-site
/// <see cref="DeployToAllSitesAsync"/> path hoists the global queries OUT of the
/// per-site loop and calls the prefetched-globals overload to avoid the N+1
/// re-query of every system-wide artifact set per site.
/// </remarks>
public async Task<DeployArtifactsCommand> BuildDeployArtifactsCommandAsync(
int siteId,
CancellationToken cancellationToken = default,
string? deploymentId = null)
{
var globals = await FetchGlobalArtifactsAsync(cancellationToken);
return await BuildDeployArtifactsCommandAsync(siteId, globals, cancellationToken, deploymentId);
}
/// <summary>
/// Builds a per-site <see cref="DeployArtifactsCommand"/> using a previously-fetched
/// snapshot of the global artifact sets (shared scripts, external systems + methods,
/// DB connections, notification lists, SMTP configurations). Only the per-site
/// data-connection query runs here.
/// </summary>
/// <remarks>
/// DeploymentManager-023: separating the global fetch from the per-site build lets
/// <see cref="DeployToAllSitesAsync"/> issue the global queries exactly once across
/// the whole multi-site sweep, eliminating the N+1 re-query of shared scripts,
/// external systems, methods, DB connections, notification lists, and SMTP
/// configurations.
/// </remarks>
private async Task<DeployArtifactsCommand> BuildDeployArtifactsCommandAsync(
int siteId,
GlobalArtifactSnapshot globals,
CancellationToken cancellationToken,
string? deploymentId)
{
var dataConnections = await _siteRepo.GetDataConnectionsBySiteIdAsync(siteId, cancellationToken);
// Map data connections
var dataConnectionArtifacts = dataConnections.Select(dc =>
new DataConnectionArtifact(dc.Name, dc.Protocol, dc.PrimaryConfiguration, dc.BackupConfiguration, dc.FailoverRetryCount)).ToList();
return new DeployArtifactsCommand(
deploymentId ?? Guid.NewGuid().ToString("N"),
globals.SharedScripts,
globals.ExternalSystems,
globals.DatabaseConnections,
globals.NotificationLists,
dataConnectionArtifacts,
globals.SmtpConfigurations,
DateTimeOffset.UtcNow);
}
/// <summary>
/// Fetches the system-wide artifact sets that are identical across every site —
/// shared scripts, external systems (with their methods serialized in), database
/// connections, notification lists, and SMTP configurations. Used by
/// <see cref="DeployToAllSitesAsync"/> to pre-load once before the per-site loop.
/// </summary>
/// <remarks>
/// DeploymentManager-023: the per-site artifact build path previously re-issued
/// every one of these queries per site (≈ 5·N + M·N round trips for N sites
/// and M external systems). Hoisting them here drops that to a single fetch.
/// </remarks>
private async Task<GlobalArtifactSnapshot> FetchGlobalArtifactsAsync(CancellationToken cancellationToken)
{
var sharedScripts = await _templateRepo.GetAllSharedScriptsAsync(cancellationToken);
var externalSystems = await _externalSystemRepo.GetAllExternalSystemsAsync(cancellationToken);
var dbConnections = await _externalSystemRepo.GetAllDatabaseConnectionsAsync(cancellationToken);
var notificationLists = await _notificationRepo.GetAllNotificationListsAsync(cancellationToken);
var dataConnections = await _siteRepo.GetDataConnectionsBySiteIdAsync(siteId, cancellationToken);
var smtpConfigurations = await _notificationRepo.GetAllSmtpConfigurationsAsync(cancellationToken);
// Map shared scripts
@@ -122,27 +181,32 @@ public class ArtifactDeploymentService
var notificationListArtifacts = notificationLists.Select(nl =>
new NotificationListArtifact(nl.Name, nl.Recipients.Select(r => r.EmailAddress).ToList())).ToList();
// Map data connections
var dataConnectionArtifacts = dataConnections.Select(dc =>
new DataConnectionArtifact(dc.Name, dc.Protocol, dc.PrimaryConfiguration, dc.BackupConfiguration, dc.FailoverRetryCount)).ToList();
// Map SMTP configurations — use Host as the artifact name (matches SQLite PK on site)
var smtpArtifacts = smtpConfigurations.Select(smtp =>
new SmtpConfigurationArtifact(
$"{smtp.Host}:{smtp.Port}", smtp.Host, smtp.Port, smtp.AuthType, smtp.FromAddress,
smtp.Credentials, null, smtp.TlsMode)).ToList();
return new DeployArtifactsCommand(
deploymentId ?? Guid.NewGuid().ToString("N"),
return new GlobalArtifactSnapshot(
scriptArtifacts,
externalSystemArtifacts,
dbConnectionArtifacts,
notificationListArtifacts,
dataConnectionArtifacts,
smtpArtifacts,
DateTimeOffset.UtcNow);
smtpArtifacts);
}
/// <summary>
/// Bag of the global artifact sets that do not vary per site, captured once at
/// the start of <see cref="DeployToAllSitesAsync"/> and reused for every per-site
/// command build (DeploymentManager-023).
/// </summary>
private sealed record GlobalArtifactSnapshot(
IReadOnlyList<SharedScriptArtifact> SharedScripts,
IReadOnlyList<ExternalSystemArtifact> ExternalSystems,
IReadOnlyList<DatabaseConnectionArtifact> DatabaseConnections,
IReadOnlyList<NotificationListArtifact> NotificationLists,
IReadOnlyList<SmtpConfigurationArtifact> SmtpConfigurations);
/// <summary>
/// Deploys artifacts to all sites. Builds a per-site command with that site's data connections.
/// Returns per-site result matrix.
@@ -161,6 +225,12 @@ public class ArtifactDeploymentService
var deploymentId = Guid.NewGuid().ToString("N");
var perSiteResults = new Dictionary<string, SiteArtifactResult>();
// DeploymentManager-023: hoist the system-wide artifact queries (shared scripts,
// external systems + methods, DB connections, notification lists, SMTP configs)
// OUT of the per-site loop so they run ONCE instead of once per site. Only
// data connections legitimately vary per site, so they stay inside the loop.
var globals = await FetchGlobalArtifactsAsync(cancellationToken);
// Build per-site commands sequentially (DbContext is not thread-safe).
// DeploymentManager-010: every per-site command carries the SAME logical
// deploymentId, so the per-site commands, audit log, persisted record,
@@ -169,7 +239,7 @@ public class ArtifactDeploymentService
foreach (var site in sites)
{
siteCommands[site.Id] = await BuildDeployArtifactsCommandAsync(
site.Id, cancellationToken, deploymentId);
site.Id, globals, cancellationToken, deploymentId);
}
// Deploy to each site in parallel with per-site timeout
@@ -138,9 +138,24 @@ public sealed class AuditWriteMiddleware
// stream for a seekable wrapper that the framework rewinds at the end
// of the pipeline for us — but we also rewind to position 0 after our
// own read so the very next reader starts from the top.
ctx.Request.EnableBuffering();
var (requestBody, requestTruncated) =
await ReadBufferedRequestBodyAsync(ctx.Request, cap).ConfigureAwait(false);
//
// InboundAPI-019: skip EnableBuffering for bodyless requests (a known
// empty Content-Length or a method that conventionally carries no body —
// GET / HEAD / DELETE / TRACE / OPTIONS). The FileBufferingReadStream
// wrapper EnableBuffering installs allocates an internal buffer regardless
// of whether the request actually has a body; bodyless inbound traffic
// (e.g. GET /api/audit/query, health probes) no longer pays that cost.
// ReadBufferedRequestBodyAsync's own ContentLength is 0 short-circuit
// returns (null, false) for the bodyless case anyway, so the audit row
// is unchanged.
var requestBody = (string?)null;
var requestTruncated = false;
if (RequestHasBody(ctx.Request))
{
ctx.Request.EnableBuffering();
(requestBody, requestTruncated) =
await ReadBufferedRequestBodyAsync(ctx.Request, cap).ConfigureAwait(false);
}
// Response body — wrap Response.Body in a forwarding stream that mirrors
// every write to the original sink (transparent to the real client)
@@ -301,6 +316,31 @@ public sealed class AuditWriteMiddleware
TaskScheduler.Default);
}
/// <summary>
/// InboundAPI-019: decides whether the request is likely to carry a body, so the
/// caller can skip <see cref="HttpRequestRewindExtensions.EnableBuffering(HttpRequest)"/>
/// (and the associated <c>FileBufferingReadStream</c> allocation) on requests that
/// definitely won't have one. Returns <c>true</c> when <see cref="HttpRequest.ContentLength"/>
/// is positive OR when the HTTP method is one that conventionally carries a body
/// (POST / PUT / PATCH). Bodyless methods (GET / HEAD / DELETE / TRACE / OPTIONS)
/// with an absent or zero Content-Length return <c>false</c> — those are the
/// requests that previously paid the buffering allocation for no benefit. A
/// body-carrying method with no Content-Length (e.g. chunked transfer-encoding)
/// still buffers, so streamed POST bodies are unaffected.
/// </summary>
private static bool RequestHasBody(HttpRequest request)
{
if (request.ContentLength is > 0)
{
return true;
}
var method = request.Method;
return HttpMethods.IsPost(method)
|| HttpMethods.IsPut(method)
|| HttpMethods.IsPatch(method);
}
/// <summary>
/// Reads the buffered request body up to <paramref name="capBytes"/> bytes
/// into a string for the audit copy and rewinds the stream so the
@@ -1287,18 +1287,28 @@ public class ManagementActor : ReceiveActor
var permittedIds = new HashSet<string>(user.PermittedSiteIds);
var templateRepo = sp.GetRequiredService<ITemplateEngineRepository>();
var instanceSiteCache = new Dictionary<int, int?>();
// ManagementService-023: pre-load all instances ONCE via the repository's
// bulk method and build an InstanceId -> SiteId? lookup, instead of issuing
// GetInstanceByIdAsync per distinct record.InstanceId (textbook N+1). The
// unfiltered branch now hits the configuration database exactly twice
// (deployment records + instances) regardless of fleet size.
var allInstances = await templateRepo.GetAllInstancesAsync();
var instanceSiteLookup = new Dictionary<int, int?>(allInstances.Count);
foreach (var instance in allInstances)
{
instanceSiteLookup[instance.Id] = instance.SiteId;
}
var scoped = new List<DeploymentRecord>();
foreach (var record in records)
{
if (!instanceSiteCache.TryGetValue(record.InstanceId, out var siteId))
if (instanceSiteLookup.TryGetValue(record.InstanceId, out var siteId)
&& siteId.HasValue
&& permittedIds.Contains(siteId.Value.ToString()))
{
var instance = await templateRepo.GetInstanceByIdAsync(record.InstanceId);
siteId = instance?.SiteId;
instanceSiteCache[record.InstanceId] = siteId;
}
if (siteId.HasValue && permittedIds.Contains(siteId.Value.ToString()))
scoped.Add(record);
}
}
return scoped;
}
@@ -49,6 +49,34 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// </summary>
private bool _dispatching;
/// <summary>
/// NotificationOutbox-006: cached <see cref="NotificationType"/> → adapter lookup, built
/// lazily on the first dispatch sweep and reused for the lifetime of the actor. The
/// adapter registration is decided at startup by <c>AddNotificationOutbox</c> (the set is
/// keyed by <see cref="NotificationType"/> and is static per process lifetime), so
/// rebuilding this dictionary on every sweep was pure allocation waste.
/// </summary>
/// <remarks>
/// The cache is paired with <see cref="_adaptersScope"/>, an actor-lifetime
/// <see cref="IServiceScope"/> created on first use so the cached scoped adapter
/// instances and their dependencies live as long as the cache itself. The scope is
/// disposed in <see cref="PostStop"/>. The adapters are stateless wrappers that
/// resolve their per-call collaborators (e.g. <see cref="INotificationRepository"/>'s
/// underlying DbContext) through their own injected dependencies; holding them for
/// the actor's lifetime is consistent with the actor's own singleton lifetime on the
/// active central node.
/// </remarks>
private IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter>? _adaptersCache;
/// <summary>
/// NotificationOutbox-006: actor-lifetime DI scope that owns the cached
/// <see cref="_adaptersCache"/> adapter instances. Created lazily on the first
/// dispatch sweep that needs adapters; disposed in <see cref="PostStop"/> so the
/// scoped adapter graph (and any disposable dependencies it transitively holds) is
/// torn down with the actor.
/// </summary>
private IServiceScope? _adaptersScope;
/// <summary>
/// NO-003: lifecycle-scoped cancellation source, cancelled in <see cref="PostStop"/> so
/// any in-flight dispatch sweep — including a long-running SMTP send via the channel
@@ -125,6 +153,14 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
_shutdownCts?.Dispose();
_shutdownCts = null;
// NotificationOutbox-006: dispose the actor-lifetime adapter scope so the cached
// scoped adapter instances and their disposable dependencies are torn down with
// the actor (e.g. on a CoordinatedShutdown / failover that stops the singleton).
_adaptersScope?.Dispose();
_adaptersScope = null;
_adaptersCache = null;
base.PostStop();
}
@@ -224,10 +260,12 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// and retry-policy resolution can all throw, and a faulted task would otherwise leave
/// the dispatcher's in-flight guard stuck and wedge the loop permanently.
///
/// The channel delivery adapters are resolved from the per-sweep scope, not held in a
/// field: <see cref="EmailNotificationDeliveryAdapter"/> takes a scoped
/// <see cref="INotificationRepository"/> directly, so a long-lived adapter reference on
/// this singleton actor would be a captive dependency over a disposed DbContext.
/// The per-sweep DI scope still owns the repository graph
/// (<see cref="INotificationOutboxRepository"/> + <see cref="INotificationRepository"/>),
/// which is correct because those services back a fresh DbContext per sweep. The
/// channel delivery adapters, however, are cached for the actor's lifetime via
/// <see cref="ResolveAdapters"/> — see <see cref="_adaptersCache"/> for the
/// NotificationOutbox-006 rationale.
/// </summary>
private async Task RunDispatchPass(DateTimeOffset now, CancellationToken cancellationToken)
{
@@ -236,7 +274,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
using var scope = _serviceProvider.CreateScope();
var outboxRepository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
var notificationRepository = scope.ServiceProvider.GetRequiredService<INotificationRepository>();
var adapters = ResolveAdapters(scope.ServiceProvider);
var adapters = ResolveAdapters();
IReadOnlyList<Notification> due;
try
@@ -348,21 +386,37 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
}
/// <summary>
/// Builds the <see cref="NotificationType"/> → adapter lookup for a dispatch sweep from
/// the registered <see cref="INotificationDeliveryAdapter"/> services in the supplied
/// scope. The last adapter registered for a given type wins, mirroring DI's last-wins
/// resolution semantics.
/// Returns the <see cref="NotificationType"/> → adapter lookup, building it lazily on
/// the first call and caching it on <see cref="_adaptersCache"/> for the actor's
/// lifetime. The last adapter registered for a given type wins, mirroring DI's
/// last-wins resolution semantics.
/// </summary>
private static IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter> ResolveAdapters(
IServiceProvider scopedServices)
/// <remarks>
/// NotificationOutbox-006: the lookup used to be rebuilt on every dispatch sweep
/// from the per-sweep DI scope. Adapter registration is static per process
/// lifetime, so the dict is now built ONCE — on the first sweep that needs it —
/// and reused. To respect each adapter's scoped lifetime
/// (<see cref="EmailNotificationDeliveryAdapter"/> takes a scoped
/// <see cref="INotificationRepository"/>), the cache is paired with
/// <see cref="_adaptersScope"/>, an actor-lifetime <see cref="IServiceScope"/> that
/// owns the cached adapter instances and is disposed in <see cref="PostStop"/>.
/// </remarks>
private IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter> ResolveAdapters()
{
if (_adaptersCache is not null)
{
return _adaptersCache;
}
_adaptersScope = _serviceProvider.CreateScope();
var adapters = new Dictionary<NotificationType, INotificationDeliveryAdapter>();
foreach (var adapter in scopedServices.GetServices<INotificationDeliveryAdapter>())
foreach (var adapter in _adaptersScope.ServiceProvider.GetServices<INotificationDeliveryAdapter>())
{
adapters[adapter.Type] = adapter;
}
return adapters;
_adaptersCache = adapters;
return _adaptersCache;
}
/// <summary>
@@ -9,8 +9,39 @@ namespace ScadaLink.NotificationService;
/// Supports OAuth2 Client Credentials (M365) and Basic Auth.
/// BCC delivery, plain text.
/// </summary>
/// <remarks>
/// <para>
/// <b>Lifetime — one wrapper, one delivery (NS-022).</b>
/// This wrapper owns a SINGLE underlying <see cref="MailKit.Net.Smtp.SmtpClient"/>
/// — it is NOT a connection pool. MailKit's <c>SmtpClient</c> is a single TCP/TLS
/// connection holder and is NOT thread-safe; reusing one across concurrent or
/// back-to-back deliveries without external synchronization is unsafe.
/// </para>
/// <para>
/// The DI registration (<c>AddSingleton&lt;Func&lt;ISmtpClientWrapper&gt;&gt;</c>)
/// is therefore a per-delivery FACTORY, not a singleton wrapper: callers
/// (<see cref="ScadaLink.NotificationOutbox.Delivery.EmailNotificationDeliveryAdapter"/>)
/// invoke the factory at the top of every <c>DeliverAsync</c>, run the
/// connect/authenticate/send/disconnect sequence on the fresh wrapper, and
/// dispose it at the end of the send. Each delivery pays a full TCP+TLS
/// handshake; this is the deliberate, documented cost of avoiding shared
/// connection state. The factory shape exists specifically so a future
/// pooled/synchronized implementation can be slotted in without changing
/// callers — but the current implementation deliberately does NOT pool.
/// </para>
/// <para>
/// Do not reuse one wrapper across deliveries. <see cref="ConnectAsync"/>
/// mutates <c>_client.Timeout</c> per call (NS-007), and the underlying
/// <c>SmtpClient</c> rejects concurrent send calls — both are latent footguns
/// for any caller tempted to "fix" the factory into a true singleton.
/// </para>
/// </remarks>
public class MailKitSmtpClientWrapper : ISmtpClientWrapper, IDisposable
{
// NS-022: ONE SmtpClient per wrapper — see class-level remarks. This is NOT a
// connection pool. MailKit's SmtpClient holds a single TCP/TLS connection and
// is not thread-safe; the wrapper is meant for a single connect/auth/send/
// disconnect cycle per instance, after which it MUST be disposed.
private readonly SmtpClient _client = new();
/// <inheritdoc />
@@ -52,8 +52,15 @@ public class SiteEventLogger : ISiteEventLogger, IDisposable
{
_logger = logger;
// SiteEventLogging-022: Cache=Shared is a cross-connection optimisation
// that lets multiple SqliteConnections share an in-process page cache.
// This logger owns exactly one SqliteConnection and serialises all
// access through _writeLock, so the mode is dormant — at best dead
// configuration, at worst a small future foot-gun for any second
// connection opened to the same file. A test path that genuinely
// needs Cache=Shared can still inject it via connectionStringOverride.
var connectionString = connectionStringOverride
?? $"Data Source={options.Value.DatabasePath};Cache=Shared";
?? $"Data Source={options.Value.DatabasePath}";
_connection = new SqliteConnection(connectionString);
_connection.Open();
@@ -350,24 +350,16 @@ public sealed class BundleImporter : IBundleImporter
}
// ---- Templates ----
// Repos only expose GetTemplateByIdAsync / GetAllTemplatesAsync — no
// by-name lookup. Pull all once and index by name for the diff loop.
var allTemplates = await _templateRepo.GetAllTemplatesAsync(ct).ConfigureAwait(false);
var hydratedByName = new Dictionary<string, Template>(StringComparer.Ordinal);
foreach (var stub in allTemplates)
{
// GetAllTemplatesAsync may not eager-load children — fetch the
// children-loaded variant for any name that matches an incoming DTO
// so the per-child diff loop sees the full collection.
if (content.Templates.Any(t => string.Equals(t.Name, stub.Name, StringComparison.Ordinal)))
{
var hydrated = await _templateRepo.GetTemplateWithChildrenAsync(stub.Id, ct).ConfigureAwait(false);
if (hydrated is not null)
{
hydratedByName[stub.Name] = hydrated;
}
}
}
// Transport-008: previously this loop iterated GetAllTemplatesAsync()
// and called GetTemplateWithChildrenAsync(stub.Id) once per matching
// name (classic N+1). The bulk variant fetches every matching template
// with children eager-loaded in a single round-trip.
var bundleTemplateNames = content.Templates.Select(t => t.Name);
var hydratedTemplates = await _templateRepo
.GetTemplatesWithChildrenAsync(bundleTemplateNames, ct)
.ConfigureAwait(false);
var hydratedByName = hydratedTemplates
.ToDictionary(t => t.Name, t => t, StringComparer.Ordinal);
foreach (var tDto in content.Templates)
{
hydratedByName.TryGetValue(tDto.Name, out var existing);