From 5fc6790c36943f86c3fe9b6083bb5e82fbac61e6 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 24 May 2026 04:37:02 -0400 Subject: [PATCH] feat(transport): BundleImporter.LoadAsync with manifest validation --- .../Import/BundleImporter.cs | 188 +++++++++++ .../ServiceCollectionExtensions.cs | 1 + .../Import/BundleImporterLoadTests.cs | 307 ++++++++++++++++++ 3 files changed, 496 insertions(+) create mode 100644 src/ScadaLink.Transport/Import/BundleImporter.cs create mode 100644 tests/ScadaLink.Transport.Tests/Import/BundleImporterLoadTests.cs diff --git a/src/ScadaLink.Transport/Import/BundleImporter.cs b/src/ScadaLink.Transport/Import/BundleImporter.cs new file mode 100644 index 0000000..bd91c0f --- /dev/null +++ b/src/ScadaLink.Transport/Import/BundleImporter.cs @@ -0,0 +1,188 @@ +using System.Security.Cryptography; +using Microsoft.Extensions.Options; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Interfaces.Services; +using ScadaLink.Commons.Interfaces.Transport; +using ScadaLink.Commons.Types.Transport; +using ScadaLink.ConfigurationDatabase; +using ScadaLink.Transport.Encryption; +using ScadaLink.Transport.Serialization; + +namespace ScadaLink.Transport.Import; + +/// +/// Three-phase bundle importer: validates the +/// bundle envelope (manifest + content hash + decryption) and opens a +/// session; diffs the bundle's DTOs against the +/// current target database; writes the chosen +/// resolutions through the audited repositories. Only LoadAsync is +/// implemented in this slice — the other two are wired into DI now so +/// follow-up tasks can fill them in without churning the constructor. +/// +public sealed class BundleImporter : IBundleImporter +{ + private readonly BundleSerializer _bundleSerializer; + private readonly ManifestValidator _manifestValidator; + private readonly BundleSecretEncryptor _encryptor; +#pragma warning disable IDE0052 // wired-in dependencies for T16/T17. + private readonly EntitySerializer _entitySerializer; + private readonly ITemplateEngineRepository _templateRepo; + private readonly IExternalSystemRepository _externalRepo; + private readonly INotificationRepository _notificationRepo; + private readonly IInboundApiRepository _inboundApiRepo; + private readonly IAuditService _auditService; + private readonly IAuditCorrelationContext _correlationContext; + private readonly ScadaLinkDbContext _dbContext; +#pragma warning restore IDE0052 + private readonly IBundleSessionStore _sessionStore; + private readonly IOptions _options; + private readonly TimeProvider _timeProvider; + + public BundleImporter( + BundleSerializer bundleSerializer, + ManifestValidator manifestValidator, + BundleSecretEncryptor encryptor, + EntitySerializer entitySerializer, + IBundleSessionStore sessionStore, + IOptions options, + TimeProvider timeProvider, + ITemplateEngineRepository templateRepo, + IExternalSystemRepository externalRepo, + INotificationRepository notificationRepo, + IInboundApiRepository inboundApiRepo, + IAuditService auditService, + IAuditCorrelationContext correlationContext, + ScadaLinkDbContext dbContext) + { + _bundleSerializer = bundleSerializer ?? throw new ArgumentNullException(nameof(bundleSerializer)); + _manifestValidator = manifestValidator ?? throw new ArgumentNullException(nameof(manifestValidator)); + _encryptor = encryptor ?? throw new ArgumentNullException(nameof(encryptor)); + _entitySerializer = entitySerializer ?? throw new ArgumentNullException(nameof(entitySerializer)); + _sessionStore = sessionStore ?? throw new ArgumentNullException(nameof(sessionStore)); + _options = options ?? throw new ArgumentNullException(nameof(options)); + _timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider)); + _templateRepo = templateRepo ?? throw new ArgumentNullException(nameof(templateRepo)); + _externalRepo = externalRepo ?? throw new ArgumentNullException(nameof(externalRepo)); + _notificationRepo = notificationRepo ?? throw new ArgumentNullException(nameof(notificationRepo)); + _inboundApiRepo = inboundApiRepo ?? throw new ArgumentNullException(nameof(inboundApiRepo)); + _auditService = auditService ?? throw new ArgumentNullException(nameof(auditService)); + _correlationContext = correlationContext ?? throw new ArgumentNullException(nameof(correlationContext)); + _dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext)); + } + + /// + /// Validates the bundle envelope and opens a session keyed by a fresh GUID. + /// Wrong-passphrase failures surface as + /// so the caller (UI / API endpoint) can increment the lockout counter on + /// the returned session — this method does not mutate FailedUnlockAttempts + /// itself because the session does not exist yet at the point of failure. + /// + public async Task LoadAsync(Stream bundleStream, string? passphrase, CancellationToken ct = default) + { + ArgumentNullException.ThrowIfNull(bundleStream); + + // Copy to a seekable buffer — manifest + content readers each open a + // fresh ZipArchive over the same bytes, so the upstream stream needs to + // be seekable. A caller-supplied FileStream is seekable but a Kestrel + // request stream is not, so we always normalise to MemoryStream. + var ms = new MemoryStream(); + await bundleStream.CopyToAsync(ms, ct).ConfigureAwait(false); + ms.Position = 0; + + // Size cap is in MB; multiply in long arithmetic so the comparison + // doesn't overflow at the int boundary for large MaxBundleSizeMb. + var maxBytes = _options.Value.MaxBundleSizeMb * 1024L * 1024L; + if (ms.Length > maxBytes) + { + throw new InvalidOperationException( + $"Bundle exceeds maximum allowed size of {_options.Value.MaxBundleSizeMb} MB."); + } + + BundleManifest manifest; + try + { + ms.Position = 0; + manifest = _bundleSerializer.ReadManifest(ms); + } + catch (InvalidDataException) + { + // Preserve the serializer's specific "manifest missing/null" message + // — the caller wants to surface a precise diagnostic to the operator. + throw; + } + catch (Exception ex) + { + throw new InvalidDataException("Bundle is missing or has a malformed manifest.json.", ex); + } + + ms.Position = 0; + var contentBytes = _bundleSerializer.ReadContentBytes(ms, manifest); + + // Validate format version + content-hash + manifest shape. Reject paths + // surface as distinct exceptions so the UI can disambiguate the cause. + var validation = _manifestValidator.Validate(manifest, contentBytes); + switch (validation) + { + case ManifestValidationResult.UnsupportedFormatVersion: + throw new NotSupportedException( + $"Bundle format version {manifest.BundleFormatVersion} is not supported by this cluster."); + case ManifestValidationResult.ContentHashMismatch: + throw new InvalidDataException( + "Bundle content hash does not match manifest — file may be corrupt."); + case ManifestValidationResult.MalformedManifest: + throw new InvalidDataException("Bundle manifest is malformed."); + case ManifestValidationResult.Ok: + break; + default: + throw new InvalidDataException($"Unrecognised manifest validation result: {validation}."); + } + + // Decrypt when the manifest carries EncryptionMetadata. AES-GCM tag + // mismatch surfaces as a CryptographicException (or its + // AuthenticationTagMismatchException subclass on .NET 10+) — bubble it + // unchanged so the caller can detect wrong-passphrase via type check + // and increment the lockout counter on the (about-to-be-rejected) + // session reference. The session is not opened on the failure path. + byte[] decryptedContent; + if (manifest.Encryption is not null) + { + if (string.IsNullOrEmpty(passphrase)) + { + throw new ArgumentException( + "Passphrase required for encrypted bundle.", nameof(passphrase)); + } + decryptedContent = _encryptor.Decrypt(contentBytes, manifest.Encryption, passphrase); + } + else + { + decryptedContent = contentBytes; + } + + var ttl = TimeSpan.FromMinutes(_options.Value.BundleSessionTtlMinutes); + var session = new BundleSession + { + SessionId = Guid.NewGuid(), + Manifest = manifest, + DecryptedContent = decryptedContent, + ExpiresAt = _timeProvider.GetUtcNow() + ttl, + }; + return _sessionStore.Open(session); + } + + public Task PreviewAsync(Guid sessionId, CancellationToken ct = default) + { + // Filled in by T16. Throwing NotImplementedException here keeps the + // interface contract honest while letting LoadAsync ship in isolation. + throw new NotImplementedException("PreviewAsync is implemented by task T16."); + } + + public Task ApplyAsync( + Guid sessionId, + IReadOnlyList resolutions, + string user, + CancellationToken ct = default) + { + // Filled in by T17. + throw new NotImplementedException("ApplyAsync is implemented by task T17."); + } +} diff --git a/src/ScadaLink.Transport/ServiceCollectionExtensions.cs b/src/ScadaLink.Transport/ServiceCollectionExtensions.cs index 50fee11..af88309 100644 --- a/src/ScadaLink.Transport/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.Transport/ServiceCollectionExtensions.cs @@ -30,6 +30,7 @@ public static class ServiceCollectionExtensions services.AddScoped(); services.AddScoped(); services.AddSingleton(); + services.AddScoped(); // Remaining concrete services added in later tasks. return services; } diff --git a/tests/ScadaLink.Transport.Tests/Import/BundleImporterLoadTests.cs b/tests/ScadaLink.Transport.Tests/Import/BundleImporterLoadTests.cs new file mode 100644 index 0000000..24ce25e --- /dev/null +++ b/tests/ScadaLink.Transport.Tests/Import/BundleImporterLoadTests.cs @@ -0,0 +1,307 @@ +using System.IO.Compression; +using System.Security.Cryptography; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Options; +using NSubstitute; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Interfaces.Services; +using ScadaLink.Commons.Interfaces.Transport; +using ScadaLink.Commons.Types.Transport; +using ScadaLink.ConfigurationDatabase; +using ScadaLink.Transport.Encryption; +using ScadaLink.Transport.Import; +using ScadaLink.Transport.Serialization; + +namespace ScadaLink.Transport.Tests.Import; + +/// +/// Unit tests for . Uses the real +/// , , +/// , and +/// — they're stateless / in-memory and easier +/// to drive than mocks. Repositories + audit + DbContext are mocked because +/// LoadAsync does not exercise them (they're injected so the constructor stays +/// stable across T15/T16/T17). +/// +public sealed class BundleImporterLoadTests +{ + private static readonly JsonSerializerOptions BundleJsonOptions = new() + { + WriteIndented = false, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + Converters = { new JsonStringEnumConverter() }, + }; + + private static BundleContentDto EmptyContent() => new( + TemplateFolders: Array.Empty(), + Templates: Array.Empty(), + SharedScripts: Array.Empty(), + ExternalSystems: Array.Empty(), + DatabaseConnections: Array.Empty(), + NotificationLists: Array.Empty(), + SmtpConfigs: Array.Empty(), + ApiKeys: Array.Empty(), + ApiMethods: Array.Empty()); + + private static BundleContentDto SmallContent() => new( + TemplateFolders: Array.Empty(), + Templates: new[] + { + new TemplateDto( + Name: "Pump", + FolderName: null, + BaseTemplateName: null, + Description: "the one and only", + Attributes: Array.Empty(), + Alarms: Array.Empty(), + Scripts: Array.Empty(), + Compositions: Array.Empty()), + }, + SharedScripts: Array.Empty(), + ExternalSystems: Array.Empty(), + DatabaseConnections: Array.Empty(), + NotificationLists: Array.Empty(), + SmtpConfigs: Array.Empty(), + ApiKeys: Array.Empty(), + ApiMethods: Array.Empty()); + + private sealed class TestTimeProvider : TimeProvider + { + private DateTimeOffset _now; + public TestTimeProvider(DateTimeOffset start) { _now = start; } + public override DateTimeOffset GetUtcNow() => _now; + } + + private sealed record TestRig( + BundleImporter Importer, + BundleSerializer Serializer, + ManifestBuilder ManifestBuilder, + BundleSecretEncryptor Encryptor, + BundleSessionStore SessionStore, + TransportOptions Options); + + private static TestRig BuildRig(Action? configure = null) + { + var opts = new TransportOptions(); + configure?.Invoke(opts); + var iOpts = Options.Create(opts); + var clock = new TestTimeProvider(DateTimeOffset.UtcNow); + var serializer = new BundleSerializer(); + var validator = new ManifestValidator(); + var encryptor = new BundleSecretEncryptor(); + var entitySerializer = new EntitySerializer(); + var manifestBuilder = new ManifestBuilder(); + var store = new BundleSessionStore(iOpts, clock); + + var importer = new BundleImporter( + bundleSerializer: serializer, + manifestValidator: validator, + encryptor: encryptor, + entitySerializer: entitySerializer, + sessionStore: store, + options: iOpts, + timeProvider: clock, + templateRepo: Substitute.For(), + externalRepo: Substitute.For(), + notificationRepo: Substitute.For(), + inboundApiRepo: Substitute.For(), + auditService: Substitute.For(), + correlationContext: Substitute.For(), + // LoadAsync never touches the DbContext — Preview/Apply do. Build + // a no-provider DbContext so the importer's null check passes; + // the in-memory provider isn't worth pulling in for unit tests. + dbContext: new ScadaLinkDbContext( + new DbContextOptionsBuilder().Options)); + + return new TestRig(importer, serializer, manifestBuilder, encryptor, store, opts); + } + + private static Stream PackPlainBundle(BundleSerializer serializer, ManifestBuilder builder, BundleContentDto content) + { + var contentBytes = serializer.SerializeContentBytes(content); + var manifest = builder.Build( + sourceEnvironment: "dev", + exportedBy: "alice", + scadaLinkVersion: "1.0.0", + encryption: null, + summary: new BundleSummary(content.Templates.Count, 0, 0, 0, 0, 0, 0, 0, 0), + contents: Array.Empty(), + contentBytes: contentBytes); + return serializer.Pack(content, manifest, passphrase: null, encryptor: null); + } + + private static Stream PackEncryptedBundle( + BundleSerializer serializer, + ManifestBuilder builder, + BundleSecretEncryptor encryptor, + BundleContentDto content, + string passphrase) + { + var contentBytes = serializer.SerializeContentBytes(content); + // Pack re-stamps salt/iv/hash from the ciphertext it actually writes, + // so the seed values here are placeholders. + var seed = new EncryptionMetadata("AES-256-GCM", "PBKDF2-SHA256", 600_000, string.Empty, string.Empty); + var manifest = builder.Build( + sourceEnvironment: "dev", + exportedBy: "alice", + scadaLinkVersion: "1.0.0", + encryption: seed, + summary: new BundleSummary(content.Templates.Count, 0, 0, 0, 0, 0, 0, 0, 0), + contents: Array.Empty(), + contentBytes: contentBytes); + return serializer.Pack(content, manifest, passphrase, encryptor); + } + + [Fact] + public async Task LoadAsync_returns_session_for_unencrypted_bundle() + { + var rig = BuildRig(); + var content = SmallContent(); + var stream = PackPlainBundle(rig.Serializer, rig.ManifestBuilder, content); + + var session = await rig.Importer.LoadAsync(stream, passphrase: null); + + Assert.NotNull(session); + Assert.NotEqual(Guid.Empty, session.SessionId); + Assert.Equal("dev", session.Manifest.SourceEnvironment); + Assert.Equal("alice", session.Manifest.ExportedBy); + Assert.Null(session.Manifest.Encryption); + Assert.NotNull(rig.SessionStore.Get(session.SessionId)); + } + + [Fact] + public async Task LoadAsync_returns_session_for_encrypted_bundle_with_correct_passphrase() + { + var rig = BuildRig(); + var content = SmallContent(); + var stream = PackEncryptedBundle(rig.Serializer, rig.ManifestBuilder, rig.Encryptor, content, "secret123"); + + var session = await rig.Importer.LoadAsync(stream, passphrase: "secret123"); + + Assert.NotNull(session); + Assert.NotNull(session.Manifest.Encryption); + Assert.NotEmpty(session.DecryptedContent); + // The decrypted payload must round-trip back to the original DTO so the + // PreviewAsync phase can deserialize it directly from the session. + var roundTripped = JsonSerializer.Deserialize( + session.DecryptedContent, BundleJsonOptions); + Assert.NotNull(roundTripped); + Assert.Single(roundTripped!.Templates); + Assert.Equal("Pump", roundTripped.Templates[0].Name); + } + + [Fact] + public async Task LoadAsync_throws_when_passphrase_wrong() + { + var rig = BuildRig(); + var stream = PackEncryptedBundle( + rig.Serializer, rig.ManifestBuilder, rig.Encryptor, EmptyContent(), "correct"); + + // AES-GCM raises AuthenticationTagMismatchException, a CryptographicException + // subclass on .NET 10 — ThrowsAny is the right match. + await Assert.ThrowsAnyAsync( + () => rig.Importer.LoadAsync(stream, passphrase: "wrong")); + } + + [Fact] + public async Task LoadAsync_throws_NotSupportedException_when_bundleFormatVersion_unsupported() + { + var rig = BuildRig(); + + // Synthesize a zip by hand whose manifest carries an unsupported format + // version. The validator looks at BundleFormatVersion first thing after + // null-checks, so the content hash doesn't need to be correct for this + // path — we just need a structurally valid manifest record. + var content = EmptyContent(); + var contentBytes = rig.Serializer.SerializeContentBytes(content); + var forwardManifest = new BundleManifest( + BundleFormatVersion: 999, + SchemaVersion: "1.0", + CreatedAtUtc: DateTimeOffset.UtcNow, + SourceEnvironment: "dev", + ExportedBy: "alice", + ScadaLinkVersion: "1.0.0", + ContentHash: "sha256:" + Convert.ToHexString(SHA256.HashData(contentBytes)).ToLowerInvariant(), + Encryption: null, + Summary: new BundleSummary(0, 0, 0, 0, 0, 0, 0, 0, 0), + Contents: Array.Empty()); + + var bundleStream = HandCraftZip(forwardManifest, contentBytes, encrypted: false); + + await Assert.ThrowsAsync( + () => rig.Importer.LoadAsync(bundleStream, passphrase: null)); + } + + [Fact] + public async Task LoadAsync_throws_InvalidDataException_when_content_hash_mismatch() + { + var rig = BuildRig(); + + // Build a normal bundle, then corrupt content.json's bytes after the + // manifest is stamped — the manifest still references the original hash. + var content = SmallContent(); + var originalContentBytes = rig.Serializer.SerializeContentBytes(content); + var manifest = rig.ManifestBuilder.Build( + sourceEnvironment: "dev", + exportedBy: "alice", + scadaLinkVersion: "1.0.0", + encryption: null, + summary: new BundleSummary(1, 0, 0, 0, 0, 0, 0, 0, 0), + contents: Array.Empty(), + contentBytes: originalContentBytes); + + // Corrupt the bytes so the validator's recomputed hash diverges from + // the manifest's frozen hash. + var corrupted = (byte[])originalContentBytes.Clone(); + corrupted[0] ^= 0xFF; + + var bundleStream = HandCraftZip(manifest, corrupted, encrypted: false); + + var ex = await Assert.ThrowsAsync( + () => rig.Importer.LoadAsync(bundleStream, passphrase: null)); + Assert.Contains("hash", ex.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task LoadAsync_throws_InvalidOperationException_when_bundle_exceeds_size_cap() + { + // MaxBundleSizeMb is an int; the smallest positive value is 1 MB. Pack + // a normal bundle and bump it past 1 MB by padding with a long + // description, then cap the limit to 0 — the comparison is `> maxBytes` + // so any positive byte count exceeds a 0 MB cap. + var rig = BuildRig(opts => opts.MaxBundleSizeMb = 0); + var stream = PackPlainBundle(rig.Serializer, rig.ManifestBuilder, SmallContent()); + + await Assert.ThrowsAsync( + () => rig.Importer.LoadAsync(stream, passphrase: null)); + } + + /// + /// Builds a zip directly so the test can write a manifest whose + /// ContentHash or BundleFormatVersion intentionally + /// disagrees with the content bytes — paths the high-level + /// won't produce because it always + /// re-stamps the hash itself. + /// + private static Stream HandCraftZip(BundleManifest manifest, byte[] contentBytes, bool encrypted) + { + var manifestBytes = JsonSerializer.SerializeToUtf8Bytes(manifest, BundleJsonOptions); + var ms = new MemoryStream(); + using (var archive = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true)) + { + WriteEntry(archive, "manifest.json", manifestBytes); + WriteEntry(archive, encrypted ? "content.enc" : "content.json", contentBytes); + } + ms.Position = 0; + return ms; + } + + private static void WriteEntry(ZipArchive archive, string name, byte[] payload) + { + var entry = archive.CreateEntry(name, CompressionLevel.Optimal); + using var es = entry.Open(); + es.Write(payload, 0, payload.Length); + } +}