feat(transport): BundleImporter.LoadAsync with manifest validation

This commit is contained in:
Joseph Doherty
2026-05-24 04:37:02 -04:00
parent 7c70ce0dbf
commit 5fc6790c36
3 changed files with 496 additions and 0 deletions

View File

@@ -0,0 +1,188 @@
using System.Security.Cryptography;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Interfaces.Transport;
using ScadaLink.Commons.Types.Transport;
using ScadaLink.ConfigurationDatabase;
using ScadaLink.Transport.Encryption;
using ScadaLink.Transport.Serialization;
namespace ScadaLink.Transport.Import;
/// <summary>
/// Three-phase bundle importer: <see cref="LoadAsync"/> validates the
/// bundle envelope (manifest + content hash + decryption) and opens a
/// session; <see cref="PreviewAsync"/> diffs the bundle's DTOs against the
/// current target database; <see cref="ApplyAsync"/> writes the chosen
/// resolutions through the audited repositories. Only LoadAsync is
/// implemented in this slice — the other two are wired into DI now so
/// follow-up tasks can fill them in without churning the constructor.
/// </summary>
public sealed class BundleImporter : IBundleImporter
{
private readonly BundleSerializer _bundleSerializer;
private readonly ManifestValidator _manifestValidator;
private readonly BundleSecretEncryptor _encryptor;
#pragma warning disable IDE0052 // wired-in dependencies for T16/T17.
private readonly EntitySerializer _entitySerializer;
private readonly ITemplateEngineRepository _templateRepo;
private readonly IExternalSystemRepository _externalRepo;
private readonly INotificationRepository _notificationRepo;
private readonly IInboundApiRepository _inboundApiRepo;
private readonly IAuditService _auditService;
private readonly IAuditCorrelationContext _correlationContext;
private readonly ScadaLinkDbContext _dbContext;
#pragma warning restore IDE0052
private readonly IBundleSessionStore _sessionStore;
private readonly IOptions<TransportOptions> _options;
private readonly TimeProvider _timeProvider;
public BundleImporter(
BundleSerializer bundleSerializer,
ManifestValidator manifestValidator,
BundleSecretEncryptor encryptor,
EntitySerializer entitySerializer,
IBundleSessionStore sessionStore,
IOptions<TransportOptions> options,
TimeProvider timeProvider,
ITemplateEngineRepository templateRepo,
IExternalSystemRepository externalRepo,
INotificationRepository notificationRepo,
IInboundApiRepository inboundApiRepo,
IAuditService auditService,
IAuditCorrelationContext correlationContext,
ScadaLinkDbContext dbContext)
{
_bundleSerializer = bundleSerializer ?? throw new ArgumentNullException(nameof(bundleSerializer));
_manifestValidator = manifestValidator ?? throw new ArgumentNullException(nameof(manifestValidator));
_encryptor = encryptor ?? throw new ArgumentNullException(nameof(encryptor));
_entitySerializer = entitySerializer ?? throw new ArgumentNullException(nameof(entitySerializer));
_sessionStore = sessionStore ?? throw new ArgumentNullException(nameof(sessionStore));
_options = options ?? throw new ArgumentNullException(nameof(options));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
_templateRepo = templateRepo ?? throw new ArgumentNullException(nameof(templateRepo));
_externalRepo = externalRepo ?? throw new ArgumentNullException(nameof(externalRepo));
_notificationRepo = notificationRepo ?? throw new ArgumentNullException(nameof(notificationRepo));
_inboundApiRepo = inboundApiRepo ?? throw new ArgumentNullException(nameof(inboundApiRepo));
_auditService = auditService ?? throw new ArgumentNullException(nameof(auditService));
_correlationContext = correlationContext ?? throw new ArgumentNullException(nameof(correlationContext));
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
}
/// <summary>
/// Validates the bundle envelope and opens a session keyed by a fresh GUID.
/// Wrong-passphrase failures surface as <see cref="CryptographicException"/>
/// so the caller (UI / API endpoint) can increment the lockout counter on
/// the returned session — this method does not mutate <c>FailedUnlockAttempts</c>
/// itself because the session does not exist yet at the point of failure.
/// </summary>
public async Task<BundleSession> LoadAsync(Stream bundleStream, string? passphrase, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(bundleStream);
// Copy to a seekable buffer — manifest + content readers each open a
// fresh ZipArchive over the same bytes, so the upstream stream needs to
// be seekable. A caller-supplied FileStream is seekable but a Kestrel
// request stream is not, so we always normalise to MemoryStream.
var ms = new MemoryStream();
await bundleStream.CopyToAsync(ms, ct).ConfigureAwait(false);
ms.Position = 0;
// Size cap is in MB; multiply in long arithmetic so the comparison
// doesn't overflow at the int boundary for large MaxBundleSizeMb.
var maxBytes = _options.Value.MaxBundleSizeMb * 1024L * 1024L;
if (ms.Length > maxBytes)
{
throw new InvalidOperationException(
$"Bundle exceeds maximum allowed size of {_options.Value.MaxBundleSizeMb} MB.");
}
BundleManifest manifest;
try
{
ms.Position = 0;
manifest = _bundleSerializer.ReadManifest(ms);
}
catch (InvalidDataException)
{
// Preserve the serializer's specific "manifest missing/null" message
// — the caller wants to surface a precise diagnostic to the operator.
throw;
}
catch (Exception ex)
{
throw new InvalidDataException("Bundle is missing or has a malformed manifest.json.", ex);
}
ms.Position = 0;
var contentBytes = _bundleSerializer.ReadContentBytes(ms, manifest);
// Validate format version + content-hash + manifest shape. Reject paths
// surface as distinct exceptions so the UI can disambiguate the cause.
var validation = _manifestValidator.Validate(manifest, contentBytes);
switch (validation)
{
case ManifestValidationResult.UnsupportedFormatVersion:
throw new NotSupportedException(
$"Bundle format version {manifest.BundleFormatVersion} is not supported by this cluster.");
case ManifestValidationResult.ContentHashMismatch:
throw new InvalidDataException(
"Bundle content hash does not match manifest — file may be corrupt.");
case ManifestValidationResult.MalformedManifest:
throw new InvalidDataException("Bundle manifest is malformed.");
case ManifestValidationResult.Ok:
break;
default:
throw new InvalidDataException($"Unrecognised manifest validation result: {validation}.");
}
// Decrypt when the manifest carries EncryptionMetadata. AES-GCM tag
// mismatch surfaces as a CryptographicException (or its
// AuthenticationTagMismatchException subclass on .NET 10+) — bubble it
// unchanged so the caller can detect wrong-passphrase via type check
// and increment the lockout counter on the (about-to-be-rejected)
// session reference. The session is not opened on the failure path.
byte[] decryptedContent;
if (manifest.Encryption is not null)
{
if (string.IsNullOrEmpty(passphrase))
{
throw new ArgumentException(
"Passphrase required for encrypted bundle.", nameof(passphrase));
}
decryptedContent = _encryptor.Decrypt(contentBytes, manifest.Encryption, passphrase);
}
else
{
decryptedContent = contentBytes;
}
var ttl = TimeSpan.FromMinutes(_options.Value.BundleSessionTtlMinutes);
var session = new BundleSession
{
SessionId = Guid.NewGuid(),
Manifest = manifest,
DecryptedContent = decryptedContent,
ExpiresAt = _timeProvider.GetUtcNow() + ttl,
};
return _sessionStore.Open(session);
}
public Task<ImportPreview> PreviewAsync(Guid sessionId, CancellationToken ct = default)
{
// Filled in by T16. Throwing NotImplementedException here keeps the
// interface contract honest while letting LoadAsync ship in isolation.
throw new NotImplementedException("PreviewAsync is implemented by task T16.");
}
public Task<ImportResult> ApplyAsync(
Guid sessionId,
IReadOnlyList<ImportResolution> resolutions,
string user,
CancellationToken ct = default)
{
// Filled in by T17.
throw new NotImplementedException("ApplyAsync is implemented by task T17.");
}
}

View File

@@ -30,6 +30,7 @@ public static class ServiceCollectionExtensions
services.AddScoped<DependencyResolver>();
services.AddScoped<IBundleExporter, BundleExporter>();
services.AddSingleton<IBundleSessionStore, BundleSessionStore>();
services.AddScoped<IBundleImporter, BundleImporter>();
// Remaining concrete services added in later tasks.
return services;
}

View File

@@ -0,0 +1,307 @@
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
using NSubstitute;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Interfaces.Transport;
using ScadaLink.Commons.Types.Transport;
using ScadaLink.ConfigurationDatabase;
using ScadaLink.Transport.Encryption;
using ScadaLink.Transport.Import;
using ScadaLink.Transport.Serialization;
namespace ScadaLink.Transport.Tests.Import;
/// <summary>
/// Unit tests for <see cref="BundleImporter.LoadAsync"/>. Uses the real
/// <see cref="BundleSerializer"/>, <see cref="ManifestValidator"/>,
/// <see cref="BundleSecretEncryptor"/>, <see cref="EntitySerializer"/> and
/// <see cref="BundleSessionStore"/> — they're stateless / in-memory and easier
/// to drive than mocks. Repositories + audit + DbContext are mocked because
/// LoadAsync does not exercise them (they're injected so the constructor stays
/// stable across T15/T16/T17).
/// </summary>
public sealed class BundleImporterLoadTests
{
private static readonly JsonSerializerOptions BundleJsonOptions = new()
{
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
Converters = { new JsonStringEnumConverter() },
};
private static BundleContentDto EmptyContent() => new(
TemplateFolders: Array.Empty<TemplateFolderDto>(),
Templates: Array.Empty<TemplateDto>(),
SharedScripts: Array.Empty<SharedScriptDto>(),
ExternalSystems: Array.Empty<ExternalSystemDto>(),
DatabaseConnections: Array.Empty<DatabaseConnectionDto>(),
NotificationLists: Array.Empty<NotificationListDto>(),
SmtpConfigs: Array.Empty<SmtpConfigDto>(),
ApiKeys: Array.Empty<ApiKeyDto>(),
ApiMethods: Array.Empty<ApiMethodDto>());
private static BundleContentDto SmallContent() => new(
TemplateFolders: Array.Empty<TemplateFolderDto>(),
Templates: new[]
{
new TemplateDto(
Name: "Pump",
FolderName: null,
BaseTemplateName: null,
Description: "the one and only",
Attributes: Array.Empty<TemplateAttributeDto>(),
Alarms: Array.Empty<TemplateAlarmDto>(),
Scripts: Array.Empty<TemplateScriptDto>(),
Compositions: Array.Empty<TemplateCompositionDto>()),
},
SharedScripts: Array.Empty<SharedScriptDto>(),
ExternalSystems: Array.Empty<ExternalSystemDto>(),
DatabaseConnections: Array.Empty<DatabaseConnectionDto>(),
NotificationLists: Array.Empty<NotificationListDto>(),
SmtpConfigs: Array.Empty<SmtpConfigDto>(),
ApiKeys: Array.Empty<ApiKeyDto>(),
ApiMethods: Array.Empty<ApiMethodDto>());
private sealed class TestTimeProvider : TimeProvider
{
private DateTimeOffset _now;
public TestTimeProvider(DateTimeOffset start) { _now = start; }
public override DateTimeOffset GetUtcNow() => _now;
}
private sealed record TestRig(
BundleImporter Importer,
BundleSerializer Serializer,
ManifestBuilder ManifestBuilder,
BundleSecretEncryptor Encryptor,
BundleSessionStore SessionStore,
TransportOptions Options);
private static TestRig BuildRig(Action<TransportOptions>? configure = null)
{
var opts = new TransportOptions();
configure?.Invoke(opts);
var iOpts = Options.Create(opts);
var clock = new TestTimeProvider(DateTimeOffset.UtcNow);
var serializer = new BundleSerializer();
var validator = new ManifestValidator();
var encryptor = new BundleSecretEncryptor();
var entitySerializer = new EntitySerializer();
var manifestBuilder = new ManifestBuilder();
var store = new BundleSessionStore(iOpts, clock);
var importer = new BundleImporter(
bundleSerializer: serializer,
manifestValidator: validator,
encryptor: encryptor,
entitySerializer: entitySerializer,
sessionStore: store,
options: iOpts,
timeProvider: clock,
templateRepo: Substitute.For<ITemplateEngineRepository>(),
externalRepo: Substitute.For<IExternalSystemRepository>(),
notificationRepo: Substitute.For<INotificationRepository>(),
inboundApiRepo: Substitute.For<IInboundApiRepository>(),
auditService: Substitute.For<IAuditService>(),
correlationContext: Substitute.For<IAuditCorrelationContext>(),
// LoadAsync never touches the DbContext — Preview/Apply do. Build
// a no-provider DbContext so the importer's null check passes;
// the in-memory provider isn't worth pulling in for unit tests.
dbContext: new ScadaLinkDbContext(
new DbContextOptionsBuilder<ScadaLinkDbContext>().Options));
return new TestRig(importer, serializer, manifestBuilder, encryptor, store, opts);
}
private static Stream PackPlainBundle(BundleSerializer serializer, ManifestBuilder builder, BundleContentDto content)
{
var contentBytes = serializer.SerializeContentBytes(content);
var manifest = builder.Build(
sourceEnvironment: "dev",
exportedBy: "alice",
scadaLinkVersion: "1.0.0",
encryption: null,
summary: new BundleSummary(content.Templates.Count, 0, 0, 0, 0, 0, 0, 0, 0),
contents: Array.Empty<ManifestContentEntry>(),
contentBytes: contentBytes);
return serializer.Pack(content, manifest, passphrase: null, encryptor: null);
}
private static Stream PackEncryptedBundle(
BundleSerializer serializer,
ManifestBuilder builder,
BundleSecretEncryptor encryptor,
BundleContentDto content,
string passphrase)
{
var contentBytes = serializer.SerializeContentBytes(content);
// Pack re-stamps salt/iv/hash from the ciphertext it actually writes,
// so the seed values here are placeholders.
var seed = new EncryptionMetadata("AES-256-GCM", "PBKDF2-SHA256", 600_000, string.Empty, string.Empty);
var manifest = builder.Build(
sourceEnvironment: "dev",
exportedBy: "alice",
scadaLinkVersion: "1.0.0",
encryption: seed,
summary: new BundleSummary(content.Templates.Count, 0, 0, 0, 0, 0, 0, 0, 0),
contents: Array.Empty<ManifestContentEntry>(),
contentBytes: contentBytes);
return serializer.Pack(content, manifest, passphrase, encryptor);
}
[Fact]
public async Task LoadAsync_returns_session_for_unencrypted_bundle()
{
var rig = BuildRig();
var content = SmallContent();
var stream = PackPlainBundle(rig.Serializer, rig.ManifestBuilder, content);
var session = await rig.Importer.LoadAsync(stream, passphrase: null);
Assert.NotNull(session);
Assert.NotEqual(Guid.Empty, session.SessionId);
Assert.Equal("dev", session.Manifest.SourceEnvironment);
Assert.Equal("alice", session.Manifest.ExportedBy);
Assert.Null(session.Manifest.Encryption);
Assert.NotNull(rig.SessionStore.Get(session.SessionId));
}
[Fact]
public async Task LoadAsync_returns_session_for_encrypted_bundle_with_correct_passphrase()
{
var rig = BuildRig();
var content = SmallContent();
var stream = PackEncryptedBundle(rig.Serializer, rig.ManifestBuilder, rig.Encryptor, content, "secret123");
var session = await rig.Importer.LoadAsync(stream, passphrase: "secret123");
Assert.NotNull(session);
Assert.NotNull(session.Manifest.Encryption);
Assert.NotEmpty(session.DecryptedContent);
// The decrypted payload must round-trip back to the original DTO so the
// PreviewAsync phase can deserialize it directly from the session.
var roundTripped = JsonSerializer.Deserialize<BundleContentDto>(
session.DecryptedContent, BundleJsonOptions);
Assert.NotNull(roundTripped);
Assert.Single(roundTripped!.Templates);
Assert.Equal("Pump", roundTripped.Templates[0].Name);
}
[Fact]
public async Task LoadAsync_throws_when_passphrase_wrong()
{
var rig = BuildRig();
var stream = PackEncryptedBundle(
rig.Serializer, rig.ManifestBuilder, rig.Encryptor, EmptyContent(), "correct");
// AES-GCM raises AuthenticationTagMismatchException, a CryptographicException
// subclass on .NET 10 — ThrowsAny is the right match.
await Assert.ThrowsAnyAsync<CryptographicException>(
() => rig.Importer.LoadAsync(stream, passphrase: "wrong"));
}
[Fact]
public async Task LoadAsync_throws_NotSupportedException_when_bundleFormatVersion_unsupported()
{
var rig = BuildRig();
// Synthesize a zip by hand whose manifest carries an unsupported format
// version. The validator looks at BundleFormatVersion first thing after
// null-checks, so the content hash doesn't need to be correct for this
// path — we just need a structurally valid manifest record.
var content = EmptyContent();
var contentBytes = rig.Serializer.SerializeContentBytes(content);
var forwardManifest = new BundleManifest(
BundleFormatVersion: 999,
SchemaVersion: "1.0",
CreatedAtUtc: DateTimeOffset.UtcNow,
SourceEnvironment: "dev",
ExportedBy: "alice",
ScadaLinkVersion: "1.0.0",
ContentHash: "sha256:" + Convert.ToHexString(SHA256.HashData(contentBytes)).ToLowerInvariant(),
Encryption: null,
Summary: new BundleSummary(0, 0, 0, 0, 0, 0, 0, 0, 0),
Contents: Array.Empty<ManifestContentEntry>());
var bundleStream = HandCraftZip(forwardManifest, contentBytes, encrypted: false);
await Assert.ThrowsAsync<NotSupportedException>(
() => rig.Importer.LoadAsync(bundleStream, passphrase: null));
}
[Fact]
public async Task LoadAsync_throws_InvalidDataException_when_content_hash_mismatch()
{
var rig = BuildRig();
// Build a normal bundle, then corrupt content.json's bytes after the
// manifest is stamped — the manifest still references the original hash.
var content = SmallContent();
var originalContentBytes = rig.Serializer.SerializeContentBytes(content);
var manifest = rig.ManifestBuilder.Build(
sourceEnvironment: "dev",
exportedBy: "alice",
scadaLinkVersion: "1.0.0",
encryption: null,
summary: new BundleSummary(1, 0, 0, 0, 0, 0, 0, 0, 0),
contents: Array.Empty<ManifestContentEntry>(),
contentBytes: originalContentBytes);
// Corrupt the bytes so the validator's recomputed hash diverges from
// the manifest's frozen hash.
var corrupted = (byte[])originalContentBytes.Clone();
corrupted[0] ^= 0xFF;
var bundleStream = HandCraftZip(manifest, corrupted, encrypted: false);
var ex = await Assert.ThrowsAsync<InvalidDataException>(
() => rig.Importer.LoadAsync(bundleStream, passphrase: null));
Assert.Contains("hash", ex.Message, StringComparison.OrdinalIgnoreCase);
}
[Fact]
public async Task LoadAsync_throws_InvalidOperationException_when_bundle_exceeds_size_cap()
{
// MaxBundleSizeMb is an int; the smallest positive value is 1 MB. Pack
// a normal bundle and bump it past 1 MB by padding with a long
// description, then cap the limit to 0 — the comparison is `> maxBytes`
// so any positive byte count exceeds a 0 MB cap.
var rig = BuildRig(opts => opts.MaxBundleSizeMb = 0);
var stream = PackPlainBundle(rig.Serializer, rig.ManifestBuilder, SmallContent());
await Assert.ThrowsAsync<InvalidOperationException>(
() => rig.Importer.LoadAsync(stream, passphrase: null));
}
/// <summary>
/// Builds a zip directly so the test can write a manifest whose
/// <c>ContentHash</c> or <c>BundleFormatVersion</c> intentionally
/// disagrees with the content bytes — paths the high-level
/// <see cref="BundleSerializer.Pack"/> won't produce because it always
/// re-stamps the hash itself.
/// </summary>
private static Stream HandCraftZip(BundleManifest manifest, byte[] contentBytes, bool encrypted)
{
var manifestBytes = JsonSerializer.SerializeToUtf8Bytes(manifest, BundleJsonOptions);
var ms = new MemoryStream();
using (var archive = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true))
{
WriteEntry(archive, "manifest.json", manifestBytes);
WriteEntry(archive, encrypted ? "content.enc" : "content.json", contentBytes);
}
ms.Position = 0;
return ms;
}
private static void WriteEntry(ZipArchive archive, string name, byte[] payload)
{
var entry = archive.CreateEntry(name, CompressionLevel.Optimal);
using var es = entry.Open();
es.Write(payload, 0, payload.Length);
}
}