feat(m9/T32b): JSON Schema $ref resolver (lib seam, cycle/depth-guarded) + deploy-time dangling-ref block

This commit is contained in:
Joseph Doherty
2026-06-18 11:54:19 -04:00
parent 16cb078cd2
commit b3d99248fa
9 changed files with 755 additions and 14 deletions
@@ -86,5 +86,12 @@ public enum ValidationCategory
CrossCallViolation,
MissingMetadata,
ConnectionConfig,
NativeAlarmSourceInvalid
NativeAlarmSourceInvalid,
/// <summary>
/// M9-T32b: a script/method parameter or return JSON Schema contains a
/// <c>{"$ref":"lib:Name"}</c> reference that could not be resolved against the
/// shared-schema library — dangling, cyclic, or over-depth. Deploy-blocking.
/// </summary>
SchemaReference
}
@@ -60,29 +60,131 @@ public sealed class InboundApiSchema
/// <param name="json">The definition JSON; null/whitespace yields <c>null</c>.</param>
/// <returns>The parsed schema, or <c>null</c> when the input is empty.</returns>
/// <exception cref="JsonException">The input is non-empty but not valid JSON, is a JSON scalar/null at the root, or the schema nesting exceeds <see cref="MaxDepth"/>.</exception>
public static InboundApiSchema? Parse(string? json)
public static InboundApiSchema? Parse(string? json) => Parse(json, resolveRef: null);
/// <summary>
/// Parses a stored definition string into an <see cref="InboundApiSchema"/>,
/// resolving any <c>{"$ref":"lib:Name"}</c> library references (M9-T32b) through
/// the caller-supplied <paramref name="resolveRef"/> seam.
///
/// <para>
/// The pointer convention is <c>lib:Name</c>: a JSON object carrying a string
/// <c>$ref</c> whose value begins with the <c>lib:</c> scheme prefix is a
/// reference to the library entry named by the remainder. The seam maps that
/// name (the part after <c>lib:</c>) to the referenced schema JSON, or returns
/// <c>null</c> when the entry does not exist. Resolution is recursive (a
/// referenced schema may itself contain <c>$ref</c>s) and guarded against
/// cycles and excessive depth — a cycle or over-depth chain surfaces as a
/// controlled <see cref="JsonException"/>, never a stack overflow.
/// </para>
///
/// <para>
/// A <c>$ref</c> the seam cannot resolve (returns <c>null</c>), a <c>$ref</c>
/// with no resolver supplied, and a cyclic/over-depth <c>$ref</c> are all
/// DANGLING — surfaced here as a <see cref="JsonException"/> so a caller using
/// the throwing path treats them as a hard error. (For deploy-time validation
/// that needs to COLLECT dangling refs rather than throw, use
/// <see cref="ParseWithRefs"/>.)
/// </para>
///
/// <para>
/// Schemas with no <c>$ref</c> parse identically to <see cref="Parse(string?)"/>;
/// the resolver is never consulted for them, so behavior is unchanged.
/// </para>
/// </summary>
/// <param name="json">The definition JSON; null/whitespace yields <c>null</c>.</param>
/// <param name="resolveRef">
/// Optional reference-resolution seam mapping a <c>lib:Name</c> target's name to
/// the referenced schema JSON (or <c>null</c> when not found). <c>null</c> means
/// no resolver: a <c>$ref</c> then dangles. The seam keeps this Commons type free
/// of any repository dependency — the caller (the validation layer) supplies it.
/// </param>
/// <returns>The parsed (and ref-resolved) schema, or <c>null</c> when the input is empty.</returns>
/// <exception cref="JsonException">The input is invalid JSON, a root scalar/null, exceeds <see cref="MaxDepth"/>, or contains a dangling/cyclic/over-depth <c>$ref</c>.</exception>
public static InboundApiSchema? Parse(string? json, Func<string, string?>? resolveRef)
{
var result = ParseWithRefs(json, resolveRef);
if (result.UnresolvedRefs.Count > 0)
{
throw new JsonException(
$"Schema contains unresolved $ref(s): {string.Join(", ", result.UnresolvedRefs)}.");
}
return result.Schema;
}
/// <summary>
/// Parses a stored definition string into an <see cref="InboundApiSchema"/>,
/// resolving <c>{"$ref":"lib:Name"}</c> library references through the
/// caller-supplied <paramref name="resolveRef"/> seam, and COLLECTING (rather than
/// throwing on) any references that cannot be resolved (M9-T32b).
///
/// <para>
/// This is the deploy-time entry point: a dangling, cyclic, or over-depth
/// <c>$ref</c> is reported in <see cref="SchemaParseResult.UnresolvedRefs"/> so the
/// validation layer can surface it as a deploy-blocking error naming the missing
/// reference, instead of aborting the whole parse. See <see cref="Parse(string?, Func{string, string?})"/>
/// for the throwing variant and for the <c>lib:Name</c> pointer convention.
/// </para>
/// </summary>
/// <param name="json">The definition JSON; null/whitespace yields a result with a <c>null</c> schema and no unresolved refs.</param>
/// <param name="resolveRef">
/// Optional reference-resolution seam (see <see cref="Parse(string?, Func{string, string?})"/>).
/// <c>null</c> means no resolver: any <c>$ref</c> is reported as unresolved.
/// </param>
/// <returns>The parsed schema (refs resolved where possible) plus the names of any references that could not be resolved.</returns>
/// <exception cref="JsonException">The input is invalid JSON, a root scalar/null, or exceeds the structural <see cref="MaxDepth"/> for non-ref nesting.</exception>
public static SchemaParseResult ParseWithRefs(string? json, Func<string, string?>? resolveRef)
{
if (string.IsNullOrWhiteSpace(json))
{
return null;
return new SchemaParseResult(null, []);
}
var unresolved = new List<string>();
// The active-ref set tracks the refs being resolved on the CURRENT path so a
// cycle (A→B→A) is detected and reported instead of recursing forever.
var ctx = new RefResolutionContext(resolveRef, unresolved, new HashSet<string>(StringComparer.Ordinal));
using var doc = JsonDocument.Parse(json, DocOptions);
return doc.RootElement.ValueKind switch
var schema = doc.RootElement.ValueKind switch
{
JsonValueKind.Object => ParseSchema(doc.RootElement, depth: 0),
JsonValueKind.Object => ParseSchema(doc.RootElement, depth: 0, ctx),
JsonValueKind.Array => ParseLegacyArray(doc.RootElement),
_ => throw new JsonException("Type definition must be a JSON object (JSON Schema) or legacy parameter array."),
};
return new SchemaParseResult(schema, unresolved);
}
private static InboundApiSchema ParseSchema(JsonElement el, int depth)
/// <summary>
/// Carries the <c>$ref</c> resolution state threaded through the recursive parse:
/// the caller-supplied resolver seam, the accumulator for unresolved references,
/// and the set of references active on the current resolution path (the cycle
/// guard). A <c>null</c> <see cref="Resolver"/> means no resolver was supplied —
/// every <c>$ref</c> then dangles.
/// </summary>
private sealed record RefResolutionContext(
Func<string, string?>? Resolver,
List<string> Unresolved,
HashSet<string> ActiveRefs);
private static InboundApiSchema ParseSchema(JsonElement el, int depth, RefResolutionContext ctx)
{
if (depth > MaxDepth)
{
throw new JsonException($"Schema nesting exceeds the maximum allowed depth of {MaxDepth}.");
}
// $ref resolution (M9-T32b): a {"$ref":"lib:Name"} node is replaced by the
// referenced schema, resolved through the caller-supplied seam. Dangling,
// cyclic, and over-depth refs are recorded as unresolved (the caller decides
// whether to throw or collect) and parse continues with a shape-only schema.
if (TryReadLibRef(el, out var refName))
{
return ResolveLibRef(refName, depth, ctx);
}
var type = el.TryGetProperty("type", out var t) && t.ValueKind == JsonValueKind.String
? NormalizeType(t.GetString())
: "string";
@@ -92,7 +194,7 @@ public sealed class InboundApiSchema
InboundApiSchema? items = null;
if (el.TryGetProperty("items", out var itemsEl) && itemsEl.ValueKind == JsonValueKind.Object)
{
items = ParseSchema(itemsEl, depth + 1);
items = ParseSchema(itemsEl, depth + 1, ctx);
}
return new InboundApiSchema { Type = "array", Items = items };
@@ -122,7 +224,7 @@ public sealed class InboundApiSchema
foreach (var prop in props.EnumerateObject())
{
var schema = prop.Value.ValueKind == JsonValueKind.Object
? ParseSchema(prop.Value, depth + 1)
? ParseSchema(prop.Value, depth + 1, ctx)
: new InboundApiSchema { Type = "string" };
fields.Add(new InboundApiSchemaField(prop.Name, requiredSet.Contains(prop.Name), schema));
}
@@ -134,6 +236,103 @@ public sealed class InboundApiSchema
return new InboundApiSchema { Type = type };
}
/// <summary>The <c>lib:</c> scheme prefix on a <c>$ref</c> value identifying a library reference.</summary>
private const string LibRefScheme = "lib:";
/// <summary>The placeholder type for an unresolvable <c>$ref</c> node (dangling, cyclic, or over-depth).</summary>
private const string UnresolvedRefType = "ref";
/// <summary>
/// Recognizes a <c>{"$ref":"lib:Name"}</c> reference node and extracts its target
/// name (the part after the <c>lib:</c> scheme prefix). Returns <c>false</c> for any
/// node that is not a <c>lib:</c> reference, so non-ref schemas take the normal path.
/// </summary>
/// <param name="el">The schema node to inspect.</param>
/// <param name="refName">The resolved target name (after <c>lib:</c>) when this is a <c>lib:</c> ref; otherwise empty.</param>
/// <returns><c>true</c> when <paramref name="el"/> is a <c>lib:</c> <c>$ref</c> node with a non-empty target name.</returns>
private static bool TryReadLibRef(JsonElement el, out string refName)
{
refName = string.Empty;
if (!el.TryGetProperty("$ref", out var refEl) || refEl.ValueKind != JsonValueKind.String)
{
return false;
}
var raw = refEl.GetString();
if (string.IsNullOrEmpty(raw) || !raw.StartsWith(LibRefScheme, StringComparison.Ordinal))
{
return false;
}
var name = raw[LibRefScheme.Length..].Trim();
if (name.Length == 0)
{
return false;
}
refName = name;
return true;
}
/// <summary>
/// Resolves a <c>lib:Name</c> reference through the seam, parsing the referenced
/// schema (which may itself contain <c>$ref</c>s). Dangling (seam returns null or no
/// seam), cyclic (the name is already active on the current path), and over-depth
/// references are recorded in the context's unresolved list and yield a shape-only
/// placeholder schema so the parse terminates without throwing or overflowing.
/// </summary>
/// <param name="refName">The library entry name to resolve.</param>
/// <param name="depth">The current structural depth (shared with the <see cref="MaxDepth"/> guard).</param>
/// <param name="ctx">The active resolution context (seam, unresolved accumulator, cycle guard).</param>
/// <returns>The resolved schema, or a placeholder <c>ref</c>-typed schema when unresolvable.</returns>
private static InboundApiSchema ResolveLibRef(string refName, int depth, RefResolutionContext ctx)
{
// Depth guard: a long (even non-cyclic) ref chain is bounded by the same
// structural ceiling as nested objects/arrays — terminate, never overflow.
// The guard fires at `>= MaxDepth` (one level BEFORE ParseSchema's own
// `> MaxDepth` throw) so an over-depth ref is COLLECTED as unresolved on the
// ParseWithRefs path rather than aborting the whole parse with a throw.
if (depth >= MaxDepth)
{
ctx.Unresolved.Add($"{refName} (ref nesting exceeds depth {MaxDepth})");
return new InboundApiSchema { Type = UnresolvedRefType };
}
// Cycle guard: this name is already being resolved on the current path.
if (ctx.ActiveRefs.Contains(refName))
{
ctx.Unresolved.Add($"{refName} (cyclic reference)");
return new InboundApiSchema { Type = UnresolvedRefType };
}
var referenced = ctx.Resolver?.Invoke(refName);
if (string.IsNullOrWhiteSpace(referenced))
{
// Dangling: the seam can't resolve it (or no seam was supplied).
ctx.Unresolved.Add(refName);
return new InboundApiSchema { Type = UnresolvedRefType };
}
ctx.ActiveRefs.Add(refName);
try
{
using var doc = JsonDocument.Parse(referenced, DocOptions);
return doc.RootElement.ValueKind switch
{
JsonValueKind.Object => ParseSchema(doc.RootElement, depth + 1, ctx),
JsonValueKind.Array => ParseLegacyArray(doc.RootElement),
_ => throw new JsonException(
$"Referenced schema 'lib:{refName}' must be a JSON object (JSON Schema) or legacy parameter array."),
};
}
finally
{
// Pop only AFTER the subtree is resolved so sibling refs to the same name
// are allowed (a diamond is not a cycle) while a self-revisit on the path is caught.
ctx.ActiveRefs.Remove(refName);
}
}
private static InboundApiSchema ParseLegacyArray(JsonElement arr)
{
var fields = new List<InboundApiSchemaField>();
@@ -391,3 +590,20 @@ public sealed class InboundApiSchema
/// <param name="Required">Whether the field must be present.</param>
/// <param name="Schema">The recursive type schema the field's value must satisfy.</param>
public sealed record InboundApiSchemaField(string Name, bool Required, InboundApiSchema Schema);
/// <summary>
/// The outcome of <see cref="InboundApiSchema.ParseWithRefs"/> (M9-T32b): the parsed
/// schema (with <c>{"$ref":"lib:Name"}</c> references resolved where possible) plus the
/// names of any references that could NOT be resolved — dangling (the seam returned
/// <c>null</c> or no seam was supplied), cyclic, or over-depth. A non-empty
/// <see cref="UnresolvedRefs"/> is the deploy-blocking signal the validation layer acts on.
/// </summary>
/// <param name="Schema">The parsed schema, or <c>null</c> when the input was empty.</param>
/// <param name="UnresolvedRefs">
/// The reference targets that could not be resolved, each annotated with the reason for
/// cyclic/over-depth cases (e.g. <c>"Foo (cyclic reference)"</c>). Empty when every
/// reference resolved.
/// </param>
public sealed record SchemaParseResult(
InboundApiSchema? Schema,
IReadOnlyList<string> UnresolvedRefs);
@@ -22,6 +22,7 @@ public class FlatteningPipeline : IFlatteningPipeline
private readonly FlatteningService _flatteningService;
private readonly ValidationService _validationService;
private readonly RevisionHashService _revisionHashService;
private readonly ISharedSchemaRepository _sharedSchemaRepo;
/// <summary>Initializes a new <see cref="FlatteningPipeline"/> with the required template engine and site repositories and services.</summary>
/// <param name="templateRepo">Repository for loading templates and instance data.</param>
@@ -29,18 +30,25 @@ public class FlatteningPipeline : IFlatteningPipeline
/// <param name="flatteningService">Service that flattens the template inheritance chain into a resolved config.</param>
/// <param name="validationService">Service that performs semantic validation on the flattened config.</param>
/// <param name="revisionHashService">Service that computes the revision hash for staleness detection.</param>
/// <param name="sharedSchemaRepo">
/// M9-T32b: repository backing the JSON-Schema <c>$ref</c> resolution seam. Used to
/// look up <c>lib:Name</c> library references so a dangling reference in any validated
/// script schema becomes a deploy-blocking error.
/// </param>
public FlatteningPipeline(
ITemplateEngineRepository templateRepo,
ISiteRepository siteRepo,
FlatteningService flatteningService,
ValidationService validationService,
RevisionHashService revisionHashService)
RevisionHashService revisionHashService,
ISharedSchemaRepository sharedSchemaRepo)
{
_templateRepo = templateRepo;
_siteRepo = siteRepo;
_flatteningService = flatteningService;
_validationService = validationService;
_revisionHashService = revisionHashService;
_sharedSchemaRepo = sharedSchemaRepo;
}
/// <inheritdoc />
@@ -135,6 +143,15 @@ public class FlatteningPipeline : IFlatteningPipeline
.Select(c => c.Name)
.ToHashSet(StringComparer.Ordinal);
// M9-T32b: build the JSON-Schema $ref resolution seam from the shared-schema
// library. The seam ValidationService consumes is synchronous, so the library is
// pre-loaded once into a name→JSON map here (avoiding sync-over-async) and the
// seam is a pure in-memory lookup. An unresolved {"$ref":"lib:Name"} in any
// validated script schema then becomes a deploy-blocking SchemaReference error.
var sharedSchemas = await _sharedSchemaRepo.ListAsync(cancellationToken);
var schemaLibrary = sharedSchemas.ToDictionary(s => s.Name, s => s.SchemaJson, StringComparer.Ordinal);
Func<string, string?> resolveSchemaRef = name => schemaLibrary.GetValueOrDefault(name);
// Validate. This is the deploy-gating path, so connection-binding completeness
// is enforced as an Error (enforceConnectionBindings: true): a data-sourced
// attribute with no binding — or one bound to a connection that no longer exists
@@ -146,7 +163,8 @@ public class FlatteningPipeline : IFlatteningPipeline
resolvedSharedScripts,
alarmCapableConnectionNames,
enforceConnectionBindings: true,
siteConnectionNames: siteConnectionNames);
siteConnectionNames: siteConnectionNames,
resolveSchemaRef: resolveSchemaRef);
// Compute revision hash
var hash = _revisionHashService.ComputeHash(config);
@@ -571,9 +571,18 @@ public class ManagementActor : ReceiveActor
}).ToList()
};
// M9-T32b: supply the JSON-Schema $ref resolution seam from the shared-schema
// library so a dangling {"$ref":"lib:Name"} in a template script schema is flagged
// here (design-time validate) consistently with the deploy path. The library is
// pre-loaded into a name→JSON map (the seam ValidationService consumes is sync).
var sharedSchemaRepo = sp.GetRequiredService<ISharedSchemaRepository>();
var sharedSchemas = await sharedSchemaRepo.ListAsync();
var schemaLibrary = sharedSchemas.ToDictionary(s => s.Name, s => s.SchemaJson, StringComparer.Ordinal);
Func<string, string?> resolveSchemaRef = name => schemaLibrary.GetValueOrDefault(name);
// Run full validation pipeline (collisions, script compilation, trigger refs, bindings)
var validationService = new TemplateEngine.Validation.ValidationService();
var validationResult = validationService.Validate(flatConfig);
var validationResult = validationService.Validate(flatConfig, resolveSchemaRef: resolveSchemaRef);
// Also detect naming collisions across the inheritance/composition graph
var svc = sp.GetRequiredService<TemplateService>();
@@ -1,5 +1,6 @@
using System.Text.Json;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.Commons.Types.InboundApi;
using ZB.MOM.WW.ScadaBridge.ScriptAnalysis;
namespace ZB.MOM.WW.ScadaBridge.TemplateEngine.Validation;
@@ -83,13 +84,25 @@ public class ValidationService
/// connection is checked against this set so a binding to a phantom/stale connection
/// is caught. <c>null</c> skips the "exists at site" half (it stays inert).
/// </param>
/// <param name="resolveSchemaRef">
/// M9-T32b: optional JSON-Schema <c>$ref</c> resolution seam mapping a
/// <c>lib:Name</c> reference target's name to the referenced schema JSON (or
/// <c>null</c> when the library entry does not exist). Supplied on the deploy path
/// (backed by <c>ISharedSchemaRepository</c>) so a dangling/cyclic/over-depth
/// <c>$ref</c> in any validated script parameter/return schema becomes a
/// deploy-blocking <see cref="ValidationCategory.SchemaReference"/> error naming the
/// missing reference. When <c>null</c> the seam is absent: schemas with no
/// <c>$ref</c> are unaffected (behavior unchanged), and a <c>$ref</c> with no
/// resolver is treated as dangling (the safe option).
/// </param>
/// <returns>A merged <see cref="ValidationResult"/> aggregating all pipeline stage outcomes.</returns>
public ValidationResult Validate(
FlattenedConfiguration configuration,
IReadOnlyList<ResolvedScript>? sharedScripts = null,
IReadOnlySet<string>? alarmCapableConnectionNames = null,
bool enforceConnectionBindings = false,
IReadOnlySet<string>? siteConnectionNames = null)
IReadOnlySet<string>? siteConnectionNames = null,
Func<string, string?>? resolveSchemaRef = null)
{
ArgumentNullException.ThrowIfNull(configuration);
@@ -102,12 +115,86 @@ public class ValidationService
ValidateScriptTriggerReferences(configuration),
ValidateExpressionTriggers(configuration),
ValidateConnectionBindingCompleteness(configuration, enforceConnectionBindings, siteConnectionNames),
ValidateSchemaReferences(configuration, sharedScripts, resolveSchemaRef),
_semanticValidator.Validate(configuration, sharedScripts, alarmCapableConnectionNames)
};
return ValidationResult.Merge(results.ToArray());
}
/// <summary>
/// M9-T32b — JSON-Schema <c>$ref</c> resolution check. Parses every script
/// parameter/return schema (instance scripts and the supplied shared scripts)
/// through <see cref="InboundApiSchema.ParseWithRefs"/>, resolving any
/// <c>{"$ref":"lib:Name"}</c> reference via <paramref name="resolveSchemaRef"/>.
/// A reference that cannot be resolved — dangling (the seam returns <c>null</c> or
/// none is supplied), cyclic, or over-depth — is a deploy-blocking
/// <see cref="ValidationCategory.SchemaReference"/> error naming the missing
/// reference and the owning script.
///
/// <para>
/// The check is INERT for schemas that contain no <c>$ref</c>:
/// <see cref="InboundApiSchema.ParseWithRefs"/> never consults the seam for them and
/// reports no unresolved refs, so the pre-existing validation behavior is unchanged
/// (this is the only edit to the schema validation path for non-<c>$ref</c> schemas).
/// A malformed (non-JSON) schema is left to the existing script-compilation /
/// semantic checks — this check swallows the parse exception and reports nothing,
/// so it never double-reports a structural problem as a missing reference.
/// </para>
/// </summary>
/// <param name="configuration">The flattened configuration whose scripts' schemas are checked.</param>
/// <param name="sharedScripts">Optional shared scripts whose schemas are also checked.</param>
/// <param name="resolveSchemaRef">The <c>$ref</c> resolution seam, or <c>null</c> (no resolver → refs dangle).</param>
/// <returns>A <see cref="ValidationResult"/> with one error per unresolved reference, or success.</returns>
internal static ValidationResult ValidateSchemaReferences(
FlattenedConfiguration configuration,
IReadOnlyList<ResolvedScript>? sharedScripts,
Func<string, string?>? resolveSchemaRef)
{
var errors = new List<ValidationEntry>();
foreach (var script in configuration.Scripts)
{
CheckSchema(script.CanonicalName, "parameter definitions", script.ParameterDefinitions);
CheckSchema(script.CanonicalName, "return definition", script.ReturnDefinition);
}
foreach (var shared in sharedScripts ?? [])
{
CheckSchema(shared.CanonicalName, "parameter definitions", shared.ParameterDefinitions);
CheckSchema(shared.CanonicalName, "return definition", shared.ReturnDefinition);
}
return errors.Count > 0
? new ValidationResult { Errors = errors }
: ValidationResult.Success();
void CheckSchema(string scriptName, string schemaLabel, string? schemaJson)
{
if (string.IsNullOrWhiteSpace(schemaJson))
return;
SchemaParseResult parsed;
try
{
parsed = InboundApiSchema.ParseWithRefs(schemaJson, resolveSchemaRef);
}
catch (JsonException)
{
// Malformed schema JSON / over-depth nesting is surfaced by the other
// validation stages — not a missing-reference concern. Don't double-report.
return;
}
foreach (var missing in parsed.UnresolvedRefs)
{
errors.Add(ValidationEntry.Error(ValidationCategory.SchemaReference,
$"Script '{scriptName}' {schemaLabel} references schema 'lib:{missing}' which could not be resolved.",
scriptName));
}
}
}
/// <summary>
/// Validates that flattening produced a non-empty configuration.
/// </summary>