feat: add SubjectTransform compiled engine for subject mapping

Port Go server/subject_transform.go to .NET. Implements a compiled transform engine that parses source patterns with wildcards and destination templates with function tokens at Create() time, then evaluates them efficiently at Apply() time without runtime regex. Supports all 9 transform functions: wildcard/$N, partition (FNV-1a), split, splitFromLeft, splitFromRight, sliceFromLeft, sliceFromRight, left, and right. Used for stream mirroring, account imports/exports, and subject routing.
2026-02-23 04:27:36 -05:00
parent 67a3881c7c
commit 46116400d2
2 changed files with 1104 additions and 0 deletions
--- a/src/NATS.Server/Subscriptions/SubjectTransform.cs
+++ b/src/NATS.Server/Subscriptions/SubjectTransform.cs
@@ -0,0 +1,708 @@
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace NATS.Server.Subscriptions;
+
+/// <summary>
+/// Compiled subject transform engine that maps subjects from a source pattern to a destination template.
+/// Reference: Go server/subject_transform.go
+/// </summary>
+public sealed partial class SubjectTransform
+{
+    private readonly string _source;
+    private readonly string _dest;
+    private readonly string[] _sourceTokens;
+    private readonly string[] _destTokens;
+    private readonly TransformOp[] _ops;
+
+    private SubjectTransform(string source, string dest, string[] sourceTokens, string[] destTokens, TransformOp[] ops)
+    {
+        _source = source;
+        _dest = dest;
+        _sourceTokens = sourceTokens;
+        _destTokens = destTokens;
+        _ops = ops;
+    }
+
+    /// <summary>
+    /// Compiles a subject transform from source pattern to destination template.
+    /// Returns null if source is invalid or destination references out-of-range wildcards.
+    /// </summary>
+    public static SubjectTransform? Create(string source, string destination)
+    {
+        if (string.IsNullOrEmpty(destination))
+            return null;
+
+        if (string.IsNullOrEmpty(source))
+            source = ">";
+
+        // Validate source and destination as subjects
+        var (srcValid, srcTokens, srcPwcCount, srcHasFwc) = SubjectInfo(source);
+        var (destValid, destTokens, destPwcCount, destHasFwc) = SubjectInfo(destination);
+
+        // Both must be valid, dest must have no pwcs, fwc must match
+        if (!srcValid || !destValid || destPwcCount > 0 || srcHasFwc != destHasFwc)
+            return null;
+
+        var ops = new TransformOp[destTokens.Length];
+
+        if (srcPwcCount > 0 || srcHasFwc)
+        {
+            // Build map from 1-based wildcard index to source token position
+            var wildcardPositions = new Dictionary<int, int>();
+            int wildcardNum = 0;
+            for (int i = 0; i < srcTokens.Length; i++)
+            {
+                if (srcTokens[i] == "*")
+                {
+                    wildcardNum++;
+                    wildcardPositions[wildcardNum] = i;
+                }
+            }
+
+            for (int i = 0; i < destTokens.Length; i++)
+            {
+                var parsed = ParseDestToken(destTokens[i]);
+                if (parsed == null)
+                    return null; // Parse error (bad function, etc.)
+
+                if (parsed.Type == TransformType.None)
+                {
+                    ops[i] = new TransformOp(TransformType.None);
+                    continue;
+                }
+
+                // Resolve wildcard indexes to source token positions
+                var srcPositions = new int[parsed.WildcardIndexes.Length];
+                for (int j = 0; j < parsed.WildcardIndexes.Length; j++)
+                {
+                    int wcIdx = parsed.WildcardIndexes[j];
+                    if (wcIdx > srcPwcCount)
+                        return null; // Out of range
+
+                    // Match Go behavior: missing map key returns zero-value (0)
+                    // This happens for partition with index 0, which Go silently allows.
+                    if (!wildcardPositions.TryGetValue(wcIdx, out int pos))
+                        pos = 0;
+
+                    srcPositions[j] = pos;
+                }
+
+                ops[i] = new TransformOp(parsed.Type, srcPositions, parsed.IntArg, parsed.StringArg);
+            }
+        }
+        else
+        {
+            // No wildcards in source: only NoTransform, Partition, and Random allowed
+            for (int i = 0; i < destTokens.Length; i++)
+            {
+                var parsed = ParseDestToken(destTokens[i]);
+                if (parsed == null)
+                    return null;
+
+                if (parsed.Type == TransformType.None)
+                {
+                    ops[i] = new TransformOp(TransformType.None);
+                }
+                else if (parsed.Type == TransformType.Partition)
+                {
+                    ops[i] = new TransformOp(TransformType.Partition, [], parsed.IntArg, parsed.StringArg);
+                }
+                else
+                {
+                    // Other functions not allowed without wildcards in source
+                    return null;
+                }
+            }
+        }
+
+        return new SubjectTransform(source, destination, srcTokens, destTokens, ops);
+    }
+
+    /// <summary>
+    /// Matches subject against source pattern, captures wildcard values, evaluates destination template.
+    /// Returns null if subject doesn't match source.
+    /// </summary>
+    public string? Apply(string subject)
+    {
+        if (string.IsNullOrEmpty(subject))
+            return null;
+
+        // Special case: source is > (match everything) and dest is > (passthrough)
+        if ((_source == ">" || _source == string.Empty) && (_dest == ">" || _dest == string.Empty))
+            return subject;
+
+        var subjectTokens = subject.Split('.');
+
+        // Check if subject matches source pattern
+        if (_source != ">" && !MatchTokens(subjectTokens, _sourceTokens))
+            return null;
+
+        return TransformTokenized(subjectTokens);
+    }
+
+    private string TransformTokenized(string[] tokens)
+    {
+        if (_ops.Length == 0)
+            return _dest;
+
+        var sb = new StringBuilder();
+        int lastIndex = _ops.Length - 1;
+
+        for (int i = 0; i < _ops.Length; i++)
+        {
+            var op = _ops[i];
+
+            if (op.Type == TransformType.None)
+            {
+                // If this dest token is fwc, break out to handle trailing tokens
+                if (_destTokens[i] == ">")
+                    break;
+
+                sb.Append(_destTokens[i]);
+            }
+            else
+            {
+                switch (op.Type)
+                {
+                    case TransformType.Wildcard:
+                        if (op.SourcePositions.Length > 0 && op.SourcePositions[0] < tokens.Length)
+                            sb.Append(tokens[op.SourcePositions[0]]);
+                        break;
+
+                    case TransformType.Partition:
+                        sb.Append(ComputePartition(tokens, op));
+                        break;
+
+                    case TransformType.Split:
+                        ApplySplit(sb, tokens, op);
+                        break;
+
+                    case TransformType.SplitFromLeft:
+                        ApplySplitFromLeft(sb, tokens, op);
+                        break;
+
+                    case TransformType.SplitFromRight:
+                        ApplySplitFromRight(sb, tokens, op);
+                        break;
+
+                    case TransformType.SliceFromLeft:
+                        ApplySliceFromLeft(sb, tokens, op);
+                        break;
+
+                    case TransformType.SliceFromRight:
+                        ApplySliceFromRight(sb, tokens, op);
+                        break;
+
+                    case TransformType.Left:
+                        ApplyLeft(sb, tokens, op);
+                        break;
+
+                    case TransformType.Right:
+                        ApplyRight(sb, tokens, op);
+                        break;
+                }
+            }
+
+            if (i < lastIndex)
+                sb.Append('.');
+        }
+
+        // Handle trailing fwc: append remaining tokens from subject
+        if (_destTokens[^1] == ">")
+        {
+            int srcFwcPos = _sourceTokens.Length - 1; // position of > in source
+            for (int i = srcFwcPos; i < tokens.Length; i++)
+            {
+                sb.Append(tokens[i]);
+                if (i < tokens.Length - 1)
+                    sb.Append('.');
+            }
+        }
+
+        return sb.ToString();
+    }
+
+    private static string ComputePartition(string[] tokens, TransformOp op)
+    {
+        int numBuckets = op.IntArg;
+        if (numBuckets == 0)
+            return "0";
+
+        byte[] keyBytes;
+        if (op.SourcePositions.Length > 0)
+        {
+            // Hash concatenation of specified source tokens
+            var keyBuilder = new StringBuilder();
+            foreach (int pos in op.SourcePositions)
+            {
+                if (pos < tokens.Length)
+                    keyBuilder.Append(tokens[pos]);
+            }
+
+            keyBytes = Encoding.ASCII.GetBytes(keyBuilder.ToString());
+        }
+        else
+        {
+            // Hash full subject (all tokens joined with .)
+            keyBytes = Encoding.ASCII.GetBytes(string.Join(".", tokens));
+        }
+
+        uint hash = Fnv1A32(keyBytes);
+        return (hash % (uint)numBuckets).ToString();
+    }
+
+    /// <summary>
+    /// FNV-1a 32-bit hash. Offset basis: 2166136261, prime: 16777619.
+    /// </summary>
+    private static uint Fnv1A32(byte[] data)
+    {
+        const uint offsetBasis = 2166136261;
+        const uint prime = 16777619;
+
+        uint hash = offsetBasis;
+        foreach (byte b in data)
+        {
+            hash ^= b;
+            hash *= prime;
+        }
+
+        return hash;
+    }
+
+    private static void ApplySplit(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        if (op.SourcePositions.Length == 0)
+            return;
+
+        string sourceToken = tokens[op.SourcePositions[0]];
+        string delimiter = op.StringArg ?? string.Empty;
+
+        var splits = sourceToken.Split(delimiter);
+        bool first = true;
+
+        for (int j = 0; j < splits.Length; j++)
+        {
+            string split = splits[j];
+            if (split != string.Empty)
+            {
+                if (!first)
+                    sb.Append('.');
+                sb.Append(split);
+                first = false;
+            }
+        }
+    }
+
+    private static void ApplySplitFromLeft(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int position = op.IntArg;
+
+        if (position > 0 && position < sourceToken.Length)
+        {
+            sb.Append(sourceToken.AsSpan(0, position));
+            sb.Append('.');
+            sb.Append(sourceToken.AsSpan(position));
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    private static void ApplySplitFromRight(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int position = op.IntArg;
+
+        if (position > 0 && position < sourceToken.Length)
+        {
+            sb.Append(sourceToken.AsSpan(0, sourceToken.Length - position));
+            sb.Append('.');
+            sb.Append(sourceToken.AsSpan(sourceToken.Length - position));
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    private static void ApplySliceFromLeft(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int sliceSize = op.IntArg;
+
+        if (sliceSize > 0 && sliceSize < sourceToken.Length)
+        {
+            for (int i = 0; i + sliceSize <= sourceToken.Length; i += sliceSize)
+            {
+                if (i != 0)
+                    sb.Append('.');
+
+                sb.Append(sourceToken.AsSpan(i, sliceSize));
+
+                // If there's a remainder that doesn't fill a full slice
+                if (i + sliceSize != sourceToken.Length && i + sliceSize + sliceSize > sourceToken.Length)
+                {
+                    sb.Append('.');
+                    sb.Append(sourceToken.AsSpan(i + sliceSize));
+                    break;
+                }
+            }
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    private static void ApplySliceFromRight(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int sliceSize = op.IntArg;
+
+        if (sliceSize > 0 && sliceSize < sourceToken.Length)
+        {
+            int remainder = sourceToken.Length % sliceSize;
+            if (remainder > 0)
+            {
+                sb.Append(sourceToken.AsSpan(0, remainder));
+                sb.Append('.');
+            }
+
+            for (int i = remainder; i + sliceSize <= sourceToken.Length; i += sliceSize)
+            {
+                sb.Append(sourceToken.AsSpan(i, sliceSize));
+                if (i + sliceSize < sourceToken.Length)
+                    sb.Append('.');
+            }
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    private static void ApplyLeft(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int length = op.IntArg;
+
+        if (length > 0 && length < sourceToken.Length)
+        {
+            sb.Append(sourceToken.AsSpan(0, length));
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    private static void ApplyRight(StringBuilder sb, string[] tokens, TransformOp op)
+    {
+        string sourceToken = tokens[op.SourcePositions[0]];
+        int length = op.IntArg;
+
+        if (length > 0 && length < sourceToken.Length)
+        {
+            sb.Append(sourceToken.AsSpan(sourceToken.Length - length));
+        }
+        else
+        {
+            sb.Append(sourceToken);
+        }
+    }
+
+    /// <summary>
+    /// Matches literal subject tokens against a pattern with wildcards.
+    /// Subject tokens must be literal (no wildcards).
+    /// </summary>
+    private static bool MatchTokens(string[] subjectTokens, string[] patternTokens)
+    {
+        for (int i = 0; i < patternTokens.Length; i++)
+        {
+            if (i >= subjectTokens.Length)
+                return false;
+
+            string pt = patternTokens[i];
+
+            // Full wildcard matches all remaining
+            if (pt == ">")
+                return true;
+
+            // Partial wildcard matches any single token
+            if (pt == "*")
+                continue;
+
+            // Literal comparison
+            if (subjectTokens[i] != pt)
+                return false;
+        }
+
+        // Both must be exhausted (unless pattern ended with >)
+        return subjectTokens.Length == patternTokens.Length;
+    }
+
+    /// <summary>
+    /// Validates a subject and returns (valid, tokens, pwcCount, hasFwc).
+    /// Reference: Go subject_transform.go subjectInfo()
+    /// </summary>
+    private static (bool Valid, string[] Tokens, int PwcCount, bool HasFwc) SubjectInfo(string subject)
+    {
+        if (string.IsNullOrEmpty(subject))
+            return (false, [], 0, false);
+
+        string[] tokens = subject.Split('.');
+        int pwcCount = 0;
+        bool hasFwc = false;
+
+        foreach (string t in tokens)
+        {
+            if (t.Length == 0 || hasFwc)
+                return (false, [], 0, false);
+
+            if (t.Length == 1)
+            {
+                switch (t[0])
+                {
+                    case '>':
+                        hasFwc = true;
+                        break;
+                    case '*':
+                        pwcCount++;
+                        break;
+                }
+            }
+        }
+
+        return (true, tokens, pwcCount, hasFwc);
+    }
+
+    /// <summary>
+    /// Parses a single destination token into a transform operation descriptor.
+    /// Returns null on parse error.
+    /// </summary>
+    private static ParsedToken? ParseDestToken(string token)
+    {
+        if (token.Length <= 1)
+            return new ParsedToken(TransformType.None, [], -1, string.Empty);
+
+        // $N shorthand for wildcard(N)
+        if (token[0] == '$')
+        {
+            if (int.TryParse(token.AsSpan(1), out int idx))
+                return new ParsedToken(TransformType.Wildcard, [idx], -1, string.Empty);
+
+            // Other things rely on tokens starting with $ so not an error
+            return new ParsedToken(TransformType.None, [], -1, string.Empty);
+        }
+
+        // Mustache-style {{function(args)}}
+        if (token.Length > 4 && token[0] == '{' && token[1] == '{' && token[^2] == '}' && token[^1] == '}')
+        {
+            return ParseMustacheToken(token);
+        }
+
+        return new ParsedToken(TransformType.None, [], -1, string.Empty);
+    }
+
+    private static ParsedToken? ParseMustacheToken(string token)
+    {
+        // wildcard(n)
+        var args = GetFunctionArgs(WildcardRegex(), token);
+        if (args != null)
+        {
+            if (args.Length == 1 && args[0] == string.Empty)
+                return null; // Not enough args
+
+            if (args.Length == 1)
+            {
+                if (!int.TryParse(args[0].Trim(), out int idx))
+                    return null;
+                return new ParsedToken(TransformType.Wildcard, [idx], -1, string.Empty);
+            }
+
+            return null; // Too many args
+        }
+
+        // partition(num, tokens...)
+        args = GetFunctionArgs(PartitionRegex(), token);
+        if (args != null)
+        {
+            if (args.Length < 1)
+                return null;
+
+            if (args.Length == 1)
+            {
+                if (!TryParseInt32(args[0].Trim(), out int numBuckets))
+                    return null;
+                return new ParsedToken(TransformType.Partition, [], numBuckets, string.Empty);
+            }
+
+            // partition(num, tok1, tok2, ...)
+            if (!TryParseInt32(args[0].Trim(), out int buckets))
+                return null;
+
+            var indexes = new int[args.Length - 1];
+            for (int i = 1; i < args.Length; i++)
+            {
+                if (!int.TryParse(args[i].Trim(), out indexes[i - 1]))
+                    return null;
+            }
+
+            return new ParsedToken(TransformType.Partition, indexes, buckets, string.Empty);
+        }
+
+        // splitFromLeft(token, position)
+        args = GetFunctionArgs(SplitFromLeftRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.SplitFromLeft);
+
+        // splitFromRight(token, position)
+        args = GetFunctionArgs(SplitFromRightRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.SplitFromRight);
+
+        // sliceFromLeft(token, size)
+        args = GetFunctionArgs(SliceFromLeftRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.SliceFromLeft);
+
+        // sliceFromRight(token, size)
+        args = GetFunctionArgs(SliceFromRightRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.SliceFromRight);
+
+        // right(token, length)
+        args = GetFunctionArgs(RightRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.Right);
+
+        // left(token, length)
+        args = GetFunctionArgs(LeftRegex(), token);
+        if (args != null)
+            return ParseIndexIntArgs(args, TransformType.Left);
+
+        // split(token, delimiter)
+        args = GetFunctionArgs(SplitRegex(), token);
+        if (args != null)
+        {
+            if (args.Length < 2)
+                return null;
+            if (args.Length > 2)
+                return null;
+
+            if (!int.TryParse(args[0].Trim(), out int idx))
+                return null;
+
+            string delimiter = args[1];
+            if (delimiter.Contains(' ') || delimiter.Contains('.'))
+                return null;
+
+            return new ParsedToken(TransformType.Split, [idx], -1, delimiter);
+        }
+
+        // Unknown function
+        return null;
+    }
+
+    private static ParsedToken? ParseIndexIntArgs(string[] args, TransformType type)
+    {
+        if (args.Length < 2)
+            return null;
+        if (args.Length > 2)
+            return null;
+
+        if (!int.TryParse(args[0].Trim(), out int idx))
+            return null;
+
+        if (!TryParseInt32(args[1].Trim(), out int intArg))
+            return null;
+
+        return new ParsedToken(type, [idx], intArg, string.Empty);
+    }
+
+    private static bool TryParseInt32(string s, out int result)
+    {
+        // Parse as long first to detect overflow
+        if (long.TryParse(s, out long longVal) && longVal >= 0 && longVal <= int.MaxValue)
+        {
+            result = (int)longVal;
+            return true;
+        }
+
+        result = -1;
+        return false;
+    }
+
+    private static string[]? GetFunctionArgs(Regex regex, string token)
+    {
+        var match = regex.Match(token);
+        if (match.Success && match.Groups.Count > 1)
+        {
+            string argsStr = match.Groups[1].Value;
+            return CommaSeparatorRegex().Split(argsStr);
+        }
+
+        return null;
+    }
+
+    // Regex patterns matching the Go reference implementation (case-insensitive function names)
+    [GeneratedRegex(@"\{\{\s*[wW]ildcard\s*\((.*)\)\s*\}\}")]
+    private static partial Regex WildcardRegex();
+
+    [GeneratedRegex(@"\{\{\s*[pP]artition\s*\((.*)\)\s*\}\}")]
+    private static partial Regex PartitionRegex();
+
+    [GeneratedRegex(@"\{\{\s*[sS]plit[fF]rom[lL]eft\s*\((.*)\)\s*\}\}")]
+    private static partial Regex SplitFromLeftRegex();
+
+    [GeneratedRegex(@"\{\{\s*[sS]plit[fF]rom[rR]ight\s*\((.*)\)\s*\}\}")]
+    private static partial Regex SplitFromRightRegex();
+
+    [GeneratedRegex(@"\{\{\s*[sS]lice[fF]rom[lL]eft\s*\((.*)\)\s*\}\}")]
+    private static partial Regex SliceFromLeftRegex();
+
+    [GeneratedRegex(@"\{\{\s*[sS]lice[fF]rom[rR]ight\s*\((.*)\)\s*\}\}")]
+    private static partial Regex SliceFromRightRegex();
+
+    [GeneratedRegex(@"\{\{\s*[sS]plit\s*\((.*)\)\s*\}\}")]
+    private static partial Regex SplitRegex();
+
+    [GeneratedRegex(@"\{\{\s*[lL]eft\s*\((.*)\)\s*\}\}")]
+    private static partial Regex LeftRegex();
+
+    [GeneratedRegex(@"\{\{\s*[rR]ight\s*\((.*)\)\s*\}\}")]
+    private static partial Regex RightRegex();
+
+    [GeneratedRegex(@",\s*")]
+    private static partial Regex CommaSeparatorRegex();
+
+    private enum TransformType
+    {
+        None,
+        Wildcard,
+        Partition,
+        Split,
+        SplitFromLeft,
+        SplitFromRight,
+        SliceFromLeft,
+        SliceFromRight,
+        Left,
+        Right,
+    }
+
+    private sealed record ParsedToken(TransformType Type, int[] WildcardIndexes, int IntArg, string StringArg);
+
+    private readonly record struct TransformOp(
+        TransformType Type,
+        int[] SourcePositions,
+        int IntArg,
+        string? StringArg)
+    {
+        public TransformOp(TransformType type) : this(type, [], -1, null)
+        {
+        }
+    }
+}