feat: add SubjectTransform compiled engine for subject mapping

Port Go server/subject_transform.go to .NET. Implements a compiled
transform engine that parses source patterns with wildcards and
destination templates with function tokens at Create() time, then
evaluates them efficiently at Apply() time without runtime regex.

Supports all 9 transform functions: wildcard/$N, partition (FNV-1a),
split, splitFromLeft, splitFromRight, sliceFromLeft, sliceFromRight,
left, and right. Used for stream mirroring, account imports/exports,
and subject routing.
This commit is contained in:
Joseph Doherty
2026-02-23 04:27:36 -05:00
parent 67a3881c7c
commit 46116400d2
2 changed files with 1104 additions and 0 deletions

View File

@@ -0,0 +1,708 @@
using System.Text;
using System.Text.RegularExpressions;
namespace NATS.Server.Subscriptions;
/// <summary>
/// Compiled subject transform engine that maps subjects from a source pattern to a destination template.
/// Reference: Go server/subject_transform.go
/// </summary>
public sealed partial class SubjectTransform
{
private readonly string _source;
private readonly string _dest;
private readonly string[] _sourceTokens;
private readonly string[] _destTokens;
private readonly TransformOp[] _ops;
private SubjectTransform(string source, string dest, string[] sourceTokens, string[] destTokens, TransformOp[] ops)
{
_source = source;
_dest = dest;
_sourceTokens = sourceTokens;
_destTokens = destTokens;
_ops = ops;
}
/// <summary>
/// Compiles a subject transform from source pattern to destination template.
/// Returns null if source is invalid or destination references out-of-range wildcards.
/// </summary>
public static SubjectTransform? Create(string source, string destination)
{
if (string.IsNullOrEmpty(destination))
return null;
if (string.IsNullOrEmpty(source))
source = ">";
// Validate source and destination as subjects
var (srcValid, srcTokens, srcPwcCount, srcHasFwc) = SubjectInfo(source);
var (destValid, destTokens, destPwcCount, destHasFwc) = SubjectInfo(destination);
// Both must be valid, dest must have no pwcs, fwc must match
if (!srcValid || !destValid || destPwcCount > 0 || srcHasFwc != destHasFwc)
return null;
var ops = new TransformOp[destTokens.Length];
if (srcPwcCount > 0 || srcHasFwc)
{
// Build map from 1-based wildcard index to source token position
var wildcardPositions = new Dictionary<int, int>();
int wildcardNum = 0;
for (int i = 0; i < srcTokens.Length; i++)
{
if (srcTokens[i] == "*")
{
wildcardNum++;
wildcardPositions[wildcardNum] = i;
}
}
for (int i = 0; i < destTokens.Length; i++)
{
var parsed = ParseDestToken(destTokens[i]);
if (parsed == null)
return null; // Parse error (bad function, etc.)
if (parsed.Type == TransformType.None)
{
ops[i] = new TransformOp(TransformType.None);
continue;
}
// Resolve wildcard indexes to source token positions
var srcPositions = new int[parsed.WildcardIndexes.Length];
for (int j = 0; j < parsed.WildcardIndexes.Length; j++)
{
int wcIdx = parsed.WildcardIndexes[j];
if (wcIdx > srcPwcCount)
return null; // Out of range
// Match Go behavior: missing map key returns zero-value (0)
// This happens for partition with index 0, which Go silently allows.
if (!wildcardPositions.TryGetValue(wcIdx, out int pos))
pos = 0;
srcPositions[j] = pos;
}
ops[i] = new TransformOp(parsed.Type, srcPositions, parsed.IntArg, parsed.StringArg);
}
}
else
{
// No wildcards in source: only NoTransform, Partition, and Random allowed
for (int i = 0; i < destTokens.Length; i++)
{
var parsed = ParseDestToken(destTokens[i]);
if (parsed == null)
return null;
if (parsed.Type == TransformType.None)
{
ops[i] = new TransformOp(TransformType.None);
}
else if (parsed.Type == TransformType.Partition)
{
ops[i] = new TransformOp(TransformType.Partition, [], parsed.IntArg, parsed.StringArg);
}
else
{
// Other functions not allowed without wildcards in source
return null;
}
}
}
return new SubjectTransform(source, destination, srcTokens, destTokens, ops);
}
/// <summary>
/// Matches subject against source pattern, captures wildcard values, evaluates destination template.
/// Returns null if subject doesn't match source.
/// </summary>
public string? Apply(string subject)
{
if (string.IsNullOrEmpty(subject))
return null;
// Special case: source is > (match everything) and dest is > (passthrough)
if ((_source == ">" || _source == string.Empty) && (_dest == ">" || _dest == string.Empty))
return subject;
var subjectTokens = subject.Split('.');
// Check if subject matches source pattern
if (_source != ">" && !MatchTokens(subjectTokens, _sourceTokens))
return null;
return TransformTokenized(subjectTokens);
}
private string TransformTokenized(string[] tokens)
{
if (_ops.Length == 0)
return _dest;
var sb = new StringBuilder();
int lastIndex = _ops.Length - 1;
for (int i = 0; i < _ops.Length; i++)
{
var op = _ops[i];
if (op.Type == TransformType.None)
{
// If this dest token is fwc, break out to handle trailing tokens
if (_destTokens[i] == ">")
break;
sb.Append(_destTokens[i]);
}
else
{
switch (op.Type)
{
case TransformType.Wildcard:
if (op.SourcePositions.Length > 0 && op.SourcePositions[0] < tokens.Length)
sb.Append(tokens[op.SourcePositions[0]]);
break;
case TransformType.Partition:
sb.Append(ComputePartition(tokens, op));
break;
case TransformType.Split:
ApplySplit(sb, tokens, op);
break;
case TransformType.SplitFromLeft:
ApplySplitFromLeft(sb, tokens, op);
break;
case TransformType.SplitFromRight:
ApplySplitFromRight(sb, tokens, op);
break;
case TransformType.SliceFromLeft:
ApplySliceFromLeft(sb, tokens, op);
break;
case TransformType.SliceFromRight:
ApplySliceFromRight(sb, tokens, op);
break;
case TransformType.Left:
ApplyLeft(sb, tokens, op);
break;
case TransformType.Right:
ApplyRight(sb, tokens, op);
break;
}
}
if (i < lastIndex)
sb.Append('.');
}
// Handle trailing fwc: append remaining tokens from subject
if (_destTokens[^1] == ">")
{
int srcFwcPos = _sourceTokens.Length - 1; // position of > in source
for (int i = srcFwcPos; i < tokens.Length; i++)
{
sb.Append(tokens[i]);
if (i < tokens.Length - 1)
sb.Append('.');
}
}
return sb.ToString();
}
private static string ComputePartition(string[] tokens, TransformOp op)
{
int numBuckets = op.IntArg;
if (numBuckets == 0)
return "0";
byte[] keyBytes;
if (op.SourcePositions.Length > 0)
{
// Hash concatenation of specified source tokens
var keyBuilder = new StringBuilder();
foreach (int pos in op.SourcePositions)
{
if (pos < tokens.Length)
keyBuilder.Append(tokens[pos]);
}
keyBytes = Encoding.ASCII.GetBytes(keyBuilder.ToString());
}
else
{
// Hash full subject (all tokens joined with .)
keyBytes = Encoding.ASCII.GetBytes(string.Join(".", tokens));
}
uint hash = Fnv1A32(keyBytes);
return (hash % (uint)numBuckets).ToString();
}
/// <summary>
/// FNV-1a 32-bit hash. Offset basis: 2166136261, prime: 16777619.
/// </summary>
private static uint Fnv1A32(byte[] data)
{
const uint offsetBasis = 2166136261;
const uint prime = 16777619;
uint hash = offsetBasis;
foreach (byte b in data)
{
hash ^= b;
hash *= prime;
}
return hash;
}
private static void ApplySplit(StringBuilder sb, string[] tokens, TransformOp op)
{
if (op.SourcePositions.Length == 0)
return;
string sourceToken = tokens[op.SourcePositions[0]];
string delimiter = op.StringArg ?? string.Empty;
var splits = sourceToken.Split(delimiter);
bool first = true;
for (int j = 0; j < splits.Length; j++)
{
string split = splits[j];
if (split != string.Empty)
{
if (!first)
sb.Append('.');
sb.Append(split);
first = false;
}
}
}
private static void ApplySplitFromLeft(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int position = op.IntArg;
if (position > 0 && position < sourceToken.Length)
{
sb.Append(sourceToken.AsSpan(0, position));
sb.Append('.');
sb.Append(sourceToken.AsSpan(position));
}
else
{
sb.Append(sourceToken);
}
}
private static void ApplySplitFromRight(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int position = op.IntArg;
if (position > 0 && position < sourceToken.Length)
{
sb.Append(sourceToken.AsSpan(0, sourceToken.Length - position));
sb.Append('.');
sb.Append(sourceToken.AsSpan(sourceToken.Length - position));
}
else
{
sb.Append(sourceToken);
}
}
private static void ApplySliceFromLeft(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int sliceSize = op.IntArg;
if (sliceSize > 0 && sliceSize < sourceToken.Length)
{
for (int i = 0; i + sliceSize <= sourceToken.Length; i += sliceSize)
{
if (i != 0)
sb.Append('.');
sb.Append(sourceToken.AsSpan(i, sliceSize));
// If there's a remainder that doesn't fill a full slice
if (i + sliceSize != sourceToken.Length && i + sliceSize + sliceSize > sourceToken.Length)
{
sb.Append('.');
sb.Append(sourceToken.AsSpan(i + sliceSize));
break;
}
}
}
else
{
sb.Append(sourceToken);
}
}
private static void ApplySliceFromRight(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int sliceSize = op.IntArg;
if (sliceSize > 0 && sliceSize < sourceToken.Length)
{
int remainder = sourceToken.Length % sliceSize;
if (remainder > 0)
{
sb.Append(sourceToken.AsSpan(0, remainder));
sb.Append('.');
}
for (int i = remainder; i + sliceSize <= sourceToken.Length; i += sliceSize)
{
sb.Append(sourceToken.AsSpan(i, sliceSize));
if (i + sliceSize < sourceToken.Length)
sb.Append('.');
}
}
else
{
sb.Append(sourceToken);
}
}
private static void ApplyLeft(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int length = op.IntArg;
if (length > 0 && length < sourceToken.Length)
{
sb.Append(sourceToken.AsSpan(0, length));
}
else
{
sb.Append(sourceToken);
}
}
private static void ApplyRight(StringBuilder sb, string[] tokens, TransformOp op)
{
string sourceToken = tokens[op.SourcePositions[0]];
int length = op.IntArg;
if (length > 0 && length < sourceToken.Length)
{
sb.Append(sourceToken.AsSpan(sourceToken.Length - length));
}
else
{
sb.Append(sourceToken);
}
}
/// <summary>
/// Matches literal subject tokens against a pattern with wildcards.
/// Subject tokens must be literal (no wildcards).
/// </summary>
private static bool MatchTokens(string[] subjectTokens, string[] patternTokens)
{
for (int i = 0; i < patternTokens.Length; i++)
{
if (i >= subjectTokens.Length)
return false;
string pt = patternTokens[i];
// Full wildcard matches all remaining
if (pt == ">")
return true;
// Partial wildcard matches any single token
if (pt == "*")
continue;
// Literal comparison
if (subjectTokens[i] != pt)
return false;
}
// Both must be exhausted (unless pattern ended with >)
return subjectTokens.Length == patternTokens.Length;
}
/// <summary>
/// Validates a subject and returns (valid, tokens, pwcCount, hasFwc).
/// Reference: Go subject_transform.go subjectInfo()
/// </summary>
private static (bool Valid, string[] Tokens, int PwcCount, bool HasFwc) SubjectInfo(string subject)
{
if (string.IsNullOrEmpty(subject))
return (false, [], 0, false);
string[] tokens = subject.Split('.');
int pwcCount = 0;
bool hasFwc = false;
foreach (string t in tokens)
{
if (t.Length == 0 || hasFwc)
return (false, [], 0, false);
if (t.Length == 1)
{
switch (t[0])
{
case '>':
hasFwc = true;
break;
case '*':
pwcCount++;
break;
}
}
}
return (true, tokens, pwcCount, hasFwc);
}
/// <summary>
/// Parses a single destination token into a transform operation descriptor.
/// Returns null on parse error.
/// </summary>
private static ParsedToken? ParseDestToken(string token)
{
if (token.Length <= 1)
return new ParsedToken(TransformType.None, [], -1, string.Empty);
// $N shorthand for wildcard(N)
if (token[0] == '$')
{
if (int.TryParse(token.AsSpan(1), out int idx))
return new ParsedToken(TransformType.Wildcard, [idx], -1, string.Empty);
// Other things rely on tokens starting with $ so not an error
return new ParsedToken(TransformType.None, [], -1, string.Empty);
}
// Mustache-style {{function(args)}}
if (token.Length > 4 && token[0] == '{' && token[1] == '{' && token[^2] == '}' && token[^1] == '}')
{
return ParseMustacheToken(token);
}
return new ParsedToken(TransformType.None, [], -1, string.Empty);
}
private static ParsedToken? ParseMustacheToken(string token)
{
// wildcard(n)
var args = GetFunctionArgs(WildcardRegex(), token);
if (args != null)
{
if (args.Length == 1 && args[0] == string.Empty)
return null; // Not enough args
if (args.Length == 1)
{
if (!int.TryParse(args[0].Trim(), out int idx))
return null;
return new ParsedToken(TransformType.Wildcard, [idx], -1, string.Empty);
}
return null; // Too many args
}
// partition(num, tokens...)
args = GetFunctionArgs(PartitionRegex(), token);
if (args != null)
{
if (args.Length < 1)
return null;
if (args.Length == 1)
{
if (!TryParseInt32(args[0].Trim(), out int numBuckets))
return null;
return new ParsedToken(TransformType.Partition, [], numBuckets, string.Empty);
}
// partition(num, tok1, tok2, ...)
if (!TryParseInt32(args[0].Trim(), out int buckets))
return null;
var indexes = new int[args.Length - 1];
for (int i = 1; i < args.Length; i++)
{
if (!int.TryParse(args[i].Trim(), out indexes[i - 1]))
return null;
}
return new ParsedToken(TransformType.Partition, indexes, buckets, string.Empty);
}
// splitFromLeft(token, position)
args = GetFunctionArgs(SplitFromLeftRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.SplitFromLeft);
// splitFromRight(token, position)
args = GetFunctionArgs(SplitFromRightRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.SplitFromRight);
// sliceFromLeft(token, size)
args = GetFunctionArgs(SliceFromLeftRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.SliceFromLeft);
// sliceFromRight(token, size)
args = GetFunctionArgs(SliceFromRightRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.SliceFromRight);
// right(token, length)
args = GetFunctionArgs(RightRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.Right);
// left(token, length)
args = GetFunctionArgs(LeftRegex(), token);
if (args != null)
return ParseIndexIntArgs(args, TransformType.Left);
// split(token, delimiter)
args = GetFunctionArgs(SplitRegex(), token);
if (args != null)
{
if (args.Length < 2)
return null;
if (args.Length > 2)
return null;
if (!int.TryParse(args[0].Trim(), out int idx))
return null;
string delimiter = args[1];
if (delimiter.Contains(' ') || delimiter.Contains('.'))
return null;
return new ParsedToken(TransformType.Split, [idx], -1, delimiter);
}
// Unknown function
return null;
}
private static ParsedToken? ParseIndexIntArgs(string[] args, TransformType type)
{
if (args.Length < 2)
return null;
if (args.Length > 2)
return null;
if (!int.TryParse(args[0].Trim(), out int idx))
return null;
if (!TryParseInt32(args[1].Trim(), out int intArg))
return null;
return new ParsedToken(type, [idx], intArg, string.Empty);
}
private static bool TryParseInt32(string s, out int result)
{
// Parse as long first to detect overflow
if (long.TryParse(s, out long longVal) && longVal >= 0 && longVal <= int.MaxValue)
{
result = (int)longVal;
return true;
}
result = -1;
return false;
}
private static string[]? GetFunctionArgs(Regex regex, string token)
{
var match = regex.Match(token);
if (match.Success && match.Groups.Count > 1)
{
string argsStr = match.Groups[1].Value;
return CommaSeparatorRegex().Split(argsStr);
}
return null;
}
// Regex patterns matching the Go reference implementation (case-insensitive function names)
[GeneratedRegex(@"\{\{\s*[wW]ildcard\s*\((.*)\)\s*\}\}")]
private static partial Regex WildcardRegex();
[GeneratedRegex(@"\{\{\s*[pP]artition\s*\((.*)\)\s*\}\}")]
private static partial Regex PartitionRegex();
[GeneratedRegex(@"\{\{\s*[sS]plit[fF]rom[lL]eft\s*\((.*)\)\s*\}\}")]
private static partial Regex SplitFromLeftRegex();
[GeneratedRegex(@"\{\{\s*[sS]plit[fF]rom[rR]ight\s*\((.*)\)\s*\}\}")]
private static partial Regex SplitFromRightRegex();
[GeneratedRegex(@"\{\{\s*[sS]lice[fF]rom[lL]eft\s*\((.*)\)\s*\}\}")]
private static partial Regex SliceFromLeftRegex();
[GeneratedRegex(@"\{\{\s*[sS]lice[fF]rom[rR]ight\s*\((.*)\)\s*\}\}")]
private static partial Regex SliceFromRightRegex();
[GeneratedRegex(@"\{\{\s*[sS]plit\s*\((.*)\)\s*\}\}")]
private static partial Regex SplitRegex();
[GeneratedRegex(@"\{\{\s*[lL]eft\s*\((.*)\)\s*\}\}")]
private static partial Regex LeftRegex();
[GeneratedRegex(@"\{\{\s*[rR]ight\s*\((.*)\)\s*\}\}")]
private static partial Regex RightRegex();
[GeneratedRegex(@",\s*")]
private static partial Regex CommaSeparatorRegex();
private enum TransformType
{
None,
Wildcard,
Partition,
Split,
SplitFromLeft,
SplitFromRight,
SliceFromLeft,
SliceFromRight,
Left,
Right,
}
private sealed record ParsedToken(TransformType Type, int[] WildcardIndexes, int IntArg, string StringArg);
private readonly record struct TransformOp(
TransformType Type,
int[] SourcePositions,
int IntArg,
string? StringArg)
{
public TransformOp(TransformType type) : this(type, [], -1, null)
{
}
}
}