From 9fff5709c4badda7f7aad200337c79abe0067f24 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 23 Feb 2026 04:20:56 -0500 Subject: [PATCH] feat: add NATS config file lexer (port of Go conf/lex.go) Port the NATS configuration file lexer from Go's conf/lex.go to C#. The lexer is a state-machine tokenizer that supports the NATS config format: key-value pairs with =, :, or whitespace separators; nested maps {}; arrays []; single and double quoted strings with escape sequences; block strings (); variables $VAR; include directives; comments (# and //); booleans; integers with size suffixes (kb, mb, gb); floats; ISO8601 datetimes; and IP addresses. --- .../Configuration/NatsConfLexer.cs | 1492 +++++++++++++++++ .../Configuration/NatsConfToken.cs | 24 + tests/NATS.Server.Tests/NatsConfLexerTests.cs | 221 +++ 3 files changed, 1737 insertions(+) create mode 100644 src/NATS.Server/Configuration/NatsConfLexer.cs create mode 100644 src/NATS.Server/Configuration/NatsConfToken.cs create mode 100644 tests/NATS.Server.Tests/NatsConfLexerTests.cs diff --git a/src/NATS.Server/Configuration/NatsConfLexer.cs b/src/NATS.Server/Configuration/NatsConfLexer.cs new file mode 100644 index 0000000..8f878fb --- /dev/null +++ b/src/NATS.Server/Configuration/NatsConfLexer.cs @@ -0,0 +1,1492 @@ +// Port of Go conf/lex.go — state-machine tokenizer for NATS config files. +// Reference: golang/nats-server/conf/lex.go + +namespace NATS.Server.Configuration; + +public sealed class NatsConfLexer +{ + private const char Eof = '\0'; + private const char MapStartChar = '{'; + private const char MapEndChar = '}'; + private const char KeySepEqual = '='; + private const char KeySepColon = ':'; + private const char ArrayStartChar = '['; + private const char ArrayEndChar = ']'; + private const char ArrayValTerm = ','; + private const char MapValTerm = ','; + private const char CommentHashStart = '#'; + private const char CommentSlashStart = '/'; + private const char DqStringStart = '"'; + private const char DqStringEnd = '"'; + private const char SqStringStart = '\''; + private const char SqStringEnd = '\''; + private const char OptValTerm = ';'; + private const char TopOptStart = '{'; + private const char TopOptTerm = '}'; + private const char TopOptValTerm = ','; + private const char BlockStartChar = '('; + private const char BlockEndChar = ')'; + + private delegate LexState? LexState(NatsConfLexer lx); + + private readonly string _input; + private int _start; + private int _pos; + private int _width; + private int _line; + private readonly List _items; + private readonly Stack _stack; + private readonly List _stringParts; + private LexState? _stringStateFn; + + // Start position of the current line (after newline character). + private int _lstart; + + // Start position of the line from the current item. + private int _ilstart; + + private NatsConfLexer(string input) + { + _input = input; + _start = 0; + _pos = 0; + _width = 0; + _line = 1; + _items = []; + _stack = new Stack(); + _stringParts = []; + _lstart = 0; + _ilstart = 0; + } + + public static IReadOnlyList Tokenize(string input) + { + var lx = new NatsConfLexer(input); + LexState? state = LexTop; + while (state is not null) + { + state = state(lx); + } + + return lx._items; + } + + private void Push(LexState state) => _stack.Push(state); + + private LexState? Pop() + { + if (_stack.Count == 0) + { + return Errorf("BUG in lexer: no states to pop."); + } + + return _stack.Pop(); + } + + private void Emit(TokenType type) + { + var val = string.Concat(_stringParts) + _input[_start.._pos]; + var pos = _pos - _ilstart - val.Length; + _items.Add(new Token(type, val, _line, pos)); + _start = _pos; + _ilstart = _lstart; + } + + private void EmitString() + { + string finalString; + if (_stringParts.Count > 0) + { + finalString = string.Concat(_stringParts) + _input[_start.._pos]; + _stringParts.Clear(); + } + else + { + finalString = _input[_start.._pos]; + } + + var pos = _pos - _ilstart - finalString.Length; + _items.Add(new Token(TokenType.String, finalString, _line, pos)); + _start = _pos; + _ilstart = _lstart; + } + + private void AddCurrentStringPart(int offset) + { + _stringParts.Add(_input[_start..(_pos - offset)]); + _start = _pos; + } + + private LexState? AddStringPart(string s) + { + _stringParts.Add(s); + _start = _pos; + return _stringStateFn; + } + + private bool HasEscapedParts() => _stringParts.Count > 0; + + private char Next() + { + if (_pos >= _input.Length) + { + _width = 0; + return Eof; + } + + if (_input[_pos] == '\n') + { + _line++; + _lstart = _pos; + } + + var c = _input[_pos]; + _width = 1; + _pos += _width; + return c; + } + + private void Ignore() + { + _start = _pos; + _ilstart = _lstart; + } + + private void Backup() + { + _pos -= _width; + if (_pos < _input.Length && _input[_pos] == '\n') + { + _line--; + } + } + + private char Peek() + { + var r = Next(); + Backup(); + return r; + } + + private LexState? Errorf(string message) + { + var pos = _pos - _lstart; + _items.Add(new Token(TokenType.Error, message, _line, pos)); + return null; + } + + // --- Helper methods --- + + private static bool IsWhitespace(char c) => c is '\t' or ' '; + + private static bool IsNL(char c) => c is '\n' or '\r'; + + private static bool IsKeySeparator(char c) => c is KeySepEqual or KeySepColon; + + private static bool IsNumberSuffix(char c) => + c is 'k' or 'K' or 'm' or 'M' or 'g' or 'G' or 't' or 'T' or 'p' or 'P' or 'e' or 'E'; + + // --- State functions --- + + private static LexState? LexTop(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r)) + { + return LexSkip(lx, LexTop); + } + + switch (r) + { + case TopOptStart: + lx.Push(LexTop); + return LexSkip(lx, LexBlockStart); + case CommentHashStart: + lx.Push(LexTop); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexTop); + return LexCommentStart; + } + + lx.Backup(); + goto case Eof; + } + + case Eof: + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + // Back up and let the key lexer handle it. + lx.Backup(); + lx.Push(LexTopValueEnd); + return LexKeyStart; + } + + private static LexState? LexTopValueEnd(NatsConfLexer lx) + { + var r = lx.Next(); + switch (r) + { + case CommentHashStart: + lx.Push(LexTop); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexTop); + return LexCommentStart; + } + + lx.Backup(); + if (IsWhitespace(r)) + { + return LexTopValueEnd; + } + + break; + } + + default: + if (IsWhitespace(r)) + { + return LexTopValueEnd; + } + + break; + } + + if (IsNL(r) || r == Eof || r == OptValTerm || r == TopOptValTerm || r == TopOptTerm) + { + lx.Ignore(); + return LexTop; + } + + return lx.Errorf($"Expected a top-level value to end with a new line, comment or EOF, but got '{EscapeSpecial(r)}' instead."); + } + + private static LexState? LexBlockStart(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r)) + { + return LexSkip(lx, LexBlockStart); + } + + switch (r) + { + case TopOptStart: + lx.Push(LexBlockEnd); + return LexSkip(lx, LexBlockStart); + case TopOptTerm: + lx.Ignore(); + return lx.Pop(); + case CommentHashStart: + lx.Push(LexBlockStart); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexBlockStart); + return LexCommentStart; + } + + lx.Backup(); + goto case Eof; + } + + case Eof: + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + lx.Backup(); + lx.Push(LexBlockValueEnd); + return LexKeyStart; + } + + private static LexState? LexBlockValueEnd(NatsConfLexer lx) + { + var r = lx.Next(); + switch (r) + { + case CommentHashStart: + lx.Push(LexBlockValueEnd); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexBlockValueEnd); + return LexCommentStart; + } + + lx.Backup(); + if (IsWhitespace(r)) + { + return LexBlockValueEnd; + } + + break; + } + + default: + if (IsWhitespace(r)) + { + return LexBlockValueEnd; + } + + break; + } + + if (IsNL(r) || r == OptValTerm || r == TopOptValTerm) + { + lx.Ignore(); + return LexBlockStart; + } + + if (r == TopOptTerm) + { + lx.Backup(); + return LexBlockEnd; + } + + return lx.Errorf($"Expected a block-level value to end with a new line, comment or EOF, but got '{EscapeSpecial(r)}' instead."); + } + + private static LexState? LexBlockEnd(NatsConfLexer lx) + { + var r = lx.Next(); + switch (r) + { + case CommentHashStart: + lx.Push(LexBlockStart); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexBlockStart); + return LexCommentStart; + } + + lx.Backup(); + if (IsNL(r) || IsWhitespace(r)) + { + return LexBlockEnd; + } + + break; + } + + default: + if (IsNL(r) || IsWhitespace(r)) + { + return LexBlockEnd; + } + + break; + } + + if (r == OptValTerm || r == TopOptValTerm) + { + lx.Ignore(); + return LexBlockStart; + } + + if (r == TopOptTerm) + { + lx.Ignore(); + return lx.Pop(); + } + + return lx.Errorf($"Expected a block-level to end with a '}}', but got '{EscapeSpecial(r)}' instead."); + } + + private static LexState? LexKeyStart(NatsConfLexer lx) + { + var r = lx.Peek(); + if (IsKeySeparator(r)) + { + return lx.Errorf($"Unexpected key separator '{r}'"); + } + + if (char.IsWhiteSpace(r)) + { + lx.Next(); + return LexSkip(lx, LexKeyStart); + } + + if (r == DqStringStart) + { + lx.Next(); + return LexSkip(lx, LexDubQuotedKey); + } + + if (r == SqStringStart) + { + lx.Next(); + return LexSkip(lx, LexQuotedKey); + } + + lx.Ignore(); + lx.Next(); + return LexKey; + } + + private static LexState? LexDubQuotedKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (r == DqStringEnd) + { + lx.Emit(TokenType.Key); + lx.Next(); + return LexSkip(lx, LexKeyEnd); + } + + if (r == Eof) + { + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + lx.Next(); + return LexDubQuotedKey; + } + + private static LexState? LexQuotedKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (r == SqStringEnd) + { + lx.Emit(TokenType.Key); + lx.Next(); + return LexSkip(lx, LexKeyEnd); + } + + if (r == Eof) + { + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + lx.Next(); + return LexQuotedKey; + } + + private LexState? KeyCheckKeyword(LexState fallThrough, LexState? push) + { + var key = _input[_start.._pos].ToLowerInvariant(); + if (key == "include") + { + Ignore(); + if (push is not null) + { + Push(push); + } + + return LexIncludeStart; + } + + Emit(TokenType.Key); + return fallThrough; + } + + private static LexState? LexIncludeStart(NatsConfLexer lx) + { + var r = lx.Next(); + if (IsWhitespace(r)) + { + return LexSkip(lx, LexIncludeStart); + } + + lx.Backup(); + return LexInclude; + } + + private static LexState? LexIncludeQuotedString(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == SqStringEnd) + { + lx.Backup(); + lx.Emit(TokenType.Include); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + if (r == Eof) + { + return lx.Errorf("Unexpected EOF in quoted include"); + } + + return LexIncludeQuotedString; + } + + private static LexState? LexIncludeDubQuotedString(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == DqStringEnd) + { + lx.Backup(); + lx.Emit(TokenType.Include); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + if (r == Eof) + { + return lx.Errorf("Unexpected EOF in double quoted include"); + } + + return LexIncludeDubQuotedString; + } + + private static LexState? LexIncludeString(NatsConfLexer lx) + { + var r = lx.Next(); + if (IsNL(r) || r == Eof || r == OptValTerm || r == MapEndChar || IsWhitespace(r)) + { + lx.Backup(); + lx.Emit(TokenType.Include); + return lx.Pop(); + } + + if (r == SqStringEnd) + { + lx.Backup(); + lx.Emit(TokenType.Include); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + return LexIncludeString; + } + + private static LexState? LexInclude(NatsConfLexer lx) + { + var r = lx.Next(); + switch (r) + { + case SqStringStart: + lx.Ignore(); + return LexIncludeQuotedString; + case DqStringStart: + lx.Ignore(); + return LexIncludeDubQuotedString; + case ArrayStartChar: + return lx.Errorf("Expected include value but found start of an array"); + case MapStartChar: + return lx.Errorf("Expected include value but found start of a map"); + case BlockStartChar: + return lx.Errorf("Expected include value but found start of a block"); + case '\\': + return lx.Errorf("Expected include value but found escape sequence"); + } + + if (char.IsDigit(r) || r == '-') + { + return lx.Errorf("Expected include value but found start of a number"); + } + + if (IsNL(r)) + { + return lx.Errorf("Expected include value but found new line"); + } + + lx.Backup(); + return LexIncludeString; + } + + private static LexState? LexKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (char.IsWhiteSpace(r) && !IsNL(r)) + { + // Spaces signal we could be looking at a keyword, e.g. include. + return lx.KeyCheckKeyword(LexKeyEnd, null); + } + + if (IsKeySeparator(r) || r == Eof) + { + lx.Emit(TokenType.Key); + return LexKeyEnd; + } + + if (IsNL(r)) + { + // Newline after key with no separator — check for keyword. + return lx.KeyCheckKeyword(LexKeyEnd, null); + } + + lx.Next(); + return LexKey; + } + + private static LexState? LexKeyEnd(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r) && !IsNL(r)) + { + return LexSkip(lx, LexKeyEnd); + } + + if (IsKeySeparator(r)) + { + return LexSkip(lx, LexValue); + } + + if (r == Eof) + { + lx.Emit(TokenType.Eof); + return null; + } + + // We start the value here. + lx.Backup(); + return LexValue; + } + + private static LexState? LexValue(NatsConfLexer lx) + { + var r = lx.Next(); + if (IsWhitespace(r)) + { + return LexSkip(lx, LexValue); + } + + switch (r) + { + case ArrayStartChar: + lx.Ignore(); + lx.Emit(TokenType.ArrayStart); + return LexArrayValue; + case MapStartChar: + lx.Ignore(); + lx.Emit(TokenType.MapStart); + return LexMapKeyStart; + case SqStringStart: + lx.Ignore(); + return LexQuotedString; + case DqStringStart: + lx.Ignore(); + lx._stringStateFn = LexDubQuotedString; + return LexDubQuotedString; + case '-': + return LexNegNumberStart; + case BlockStartChar: + lx.Ignore(); + return LexBlock; + case '.': + return lx.Errorf("Floats must start with a digit"); + } + + if (char.IsDigit(r)) + { + lx.Backup(); + return LexNumberOrDateOrStringOrIPStart; + } + + if (IsNL(r)) + { + return lx.Errorf("Expected value but found new line"); + } + + lx.Backup(); + lx._stringStateFn = LexString; + return LexString; + } + + private static LexState? LexArrayValue(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r)) + { + return LexSkip(lx, LexArrayValue); + } + + switch (r) + { + case CommentHashStart: + lx.Push(LexArrayValue); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexArrayValue); + return LexCommentStart; + } + + lx.Backup(); + // fallthrough to ArrayValTerm check + if (r == ArrayValTerm) + { + return lx.Errorf($"Unexpected array value terminator '{ArrayValTerm}'."); + } + + break; + } + + case ArrayValTerm: + return lx.Errorf($"Unexpected array value terminator '{ArrayValTerm}'."); + case ArrayEndChar: + return LexArrayEnd; + } + + lx.Backup(); + lx.Push(LexArrayValueEnd); + return LexValue; + } + + private static LexState? LexArrayValueEnd(NatsConfLexer lx) + { + var r = lx.Next(); + if (IsWhitespace(r)) + { + return LexSkip(lx, LexArrayValueEnd); + } + + switch (r) + { + case CommentHashStart: + lx.Push(LexArrayValueEnd); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexArrayValueEnd); + return LexCommentStart; + } + + lx.Backup(); + // fallthrough + if (r == ArrayValTerm || IsNL(r)) + { + return LexSkip(lx, LexArrayValue); + } + + break; + } + + case ArrayEndChar: + return LexArrayEnd; + } + + if (r == ArrayValTerm || IsNL(r)) + { + return LexSkip(lx, LexArrayValue); + } + + return lx.Errorf($"Expected an array value terminator ',' or an array terminator ']', but got '{EscapeSpecial(r)}' instead."); + } + + private static LexState? LexArrayEnd(NatsConfLexer lx) + { + lx.Ignore(); + lx.Emit(TokenType.ArrayEnd); + return lx.Pop(); + } + + private static LexState? LexMapKeyStart(NatsConfLexer lx) + { + var r = lx.Peek(); + if (IsKeySeparator(r)) + { + return lx.Errorf($"Unexpected key separator '{r}'."); + } + + if (r == ArrayEndChar) + { + return lx.Errorf($"Unexpected array end '{r}' processing map."); + } + + if (char.IsWhiteSpace(r)) + { + lx.Next(); + return LexSkip(lx, LexMapKeyStart); + } + + if (r == MapEndChar) + { + lx.Next(); + return LexSkip(lx, LexMapEnd); + } + + if (r == CommentHashStart) + { + lx.Next(); + lx.Push(LexMapKeyStart); + return LexCommentStart; + } + + if (r == CommentSlashStart) + { + lx.Next(); + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexMapKeyStart); + return LexCommentStart; + } + + lx.Backup(); + } + + if (r == SqStringStart) + { + lx.Next(); + return LexSkip(lx, LexMapQuotedKey); + } + + if (r == DqStringStart) + { + lx.Next(); + return LexSkip(lx, LexMapDubQuotedKey); + } + + if (r == Eof) + { + return lx.Errorf("Unexpected EOF processing map."); + } + + lx.Ignore(); + lx.Next(); + return LexMapKey; + } + + private static LexState? LexMapQuotedKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (r == Eof) + { + return lx.Errorf("Unexpected EOF processing quoted map key."); + } + + if (r == SqStringEnd) + { + lx.Emit(TokenType.Key); + lx.Next(); + return LexSkip(lx, LexMapKeyEnd); + } + + lx.Next(); + return LexMapQuotedKey; + } + + private static LexState? LexMapDubQuotedKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (r == Eof) + { + return lx.Errorf("Unexpected EOF processing double quoted map key."); + } + + if (r == DqStringEnd) + { + lx.Emit(TokenType.Key); + lx.Next(); + return LexSkip(lx, LexMapKeyEnd); + } + + lx.Next(); + return LexMapDubQuotedKey; + } + + private static LexState? LexMapKey(NatsConfLexer lx) + { + var r = lx.Peek(); + if (r == Eof) + { + return lx.Errorf("Unexpected EOF processing map key."); + } + + if (char.IsWhiteSpace(r) && !IsNL(r)) + { + return lx.KeyCheckKeyword(LexMapKeyEnd, LexMapValueEnd); + } + + if (IsNL(r)) + { + return lx.KeyCheckKeyword(LexMapKeyEnd, LexMapValueEnd); + } + + if (IsKeySeparator(r)) + { + lx.Emit(TokenType.Key); + return LexMapKeyEnd; + } + + lx.Next(); + return LexMapKey; + } + + private static LexState? LexMapKeyEnd(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r) && !IsNL(r)) + { + return LexSkip(lx, LexMapKeyEnd); + } + + if (IsKeySeparator(r)) + { + return LexSkip(lx, LexMapValue); + } + + // We start the value here. + lx.Backup(); + return LexMapValue; + } + + private static LexState? LexMapValue(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsWhiteSpace(r)) + { + return LexSkip(lx, LexMapValue); + } + + if (r == MapValTerm) + { + return lx.Errorf($"Unexpected map value terminator '{MapValTerm}'."); + } + + if (r == MapEndChar) + { + return LexSkip(lx, LexMapEnd); + } + + lx.Backup(); + lx.Push(LexMapValueEnd); + return LexValue; + } + + private static LexState? LexMapValueEnd(NatsConfLexer lx) + { + var r = lx.Next(); + if (IsWhitespace(r)) + { + return LexSkip(lx, LexMapValueEnd); + } + + switch (r) + { + case CommentHashStart: + lx.Push(LexMapValueEnd); + return LexCommentStart; + case CommentSlashStart: + { + var rn = lx.Next(); + if (rn == CommentSlashStart) + { + lx.Push(LexMapValueEnd); + return LexCommentStart; + } + + lx.Backup(); + // fallthrough + if (r == OptValTerm || r == MapValTerm || IsNL(r)) + { + return LexSkip(lx, LexMapKeyStart); + } + + break; + } + } + + if (r == OptValTerm || r == MapValTerm || IsNL(r)) + { + return LexSkip(lx, LexMapKeyStart); + } + + if (r == MapEndChar) + { + return LexSkip(lx, LexMapEnd); + } + + return lx.Errorf($"Expected a map value terminator ',' or a map terminator '}}', but got '{EscapeSpecial(r)}' instead."); + } + + private static LexState? LexMapEnd(NatsConfLexer lx) + { + lx.Ignore(); + lx.Emit(TokenType.MapEnd); + return lx.Pop(); + } + + private bool IsBool() + { + var str = _input[_start.._pos].ToLowerInvariant(); + return str is "true" or "false" or "on" or "off" or "yes" or "no"; + } + + private bool IsVariable() + { + if (_start >= _input.Length) + { + return false; + } + + if (_input[_start] == '$') + { + _start += 1; + return true; + } + + return false; + } + + private static LexState? LexQuotedString(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == SqStringEnd) + { + lx.Backup(); + lx.Emit(TokenType.String); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + if (r == Eof) + { + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + return LexQuotedString; + } + + private static LexState? LexDubQuotedString(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == '\\') + { + lx.AddCurrentStringPart(1); + return LexStringEscape; + } + + if (r == DqStringEnd) + { + lx.Backup(); + lx.EmitString(); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + if (r == Eof) + { + if (lx._pos > lx._start) + { + return lx.Errorf("Unexpected EOF."); + } + + lx.Emit(TokenType.Eof); + return null; + } + + return LexDubQuotedString; + } + + private static LexState? LexString(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == '\\') + { + lx.AddCurrentStringPart(1); + return LexStringEscape; + } + + // Termination of non-quoted strings. + if (IsNL(r) || r == Eof || r == OptValTerm || + r == ArrayValTerm || r == ArrayEndChar || r == MapEndChar || + IsWhitespace(r)) + { + lx.Backup(); + if (lx.HasEscapedParts()) + { + lx.EmitString(); + } + else if (lx.IsBool()) + { + lx.Emit(TokenType.Bool); + } + else if (lx.IsVariable()) + { + lx.Emit(TokenType.Variable); + } + else + { + lx.EmitString(); + } + + return lx.Pop(); + } + + if (r == SqStringEnd) + { + lx.Backup(); + lx.EmitString(); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + return LexString; + } + + private static LexState? LexBlock(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == BlockEndChar) + { + lx.Backup(); + lx.Backup(); + + // Looking for a ')' character on a line by itself. + // If the previous character isn't a newline, keep processing. + if (lx.Next() != '\n') + { + lx.Next(); + return LexBlock; + } + + lx.Next(); + + // Make sure the next character is a newline or EOF. + var next = lx.Next(); + if (next is '\n' or Eof) + { + lx.Backup(); + lx.Backup(); + lx.Emit(TokenType.String); + lx.Next(); + lx.Ignore(); + return lx.Pop(); + } + + lx.Backup(); + return LexBlock; + } + + if (r == Eof) + { + return lx.Errorf("Unexpected EOF processing block."); + } + + return LexBlock; + } + + private static LexState? LexStringEscape(NatsConfLexer lx) + { + var r = lx.Next(); + return r switch + { + 'x' => LexStringBinary(lx), + 't' => lx.AddStringPart("\t"), + 'n' => lx.AddStringPart("\n"), + 'r' => lx.AddStringPart("\r"), + '"' => lx.AddStringPart("\""), + '\\' => lx.AddStringPart("\\"), + _ => lx.Errorf($"Invalid escape character '{EscapeSpecial(r)}'. Only the following escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\."), + }; + } + + private static LexState? LexStringBinary(NatsConfLexer lx) + { + var r1 = lx.Next(); + if (IsNL(r1)) + { + return lx.Errorf("Expected two hexadecimal digits after '\\x', but hit end of line"); + } + + var r2 = lx.Next(); + if (IsNL(r2)) + { + return lx.Errorf("Expected two hexadecimal digits after '\\x', but hit end of line"); + } + + var hexStr = lx._input[(lx._pos - 2)..lx._pos]; + try + { + var bytes = Convert.FromHexString(hexStr); + return lx.AddStringPart(System.Text.Encoding.Latin1.GetString(bytes)); + } + catch (FormatException) + { + return lx.Errorf($"Expected two hexadecimal digits after '\\x', but got '{hexStr}'"); + } + } + + private static LexState? LexNumberOrDateOrStringOrIPStart(NatsConfLexer lx) + { + var r = lx.Next(); + if (!char.IsDigit(r)) + { + if (r == '.') + { + return lx.Errorf("Floats must start with a digit, not '.'."); + } + + return lx.Errorf($"Expected a digit but got '{EscapeSpecial(r)}'."); + } + + return LexNumberOrDateOrStringOrIP; + } + + private static LexState? LexNumberOrDateOrStringOrIP(NatsConfLexer lx) + { + var r = lx.Next(); + if (r == '-') + { + if (lx._pos - lx._start != 5) + { + return lx.Errorf("All ISO8601 dates must be in full Zulu form."); + } + + return LexDateAfterYear; + } + + if (char.IsDigit(r)) + { + return LexNumberOrDateOrStringOrIP; + } + + if (r == '.') + { + return LexFloatStart; + } + + if (IsNumberSuffix(r)) + { + return LexConvenientNumber; + } + + // Check if this is a terminator or a string character. + if (!(IsNL(r) || r == Eof || r == MapEndChar || r == OptValTerm || r == MapValTerm || IsWhitespace(r) || r == ArrayValTerm || r == ArrayEndChar)) + { + // Treat it as a string value. + lx._stringStateFn = LexString; + return LexString; + } + + lx.Backup(); + lx.Emit(TokenType.Integer); + return lx.Pop(); + } + + private static LexState? LexConvenientNumber(NatsConfLexer lx) + { + var r = lx.Next(); + if (r is 'b' or 'B' or 'i' or 'I') + { + return LexConvenientNumber; + } + + lx.Backup(); + if (IsNL(r) || r == Eof || r == MapEndChar || r == OptValTerm || r == MapValTerm || + IsWhitespace(r) || char.IsDigit(r) || r == ArrayValTerm || r == ArrayEndChar) + { + lx.Emit(TokenType.Integer); + return lx.Pop(); + } + + // This is not a number, treat as string. + lx._stringStateFn = LexString; + return LexString; + } + + private static LexState? LexDateAfterYear(NatsConfLexer lx) + { + // Expected: MM-DDTHH:MM:SSZ + char[] formats = + [ + '0', '0', '-', '0', '0', + 'T', + '0', '0', ':', '0', '0', ':', '0', '0', + 'Z', + ]; + + foreach (var f in formats) + { + var r = lx.Next(); + if (f == '0') + { + if (!char.IsDigit(r)) + { + return lx.Errorf($"Expected digit in ISO8601 datetime, but found '{EscapeSpecial(r)}' instead."); + } + } + else if (f != r) + { + return lx.Errorf($"Expected '{f}' in ISO8601 datetime, but found '{EscapeSpecial(r)}' instead."); + } + } + + lx.Emit(TokenType.DateTime); + return lx.Pop(); + } + + private static LexState? LexNegNumberStart(NatsConfLexer lx) + { + var r = lx.Next(); + if (!char.IsDigit(r)) + { + if (r == '.') + { + return lx.Errorf("Floats must start with a digit, not '.'."); + } + + return lx.Errorf($"Expected a digit but got '{EscapeSpecial(r)}'."); + } + + return LexNegNumber; + } + + private static LexState? LexNegNumber(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsDigit(r)) + { + return LexNegNumber; + } + + if (r == '.') + { + return LexFloatStart; + } + + if (IsNumberSuffix(r)) + { + return LexConvenientNumber; + } + + lx.Backup(); + lx.Emit(TokenType.Integer); + return lx.Pop(); + } + + private static LexState? LexFloatStart(NatsConfLexer lx) + { + var r = lx.Next(); + if (!char.IsDigit(r)) + { + return lx.Errorf($"Floats must have a digit after the '.', but got '{EscapeSpecial(r)}' instead."); + } + + return LexFloat; + } + + private static LexState? LexFloat(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsDigit(r)) + { + return LexFloat; + } + + // Not a digit; if it's another '.', this might be an IP address. + if (r == '.') + { + return LexIPAddr; + } + + lx.Backup(); + lx.Emit(TokenType.Float); + return lx.Pop(); + } + + private static LexState? LexIPAddr(NatsConfLexer lx) + { + var r = lx.Next(); + if (char.IsDigit(r) || r is '.' or ':' or '-') + { + return LexIPAddr; + } + + lx.Backup(); + lx.Emit(TokenType.String); + return lx.Pop(); + } + + private static LexState? LexCommentStart(NatsConfLexer lx) + { + lx.Ignore(); + lx.Emit(TokenType.Comment); + return LexComment; + } + + private static LexState? LexComment(NatsConfLexer lx) + { + var r = lx.Peek(); + if (IsNL(r) || r == Eof) + { + // Consume the comment text but don't emit it as a user-visible token. + // Just ignore it and pop back. + lx.Ignore(); + return lx.Pop(); + } + + lx.Next(); + return LexComment; + } + + private static LexState LexSkip(NatsConfLexer lx, LexState nextState) + { + lx.Ignore(); + return nextState; + } + + private static string EscapeSpecial(char c) => c switch + { + '\n' => "\\n", + '\r' => "\\r", + '\t' => "\\t", + Eof => "EOF", + _ => c.ToString(), + }; +} diff --git a/src/NATS.Server/Configuration/NatsConfToken.cs b/src/NATS.Server/Configuration/NatsConfToken.cs new file mode 100644 index 0000000..8054239 --- /dev/null +++ b/src/NATS.Server/Configuration/NatsConfToken.cs @@ -0,0 +1,24 @@ +// Port of Go conf/lex.go token types. + +namespace NATS.Server.Configuration; + +public enum TokenType +{ + Error, + Eof, + Key, + String, + Bool, + Integer, + Float, + DateTime, + ArrayStart, + ArrayEnd, + MapStart, + MapEnd, + Variable, + Include, + Comment, +} + +public readonly record struct Token(TokenType Type, string Value, int Line, int Position); diff --git a/tests/NATS.Server.Tests/NatsConfLexerTests.cs b/tests/NATS.Server.Tests/NatsConfLexerTests.cs new file mode 100644 index 0000000..d664fc1 --- /dev/null +++ b/tests/NATS.Server.Tests/NatsConfLexerTests.cs @@ -0,0 +1,221 @@ +using NATS.Server.Configuration; + +namespace NATS.Server.Tests; + +public class NatsConfLexerTests +{ + [Fact] + public void Lex_SimpleKeyStringValue_ReturnsKeyAndString() + { + var tokens = NatsConfLexer.Tokenize("foo = \"bar\"").ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[0].Value.ShouldBe("foo"); + tokens[1].Type.ShouldBe(TokenType.String); + tokens[1].Value.ShouldBe("bar"); + tokens[2].Type.ShouldBe(TokenType.Eof); + } + + [Fact] + public void Lex_SingleQuotedString_ReturnsString() + { + var tokens = NatsConfLexer.Tokenize("foo = 'bar'").ToList(); + tokens[1].Type.ShouldBe(TokenType.String); + tokens[1].Value.ShouldBe("bar"); + } + + [Fact] + public void Lex_IntegerValue_ReturnsInteger() + { + var tokens = NatsConfLexer.Tokenize("port = 4222").ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[0].Value.ShouldBe("port"); + tokens[1].Type.ShouldBe(TokenType.Integer); + tokens[1].Value.ShouldBe("4222"); + } + + [Fact] + public void Lex_IntegerWithSuffix_ReturnsInteger() + { + var tokens = NatsConfLexer.Tokenize("size = 64mb").ToList(); + tokens[1].Type.ShouldBe(TokenType.Integer); + tokens[1].Value.ShouldBe("64mb"); + } + + [Fact] + public void Lex_BooleanValues_ReturnsBool() + { + foreach (var val in new[] { "true", "false", "yes", "no", "on", "off" }) + { + var tokens = NatsConfLexer.Tokenize($"flag = {val}").ToList(); + tokens[1].Type.ShouldBe(TokenType.Bool); + } + } + + [Fact] + public void Lex_FloatValue_ReturnsFloat() + { + var tokens = NatsConfLexer.Tokenize("rate = 2.5").ToList(); + tokens[1].Type.ShouldBe(TokenType.Float); + tokens[1].Value.ShouldBe("2.5"); + } + + [Fact] + public void Lex_NegativeNumber_ReturnsInteger() + { + var tokens = NatsConfLexer.Tokenize("offset = -10").ToList(); + tokens[1].Type.ShouldBe(TokenType.Integer); + tokens[1].Value.ShouldBe("-10"); + } + + [Fact] + public void Lex_DatetimeValue_ReturnsDatetime() + { + var tokens = NatsConfLexer.Tokenize("ts = 2024-01-15T10:30:00Z").ToList(); + tokens[1].Type.ShouldBe(TokenType.DateTime); + } + + [Fact] + public void Lex_HashComment_IsIgnored() + { + var tokens = NatsConfLexer.Tokenize("# this is a comment\nfoo = 1").ToList(); + var keys = tokens.Where(t => t.Type == TokenType.Key).ToList(); + keys.Count.ShouldBe(1); + keys[0].Value.ShouldBe("foo"); + } + + [Fact] + public void Lex_SlashComment_IsIgnored() + { + var tokens = NatsConfLexer.Tokenize("// comment\nfoo = 1").ToList(); + var keys = tokens.Where(t => t.Type == TokenType.Key).ToList(); + keys.Count.ShouldBe(1); + } + + [Fact] + public void Lex_MapBlock_ReturnsMapStartEnd() + { + var tokens = NatsConfLexer.Tokenize("auth { user: admin }").ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[0].Value.ShouldBe("auth"); + tokens[1].Type.ShouldBe(TokenType.MapStart); + tokens[2].Type.ShouldBe(TokenType.Key); + tokens[2].Value.ShouldBe("user"); + tokens[3].Type.ShouldBe(TokenType.String); + tokens[3].Value.ShouldBe("admin"); + tokens[4].Type.ShouldBe(TokenType.MapEnd); + } + + [Fact] + public void Lex_Array_ReturnsArrayStartEnd() + { + var tokens = NatsConfLexer.Tokenize("items = [1, 2, 3]").ToList(); + tokens[1].Type.ShouldBe(TokenType.ArrayStart); + tokens[2].Type.ShouldBe(TokenType.Integer); + tokens[2].Value.ShouldBe("1"); + tokens[5].Type.ShouldBe(TokenType.ArrayEnd); + } + + [Fact] + public void Lex_Variable_ReturnsVariable() + { + var tokens = NatsConfLexer.Tokenize("secret = $MY_VAR").ToList(); + tokens[1].Type.ShouldBe(TokenType.Variable); + tokens[1].Value.ShouldBe("MY_VAR"); + } + + [Fact] + public void Lex_Include_ReturnsInclude() + { + var tokens = NatsConfLexer.Tokenize("include \"auth.conf\"").ToList(); + tokens[0].Type.ShouldBe(TokenType.Include); + tokens[0].Value.ShouldBe("auth.conf"); + } + + [Fact] + public void Lex_EscapeSequences_AreProcessed() + { + var tokens = NatsConfLexer.Tokenize("msg = \"hello\\tworld\\n\"").ToList(); + tokens[1].Type.ShouldBe(TokenType.String); + tokens[1].Value.ShouldBe("hello\tworld\n"); + } + + [Fact] + public void Lex_HexEscape_IsProcessed() + { + var tokens = NatsConfLexer.Tokenize("val = \"\\x41\\x42\"").ToList(); + tokens[1].Value.ShouldBe("AB"); + } + + [Fact] + public void Lex_ColonSeparator_Works() + { + var tokens = NatsConfLexer.Tokenize("foo: bar").ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[1].Type.ShouldBe(TokenType.String); + } + + [Fact] + public void Lex_WhitespaceSeparator_Works() + { + var tokens = NatsConfLexer.Tokenize("foo bar").ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[1].Type.ShouldBe(TokenType.String); + } + + [Fact] + public void Lex_SemicolonTerminator_IsHandled() + { + var tokens = NatsConfLexer.Tokenize("foo = 1; bar = 2").ToList(); + var keys = tokens.Where(t => t.Type == TokenType.Key).ToList(); + keys.Count.ShouldBe(2); + } + + [Fact] + public void Lex_EmptyInput_ReturnsEof() + { + var tokens = NatsConfLexer.Tokenize("").ToList(); + tokens.Count.ShouldBe(1); + tokens[0].Type.ShouldBe(TokenType.Eof); + } + + [Fact] + public void Lex_BlockString_ReturnsString() + { + var input = "desc (\nthis is\na block\n)\n"; + var tokens = NatsConfLexer.Tokenize(input).ToList(); + tokens[0].Type.ShouldBe(TokenType.Key); + tokens[1].Type.ShouldBe(TokenType.String); + } + + [Fact] + public void Lex_IPAddress_ReturnsString() + { + var tokens = NatsConfLexer.Tokenize("host = 127.0.0.1").ToList(); + tokens[1].Type.ShouldBe(TokenType.String); + tokens[1].Value.ShouldBe("127.0.0.1"); + } + + [Fact] + public void Lex_TrackLineNumbers() + { + var tokens = NatsConfLexer.Tokenize("a = 1\nb = 2\nc = 3").ToList(); + tokens[0].Line.ShouldBe(1); // a + tokens[2].Line.ShouldBe(2); // b + tokens[4].Line.ShouldBe(3); // c + } + + [Fact] + public void Lex_UnterminatedString_ReturnsError() + { + var tokens = NatsConfLexer.Tokenize("foo = \"unterminated").ToList(); + tokens.ShouldContain(t => t.Type == TokenType.Error); + } + + [Fact] + public void Lex_StringStartingWithDigit_TreatedAsString() + { + var tokens = NatsConfLexer.Tokenize("foo = 3xyz").ToList(); + tokens[1].Type.ShouldBe(TokenType.String); + tokens[1].Value.ShouldBe("3xyz"); + } +}