feat(commons): quote-aware OverrideCsvParser (T16 CSV)

This commit is contained in:
Joseph Doherty
2026-06-18 02:01:38 -04:00
parent a87bf8a459
commit 77a31ba994
2 changed files with 369 additions and 0 deletions
@@ -0,0 +1,197 @@
namespace ZB.MOM.WW.ScadaBridge.Commons.Types;
/// <summary>
/// One parsed instance-attribute-override CSV row: an attribute name plus its
/// (canonical-string) value and optional list element type. A null
/// <see cref="Value"/> means "clear the override". <see cref="LineNumber"/> is the
/// 1-based source line (the header is line 1) so downstream errors can point back
/// at the operator's file.
/// </summary>
public sealed record OverrideCsvRow(string AttributeName, string? Value, string? ElementType, int LineNumber);
/// <summary>
/// Outcome of parsing an override CSV: the successfully-parsed <see cref="Rows"/>
/// plus per-line <see cref="Errors"/>. Parsing never throws — malformed rows are
/// reported and excluded, valid rows still flow through. Downstream callers
/// validate names/types against the instance schema; this parser is purely
/// syntactic.
/// </summary>
public sealed record OverrideCsvParseResult(IReadOnlyList<OverrideCsvRow> Rows, IReadOnlyList<string> Errors);
/// <summary>
/// Pure, dependency-free, quote-aware parser turning instance-attribute-override
/// CSV text into structured rows plus per-line errors. Callers supply the text
/// (no file I/O). The header row is required and case-insensitive
/// (<c>AttributeName,Value,ElementType</c>); the <c>ElementType</c> column is
/// optional. Fields follow RFC-4180 quoting: a double-quoted field may embed
/// commas and doubled quotes (<c>""</c> → <c>"</c>); only unquoted fields are
/// whitespace-trimmed.
/// </summary>
public static class OverrideCsvParser
{
private const string HeaderError =
"Missing or invalid header row. Expected 'AttributeName,Value,ElementType' " +
"(ElementType column optional).";
/// <summary>
/// Parses override CSV <paramref name="csvText"/>. Returns parsed rows and any
/// per-line errors; never throws. On a missing/unrecognized header returns zero
/// rows and a single header error.
/// </summary>
public static OverrideCsvParseResult Parse(string csvText)
{
var rows = new List<OverrideCsvRow>();
var errors = new List<string>();
// Split into physical lines; \r\n and \r are normalized to \n boundaries.
var lines = (csvText ?? string.Empty).Replace("\r\n", "\n").Replace('\r', '\n').Split('\n');
var headerSeen = false;
var hasElementTypeColumn = false;
for (var i = 0; i < lines.Length; i++)
{
var lineNumber = i + 1;
var rawLine = lines[i];
// Skip fully-blank lines (whitespace-only included) without error.
if (string.IsNullOrWhiteSpace(rawLine))
continue;
var fields = SplitFields(rawLine);
if (!headerSeen)
{
if (!TryMatchHeader(fields, out hasElementTypeColumn))
{
errors.Add(HeaderError);
return new OverrideCsvParseResult(rows, errors);
}
headerSeen = true;
continue;
}
var expectedColumns = hasElementTypeColumn ? 3 : 2;
if (fields.Count != expectedColumns)
{
errors.Add(
$"Line {lineNumber}: expected {expectedColumns} columns but found {fields.Count}.");
continue;
}
var attributeName = fields[0];
if (string.IsNullOrWhiteSpace(attributeName))
{
errors.Add($"Line {lineNumber}: AttributeName must not be blank.");
continue;
}
var value = NullIfEmpty(fields[1]);
var elementType = hasElementTypeColumn ? NullIfEmpty(fields[2]) : null;
rows.Add(new OverrideCsvRow(attributeName, value, elementType, lineNumber));
}
if (!headerSeen)
errors.Add(HeaderError);
return new OverrideCsvParseResult(rows, errors);
}
/// <summary>Matches the required header (case-insensitive); reports whether the optional ElementType column is present.</summary>
private static bool TryMatchHeader(IReadOnlyList<string> fields, out bool hasElementTypeColumn)
{
hasElementTypeColumn = false;
var matchesTwoColumn =
fields.Count == 2 &&
HeaderEquals(fields[0], "AttributeName") &&
HeaderEquals(fields[1], "Value");
var matchesThreeColumn =
fields.Count == 3 &&
HeaderEquals(fields[0], "AttributeName") &&
HeaderEquals(fields[1], "Value") &&
HeaderEquals(fields[2], "ElementType");
if (matchesThreeColumn)
{
hasElementTypeColumn = true;
return true;
}
return matchesTwoColumn;
}
private static bool HeaderEquals(string field, string expected) =>
string.Equals(field, expected, StringComparison.OrdinalIgnoreCase);
private static string? NullIfEmpty(string field) => field.Length == 0 ? null : field;
/// <summary>
/// RFC-4180-ish field splitter for a single physical line: quoted fields may
/// embed commas and doubled quotes (<c>""</c> → <c>"</c>); unquoted fields are
/// whitespace-trimmed.
/// </summary>
private static List<string> SplitFields(string line)
{
var fields = new List<string>();
var field = new System.Text.StringBuilder();
var inQuotes = false;
var quoted = false; // this field was (at least partly) quoted → preserve whitespace
for (var i = 0; i < line.Length; i++)
{
var c = line[i];
if (inQuotes)
{
if (c == '"')
{
// Doubled quote inside a quoted field → a single literal quote.
if (i + 1 < line.Length && line[i + 1] == '"')
{
field.Append('"');
i++;
}
else
{
inQuotes = false;
}
}
else
{
field.Append(c);
}
continue;
}
switch (c)
{
case '"':
inQuotes = true;
quoted = true;
break;
case ',':
fields.Add(Finalize(field, quoted));
field.Clear();
quoted = false;
break;
default:
field.Append(c);
break;
}
}
fields.Add(Finalize(field, quoted));
return fields;
}
private static string Finalize(System.Text.StringBuilder field, bool quoted)
{
var text = field.ToString();
return quoted ? text : text.Trim(); // only unquoted whitespace is trimmed
}
}
@@ -0,0 +1,172 @@
using ZB.MOM.WW.ScadaBridge.Commons.Types;
namespace ZB.MOM.WW.ScadaBridge.Commons.Tests;
public class OverrideCsvParserTests
{
[Fact]
public void Parse_SimpleThreeColumnFile_ReturnsTwoRowsNoErrors()
{
const string csv = "AttributeName,Value,ElementType\nSetpoint,42,Int32\nName,Pump A,\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
Assert.Equal(2, result.Rows.Count);
Assert.Equal("Setpoint", result.Rows[0].AttributeName);
Assert.Equal("42", result.Rows[0].Value);
Assert.Equal("Int32", result.Rows[0].ElementType);
Assert.Equal(2, result.Rows[0].LineNumber);
Assert.Equal("Name", result.Rows[1].AttributeName);
Assert.Equal("Pump A", result.Rows[1].Value);
Assert.Null(result.Rows[1].ElementType);
Assert.Equal(3, result.Rows[1].LineNumber);
}
[Fact]
public void Parse_TwoColumnFileWithoutElementType_RowsHaveNullElementType()
{
const string csv = "AttributeName,Value\nSetpoint,42\nName,Pump A\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
Assert.Equal(2, result.Rows.Count);
Assert.All(result.Rows, r => Assert.Null(r.ElementType));
Assert.Equal("42", result.Rows[0].Value);
Assert.Equal("Pump A", result.Rows[1].Value);
}
[Fact]
public void Parse_QuotedValueWithComma_PreservesEmbeddedComma()
{
const string csv = "AttributeName,Value,ElementType\nName,\"a,b,c\",\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
var row = Assert.Single(result.Rows);
Assert.Equal("Name", row.AttributeName);
Assert.Equal("a,b,c", row.Value);
Assert.Null(row.ElementType);
}
[Fact]
public void Parse_DoubledQuoteEscape_UnescapesToSingleQuote()
{
const string csv = "AttributeName,Value,ElementType\nName,\"he said \"\"hi\"\"\",\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
var row = Assert.Single(result.Rows);
Assert.Equal("he said \"hi\"", row.Value);
}
[Fact]
public void Parse_EmptyValueField_YieldsNullValue()
{
const string csv = "AttributeName,Value,ElementType\nSetpoint,,\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
var row = Assert.Single(result.Rows);
Assert.Equal("Setpoint", row.AttributeName);
Assert.Null(row.Value);
Assert.Null(row.ElementType);
}
[Fact]
public void Parse_RowWithTooFewColumns_ProducesLineNumberedErrorAndExcludesRow()
{
const string csv = "AttributeName,Value,ElementType\nSetpoint\nName,Pump A,\n";
var result = OverrideCsvParser.Parse(csv);
// Bad row on line 2 excluded; good row on line 3 retained.
var row = Assert.Single(result.Rows);
Assert.Equal("Name", row.AttributeName);
Assert.Equal(3, row.LineNumber);
var error = Assert.Single(result.Errors);
Assert.Contains("2", error);
}
[Fact]
public void Parse_BlankAttributeName_ProducesLineNumberedError()
{
const string csv = "AttributeName,Value,ElementType\n,42,Int32\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Rows);
var error = Assert.Single(result.Errors);
Assert.Contains("2", error);
}
[Fact]
public void Parse_BlankLines_AreSkippedWithoutError()
{
const string csv = "AttributeName,Value,ElementType\n\nSetpoint,42,Int32\n \nName,Pump A,\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
Assert.Equal(2, result.Rows.Count);
// LineNumber reflects the true source line (blank line 2 skipped).
Assert.Equal(3, result.Rows[0].LineNumber);
Assert.Equal(5, result.Rows[1].LineNumber);
}
[Fact]
public void Parse_MissingHeader_ReturnsZeroRowsAndHeaderError()
{
const string csv = "Setpoint,42,Int32\nName,Pump A,\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Rows);
var error = Assert.Single(result.Errors);
Assert.Contains("header", error, StringComparison.OrdinalIgnoreCase);
}
[Fact]
public void Parse_HeaderIsCaseInsensitiveAndTrimsWhitespace()
{
const string csv = "attributename, value , elementtype\nSetpoint,42,Int32\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
var row = Assert.Single(result.Rows);
Assert.Equal("Setpoint", row.AttributeName);
Assert.Equal("42", row.Value);
Assert.Equal("Int32", row.ElementType);
}
[Fact]
public void Parse_EmptyInput_ReturnsHeaderError()
{
var result = OverrideCsvParser.Parse(string.Empty);
Assert.Empty(result.Rows);
Assert.Single(result.Errors);
}
[Fact]
public void Parse_UnquotedWhitespace_IsTrimmedButQuotedWhitespacePreserved()
{
const string csv = "AttributeName,Value,ElementType\n Setpoint , 42 ,Int32\nName,\" spaced \",\n";
var result = OverrideCsvParser.Parse(csv);
Assert.Empty(result.Errors);
Assert.Equal(2, result.Rows.Count);
Assert.Equal("Setpoint", result.Rows[0].AttributeName);
Assert.Equal("42", result.Rows[0].Value);
Assert.Equal(" spaced ", result.Rows[1].Value);
}
}