diff --git a/src/ZB.MOM.WW.ScadaBridge.Commons/Types/OverrideCsvParser.cs b/src/ZB.MOM.WW.ScadaBridge.Commons/Types/OverrideCsvParser.cs index a847ced3..ed831f27 100644 --- a/src/ZB.MOM.WW.ScadaBridge.Commons/Types/OverrideCsvParser.cs +++ b/src/ZB.MOM.WW.ScadaBridge.Commons/Types/OverrideCsvParser.cs @@ -58,7 +58,11 @@ public static class OverrideCsvParser if (string.IsNullOrWhiteSpace(rawLine)) continue; - var fields = SplitFields(rawLine); + if (!SplitFields(rawLine, out var fields)) + { + errors.Add($"Line {lineNumber}: Unterminated quoted field."); + continue; + } if (!headerSeen) { @@ -130,16 +134,30 @@ public static class OverrideCsvParser private static string? NullIfEmpty(string field) => field.Length == 0 ? null : field; /// - /// RFC-4180-ish field splitter for a single physical line: quoted fields may - /// embed commas and doubled quotes ("""); unquoted fields are - /// whitespace-trimmed. + /// RFC-4180-ish field splitter for a single physical line. Quoting rules: + /// + /// A field is quoted only if its first non-whitespace char is a + /// " (leading whitespace before the opening quote is allowed and + /// ignored). Inside a quoted field, commas are literal and "" is an + /// escaped single quote; the closing " must be the last non-whitespace + /// char of the field (trailing whitespace after the close is allowed and + /// ignored). + /// A " appearing anywhere else in an unquoted field (i.e. after + /// non-whitespace content) is a literal character and is preserved. + /// Unquoted fields are whitespace-trimmed; quoted field values are kept + /// verbatim. + /// + /// Returns true with the split on success; + /// returns false when a quoted field is opened but never closed before + /// end-of-line (the caller emits a per-line "unterminated" error). /// - private static List SplitFields(string line) + private static bool SplitFields(string line, out List fields) { - var fields = new List(); + fields = new List(); var field = new System.Text.StringBuilder(); - var inQuotes = false; - var quoted = false; // this field was (at least partly) quoted → preserve whitespace + var inQuotes = false; // currently between an opening and closing quote + var quoted = false; // this field opened with a quote → keep value verbatim + var sawContent = false; // any non-whitespace char seen in the current field yet for (var i = 0; i < line.Length; i++) { @@ -157,7 +175,7 @@ public static class OverrideCsvParser } else { - inQuotes = false; + inQuotes = false; // closing quote; only trailing whitespace may follow } } else @@ -170,23 +188,40 @@ public static class OverrideCsvParser switch (c) { - case '"': - inQuotes = true; - quoted = true; - break; case ',': fields.Add(Finalize(field, quoted)); field.Clear(); + inQuotes = false; quoted = false; + sawContent = false; + break; + case '"' when !sawContent: + // Opening quote: first non-whitespace char of the field. Any + // leading whitespace seen so far is part of the (ignored) prefix. + field.Clear(); + inQuotes = true; + quoted = true; + sawContent = true; break; default: + // After a quoted field has closed, only whitespace may appear + // before the next delimiter — it is ignored, not appended. + if (quoted) + break; + + // A '"' here (sawContent already true) falls through as a literal. + if (!char.IsWhiteSpace(c)) + sawContent = true; field.Append(c); break; } } + if (inQuotes) + return false; // opened a quoted field that was never closed + fields.Add(Finalize(field, quoted)); - return fields; + return true; } private static string Finalize(System.Text.StringBuilder field, bool quoted) diff --git a/tests/ZB.MOM.WW.ScadaBridge.Commons.Tests/OverrideCsvParserTests.cs b/tests/ZB.MOM.WW.ScadaBridge.Commons.Tests/OverrideCsvParserTests.cs index 51e3c7dc..bacddcc4 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.Commons.Tests/OverrideCsvParserTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.Commons.Tests/OverrideCsvParserTests.cs @@ -169,4 +169,80 @@ public class OverrideCsvParserTests Assert.Equal("42", result.Rows[0].Value); Assert.Equal(" spaced ", result.Rows[1].Value); } + + [Fact] + public void Parse_MidFieldUnquotedQuote_IsPreservedAsLiteral() + { + // A '"' that does NOT open a field (it follows non-whitespace content in an + // unquoted field) is a literal character — 'va"lue' must survive intact. + const string csv = "AttributeName,Value,ElementType\nName,va\"lue,Type\n"; + + var result = OverrideCsvParser.Parse(csv); + + Assert.Empty(result.Errors); + var row = Assert.Single(result.Rows); + Assert.Equal("Name", row.AttributeName); + Assert.Equal("va\"lue", row.Value); + Assert.Equal("Type", row.ElementType); + } + + [Fact] + public void Parse_UnterminatedQuotedField_ProducesLineNumberedErrorAndExcludesRow() + { + // A quote opens the field but is never closed before end-of-line → malformed. + const string csv = "AttributeName,Value\nName,\"unclosed\n"; + + var result = OverrideCsvParser.Parse(csv); + + Assert.Empty(result.Rows); + var error = Assert.Single(result.Errors); + Assert.Contains("2", error); + Assert.Contains("Unterminated", error, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void Parse_WellFormedQuotedFieldWithComma_StillParses() + { + // Regression guard alongside the unterminated-quote fix: a properly closed + // quoted field embedding a comma must still round-trip. + const string csv = "AttributeName,Value,ElementType\nName,\"a,b\",Type\n"; + + var result = OverrideCsvParser.Parse(csv); + + Assert.Empty(result.Errors); + var row = Assert.Single(result.Rows); + Assert.Equal("Name", row.AttributeName); + Assert.Equal("a,b", row.Value); + Assert.Equal("Type", row.ElementType); + } + + [Fact] + public void Parse_EmptyQuotedField_YieldsNullValue() + { + // A bare "" is an empty quoted field; empty → null per the empty-field rule. + const string csv = "AttributeName,Value,ElementType\nName,\"\",Type\n"; + + var result = OverrideCsvParser.Parse(csv); + + Assert.Empty(result.Errors); + var row = Assert.Single(result.Rows); + Assert.Equal("Name", row.AttributeName); + Assert.Null(row.Value); + Assert.Equal("Type", row.ElementType); + } + + [Fact] + public void Parse_QuotedFieldWithTrailingWhitespaceAfterClose_IsAccepted() + { + // The closing quote may be followed by ignorable trailing whitespace before + // the delimiter; the field value itself is preserved verbatim. + const string csv = "AttributeName,Value,ElementType\nName,\"a,b\" ,Type\n"; + + var result = OverrideCsvParser.Parse(csv); + + Assert.Empty(result.Errors); + var row = Assert.Single(result.Rows); + Assert.Equal("a,b", row.Value); + Assert.Equal("Type", row.ElementType); + } }