diff --git a/src/ZB.MOM.WW.ScadaBridge.Transport/Import/LineDiffer.cs b/src/ZB.MOM.WW.ScadaBridge.Transport/Import/LineDiffer.cs new file mode 100644 index 00000000..88a830ee --- /dev/null +++ b/src/ZB.MOM.WW.ScadaBridge.Transport/Import/LineDiffer.cs @@ -0,0 +1,299 @@ +namespace ZB.MOM.WW.ScadaBridge.Transport.Import; + +/// +/// The kind of edit a represents within a per-line +/// diff: an unchanged line common to both sides (), a line +/// present only in the new text (), or a line present only in +/// the old text (). +/// +public enum LineDiffOp +{ + /// Unchanged line, present in both old and new text. + Context, + + /// Line present only in the new text. + Add, + + /// Line present only in the old text. + Remove, +} + +/// +/// One line of a per-line diff. and +/// are 1-based. lines carry both; +/// lines carry only ; lines carry +/// only . +/// +public sealed record LineDiffLine(LineDiffOp Op, string Text, int? OldLineNo, int? NewLineNo); + +/// +/// The result of a per-line diff. is in source order and may +/// be truncated (see ); and +/// always report the full diff totals regardless of +/// truncation. +/// +public sealed record LineDiffResult( + IReadOnlyList Lines, + bool Truncated, + int AddedCount, + int RemovedCount); + +/// +/// Pure, dependency-free per-line text diff using the Myers O(ND) shortest-edit-script +/// algorithm. Line endings are normalized (\r\n and lone \r become \n) +/// before splitting, so otherwise-identical text with mixed endings produces no changes. +/// +/// +/// Hand-rolled deliberately — the project forbids third-party diff libraries (mirroring +/// the custom-SVG KpiTrendChart precedent). The classic Myers algorithm computes a +/// minimal edit script; we record the search frontier per edit-distance d and +/// backtrack to recover that script, then emit lines in source order. +/// +public static class LineDiffer +{ + /// + /// Computes a minimal per-line diff between and + /// (either may be , treated as empty). + /// + /// The original text, or (empty). + /// The new text, or (empty). + /// + /// Maximum number of entries to retain in . If the full + /// diff would emit more lines, is truncated to this many + /// entries and is set; the add/remove totals are + /// unaffected. Values <= 0 yield an empty . + /// + public static LineDiffResult Diff(string? oldText, string? newText, int maxLines = 400) + { + string[] oldLines = SplitLines(oldText); + string[] newLines = SplitLines(newText); + + IReadOnlyList full = BuildDiff(oldLines, newLines, out int added, out int removed); + + bool truncated = full.Count > maxLines; + IReadOnlyList lines; + if (truncated) + { + int take = maxLines > 0 ? maxLines : 0; + var capped = new LineDiffLine[take]; + for (int i = 0; i < take; i++) + { + capped[i] = full[i]; + } + + lines = capped; + } + else + { + lines = full; + } + + return new LineDiffResult(lines, truncated, added, removed); + } + + /// + /// Normalizes line endings (\r\n and lone \r to \n) and splits on \n. + /// A or empty input yields an empty array (no lines), so the diff of two + /// empties is empty rather than a single empty-string match. + /// + private static string[] SplitLines(string? text) + { + if (string.IsNullOrEmpty(text)) + { + return []; + } + + string normalized = text.Replace("\r\n", "\n").Replace('\r', '\n'); + return normalized.Split('\n'); + } + + /// + /// Runs Myers O(ND), backtracks the recorded frontier into a source-ordered edit script, + /// and reports the full add/remove totals via the out parameters. + /// + private static IReadOnlyList BuildDiff( + string[] a, + string[] b, + out int added, + out int removed) + { + int n = a.Length; + int m = b.Length; + var result = new List(n + m); + added = 0; + removed = 0; + + // Fast paths: one side empty. + if (n == 0 && m == 0) + { + return result; + } + + if (n == 0) + { + for (int j = 0; j < m; j++) + { + result.Add(new LineDiffLine(LineDiffOp.Add, b[j], OldLineNo: null, NewLineNo: j + 1)); + } + + added = m; + return result; + } + + if (m == 0) + { + for (int i = 0; i < n; i++) + { + result.Add(new LineDiffLine(LineDiffOp.Remove, a[i], OldLineNo: i + 1, NewLineNo: null)); + } + + removed = n; + return result; + } + + // Myers shortest-edit-script: record the V frontier at each edit distance d so we can + // backtrack the path. The diagonal index k ranges over [-d, d]; we offset by `max` to + // index a non-negative array. + int max = n + m; + int offset = max; + var trace = new List(max + 1); + var v = new int[(2 * max) + 1]; + + int foundD = -1; + for (int d = 0; d <= max; d++) + { + // Snapshot V before this round's overwrites so backtracking is faithful. + trace.Add((int[])v.Clone()); + + for (int k = -d; k <= d; k += 2) + { + int x; + if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1])) + { + // Down move (insertion): take the path from k+1. + x = v[offset + k + 1]; + } + else + { + // Right move (deletion): extend the path from k-1. + x = v[offset + k - 1] + 1; + } + + int y = x - k; + + // Follow the diagonal (matching lines) as far as possible. + while (x < n && y < m && a[x] == b[y]) + { + x++; + y++; + } + + v[offset + k] = x; + + if (x >= n && y >= m) + { + foundD = d; + break; + } + } + + if (foundD >= 0) + { + break; + } + } + + // Backtrack the recorded frontier to produce the edit script in reverse, then reverse it. + var reversed = Backtrack(a, b, trace, foundD, offset, n, m); + + foreach (LineDiffLine line in reversed) + { + result.Add(line); + if (line.Op == LineDiffOp.Add) + { + added++; + } + else if (line.Op == LineDiffOp.Remove) + { + removed++; + } + } + + return result; + } + + /// + /// Walks the per-distance V snapshots backwards from the end point, emitting matched + /// (context), removed, and added lines. The walk produces lines in reverse source order; + /// the returned list is reversed back into forward source order. + /// + private static List Backtrack( + string[] a, + string[] b, + List trace, + int foundD, + int offset, + int n, + int m) + { + var reverse = new List(n + m); + int x = n; + int y = m; + + for (int d = foundD; d > 0; d--) + { + int[] v = trace[d]; + int k = x - y; + + int prevK; + if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1])) + { + // Came from a down move (insertion). + prevK = k + 1; + } + else + { + // Came from a right move (deletion). + prevK = k - 1; + } + + int prevX = v[offset + prevK]; + int prevY = prevX - prevK; + + // Emit the diagonal (matched lines) that follows the snake's start. + while (x > prevX && y > prevY) + { + x--; + y--; + reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1)); + } + + if (d > 0) + { + if (x == prevX) + { + // Insertion: a new-side line (b[prevY]) with no old counterpart. + y--; + reverse.Add(new LineDiffLine(LineDiffOp.Add, b[y], OldLineNo: null, NewLineNo: y + 1)); + } + else + { + // Deletion: an old-side line (a[prevX]) with no new counterpart. + x--; + reverse.Add(new LineDiffLine(LineDiffOp.Remove, a[x], OldLineNo: x + 1, NewLineNo: null)); + } + } + } + + // d == 0 leg: any remaining leading diagonal is all matched context. + while (x > 0 && y > 0) + { + x--; + y--; + reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1)); + } + + reverse.Reverse(); + return reverse; + } +} diff --git a/tests/ZB.MOM.WW.ScadaBridge.Transport.Tests/Import/LineDifferTests.cs b/tests/ZB.MOM.WW.ScadaBridge.Transport.Tests/Import/LineDifferTests.cs new file mode 100644 index 00000000..fb84625e --- /dev/null +++ b/tests/ZB.MOM.WW.ScadaBridge.Transport.Tests/Import/LineDifferTests.cs @@ -0,0 +1,192 @@ +using ZB.MOM.WW.ScadaBridge.Transport.Import; + +namespace ZB.MOM.WW.ScadaBridge.Transport.Tests.Import; + +public sealed class LineDifferTests +{ + [Fact] + public void IdenticalText_NoAddsOrRemoves_NotTruncated() + { + var result = LineDiffer.Diff("a\nb\nc", "a\nb\nc"); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + Assert.False(result.Truncated); + // All emitted lines (if any) must be Context. + Assert.All(result.Lines, l => Assert.Equal(LineDiffOp.Context, l.Op)); + } + + [Fact] + public void OldEmpty_NewThreeLines_ThreeAdds_NewLineNos1To3() + { + var result = LineDiffer.Diff("", "x\ny\nz"); + + var adds = result.Lines.Where(l => l.Op == LineDiffOp.Add).ToList(); + Assert.Equal(3, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + Assert.False(result.Truncated); + Assert.Equal(3, adds.Count); + Assert.Equal(new[] { 1, 2, 3 }, adds.Select(a => a.NewLineNo!.Value).ToArray()); + Assert.Equal(new[] { "x", "y", "z" }, adds.Select(a => a.Text).ToArray()); + // Add lines have no old line number. + Assert.All(adds, a => Assert.Null(a.OldLineNo)); + } + + [Fact] + public void NewEmpty_OldTwoLines_TwoRemoves_OldLineNos1To2() + { + var result = LineDiffer.Diff("p\nq", ""); + + var removes = result.Lines.Where(l => l.Op == LineDiffOp.Remove).ToList(); + Assert.Equal(0, result.AddedCount); + Assert.Equal(2, result.RemovedCount); + Assert.False(result.Truncated); + Assert.Equal(2, removes.Count); + Assert.Equal(new[] { 1, 2 }, removes.Select(r => r.OldLineNo!.Value).ToArray()); + Assert.Equal(new[] { "p", "q" }, removes.Select(r => r.Text).ToArray()); + // Remove lines have no new line number. + Assert.All(removes, r => Assert.Null(r.NewLineNo)); + } + + [Fact] + public void SingleMiddleLineChange_OneRemoveOneAdd_ContextPreserved() + { + var result = LineDiffer.Diff("a\nB\nc", "a\nX\nc"); + + var removes = result.Lines.Where(l => l.Op == LineDiffOp.Remove).ToList(); + var adds = result.Lines.Where(l => l.Op == LineDiffOp.Add).ToList(); + var contexts = result.Lines.Where(l => l.Op == LineDiffOp.Context).ToList(); + + Assert.Equal(1, result.RemovedCount); + Assert.Equal(1, result.AddedCount); + Assert.False(result.Truncated); + + var remove = Assert.Single(removes); + Assert.Equal("B", remove.Text); + Assert.Equal(2, remove.OldLineNo); + Assert.Null(remove.NewLineNo); + + var add = Assert.Single(adds); + Assert.Equal("X", add.Text); + Assert.Equal(2, add.NewLineNo); + Assert.Null(add.OldLineNo); + + // Context lines "a" and "c" carry both line numbers. + Assert.Equal(2, contexts.Count); + var ctxA = contexts.Single(c => c.Text == "a"); + Assert.Equal(1, ctxA.OldLineNo); + Assert.Equal(1, ctxA.NewLineNo); + var ctxC = contexts.Single(c => c.Text == "c"); + Assert.Equal(3, ctxC.OldLineNo); + Assert.Equal(3, ctxC.NewLineNo); + } + + [Fact] + public void CrlfNormalization_NoChanges() + { + var result = LineDiffer.Diff("a\r\nb", "a\nb"); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + Assert.False(result.Truncated); + } + + [Fact] + public void LoneCarriageReturnNormalization_NoChanges() + { + var result = LineDiffer.Diff("a\rb", "a\nb"); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + Assert.False(result.Truncated); + } + + [Fact] + public void NullOld_NewSingleLine_OneAdd() + { + var result = LineDiffer.Diff(null, "x"); + + Assert.Equal(1, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + var add = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Add); + Assert.Equal("x", add.Text); + Assert.Equal(1, add.NewLineNo); + } + + [Fact] + public void NullNew_OldSingleLine_OneRemove() + { + var result = LineDiffer.Diff("x", null); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(1, result.RemovedCount); + var remove = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Remove); + Assert.Equal("x", remove.Text); + Assert.Equal(1, remove.OldLineNo); + } + + [Fact] + public void BothNull_NoChanges() + { + var result = LineDiffer.Diff(null, null); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + Assert.False(result.Truncated); + } + + [Fact] + public void SizeCap_OldEmpty_ThousandNewLines_TruncatedToMax() + { + var newText = string.Join("\n", Enumerable.Range(1, 1000).Select(i => $"line{i}")); + + var result = LineDiffer.Diff("", newText, maxLines: 400); + + Assert.True(result.Lines.Count <= 400); + Assert.True(result.Truncated); + Assert.Equal(1000, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + } + + [Fact] + public void InterleavedCase_LcsContextPreserved() + { + // old: 1 2 3 4 new: 1 3 4 5 => remove 2, context 1/3/4, add 5 + var result = LineDiffer.Diff("1\n2\n3\n4", "1\n3\n4\n5"); + + Assert.Equal(1, result.AddedCount); + Assert.Equal(1, result.RemovedCount); + Assert.False(result.Truncated); + + var remove = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Remove); + Assert.Equal("2", remove.Text); + Assert.Equal(2, remove.OldLineNo); + + var add = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Add); + Assert.Equal("5", add.Text); + Assert.Equal(4, add.NewLineNo); + + var contextTexts = result.Lines + .Where(l => l.Op == LineDiffOp.Context) + .Select(l => l.Text) + .ToList(); + Assert.Equal(new[] { "1", "3", "4" }, contextTexts); + + // Verify ordering: the remove of "2" appears after context "1" and before context "3". + var ops = result.Lines.Select(l => (l.Op, l.Text)).ToList(); + var idxCtx1 = ops.FindIndex(x => x is { Op: LineDiffOp.Context, Text: "1" }); + var idxRem2 = ops.FindIndex(x => x is { Op: LineDiffOp.Remove, Text: "2" }); + var idxCtx3 = ops.FindIndex(x => x is { Op: LineDiffOp.Context, Text: "3" }); + Assert.True(idxCtx1 < idxRem2 && idxRem2 < idxCtx3); + } + + [Fact] + public void TrailingNewline_ProducesTrailingEmptyLine() + { + // "a\n" splits to ["a", ""] — both texts identical => no changes. + var result = LineDiffer.Diff("a\n", "a\n"); + + Assert.Equal(0, result.AddedCount); + Assert.Equal(0, result.RemovedCount); + } +}