feat(transport): pure Myers LineDiffer helper (M8 A3, T20)
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.Transport.Import;
|
||||
|
||||
/// <summary>
|
||||
/// The kind of edit a <see cref="LineDiffLine"/> represents within a per-line
|
||||
/// diff: an unchanged line common to both sides (<see cref="Context"/>), a line
|
||||
/// present only in the new text (<see cref="Add"/>), or a line present only in
|
||||
/// the old text (<see cref="Remove"/>).
|
||||
/// </summary>
|
||||
public enum LineDiffOp
|
||||
{
|
||||
/// <summary>Unchanged line, present in both old and new text.</summary>
|
||||
Context,
|
||||
|
||||
/// <summary>Line present only in the new text.</summary>
|
||||
Add,
|
||||
|
||||
/// <summary>Line present only in the old text.</summary>
|
||||
Remove,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One line of a per-line diff. <see cref="OldLineNo"/> and <see cref="NewLineNo"/>
|
||||
/// are 1-based. <see cref="LineDiffOp.Context"/> lines carry both; <see cref="LineDiffOp.Add"/>
|
||||
/// lines carry only <see cref="NewLineNo"/>; <see cref="LineDiffOp.Remove"/> lines carry
|
||||
/// only <see cref="OldLineNo"/>.
|
||||
/// </summary>
|
||||
public sealed record LineDiffLine(LineDiffOp Op, string Text, int? OldLineNo, int? NewLineNo);
|
||||
|
||||
/// <summary>
|
||||
/// The result of a per-line diff. <see cref="Lines"/> is in source order and may
|
||||
/// be truncated (see <see cref="Truncated"/>); <see cref="AddedCount"/> and
|
||||
/// <see cref="RemovedCount"/> always report the full diff totals regardless of
|
||||
/// truncation.
|
||||
/// </summary>
|
||||
public sealed record LineDiffResult(
|
||||
IReadOnlyList<LineDiffLine> Lines,
|
||||
bool Truncated,
|
||||
int AddedCount,
|
||||
int RemovedCount);
|
||||
|
||||
/// <summary>
|
||||
/// Pure, dependency-free per-line text diff using the Myers O(ND) shortest-edit-script
|
||||
/// algorithm. Line endings are normalized (<c>\r\n</c> and lone <c>\r</c> become <c>\n</c>)
|
||||
/// before splitting, so otherwise-identical text with mixed endings produces no changes.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Hand-rolled deliberately — the project forbids third-party diff libraries (mirroring
|
||||
/// the custom-SVG <c>KpiTrendChart</c> precedent). The classic Myers algorithm computes a
|
||||
/// minimal edit script; we record the search frontier per edit-distance <c>d</c> and
|
||||
/// backtrack to recover that script, then emit lines in source order.
|
||||
/// </remarks>
|
||||
public static class LineDiffer
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes a minimal per-line diff between <paramref name="oldText"/> and
|
||||
/// <paramref name="newText"/> (either may be <see langword="null"/>, treated as empty).
|
||||
/// </summary>
|
||||
/// <param name="oldText">The original text, or <see langword="null"/> (empty).</param>
|
||||
/// <param name="newText">The new text, or <see langword="null"/> (empty).</param>
|
||||
/// <param name="maxLines">
|
||||
/// Maximum number of entries to retain in <see cref="LineDiffResult.Lines"/>. If the full
|
||||
/// diff would emit more lines, <see cref="LineDiffResult.Lines"/> is truncated to this many
|
||||
/// entries and <see cref="LineDiffResult.Truncated"/> is set; the add/remove totals are
|
||||
/// unaffected. Values <= 0 yield an empty <see cref="LineDiffResult.Lines"/>.
|
||||
/// </param>
|
||||
public static LineDiffResult Diff(string? oldText, string? newText, int maxLines = 400)
|
||||
{
|
||||
string[] oldLines = SplitLines(oldText);
|
||||
string[] newLines = SplitLines(newText);
|
||||
|
||||
IReadOnlyList<LineDiffLine> full = BuildDiff(oldLines, newLines, out int added, out int removed);
|
||||
|
||||
bool truncated = full.Count > maxLines;
|
||||
IReadOnlyList<LineDiffLine> lines;
|
||||
if (truncated)
|
||||
{
|
||||
int take = maxLines > 0 ? maxLines : 0;
|
||||
var capped = new LineDiffLine[take];
|
||||
for (int i = 0; i < take; i++)
|
||||
{
|
||||
capped[i] = full[i];
|
||||
}
|
||||
|
||||
lines = capped;
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = full;
|
||||
}
|
||||
|
||||
return new LineDiffResult(lines, truncated, added, removed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes line endings (<c>\r\n</c> and lone <c>\r</c> to <c>\n</c>) and splits on <c>\n</c>.
|
||||
/// A <see langword="null"/> or empty input yields an empty array (no lines), so the diff of two
|
||||
/// empties is empty rather than a single empty-string match.
|
||||
/// </summary>
|
||||
private static string[] SplitLines(string? text)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
string normalized = text.Replace("\r\n", "\n").Replace('\r', '\n');
|
||||
return normalized.Split('\n');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs Myers O(ND), backtracks the recorded frontier into a source-ordered edit script,
|
||||
/// and reports the full add/remove totals via the out parameters.
|
||||
/// </summary>
|
||||
private static IReadOnlyList<LineDiffLine> BuildDiff(
|
||||
string[] a,
|
||||
string[] b,
|
||||
out int added,
|
||||
out int removed)
|
||||
{
|
||||
int n = a.Length;
|
||||
int m = b.Length;
|
||||
var result = new List<LineDiffLine>(n + m);
|
||||
added = 0;
|
||||
removed = 0;
|
||||
|
||||
// Fast paths: one side empty.
|
||||
if (n == 0 && m == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
{
|
||||
for (int j = 0; j < m; j++)
|
||||
{
|
||||
result.Add(new LineDiffLine(LineDiffOp.Add, b[j], OldLineNo: null, NewLineNo: j + 1));
|
||||
}
|
||||
|
||||
added = m;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (m == 0)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
result.Add(new LineDiffLine(LineDiffOp.Remove, a[i], OldLineNo: i + 1, NewLineNo: null));
|
||||
}
|
||||
|
||||
removed = n;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Myers shortest-edit-script: record the V frontier at each edit distance d so we can
|
||||
// backtrack the path. The diagonal index k ranges over [-d, d]; we offset by `max` to
|
||||
// index a non-negative array.
|
||||
int max = n + m;
|
||||
int offset = max;
|
||||
var trace = new List<int[]>(max + 1);
|
||||
var v = new int[(2 * max) + 1];
|
||||
|
||||
int foundD = -1;
|
||||
for (int d = 0; d <= max; d++)
|
||||
{
|
||||
// Snapshot V before this round's overwrites so backtracking is faithful.
|
||||
trace.Add((int[])v.Clone());
|
||||
|
||||
for (int k = -d; k <= d; k += 2)
|
||||
{
|
||||
int x;
|
||||
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
|
||||
{
|
||||
// Down move (insertion): take the path from k+1.
|
||||
x = v[offset + k + 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Right move (deletion): extend the path from k-1.
|
||||
x = v[offset + k - 1] + 1;
|
||||
}
|
||||
|
||||
int y = x - k;
|
||||
|
||||
// Follow the diagonal (matching lines) as far as possible.
|
||||
while (x < n && y < m && a[x] == b[y])
|
||||
{
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
|
||||
v[offset + k] = x;
|
||||
|
||||
if (x >= n && y >= m)
|
||||
{
|
||||
foundD = d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundD >= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Backtrack the recorded frontier to produce the edit script in reverse, then reverse it.
|
||||
var reversed = Backtrack(a, b, trace, foundD, offset, n, m);
|
||||
|
||||
foreach (LineDiffLine line in reversed)
|
||||
{
|
||||
result.Add(line);
|
||||
if (line.Op == LineDiffOp.Add)
|
||||
{
|
||||
added++;
|
||||
}
|
||||
else if (line.Op == LineDiffOp.Remove)
|
||||
{
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Walks the per-distance V snapshots backwards from the end point, emitting matched
|
||||
/// (context), removed, and added lines. The walk produces lines in reverse source order;
|
||||
/// the returned list is reversed back into forward source order.
|
||||
/// </summary>
|
||||
private static List<LineDiffLine> Backtrack(
|
||||
string[] a,
|
||||
string[] b,
|
||||
List<int[]> trace,
|
||||
int foundD,
|
||||
int offset,
|
||||
int n,
|
||||
int m)
|
||||
{
|
||||
var reverse = new List<LineDiffLine>(n + m);
|
||||
int x = n;
|
||||
int y = m;
|
||||
|
||||
for (int d = foundD; d > 0; d--)
|
||||
{
|
||||
int[] v = trace[d];
|
||||
int k = x - y;
|
||||
|
||||
int prevK;
|
||||
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
|
||||
{
|
||||
// Came from a down move (insertion).
|
||||
prevK = k + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Came from a right move (deletion).
|
||||
prevK = k - 1;
|
||||
}
|
||||
|
||||
int prevX = v[offset + prevK];
|
||||
int prevY = prevX - prevK;
|
||||
|
||||
// Emit the diagonal (matched lines) that follows the snake's start.
|
||||
while (x > prevX && y > prevY)
|
||||
{
|
||||
x--;
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
|
||||
}
|
||||
|
||||
if (d > 0)
|
||||
{
|
||||
if (x == prevX)
|
||||
{
|
||||
// Insertion: a new-side line (b[prevY]) with no old counterpart.
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Add, b[y], OldLineNo: null, NewLineNo: y + 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Deletion: an old-side line (a[prevX]) with no new counterpart.
|
||||
x--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Remove, a[x], OldLineNo: x + 1, NewLineNo: null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// d == 0 leg: any remaining leading diagonal is all matched context.
|
||||
while (x > 0 && y > 0)
|
||||
{
|
||||
x--;
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
|
||||
}
|
||||
|
||||
reverse.Reverse();
|
||||
return reverse;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Transport.Import;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Transport.Tests.Import;
|
||||
|
||||
public sealed class LineDifferTests
|
||||
{
|
||||
[Fact]
|
||||
public void IdenticalText_NoAddsOrRemoves_NotTruncated()
|
||||
{
|
||||
var result = LineDiffer.Diff("a\nb\nc", "a\nb\nc");
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
// All emitted lines (if any) must be Context.
|
||||
Assert.All(result.Lines, l => Assert.Equal(LineDiffOp.Context, l.Op));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OldEmpty_NewThreeLines_ThreeAdds_NewLineNos1To3()
|
||||
{
|
||||
var result = LineDiffer.Diff("", "x\ny\nz");
|
||||
|
||||
var adds = result.Lines.Where(l => l.Op == LineDiffOp.Add).ToList();
|
||||
Assert.Equal(3, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
Assert.Equal(3, adds.Count);
|
||||
Assert.Equal(new[] { 1, 2, 3 }, adds.Select(a => a.NewLineNo!.Value).ToArray());
|
||||
Assert.Equal(new[] { "x", "y", "z" }, adds.Select(a => a.Text).ToArray());
|
||||
// Add lines have no old line number.
|
||||
Assert.All(adds, a => Assert.Null(a.OldLineNo));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NewEmpty_OldTwoLines_TwoRemoves_OldLineNos1To2()
|
||||
{
|
||||
var result = LineDiffer.Diff("p\nq", "");
|
||||
|
||||
var removes = result.Lines.Where(l => l.Op == LineDiffOp.Remove).ToList();
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(2, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
Assert.Equal(2, removes.Count);
|
||||
Assert.Equal(new[] { 1, 2 }, removes.Select(r => r.OldLineNo!.Value).ToArray());
|
||||
Assert.Equal(new[] { "p", "q" }, removes.Select(r => r.Text).ToArray());
|
||||
// Remove lines have no new line number.
|
||||
Assert.All(removes, r => Assert.Null(r.NewLineNo));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SingleMiddleLineChange_OneRemoveOneAdd_ContextPreserved()
|
||||
{
|
||||
var result = LineDiffer.Diff("a\nB\nc", "a\nX\nc");
|
||||
|
||||
var removes = result.Lines.Where(l => l.Op == LineDiffOp.Remove).ToList();
|
||||
var adds = result.Lines.Where(l => l.Op == LineDiffOp.Add).ToList();
|
||||
var contexts = result.Lines.Where(l => l.Op == LineDiffOp.Context).ToList();
|
||||
|
||||
Assert.Equal(1, result.RemovedCount);
|
||||
Assert.Equal(1, result.AddedCount);
|
||||
Assert.False(result.Truncated);
|
||||
|
||||
var remove = Assert.Single(removes);
|
||||
Assert.Equal("B", remove.Text);
|
||||
Assert.Equal(2, remove.OldLineNo);
|
||||
Assert.Null(remove.NewLineNo);
|
||||
|
||||
var add = Assert.Single(adds);
|
||||
Assert.Equal("X", add.Text);
|
||||
Assert.Equal(2, add.NewLineNo);
|
||||
Assert.Null(add.OldLineNo);
|
||||
|
||||
// Context lines "a" and "c" carry both line numbers.
|
||||
Assert.Equal(2, contexts.Count);
|
||||
var ctxA = contexts.Single(c => c.Text == "a");
|
||||
Assert.Equal(1, ctxA.OldLineNo);
|
||||
Assert.Equal(1, ctxA.NewLineNo);
|
||||
var ctxC = contexts.Single(c => c.Text == "c");
|
||||
Assert.Equal(3, ctxC.OldLineNo);
|
||||
Assert.Equal(3, ctxC.NewLineNo);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CrlfNormalization_NoChanges()
|
||||
{
|
||||
var result = LineDiffer.Diff("a\r\nb", "a\nb");
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LoneCarriageReturnNormalization_NoChanges()
|
||||
{
|
||||
var result = LineDiffer.Diff("a\rb", "a\nb");
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NullOld_NewSingleLine_OneAdd()
|
||||
{
|
||||
var result = LineDiffer.Diff(null, "x");
|
||||
|
||||
Assert.Equal(1, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
var add = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Add);
|
||||
Assert.Equal("x", add.Text);
|
||||
Assert.Equal(1, add.NewLineNo);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NullNew_OldSingleLine_OneRemove()
|
||||
{
|
||||
var result = LineDiffer.Diff("x", null);
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(1, result.RemovedCount);
|
||||
var remove = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Remove);
|
||||
Assert.Equal("x", remove.Text);
|
||||
Assert.Equal(1, remove.OldLineNo);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BothNull_NoChanges()
|
||||
{
|
||||
var result = LineDiffer.Diff(null, null);
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SizeCap_OldEmpty_ThousandNewLines_TruncatedToMax()
|
||||
{
|
||||
var newText = string.Join("\n", Enumerable.Range(1, 1000).Select(i => $"line{i}"));
|
||||
|
||||
var result = LineDiffer.Diff("", newText, maxLines: 400);
|
||||
|
||||
Assert.True(result.Lines.Count <= 400);
|
||||
Assert.True(result.Truncated);
|
||||
Assert.Equal(1000, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void InterleavedCase_LcsContextPreserved()
|
||||
{
|
||||
// old: 1 2 3 4 new: 1 3 4 5 => remove 2, context 1/3/4, add 5
|
||||
var result = LineDiffer.Diff("1\n2\n3\n4", "1\n3\n4\n5");
|
||||
|
||||
Assert.Equal(1, result.AddedCount);
|
||||
Assert.Equal(1, result.RemovedCount);
|
||||
Assert.False(result.Truncated);
|
||||
|
||||
var remove = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Remove);
|
||||
Assert.Equal("2", remove.Text);
|
||||
Assert.Equal(2, remove.OldLineNo);
|
||||
|
||||
var add = Assert.Single(result.Lines, l => l.Op == LineDiffOp.Add);
|
||||
Assert.Equal("5", add.Text);
|
||||
Assert.Equal(4, add.NewLineNo);
|
||||
|
||||
var contextTexts = result.Lines
|
||||
.Where(l => l.Op == LineDiffOp.Context)
|
||||
.Select(l => l.Text)
|
||||
.ToList();
|
||||
Assert.Equal(new[] { "1", "3", "4" }, contextTexts);
|
||||
|
||||
// Verify ordering: the remove of "2" appears after context "1" and before context "3".
|
||||
var ops = result.Lines.Select(l => (l.Op, l.Text)).ToList();
|
||||
var idxCtx1 = ops.FindIndex(x => x is { Op: LineDiffOp.Context, Text: "1" });
|
||||
var idxRem2 = ops.FindIndex(x => x is { Op: LineDiffOp.Remove, Text: "2" });
|
||||
var idxCtx3 = ops.FindIndex(x => x is { Op: LineDiffOp.Context, Text: "3" });
|
||||
Assert.True(idxCtx1 < idxRem2 && idxRem2 < idxCtx3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TrailingNewline_ProducesTrailingEmptyLine()
|
||||
{
|
||||
// "a\n" splits to ["a", ""] — both texts identical => no changes.
|
||||
var result = LineDiffer.Diff("a\n", "a\n");
|
||||
|
||||
Assert.Equal(0, result.AddedCount);
|
||||
Assert.Equal(0, result.RemovedCount);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user