feat(etl): implement ColumnDropTransformer
Add a data transformer that removes specified columns from the data stream. Columns are matched by name (case-insensitive) and multiple columns can be dropped in a single transformer. Includes comprehensive tests using NSubstitute for mock IDataReader.
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
using System.Data;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Transformers;
|
||||
|
||||
/// <summary>
|
||||
/// A data transformer that removes specified columns from the data stream.
|
||||
/// Columns are matched by name (case-insensitive).
|
||||
/// </summary>
|
||||
public class ColumnDropTransformer : DataTransformerBase
|
||||
{
|
||||
private readonly HashSet<string> _columnsToDrop;
|
||||
private int[]? _ordinalMap;
|
||||
private Dictionary<string, int>? _nameToOrdinal;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string TransformerName => $"DropColumns:{string.Join(",", _columnsToDrop)}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ColumnDropTransformer that removes the specified columns.
|
||||
/// </summary>
|
||||
/// <param name="columnsToDrop">The names of columns to drop (case-insensitive).</param>
|
||||
public ColumnDropTransformer(params string[] columnsToDrop)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(columnsToDrop);
|
||||
_columnsToDrop = new HashSet<string>(columnsToDrop, StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void OnInitialize(IDataReader source)
|
||||
{
|
||||
var ordinalList = new List<int>();
|
||||
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
{
|
||||
var name = source.GetName(i);
|
||||
if (!_columnsToDrop.Contains(name))
|
||||
{
|
||||
_nameToOrdinal[name] = ordinalList.Count;
|
||||
ordinalList.Add(i);
|
||||
}
|
||||
}
|
||||
_ordinalMap = ordinalList.ToArray();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int GetFieldCount(IDataReader source) => _ordinalMap!.Length;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string GetName(int ordinal, IDataReader source) => source.GetName(_ordinalMap![ordinal]);
|
||||
|
||||
/// <inheritdoc />
|
||||
public override Type GetFieldType(int ordinal, IDataReader source) => source.GetFieldType(_ordinalMap![ordinal]);
|
||||
|
||||
/// <inheritdoc />
|
||||
public override object GetValue(int ordinal, IDataReader source) => source.GetValue(_ordinalMap![ordinal]);
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int GetOrdinal(string name, IDataReader source)
|
||||
{
|
||||
if (_nameToOrdinal!.TryGetValue(name, out var ordinal)) return ordinal;
|
||||
throw new IndexOutOfRangeException($"Column '{name}' not found or was dropped.");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override bool IsDBNull(int ordinal, IDataReader source) => source.IsDBNull(_ordinalMap![ordinal]);
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
using System.Data;
|
||||
using JdeScoping.DataSync.Etl.Transformers;
|
||||
using NSubstitute;
|
||||
|
||||
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
||||
|
||||
public class ColumnDropTransformerTests
|
||||
{
|
||||
[Fact]
|
||||
public void FieldCount_ExcludesDroppedColumns()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
||||
var transformer = new ColumnDropTransformer("DropMe");
|
||||
var reader = transformer.Transform(source);
|
||||
Assert.Equal(3, reader.FieldCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetName_SkipsDroppedColumns()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
||||
var transformer = new ColumnDropTransformer("DropMe");
|
||||
var reader = transformer.Transform(source);
|
||||
Assert.Equal("Id", reader.GetName(0));
|
||||
Assert.Equal("Name", reader.GetName(1));
|
||||
Assert.Equal("Value", reader.GetName(2));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetOrdinal_ReturnsRemappedOrdinal()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
||||
var transformer = new ColumnDropTransformer("DropMe");
|
||||
var reader = transformer.Transform(source);
|
||||
Assert.Equal(0, reader.GetOrdinal("Id"));
|
||||
Assert.Equal(1, reader.GetOrdinal("Name"));
|
||||
Assert.Equal(2, reader.GetOrdinal("Value"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetOrdinal_DroppedColumn_ThrowsIndexOutOfRange()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
||||
var transformer = new ColumnDropTransformer("DropMe");
|
||||
var reader = transformer.Transform(source);
|
||||
Assert.Throws<IndexOutOfRangeException>(() => reader.GetOrdinal("DropMe"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetValue_ReturnsCorrectValues()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" }, new object[] { 1, "Test", "Dropped", 42 });
|
||||
source.Read().Returns(true);
|
||||
var transformer = new ColumnDropTransformer("DropMe");
|
||||
var reader = transformer.Transform(source);
|
||||
reader.Read();
|
||||
Assert.Equal(1, reader.GetValue(0));
|
||||
Assert.Equal("Test", reader.GetValue(1));
|
||||
Assert.Equal(42, reader.GetValue(2));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MultipleDroppedColumns_AllExcluded()
|
||||
{
|
||||
var source = CreateMockReader(new[] { "Id", "Drop1", "Name", "Drop2", "Value" });
|
||||
var transformer = new ColumnDropTransformer("Drop1", "Drop2");
|
||||
var reader = transformer.Transform(source);
|
||||
Assert.Equal(3, reader.FieldCount);
|
||||
Assert.Equal("Id", reader.GetName(0));
|
||||
Assert.Equal("Name", reader.GetName(1));
|
||||
Assert.Equal("Value", reader.GetName(2));
|
||||
}
|
||||
|
||||
private static IDataReader CreateMockReader(string[] columns, object[]? values = null)
|
||||
{
|
||||
var reader = Substitute.For<IDataReader>();
|
||||
reader.FieldCount.Returns(columns.Length);
|
||||
for (int i = 0; i < columns.Length; i++)
|
||||
{
|
||||
var index = i;
|
||||
reader.GetName(index).Returns(columns[index]);
|
||||
reader.GetOrdinal(columns[index]).Returns(index);
|
||||
if (values != null) reader.GetValue(index).Returns(values[index]);
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user