feat(datasync): add RegexTransformer with Find & Replace mode
Initial implementation supporting: - Find & Replace mode with regex pattern and replacement string - Case-insensitive option - NonMatchBehavior enum for handling non-matches
This commit is contained in:
@@ -0,0 +1,120 @@
|
|||||||
|
using System.Data;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Specifies behavior when a regex pattern does not match the input value.
|
||||||
|
/// </summary>
|
||||||
|
public enum NonMatchBehavior
|
||||||
|
{
|
||||||
|
/// <summary>Keep the original value unchanged.</summary>
|
||||||
|
KeepOriginal,
|
||||||
|
/// <summary>Return null/DBNull.</summary>
|
||||||
|
ReturnNull,
|
||||||
|
/// <summary>Return an empty string.</summary>
|
||||||
|
ReturnEmpty
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// A data transformer that applies regex transformations to string values in a column.
|
||||||
|
/// Supports two modes: Find & Replace (when replacement is provided) and Match & Extract
|
||||||
|
/// (when replacement is null, extracts first capture group).
|
||||||
|
/// </summary>
|
||||||
|
public class RegexTransformer : DataTransformerBase
|
||||||
|
{
|
||||||
|
private readonly string _columnName;
|
||||||
|
private readonly string _pattern;
|
||||||
|
private readonly string? _replacement;
|
||||||
|
private readonly bool _ignoreCase;
|
||||||
|
private readonly NonMatchBehavior _nonMatchBehavior;
|
||||||
|
|
||||||
|
private Regex? _regex;
|
||||||
|
private int _columnOrdinal = -1;
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public override string TransformerName => $"Regex:{_columnName}";
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a new RegexTransformer.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="columnName">The column to transform.</param>
|
||||||
|
/// <param name="pattern">The regex pattern.</param>
|
||||||
|
/// <param name="replacement">Replacement string for Find & Replace mode, or null for Match & Extract mode.</param>
|
||||||
|
/// <param name="ignoreCase">Whether to use case-insensitive matching.</param>
|
||||||
|
/// <param name="nonMatchBehavior">Behavior when pattern does not match.</param>
|
||||||
|
public RegexTransformer(
|
||||||
|
string columnName,
|
||||||
|
string pattern,
|
||||||
|
string? replacement = null,
|
||||||
|
bool ignoreCase = false,
|
||||||
|
NonMatchBehavior nonMatchBehavior = NonMatchBehavior.KeepOriginal)
|
||||||
|
{
|
||||||
|
ArgumentException.ThrowIfNullOrWhiteSpace(columnName);
|
||||||
|
ArgumentException.ThrowIfNullOrWhiteSpace(pattern);
|
||||||
|
|
||||||
|
_columnName = columnName;
|
||||||
|
_pattern = pattern;
|
||||||
|
_replacement = replacement;
|
||||||
|
_ignoreCase = ignoreCase;
|
||||||
|
_nonMatchBehavior = nonMatchBehavior;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void OnInitialize(IDataReader source)
|
||||||
|
{
|
||||||
|
_columnOrdinal = source.GetOrdinal(_columnName);
|
||||||
|
|
||||||
|
var options = RegexOptions.Compiled;
|
||||||
|
if (_ignoreCase)
|
||||||
|
options |= RegexOptions.IgnoreCase;
|
||||||
|
|
||||||
|
_regex = new Regex(_pattern, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public override object GetValue(int ordinal, IDataReader source)
|
||||||
|
{
|
||||||
|
var value = source.GetValue(ordinal);
|
||||||
|
|
||||||
|
// Only transform the target column
|
||||||
|
if (ordinal != _columnOrdinal)
|
||||||
|
return value;
|
||||||
|
|
||||||
|
// Pass through null/DBNull
|
||||||
|
if (value == null || value == DBNull.Value)
|
||||||
|
return DBNull.Value;
|
||||||
|
|
||||||
|
var stringValue = value.ToString() ?? string.Empty;
|
||||||
|
|
||||||
|
// Find & Replace mode (replacement is not null)
|
||||||
|
if (_replacement != null)
|
||||||
|
{
|
||||||
|
return _regex!.Replace(stringValue, _replacement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match & Extract mode (replacement is null)
|
||||||
|
var match = _regex!.Match(stringValue);
|
||||||
|
if (match.Success && match.Groups.Count > 1)
|
||||||
|
{
|
||||||
|
return match.Groups[1].Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No match - apply NonMatchBehavior
|
||||||
|
return _nonMatchBehavior switch
|
||||||
|
{
|
||||||
|
NonMatchBehavior.ReturnNull => DBNull.Value,
|
||||||
|
NonMatchBehavior.ReturnEmpty => string.Empty,
|
||||||
|
_ => value // KeepOriginal
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public override Type GetFieldType(int ordinal, IDataReader source)
|
||||||
|
{
|
||||||
|
// Target column always returns string
|
||||||
|
if (ordinal == _columnOrdinal)
|
||||||
|
return typeof(string);
|
||||||
|
return source.GetFieldType(ordinal);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
using System.Data;
|
||||||
|
using JdeScoping.DataSync.Etl.Transformers;
|
||||||
|
using NSubstitute;
|
||||||
|
|
||||||
|
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
||||||
|
|
||||||
|
public class RegexTransformerTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void FindReplace_RemovesPrefix()
|
||||||
|
{
|
||||||
|
// Arrange
|
||||||
|
var source = CreateMockReader(
|
||||||
|
columns: new[] { "BatchID", "Name" },
|
||||||
|
values: new object[] { "IIS_12345", "Test" });
|
||||||
|
|
||||||
|
var transformer = new RegexTransformer(
|
||||||
|
columnName: "BatchID",
|
||||||
|
pattern: "^IIS_",
|
||||||
|
replacement: "");
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var reader = transformer.Transform(source);
|
||||||
|
source.Read().Returns(true);
|
||||||
|
reader.Read();
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
Assert.Equal("12345", reader.GetValue(0));
|
||||||
|
Assert.Equal("Test", reader.GetValue(1)); // Other column unchanged
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IDataReader CreateMockReader(string[] columns, object[] values)
|
||||||
|
{
|
||||||
|
var reader = Substitute.For<IDataReader>();
|
||||||
|
reader.FieldCount.Returns(columns.Length);
|
||||||
|
for (int i = 0; i < columns.Length; i++)
|
||||||
|
{
|
||||||
|
var index = i;
|
||||||
|
reader.GetName(index).Returns(columns[index]);
|
||||||
|
reader.GetOrdinal(columns[index]).Returns(index);
|
||||||
|
reader.GetFieldType(index).Returns(values[index]?.GetType() ?? typeof(object));
|
||||||
|
reader.GetValue(index).Returns(values[index]);
|
||||||
|
reader.IsDBNull(index).Returns(values[index] == null || values[index] == DBNull.Value);
|
||||||
|
}
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user