diff --git a/NEW/src/JdeScoping.DataSync/Etl/Transformers/RegexTransformer.cs b/NEW/src/JdeScoping.DataSync/Etl/Transformers/RegexTransformer.cs new file mode 100644 index 0000000..0e14381 --- /dev/null +++ b/NEW/src/JdeScoping.DataSync/Etl/Transformers/RegexTransformer.cs @@ -0,0 +1,120 @@ +using System.Data; +using System.Text.RegularExpressions; + +namespace JdeScoping.DataSync.Etl.Transformers; + +/// +/// Specifies behavior when a regex pattern does not match the input value. +/// +public enum NonMatchBehavior +{ + /// Keep the original value unchanged. + KeepOriginal, + /// Return null/DBNull. + ReturnNull, + /// Return an empty string. + ReturnEmpty +} + +/// +/// A data transformer that applies regex transformations to string values in a column. +/// Supports two modes: Find & Replace (when replacement is provided) and Match & Extract +/// (when replacement is null, extracts first capture group). +/// +public class RegexTransformer : DataTransformerBase +{ + private readonly string _columnName; + private readonly string _pattern; + private readonly string? _replacement; + private readonly bool _ignoreCase; + private readonly NonMatchBehavior _nonMatchBehavior; + + private Regex? _regex; + private int _columnOrdinal = -1; + + /// + public override string TransformerName => $"Regex:{_columnName}"; + + /// + /// Creates a new RegexTransformer. + /// + /// The column to transform. + /// The regex pattern. + /// Replacement string for Find & Replace mode, or null for Match & Extract mode. + /// Whether to use case-insensitive matching. + /// Behavior when pattern does not match. + public RegexTransformer( + string columnName, + string pattern, + string? replacement = null, + bool ignoreCase = false, + NonMatchBehavior nonMatchBehavior = NonMatchBehavior.KeepOriginal) + { + ArgumentException.ThrowIfNullOrWhiteSpace(columnName); + ArgumentException.ThrowIfNullOrWhiteSpace(pattern); + + _columnName = columnName; + _pattern = pattern; + _replacement = replacement; + _ignoreCase = ignoreCase; + _nonMatchBehavior = nonMatchBehavior; + } + + /// + protected override void OnInitialize(IDataReader source) + { + _columnOrdinal = source.GetOrdinal(_columnName); + + var options = RegexOptions.Compiled; + if (_ignoreCase) + options |= RegexOptions.IgnoreCase; + + _regex = new Regex(_pattern, options); + } + + /// + public override object GetValue(int ordinal, IDataReader source) + { + var value = source.GetValue(ordinal); + + // Only transform the target column + if (ordinal != _columnOrdinal) + return value; + + // Pass through null/DBNull + if (value == null || value == DBNull.Value) + return DBNull.Value; + + var stringValue = value.ToString() ?? string.Empty; + + // Find & Replace mode (replacement is not null) + if (_replacement != null) + { + return _regex!.Replace(stringValue, _replacement); + } + + // Match & Extract mode (replacement is null) + var match = _regex!.Match(stringValue); + if (match.Success && match.Groups.Count > 1) + { + return match.Groups[1].Value; + } + + // No match - apply NonMatchBehavior + return _nonMatchBehavior switch + { + NonMatchBehavior.ReturnNull => DBNull.Value, + NonMatchBehavior.ReturnEmpty => string.Empty, + _ => value // KeepOriginal + }; + } + + /// + public override Type GetFieldType(int ordinal, IDataReader source) + { + // Target column always returns string + if (ordinal == _columnOrdinal) + return typeof(string); + return source.GetFieldType(ordinal); + } +} diff --git a/NEW/tests/JdeScoping.DataSync.Tests/Etl/Transformers/RegexTransformerTests.cs b/NEW/tests/JdeScoping.DataSync.Tests/Etl/Transformers/RegexTransformerTests.cs new file mode 100644 index 0000000..234aa0e --- /dev/null +++ b/NEW/tests/JdeScoping.DataSync.Tests/Etl/Transformers/RegexTransformerTests.cs @@ -0,0 +1,47 @@ +using System.Data; +using JdeScoping.DataSync.Etl.Transformers; +using NSubstitute; + +namespace JdeScoping.DataSync.Tests.Etl.Transformers; + +public class RegexTransformerTests +{ + [Fact] + public void FindReplace_RemovesPrefix() + { + // Arrange + var source = CreateMockReader( + columns: new[] { "BatchID", "Name" }, + values: new object[] { "IIS_12345", "Test" }); + + var transformer = new RegexTransformer( + columnName: "BatchID", + pattern: "^IIS_", + replacement: ""); + + // Act + var reader = transformer.Transform(source); + source.Read().Returns(true); + reader.Read(); + + // Assert + Assert.Equal("12345", reader.GetValue(0)); + Assert.Equal("Test", reader.GetValue(1)); // Other column unchanged + } + + private static IDataReader CreateMockReader(string[] columns, object[] values) + { + var reader = Substitute.For(); + reader.FieldCount.Returns(columns.Length); + for (int i = 0; i < columns.Length; i++) + { + var index = i; + reader.GetName(index).Returns(columns[index]); + reader.GetOrdinal(columns[index]).Returns(index); + reader.GetFieldType(index).Returns(values[index]?.GetType() ?? typeof(object)); + reader.GetValue(index).Returns(values[index]); + reader.IsDBNull(index).Returns(values[index] == null || values[index] == DBNull.Value); + } + return reader; + } +}