fix(etl): address Codex MCP review findings for Phase 2
- Filter MERGE SQL columns to only include columns that exist in destination (allColumns and updateColumns were using unfiltered source columns) - Fix schema-qualified table names to use proper [schema].[table] format instead of wrapping entire name in single brackets - Add empty column mapping validation to throw early if no columns intersect - Add JdeDateTransformer output column collision detection in OnInitialize - Add TODO comment for WithCommandTimeout (stored but not yet passed to destinations) - Add tests for FormatQualifiedTableName and output column collision
This commit is contained in:
@@ -62,9 +62,10 @@ public class DbBulkImportDestination : IImportDestination
|
|||||||
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
||||||
|
|
||||||
// Truncate destination table
|
// Truncate destination table
|
||||||
|
var qualifiedName = CommonScripts.FormatQualifiedTableName(_tableName);
|
||||||
await using (var truncateCmd = connection.CreateCommand())
|
await using (var truncateCmd = connection.CreateCommand())
|
||||||
{
|
{
|
||||||
truncateCmd.CommandText = $"TRUNCATE TABLE [{_tableName}]";
|
truncateCmd.CommandText = $"TRUNCATE TABLE {qualifiedName}";
|
||||||
truncateCmd.CommandTimeout = _commandTimeoutSeconds;
|
truncateCmd.CommandTimeout = _commandTimeoutSeconds;
|
||||||
await truncateCmd.ExecuteNonQueryAsync(cancellationToken);
|
await truncateCmd.ExecuteNonQueryAsync(cancellationToken);
|
||||||
}
|
}
|
||||||
@@ -75,7 +76,7 @@ public class DbBulkImportDestination : IImportDestination
|
|||||||
// Bulk copy data
|
// Bulk copy data
|
||||||
using var bulkCopy = new SqlBulkCopy(connection)
|
using var bulkCopy = new SqlBulkCopy(connection)
|
||||||
{
|
{
|
||||||
DestinationTableName = $"[{_tableName}]",
|
DestinationTableName = qualifiedName,
|
||||||
BatchSize = _batchSize,
|
BatchSize = _batchSize,
|
||||||
BulkCopyTimeout = _commandTimeoutSeconds,
|
BulkCopyTimeout = _commandTimeoutSeconds,
|
||||||
EnableStreaming = true
|
EnableStreaming = true
|
||||||
@@ -91,6 +92,12 @@ public class DbBulkImportDestination : IImportDestination
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate that we have columns to work with
|
||||||
|
if (bulkCopy.ColumnMappings.Count == 0)
|
||||||
|
throw new InvalidOperationException(
|
||||||
|
$"No columns from source exist in destination table '{_tableName}'. " +
|
||||||
|
"Check column names match between source query and destination table.");
|
||||||
|
|
||||||
// Track rows via event
|
// Track rows via event
|
||||||
bulkCopy.NotifyAfter = _batchSize;
|
bulkCopy.NotifyAfter = _batchSize;
|
||||||
bulkCopy.SqlRowsCopied += (_, e) =>
|
bulkCopy.SqlRowsCopied += (_, e) =>
|
||||||
|
|||||||
@@ -84,14 +84,26 @@ public class DbBulkMergeDestination : IImportDestination
|
|||||||
// Get destination columns for column mapping
|
// Get destination columns for column mapping
|
||||||
var destColumns = await GetDestinationColumnsAsync(connection, cancellationToken);
|
var destColumns = await GetDestinationColumnsAsync(connection, cancellationToken);
|
||||||
|
|
||||||
// Get all column names from source
|
// Get all column names from source, filtered to only include columns that exist in destination
|
||||||
var allColumns = new List<string>();
|
var allColumns = new List<string>();
|
||||||
for (int i = 0; i < source.FieldCount; i++)
|
for (int i = 0; i < source.FieldCount; i++)
|
||||||
allColumns.Add(source.GetName(i));
|
{
|
||||||
|
var colName = source.GetName(i);
|
||||||
|
if (destColumns.Contains(colName))
|
||||||
|
allColumns.Add(colName);
|
||||||
|
}
|
||||||
|
|
||||||
// Determine update columns (all non-match columns if not specified)
|
// Validate that we have columns to work with
|
||||||
|
if (allColumns.Count == 0)
|
||||||
|
throw new InvalidOperationException(
|
||||||
|
$"No columns from source exist in destination table '{_tableName}'. " +
|
||||||
|
"Check column names match between source query and destination table.");
|
||||||
|
|
||||||
|
// Determine update columns (all non-match columns if not specified), filtered to destColumns
|
||||||
var matchSet = new HashSet<string>(_matchColumns, StringComparer.OrdinalIgnoreCase);
|
var matchSet = new HashSet<string>(_matchColumns, StringComparer.OrdinalIgnoreCase);
|
||||||
var updateCols = _updateColumns ?? allColumns.Where(c => !matchSet.Contains(c)).ToArray();
|
var updateCols = (_updateColumns ?? allColumns.Where(c => !matchSet.Contains(c)).ToArray())
|
||||||
|
.Where(c => destColumns.Contains(c))
|
||||||
|
.ToArray();
|
||||||
|
|
||||||
// Build MERGE SQL
|
// Build MERGE SQL
|
||||||
var mergeSql = BuildMergeSql(tempTableName, allColumns, updateCols);
|
var mergeSql = BuildMergeSql(tempTableName, allColumns, updateCols);
|
||||||
@@ -135,7 +147,8 @@ public class DbBulkMergeDestination : IImportDestination
|
|||||||
|
|
||||||
private async Task CreateTempTableAsync(SqlConnection connection, string tempTableName, CancellationToken ct)
|
private async Task CreateTempTableAsync(SqlConnection connection, string tempTableName, CancellationToken ct)
|
||||||
{
|
{
|
||||||
var sql = $"SELECT TOP 0 * INTO {tempTableName} FROM [{_tableName}]";
|
var qualifiedName = CommonScripts.FormatQualifiedTableName(_tableName);
|
||||||
|
var sql = $"SELECT TOP 0 * INTO {tempTableName} FROM {qualifiedName}";
|
||||||
await using var cmd = connection.CreateCommand();
|
await using var cmd = connection.CreateCommand();
|
||||||
cmd.CommandText = sql;
|
cmd.CommandText = sql;
|
||||||
cmd.CommandTimeout = _commandTimeoutSeconds;
|
cmd.CommandTimeout = _commandTimeoutSeconds;
|
||||||
@@ -199,8 +212,9 @@ public class DbBulkMergeDestination : IImportDestination
|
|||||||
|
|
||||||
private string BuildMergeSql(string tempTableName, IReadOnlyList<string> allColumns, IReadOnlyList<string> updateColumns)
|
private string BuildMergeSql(string tempTableName, IReadOnlyList<string> allColumns, IReadOnlyList<string> updateColumns)
|
||||||
{
|
{
|
||||||
|
var qualifiedName = CommonScripts.FormatQualifiedTableName(_tableName);
|
||||||
var sb = new StringBuilder();
|
var sb = new StringBuilder();
|
||||||
sb.AppendLine($"MERGE INTO [{_tableName}] AS target");
|
sb.AppendLine($"MERGE INTO {qualifiedName} AS target");
|
||||||
sb.AppendLine($"USING {tempTableName} AS source");
|
sb.AppendLine($"USING {tempTableName} AS source");
|
||||||
sb.Append("ON ");
|
sb.Append("ON ");
|
||||||
sb.AppendLine(string.Join(" AND ", _matchColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
sb.AppendLine(string.Join(" AND ", _matchColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
||||||
|
|||||||
@@ -60,6 +60,9 @@ public class EtlPipelineBuilder
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Currently this timeout value is stored but not passed to destinations.
|
||||||
|
// In the future, the pipeline should pass this timeout to destinations that support it.
|
||||||
|
// For now, destinations use their own default timeout (600 seconds).
|
||||||
public EtlPipelineBuilder WithCommandTimeout(TimeSpan timeout)
|
public EtlPipelineBuilder WithCommandTimeout(TimeSpan timeout)
|
||||||
{
|
{
|
||||||
if (timeout < TimeSpan.Zero || timeout > TimeSpan.FromHours(24))
|
if (timeout < TimeSpan.Zero || timeout > TimeSpan.FromHours(24))
|
||||||
|
|||||||
@@ -18,6 +18,15 @@ public static class CommonScripts
|
|||||||
: ("dbo", parts[0]);
|
: ("dbo", parts[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Formats a table name as a properly quoted [schema].[table] identifier.
|
||||||
|
/// </summary>
|
||||||
|
public static string FormatQualifiedTableName(string tableName)
|
||||||
|
{
|
||||||
|
var (schema, table) = ParseTableName(tableName);
|
||||||
|
return $"[{schema}].[{table}]";
|
||||||
|
}
|
||||||
|
|
||||||
public static IScriptRunner DisableIndexes(
|
public static IScriptRunner DisableIndexes(
|
||||||
IDbConnectionFactory factory,
|
IDbConnectionFactory factory,
|
||||||
string tableName,
|
string tableName,
|
||||||
|
|||||||
@@ -56,6 +56,20 @@ public class JdeDateTransformer : DataTransformerBase
|
|||||||
_dateOrdinal = source.GetOrdinal(_dateColumn);
|
_dateOrdinal = source.GetOrdinal(_dateColumn);
|
||||||
_timeOrdinal = source.GetOrdinal(_timeColumn);
|
_timeOrdinal = source.GetOrdinal(_timeColumn);
|
||||||
|
|
||||||
|
// Validate output column name doesn't conflict with existing columns
|
||||||
|
// (excluding the date and time columns which will be replaced/removed)
|
||||||
|
for (int i = 0; i < source.FieldCount; i++)
|
||||||
|
{
|
||||||
|
if (i == _dateOrdinal || i == _timeOrdinal) continue;
|
||||||
|
var existingName = source.GetName(i);
|
||||||
|
if (string.Equals(existingName, _outputColumn, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException(
|
||||||
|
$"Output column name '{_outputColumn}' conflicts with existing column '{existingName}'. " +
|
||||||
|
"Choose a different output column name.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var ordinalList = new List<int>();
|
var ordinalList = new List<int>();
|
||||||
var nameList = new List<string>();
|
var nameList = new List<string>();
|
||||||
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
||||||
|
|||||||
@@ -98,4 +98,20 @@ public class CommonScriptsTests
|
|||||||
Assert.Equal("TimeoutScript", runner.ScriptName);
|
Assert.Equal("TimeoutScript", runner.ScriptName);
|
||||||
Assert.NotNull(runner);
|
Assert.NotNull(runner);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("WorkOrder", "[dbo].[WorkOrder]")]
|
||||||
|
[InlineData("dbo.WorkOrder", "[dbo].[WorkOrder]")]
|
||||||
|
[InlineData("[dbo].[WorkOrder]", "[dbo].[WorkOrder]")]
|
||||||
|
[InlineData("Config.Settings", "[Config].[Settings]")]
|
||||||
|
[InlineData("[Config].[Settings]", "[Config].[Settings]")]
|
||||||
|
[InlineData("myschema.MyTable", "[myschema].[MyTable]")]
|
||||||
|
public void FormatQualifiedTableName_VariousFormats_FormatsCorrectly(string input, string expected)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = CommonScripts.FormatQualifiedTableName(input);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
Assert.Equal(expected, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -240,6 +240,46 @@ public class JdeDateTransformerTests
|
|||||||
Assert.Equal(sentinel, result);
|
Assert.Equal(sentinel, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void OutputColumnConflictsWithExistingColumn_ThrowsInvalidOperationException()
|
||||||
|
{
|
||||||
|
// Arrange - "Name" column exists and we try to use it as output
|
||||||
|
var source = CreateMockReader(new[] { "Id", "UPMJ", "TDAY", "Name" }, new object[] { 1, 124001m, 120000m, "Test" });
|
||||||
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "Name"); // "Name" conflicts
|
||||||
|
|
||||||
|
// Act & Assert
|
||||||
|
var ex = Assert.Throws<InvalidOperationException>(() => transformer.Transform(source));
|
||||||
|
Assert.Contains("conflicts with existing column", ex.Message);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void OutputColumnMatchesDateColumn_Succeeds()
|
||||||
|
{
|
||||||
|
// Arrange - output column can match the date column name (it replaces it)
|
||||||
|
var source = CreateMockReader(new[] { "Id", "UPMJ", "TDAY", "Name" }, new object[] { 1, 124001m, 120000m, "Test" });
|
||||||
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UPMJ"); // Same as date column
|
||||||
|
|
||||||
|
// Act - should not throw
|
||||||
|
var reader = transformer.Transform(source);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
Assert.Equal("UPMJ", reader.GetName(1)); // Still named UPMJ
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void OutputColumnMatchesTimeColumn_Succeeds()
|
||||||
|
{
|
||||||
|
// Arrange - output column can match the time column name (it's removed anyway)
|
||||||
|
var source = CreateMockReader(new[] { "Id", "UPMJ", "TDAY", "Name" }, new object[] { 1, 124001m, 120000m, "Test" });
|
||||||
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "TDAY"); // Same as time column
|
||||||
|
|
||||||
|
// Act - should not throw
|
||||||
|
var reader = transformer.Transform(source);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
Assert.Equal(3, reader.FieldCount); // One column removed
|
||||||
|
}
|
||||||
|
|
||||||
private static IDataReader CreateMockReader(string[] columns, object[] values)
|
private static IDataReader CreateMockReader(string[] columns, object[] values)
|
||||||
{
|
{
|
||||||
var reader = Substitute.For<IDataReader>();
|
var reader = Substitute.For<IDataReader>();
|
||||||
|
|||||||
Reference in New Issue
Block a user