feat(etl): add column mapping to destinations (intersect with dest schema)
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using Dapper;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using JdeScoping.DataSync.Etl.Scripts;
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Destinations;
|
||||
@@ -67,6 +69,9 @@ public class DbBulkImportDestination : IImportDestination
|
||||
await truncateCmd.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
// Get destination columns for column mapping
|
||||
var destColumns = await GetDestinationColumnsAsync(connection, cancellationToken);
|
||||
|
||||
// Bulk copy data
|
||||
using var bulkCopy = new SqlBulkCopy(connection)
|
||||
{
|
||||
@@ -76,10 +81,14 @@ public class DbBulkImportDestination : IImportDestination
|
||||
EnableStreaming = true
|
||||
};
|
||||
|
||||
// Map columns by name
|
||||
// Map only columns that exist in destination
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
{
|
||||
bulkCopy.ColumnMappings.Add(source.GetName(i), source.GetName(i));
|
||||
var columnName = source.GetName(i);
|
||||
if (destColumns.Contains(columnName))
|
||||
{
|
||||
bulkCopy.ColumnMappings.Add(columnName, columnName);
|
||||
}
|
||||
}
|
||||
|
||||
// Track rows via event
|
||||
@@ -102,4 +111,17 @@ public class DbBulkImportDestination : IImportDestination
|
||||
stopwatch.Stop();
|
||||
return new DestinationResult(totalRows, batchCount, stopwatch.Elapsed);
|
||||
}
|
||||
|
||||
private async Task<HashSet<string>> GetDestinationColumnsAsync(
|
||||
SqlConnection connection,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var (schema, table) = CommonScripts.ParseTableName(_tableName);
|
||||
var sql = @"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_NAME = @tableName AND TABLE_SCHEMA = @schemaName";
|
||||
var columns = await connection.QueryAsync<string>(
|
||||
new CommandDefinition(sql, new { tableName = table, schemaName = schema },
|
||||
commandTimeout: _commandTimeoutSeconds, cancellationToken: ct));
|
||||
return columns.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using Dapper;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using JdeScoping.DataSync.Etl.Scripts;
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Destinations;
|
||||
@@ -79,6 +81,9 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
// Create temp table from destination schema
|
||||
await CreateTempTableAsync(connection, tempTableName, cancellationToken);
|
||||
|
||||
// Get destination columns for column mapping
|
||||
var destColumns = await GetDestinationColumnsAsync(connection, cancellationToken);
|
||||
|
||||
// Get all column names from source
|
||||
var allColumns = new List<string>();
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
@@ -105,7 +110,7 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
if (batch.Rows.Count >= _batchSize)
|
||||
{
|
||||
batchCount++;
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, destColumns, cancellationToken);
|
||||
totalRows += batch.Rows.Count;
|
||||
batch.Clear();
|
||||
}
|
||||
@@ -115,7 +120,7 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
if (batch.Rows.Count > 0)
|
||||
{
|
||||
batchCount++;
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, destColumns, cancellationToken);
|
||||
totalRows += batch.Rows.Count;
|
||||
}
|
||||
|
||||
@@ -158,6 +163,7 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
DataTable batch,
|
||||
string tempTableName,
|
||||
string mergeSql,
|
||||
HashSet<string> destColumns,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Bulk copy to temp table
|
||||
@@ -167,6 +173,16 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
BatchSize = batch.Rows.Count,
|
||||
BulkCopyTimeout = _commandTimeoutSeconds
|
||||
};
|
||||
|
||||
// Map only columns that exist in destination
|
||||
foreach (DataColumn col in batch.Columns)
|
||||
{
|
||||
if (destColumns.Contains(col.ColumnName))
|
||||
{
|
||||
bulkCopy.ColumnMappings.Add(col.ColumnName, col.ColumnName);
|
||||
}
|
||||
}
|
||||
|
||||
await bulkCopy.WriteToServerAsync(batch, ct);
|
||||
|
||||
// Execute MERGE
|
||||
@@ -211,4 +227,17 @@ public class DbBulkMergeDestination : IImportDestination
|
||||
table.Columns.Add(source.GetName(i), baseType);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<HashSet<string>> GetDestinationColumnsAsync(
|
||||
SqlConnection connection,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var (schema, table) = CommonScripts.ParseTableName(_tableName);
|
||||
var sql = @"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_NAME = @tableName AND TABLE_SCHEMA = @schemaName";
|
||||
var columns = await connection.QueryAsync<string>(
|
||||
new CommandDefinition(sql, new { tableName = table, schemaName = schema },
|
||||
commandTimeout: _commandTimeoutSeconds, cancellationToken: ct));
|
||||
return columns.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Data;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Destinations;
|
||||
using NSubstitute;
|
||||
@@ -6,6 +7,28 @@ namespace JdeScoping.DataSync.Tests.Etl.Destinations;
|
||||
|
||||
public class DbBulkImportDestinationTests
|
||||
{
|
||||
/// <summary>
|
||||
/// This test documents that column mapping is applied to ignore extra source columns.
|
||||
/// The actual functionality requires a database connection and is an integration test concept.
|
||||
/// The implementation fetches destination columns from INFORMATION_SCHEMA.COLUMNS
|
||||
/// and only maps columns that exist in both source and destination.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void WriteAsync_SourceHasExtraColumns_IgnoresExtraColumns_IntegrationTestConcept()
|
||||
{
|
||||
// This is an integration test concept -
|
||||
// The actual behavior verifies that column mappings are applied:
|
||||
// 1. GetDestinationColumnsAsync fetches columns from INFORMATION_SCHEMA.COLUMNS
|
||||
// 2. Column mappings only added for columns in destination (case-insensitive)
|
||||
// 3. Extra source columns are silently ignored during bulk copy
|
||||
//
|
||||
// To test this fully, an integration test with a real database is required.
|
||||
// The unit test here just verifies the component can be constructed.
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
var dest = new DbBulkImportDestination(factory, "TestTable");
|
||||
Assert.NotNull(dest);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_SetsDestinationName()
|
||||
{
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Data;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Destinations;
|
||||
using NSubstitute;
|
||||
@@ -6,6 +7,28 @@ namespace JdeScoping.DataSync.Tests.Etl.Destinations;
|
||||
|
||||
public class DbBulkMergeDestinationTests
|
||||
{
|
||||
/// <summary>
|
||||
/// This test documents that column mapping is applied to ignore extra source columns.
|
||||
/// The actual functionality requires a database connection and is an integration test concept.
|
||||
/// The implementation fetches destination columns from INFORMATION_SCHEMA.COLUMNS
|
||||
/// and only maps columns that exist in both source and destination.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void WriteAsync_SourceHasExtraColumns_IgnoresExtraColumns_IntegrationTestConcept()
|
||||
{
|
||||
// This is an integration test concept -
|
||||
// The actual behavior verifies that column mappings are applied:
|
||||
// 1. GetDestinationColumnsAsync fetches columns from INFORMATION_SCHEMA.COLUMNS
|
||||
// 2. ProcessBatchAsync only adds column mappings for columns in destination
|
||||
// 3. Extra source columns are silently ignored during bulk copy
|
||||
//
|
||||
// To test this fully, an integration test with a real database is required.
|
||||
// The unit test here just verifies the component can be constructed.
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
var dest = new DbBulkMergeDestination(factory, "TestTable", new[] { "Id" });
|
||||
Assert.NotNull(dest);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_SetsDestinationName()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user