feat(etl): implement DbBulkMergeDestination for incremental updates
This commit is contained in:
@@ -0,0 +1,204 @@
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using System.Text;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Destinations;
|
||||
|
||||
/// <summary>
|
||||
/// Imports data into a SQL Server table using bulk copy to a temp table followed by MERGE.
|
||||
/// This approach supports incremental updates by matching on key columns and updating
|
||||
/// existing rows or inserting new ones.
|
||||
/// </summary>
|
||||
public class DbBulkMergeDestination : IImportDestination
|
||||
{
|
||||
private const int DefaultBatchSize = 10000;
|
||||
|
||||
private readonly IDbConnectionFactory _connectionFactory;
|
||||
private readonly string _tableName;
|
||||
private readonly string[] _matchColumns;
|
||||
private readonly string[]? _updateColumns;
|
||||
private readonly int _batchSize;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DestinationName => $"BulkMerge:{_tableName}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new bulk merge destination for the specified table.
|
||||
/// </summary>
|
||||
/// <param name="connectionFactory">Factory to create database connections.</param>
|
||||
/// <param name="tableName">Name of the destination table.</param>
|
||||
/// <param name="matchColumns">Columns to match on for determining existing rows (key columns).</param>
|
||||
/// <param name="updateColumns">Columns to update when a row matches. If null, all non-match columns are updated.</param>
|
||||
/// <param name="batchSize">Number of rows per batch. 0 uses the default (10000).</param>
|
||||
public DbBulkMergeDestination(
|
||||
IDbConnectionFactory connectionFactory,
|
||||
string tableName,
|
||||
string[] matchColumns,
|
||||
string[]? updateColumns = null,
|
||||
int batchSize = 0)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(connectionFactory);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
|
||||
ArgumentNullException.ThrowIfNull(matchColumns);
|
||||
if (matchColumns.Length == 0)
|
||||
throw new ArgumentException("At least one match column is required.", nameof(matchColumns));
|
||||
|
||||
_connectionFactory = connectionFactory;
|
||||
_tableName = tableName;
|
||||
_matchColumns = matchColumns;
|
||||
_updateColumns = updateColumns;
|
||||
_batchSize = batchSize > 0 ? batchSize : DefaultBatchSize;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DestinationResult> WriteAsync(
|
||||
IDataReader source,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(source);
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
long totalRows = 0;
|
||||
int batchCount = 0;
|
||||
|
||||
var tempTableName = $"#ETL_{_tableName.Replace(".", "_").Replace("[", "").Replace("]", "")}";
|
||||
|
||||
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
// Create temp table from destination schema
|
||||
await CreateTempTableAsync(connection, tempTableName, cancellationToken);
|
||||
|
||||
// Get all column names from source
|
||||
var allColumns = new List<string>();
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
allColumns.Add(source.GetName(i));
|
||||
|
||||
// Determine update columns (all non-match columns if not specified)
|
||||
var matchSet = new HashSet<string>(_matchColumns, StringComparer.OrdinalIgnoreCase);
|
||||
var updateCols = _updateColumns ?? allColumns.Where(c => !matchSet.Contains(c)).ToArray();
|
||||
|
||||
// Build MERGE SQL
|
||||
var mergeSql = BuildMergeSql(tempTableName, allColumns, updateCols);
|
||||
|
||||
// Process in batches using DataTable buffer
|
||||
var batch = new DataTable();
|
||||
SetupDataTable(batch, source);
|
||||
|
||||
while (source.Read())
|
||||
{
|
||||
var row = batch.NewRow();
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
row[i] = source.GetValue(i);
|
||||
batch.Rows.Add(row);
|
||||
|
||||
if (batch.Rows.Count >= _batchSize)
|
||||
{
|
||||
batchCount++;
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
||||
totalRows += batch.Rows.Count;
|
||||
batch.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining rows
|
||||
if (batch.Rows.Count > 0)
|
||||
{
|
||||
batchCount++;
|
||||
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
||||
totalRows += batch.Rows.Count;
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
return new DestinationResult(totalRows, batchCount, stopwatch.Elapsed);
|
||||
}
|
||||
finally
|
||||
{
|
||||
await DropTempTableAsync(connection, tempTableName);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task CreateTempTableAsync(SqlConnection connection, string tempTableName, CancellationToken ct)
|
||||
{
|
||||
var sql = $"SELECT TOP 0 * INTO {tempTableName} FROM [{_tableName}]";
|
||||
await using var cmd = connection.CreateCommand();
|
||||
cmd.CommandText = sql;
|
||||
await cmd.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
private async Task DropTempTableAsync(SqlConnection connection, string tempTableName)
|
||||
{
|
||||
try
|
||||
{
|
||||
var sql = $"IF OBJECT_ID('tempdb..{tempTableName}') IS NOT NULL DROP TABLE {tempTableName}";
|
||||
await using var cmd = connection.CreateCommand();
|
||||
cmd.CommandText = sql;
|
||||
await cmd.ExecuteNonQueryAsync();
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ProcessBatchAsync(
|
||||
SqlConnection connection,
|
||||
DataTable batch,
|
||||
string tempTableName,
|
||||
string mergeSql,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Bulk copy to temp table
|
||||
using var bulkCopy = new SqlBulkCopy(connection)
|
||||
{
|
||||
DestinationTableName = tempTableName,
|
||||
BatchSize = batch.Rows.Count
|
||||
};
|
||||
await bulkCopy.WriteToServerAsync(batch, ct);
|
||||
|
||||
// Execute MERGE
|
||||
await using var cmd = connection.CreateCommand();
|
||||
cmd.CommandText = mergeSql;
|
||||
await cmd.ExecuteNonQueryAsync(ct);
|
||||
|
||||
// Truncate temp table for next batch
|
||||
cmd.CommandText = $"TRUNCATE TABLE {tempTableName}";
|
||||
await cmd.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
private string BuildMergeSql(string tempTableName, IReadOnlyList<string> allColumns, IReadOnlyList<string> updateColumns)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.AppendLine($"MERGE INTO [{_tableName}] AS target");
|
||||
sb.AppendLine($"USING {tempTableName} AS source");
|
||||
sb.Append("ON ");
|
||||
sb.AppendLine(string.Join(" AND ", _matchColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
||||
|
||||
if (updateColumns.Count > 0)
|
||||
{
|
||||
sb.AppendLine("WHEN MATCHED THEN UPDATE SET");
|
||||
sb.AppendLine(string.Join(", ", updateColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
||||
}
|
||||
|
||||
sb.AppendLine("WHEN NOT MATCHED THEN INSERT");
|
||||
sb.AppendLine($"({string.Join(", ", allColumns.Select(c => $"[{c}]"))})");
|
||||
sb.AppendLine($"VALUES ({string.Join(", ", allColumns.Select(c => $"source.[{c}]"))});");
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private static void SetupDataTable(DataTable table, IDataReader source)
|
||||
{
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
{
|
||||
var type = source.GetFieldType(i);
|
||||
var baseType = Nullable.GetUnderlyingType(type) ?? type;
|
||||
table.Columns.Add(source.GetName(i), baseType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Destinations;
|
||||
using NSubstitute;
|
||||
|
||||
namespace JdeScoping.DataSync.Tests.Etl.Destinations;
|
||||
|
||||
public class DbBulkMergeDestinationTests
|
||||
{
|
||||
[Fact]
|
||||
public void Constructor_SetsDestinationName()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
var dest = new DbBulkMergeDestination(factory, "WorkOrder", new[] { "OrderNumber" });
|
||||
Assert.Equal("BulkMerge:WorkOrder", dest.DestinationName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullFactory_ThrowsArgumentNullException()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() =>
|
||||
new DbBulkMergeDestination(null!, "WorkOrder", new[] { "Id" }));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullTableName_ThrowsArgumentNullException()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
Assert.Throws<ArgumentNullException>(() =>
|
||||
new DbBulkMergeDestination(factory, null!, new[] { "Id" }));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_EmptyTableName_ThrowsArgumentException()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
Assert.Throws<ArgumentException>(() =>
|
||||
new DbBulkMergeDestination(factory, "", new[] { "Id" }));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_EmptyMatchColumns_ThrowsArgumentException()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
Assert.Throws<ArgumentException>(() =>
|
||||
new DbBulkMergeDestination(factory, "WorkOrder", Array.Empty<string>()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullMatchColumns_ThrowsArgumentNullException()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
Assert.Throws<ArgumentNullException>(() =>
|
||||
new DbBulkMergeDestination(factory, "WorkOrder", null!));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_WithUpdateColumns_Succeeds()
|
||||
{
|
||||
var factory = Substitute.For<IDbConnectionFactory>();
|
||||
var dest = new DbBulkMergeDestination(factory, "WorkOrder",
|
||||
new[] { "OrderNumber" },
|
||||
updateColumns: new[] { "Status", "Description" });
|
||||
Assert.Equal("BulkMerge:WorkOrder", dest.DestinationName);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user