feat(etl): implement DbBulkImportDestination for full table refresh
Add bulk import destination that truncates and loads data using SqlBulkCopy with configurable batch sizes and streaming support.
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using JdeScoping.DataAccess.Interfaces;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Destinations;
|
||||
|
||||
/// <summary>
|
||||
/// Imports data into a SQL Server table using bulk copy operations.
|
||||
/// Performs a full table refresh by truncating the table before loading.
|
||||
/// </summary>
|
||||
public class DbBulkImportDestination : IImportDestination
|
||||
{
|
||||
private const int DefaultBatchSize = 10000;
|
||||
|
||||
private readonly IDbConnectionFactory _connectionFactory;
|
||||
private readonly string _tableName;
|
||||
private readonly int _batchSize;
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DestinationName => $"BulkImport:{_tableName}";
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new bulk import destination for the specified table.
|
||||
/// </summary>
|
||||
/// <param name="connectionFactory">Factory to create database connections.</param>
|
||||
/// <param name="tableName">Name of the destination table.</param>
|
||||
/// <param name="batchSize">Number of rows per batch. 0 uses the default (10000).</param>
|
||||
public DbBulkImportDestination(
|
||||
IDbConnectionFactory connectionFactory,
|
||||
string tableName,
|
||||
int batchSize = 0)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(connectionFactory);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
|
||||
|
||||
_connectionFactory = connectionFactory;
|
||||
_tableName = tableName;
|
||||
_batchSize = batchSize > 0 ? batchSize : DefaultBatchSize;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DestinationResult> WriteAsync(
|
||||
IDataReader source,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(source);
|
||||
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
long totalRows = 0;
|
||||
int batchCount = 0;
|
||||
|
||||
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
||||
|
||||
// Truncate destination table
|
||||
await using (var truncateCmd = connection.CreateCommand())
|
||||
{
|
||||
truncateCmd.CommandText = $"TRUNCATE TABLE [{_tableName}]";
|
||||
await truncateCmd.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
// Bulk copy data
|
||||
using var bulkCopy = new SqlBulkCopy(connection)
|
||||
{
|
||||
DestinationTableName = $"[{_tableName}]",
|
||||
BatchSize = _batchSize,
|
||||
BulkCopyTimeout = 3600,
|
||||
EnableStreaming = true
|
||||
};
|
||||
|
||||
// Map columns by name
|
||||
for (int i = 0; i < source.FieldCount; i++)
|
||||
{
|
||||
bulkCopy.ColumnMappings.Add(source.GetName(i), source.GetName(i));
|
||||
}
|
||||
|
||||
// Track rows via event
|
||||
bulkCopy.NotifyAfter = _batchSize;
|
||||
bulkCopy.SqlRowsCopied += (_, e) =>
|
||||
{
|
||||
totalRows = e.RowsCopied;
|
||||
batchCount++;
|
||||
};
|
||||
|
||||
await bulkCopy.WriteToServerAsync(source, cancellationToken);
|
||||
|
||||
// Final count - RowsCopied property may have more rows if NotifyAfter didn't fire
|
||||
if (bulkCopy.RowsCopied > totalRows)
|
||||
{
|
||||
totalRows = bulkCopy.RowsCopied;
|
||||
batchCount++;
|
||||
}
|
||||
|
||||
stopwatch.Stop();
|
||||
return new DestinationResult(totalRows, batchCount, stopwatch.Elapsed);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user