7e36bb4225
Remove 9 unused types from Core (duplicate extension classes, TableSpec, ColumnSpec, LotLocation), move ComponentLotViewModel and OperatorViewModel from Client to Core, and refactor DataSync.Dev to use pipeline-based configuration. Fix Login.razor to use UserInfoDto directly.
121 lines
4.5 KiB
C#
121 lines
4.5 KiB
C#
using System.Collections.Concurrent;
|
|
using JdeScoping.DataSync.Dev.Contracts;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace JdeScoping.DataSync.Dev;
|
|
|
|
/// <summary>
|
|
/// Registry for development ETL pipelines that load from cached protobuf files.
|
|
/// Uses JSON configuration via IDevEtlPipelineFactory.
|
|
/// </summary>
|
|
public class DevEtlRegistry
|
|
{
|
|
private readonly IDevEtlPipelineFactory _pipelineFactory;
|
|
private readonly string _cacheDirectory;
|
|
private readonly ILogger<DevEtlRegistry>? _logger;
|
|
|
|
public DevEtlRegistry(
|
|
IDevEtlPipelineFactory pipelineFactory,
|
|
string cacheDirectory,
|
|
ILogger<DevEtlRegistry>? logger = null)
|
|
{
|
|
_pipelineFactory = pipelineFactory ?? throw new ArgumentNullException(nameof(pipelineFactory));
|
|
|
|
if (string.IsNullOrWhiteSpace(cacheDirectory))
|
|
throw new ArgumentException("Cache directory is required.", nameof(cacheDirectory));
|
|
|
|
if (!Directory.Exists(cacheDirectory))
|
|
throw new DirectoryNotFoundException($"Cache directory not found: {cacheDirectory}");
|
|
|
|
_cacheDirectory = cacheDirectory;
|
|
_logger = logger;
|
|
}
|
|
|
|
public IEnumerable<string> GetAvailableTables() => _pipelineFactory.GetAvailableTables();
|
|
|
|
public async Task<PipelineResult> RunAsync(string tableName, CancellationToken cancellationToken = default)
|
|
{
|
|
_logger?.LogInformation("Running dev ETL for {TableName}", tableName);
|
|
|
|
var pipeline = _pipelineFactory.GetPipeline(tableName, _cacheDirectory);
|
|
var result = await pipeline.ExecuteAsync(cancellationToken);
|
|
|
|
if (result.Success)
|
|
_logger?.LogInformation("Completed {TableName}: {Rows} rows in {Elapsed:g}",
|
|
tableName, result.TotalRows, result.Elapsed);
|
|
else
|
|
_logger?.LogError(result.Error, "Failed {TableName}: {Error}",
|
|
tableName, result.Error?.Message);
|
|
|
|
return result;
|
|
}
|
|
|
|
public async Task<IReadOnlyList<PipelineResult>> RunAllAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
var results = new List<PipelineResult>();
|
|
|
|
foreach (var tableName in GetAvailableTables())
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
var result = await RunAsync(tableName, cancellationToken);
|
|
results.Add(result);
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Runs all dev ETL pipelines with parallelization.
|
|
/// Small/medium tables run concurrently, very large tables run sequentially at the end.
|
|
/// </summary>
|
|
/// <param name="maxDegreeOfParallelism">Maximum concurrent table loads (default 4).</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
public async Task<IReadOnlyList<PipelineResult>> RunAllParallelAsync(
|
|
int maxDegreeOfParallelism = 4,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var results = new ConcurrentBag<PipelineResult>();
|
|
using var semaphore = new SemaphoreSlim(maxDegreeOfParallelism);
|
|
|
|
// Separate tables by size - run very large ones sequentially at the end
|
|
var smallMediumTables = GetAvailableTables()
|
|
.Where(t => !_pipelineFactory.IsVeryLargeTable(t))
|
|
.ToList();
|
|
var veryLargeTables = GetAvailableTables()
|
|
.Where(t => _pipelineFactory.IsVeryLargeTable(t))
|
|
.ToList();
|
|
|
|
_logger?.LogInformation(
|
|
"Running {ParallelCount} tables in parallel (max {MaxParallel}), then {SequentialCount} large tables sequentially",
|
|
smallMediumTables.Count, maxDegreeOfParallelism, veryLargeTables.Count);
|
|
|
|
// Run small/medium tables in parallel
|
|
var tasks = smallMediumTables.Select(async tableName =>
|
|
{
|
|
await semaphore.WaitAsync(cancellationToken);
|
|
try
|
|
{
|
|
var result = await RunAsync(tableName, cancellationToken);
|
|
results.Add(result);
|
|
}
|
|
finally
|
|
{
|
|
semaphore.Release();
|
|
}
|
|
});
|
|
|
|
await Task.WhenAll(tasks);
|
|
|
|
// Run very large tables sequentially (IO-bound, would contend)
|
|
foreach (var tableName in veryLargeTables)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
var result = await RunAsync(tableName, cancellationToken);
|
|
results.Add(result);
|
|
}
|
|
|
|
return results.ToList();
|
|
}
|
|
}
|