refactor: address code review findings across all projects
Apply comprehensive fixes from code reviews including: - Extract shared utilities (SqlFormatHelper, CellValueConverter, DbDestinationBase) - Add interface abstractions (IAuthenticationService, IDatabaseMigrator, IMisQueryBuilder) - Implement SecureStore for encrypted secrets storage - Fix error handling with proper HTTP status codes and logging - Optimize double enumeration in DevEtlRegistry - Add DataSync.Dev README for developer onboarding - Extract filter panel base classes to reduce duplication - Update code review docs to mark all issues as fixed
This commit is contained in:
@@ -79,10 +79,11 @@ public class DevEtlRegistry
|
||||
using var semaphore = new SemaphoreSlim(maxDegreeOfParallelism);
|
||||
|
||||
// Separate tables by size - run very large ones sequentially at the end
|
||||
var smallMediumTables = GetAvailableTables()
|
||||
var allTables = GetAvailableTables().ToList();
|
||||
var smallMediumTables = allTables
|
||||
.Where(t => !_pipelineFactory.IsVeryLargeTable(t))
|
||||
.ToList();
|
||||
var veryLargeTables = GetAvailableTables()
|
||||
var veryLargeTables = allTables
|
||||
.Where(t => _pipelineFactory.IsVeryLargeTable(t))
|
||||
.ToList();
|
||||
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
# JdeScoping.DataSync.Dev
|
||||
|
||||
Development-only ETL tooling for loading cached protobuf data into SQL Server. This project enables developers to work with production-like data locally without connecting to live JDE/CMS systems.
|
||||
|
||||
## Purpose
|
||||
|
||||
This project provides a way to load pre-cached data snapshots (in protobuf format with zstd compression) into the local SQL Server database. It is intended **only for development and testing** - production data sync uses the `JdeScoping.DataSync` project with live connections to enterprise systems.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Cache Directory**: A folder containing protobuf data files (`.pb.zstd` format)
|
||||
2. **SQL Server Database**: Local SQL Server instance with the JDE Scoping schema
|
||||
3. **Connection String**: Valid SQL Server connection configured in `appsettings.json`
|
||||
|
||||
## Configuration
|
||||
|
||||
Pipeline configurations are stored in `Pipelines/dev-pipelines.json`. This file defines:
|
||||
|
||||
- **Size categories**: Tables are categorized as small, medium, large, or veryLarge
|
||||
- **Pipeline definitions**: Source file mappings to destination tables
|
||||
|
||||
### Size Categories
|
||||
|
||||
| Category | Tables | Parallelization |
|
||||
|----------|--------|-----------------|
|
||||
| Small | Branch, OrgHierarchy, WorkCenter, ProfitCenter | Parallel |
|
||||
| Medium | JdeUser, FunctionCode, Item, RouteMaster, MisData_Curr | Parallel |
|
||||
| Large | Lot, MisData_Hist, WorkOrder_Curr/Hist, LotUsage_Hist, WorkOrderComponent_Hist | Parallel |
|
||||
| VeryLarge | WorkOrderStep_*, WorkOrderComponent_Curr, WorkOrderRouting, LotUsage_Curr, WorkOrderTime_* | Sequential |
|
||||
|
||||
Very large tables run sequentially at the end to avoid I/O contention.
|
||||
|
||||
## Folder Structure
|
||||
|
||||
```
|
||||
JdeScoping.DataSync.Dev/
|
||||
├── Configuration/ # DTOs for JSON config deserialization
|
||||
├── Contracts/ # Interface definitions (IDevEtlPipelineFactory)
|
||||
├── Options/ # Options pattern classes
|
||||
├── Services/ # Implementation (DevEtlPipelineFactory)
|
||||
├── Sources/ # IImportSource implementations (ProtobufZstdFileSource)
|
||||
├── Pipelines/ # JSON configuration files
|
||||
└── DevEtlRegistry.cs # Main orchestrator class
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```csharp
|
||||
// Create the registry
|
||||
var factory = new DevEtlPipelineFactory(options, connectionString, logger);
|
||||
var registry = new DevEtlRegistry(factory, cacheDirectory, logger);
|
||||
|
||||
// List available tables
|
||||
foreach (var table in registry.GetAvailableTables())
|
||||
{
|
||||
Console.WriteLine(table);
|
||||
}
|
||||
```
|
||||
|
||||
### Run Single Table
|
||||
|
||||
```csharp
|
||||
var result = await registry.RunAsync("Branch");
|
||||
if (result.Success)
|
||||
{
|
||||
Console.WriteLine($"Loaded {result.TotalRows} rows in {result.Elapsed}");
|
||||
}
|
||||
```
|
||||
|
||||
### Run All Tables Sequentially
|
||||
|
||||
```csharp
|
||||
var results = await registry.RunAllAsync(cancellationToken);
|
||||
```
|
||||
|
||||
### Run All Tables with Parallelization
|
||||
|
||||
```csharp
|
||||
// Run small/medium/large tables in parallel (max 4 concurrent)
|
||||
// Very large tables run sequentially at the end
|
||||
var results = await registry.RunAllParallelAsync(
|
||||
maxDegreeOfParallelism: 4,
|
||||
cancellationToken);
|
||||
```
|
||||
|
||||
## Data Flow
|
||||
|
||||
1. **Source**: `ProtobufZstdFileSource` reads `.pb.zstd` files using protobuf-net-data
|
||||
2. **Transform**: Data passes through as `IDataReader` (no transformation)
|
||||
3. **Destination**: Uses `JdeScoping.DataSync` bulk import/merge destinations
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **JdeScoping.DataSync**: Core ETL pipeline infrastructure
|
||||
- **protobuf-net-data**: Protobuf serialization with IDataReader support
|
||||
|
||||
## Testing
|
||||
|
||||
The project supports unit testing via `InternalsVisibleTo`:
|
||||
- `JdeScoping.DataSync.Dev.Tests`
|
||||
- `DynamicProxyGenAssembly2` (for Moq)
|
||||
@@ -9,6 +9,12 @@ namespace JdeScoping.DataSync.Dev.Sources;
|
||||
/// Import source that reads from a zstd-compressed protobuf file.
|
||||
/// Uses protobuf-net-data for IDataReader deserialization.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This source wraps the synchronous <c>DataSerializer.Deserialize</c> in <c>Task.Run</c>
|
||||
/// because protobuf-net-data does not provide a native async API. The file is opened with
|
||||
/// <c>FileOptions.Asynchronous</c> to optimize for async I/O patterns, and the synchronous
|
||||
/// deserialization is offloaded to the thread pool to prevent blocking the calling context.
|
||||
/// </remarks>
|
||||
public sealed class ProtobufZstdFileSource : IImportSource
|
||||
{
|
||||
private const int FileBufferSize = 256 * 1024; // 256 KB
|
||||
@@ -33,28 +39,32 @@ public sealed class ProtobufZstdFileSource : IImportSource
|
||||
_filePath = filePath;
|
||||
}
|
||||
|
||||
public Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
|
||||
public async Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_fileStream != null)
|
||||
throw new InvalidOperationException("ReadDataAsync has already been called. Dispose and create a new source to read again.");
|
||||
|
||||
try
|
||||
{
|
||||
// Use FileOptions.Asynchronous for optimized async I/O patterns
|
||||
_fileStream = new FileStream(
|
||||
_filePath,
|
||||
FileMode.Open,
|
||||
FileAccess.Read,
|
||||
FileShare.Read,
|
||||
bufferSize: FileBufferSize,
|
||||
FileOptions.SequentialScan);
|
||||
FileOptions.SequentialScan | FileOptions.Asynchronous);
|
||||
|
||||
_decompressionStream = new DecompressionStream(_fileStream);
|
||||
_bufferedStream = new BufferedStream(_decompressionStream, DecompressBufferSize);
|
||||
|
||||
// protobuf-net-data returns IDataReader directly
|
||||
_reader = DataSerializer.Deserialize(_bufferedStream);
|
||||
// Offload synchronous deserialization to thread pool since protobuf-net-data
|
||||
// doesn't have a native async API. This prevents blocking the calling context.
|
||||
_reader = await Task.Run(
|
||||
() => DataSerializer.Deserialize(_bufferedStream),
|
||||
cancellationToken);
|
||||
|
||||
return Task.FromResult(_reader);
|
||||
return _reader;
|
||||
}
|
||||
catch
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user