refactor: address code review findings across all projects

Apply comprehensive fixes from code reviews including: - Extract shared utilities (SqlFormatHelper, CellValueConverter, DbDestinationBase) - Add interface abstractions (IAuthenticationService, IDatabaseMigrator, IMisQueryBuilder) - Implement SecureStore for encrypted secrets storage - Fix error handling with proper HTTP status codes and logging - Optimize double enumeration in DevEtlRegistry - Add DataSync.Dev README for developer onboarding - Extract filter panel base classes to reduce duplication - Update code review docs to mark all issues as fixed
2026-01-19 11:05:36 -05:00
parent 08f5aa1447
commit 604bfe919c
148 changed files with 8696 additions and 1538 deletions
@@ -79,10 +79,11 @@ public class DevEtlRegistry
        using var semaphore = new SemaphoreSlim(maxDegreeOfParallelism);

        // Separate tables by size - run very large ones sequentially at the end
-        var smallMediumTables = GetAvailableTables()
+        var allTables = GetAvailableTables().ToList();
+        var smallMediumTables = allTables
            .Where(t => !_pipelineFactory.IsVeryLargeTable(t))
            .ToList();
-        var veryLargeTables = GetAvailableTables()
+        var veryLargeTables = allTables
            .Where(t => _pipelineFactory.IsVeryLargeTable(t))
            .ToList();

@@ -0,0 +1,103 @@
+# JdeScoping.DataSync.Dev
+
+Development-only ETL tooling for loading cached protobuf data into SQL Server. This project enables developers to work with production-like data locally without connecting to live JDE/CMS systems.
+
+## Purpose
+
+This project provides a way to load pre-cached data snapshots (in protobuf format with zstd compression) into the local SQL Server database. It is intended **only for development and testing** - production data sync uses the `JdeScoping.DataSync` project with live connections to enterprise systems.
+
+## Prerequisites
+
+1. **Cache Directory**: A folder containing protobuf data files (`.pb.zstd` format)
+2. **SQL Server Database**: Local SQL Server instance with the JDE Scoping schema
+3. **Connection String**: Valid SQL Server connection configured in `appsettings.json`
+
+## Configuration
+
+Pipeline configurations are stored in `Pipelines/dev-pipelines.json`. This file defines:
+
+- **Size categories**: Tables are categorized as small, medium, large, or veryLarge
+- **Pipeline definitions**: Source file mappings to destination tables
+
+### Size Categories
+
+| Category | Tables | Parallelization |
+|----------|--------|-----------------|
+| Small | Branch, OrgHierarchy, WorkCenter, ProfitCenter | Parallel |
+| Medium | JdeUser, FunctionCode, Item, RouteMaster, MisData_Curr | Parallel |
+| Large | Lot, MisData_Hist, WorkOrder_Curr/Hist, LotUsage_Hist, WorkOrderComponent_Hist | Parallel |
+| VeryLarge | WorkOrderStep_*, WorkOrderComponent_Curr, WorkOrderRouting, LotUsage_Curr, WorkOrderTime_* | Sequential |
+
+Very large tables run sequentially at the end to avoid I/O contention.
+
+## Folder Structure
+
+```
+JdeScoping.DataSync.Dev/
+├── Configuration/      # DTOs for JSON config deserialization
+├── Contracts/          # Interface definitions (IDevEtlPipelineFactory)
+├── Options/            # Options pattern classes
+├── Services/           # Implementation (DevEtlPipelineFactory)
+├── Sources/            # IImportSource implementations (ProtobufZstdFileSource)
+├── Pipelines/          # JSON configuration files
+└── DevEtlRegistry.cs   # Main orchestrator class
+```
+
+## Usage
+
+### Basic Usage
+
+```csharp
+// Create the registry
+var factory = new DevEtlPipelineFactory(options, connectionString, logger);
+var registry = new DevEtlRegistry(factory, cacheDirectory, logger);
+
+// List available tables
+foreach (var table in registry.GetAvailableTables())
+{
+    Console.WriteLine(table);
+}
+```
+
+### Run Single Table
+
+```csharp
+var result = await registry.RunAsync("Branch");
+if (result.Success)
+{
+    Console.WriteLine($"Loaded {result.TotalRows} rows in {result.Elapsed}");
+}
+```
+
+### Run All Tables Sequentially
+
+```csharp
+var results = await registry.RunAllAsync(cancellationToken);
+```
+
+### Run All Tables with Parallelization
+
+```csharp
+// Run small/medium/large tables in parallel (max 4 concurrent)
+// Very large tables run sequentially at the end
+var results = await registry.RunAllParallelAsync(
+    maxDegreeOfParallelism: 4,
+    cancellationToken);
+```
+
+## Data Flow
+
+1. **Source**: `ProtobufZstdFileSource` reads `.pb.zstd` files using protobuf-net-data
+2. **Transform**: Data passes through as `IDataReader` (no transformation)
+3. **Destination**: Uses `JdeScoping.DataSync` bulk import/merge destinations
+
+## Dependencies
+
+- **JdeScoping.DataSync**: Core ETL pipeline infrastructure
+- **protobuf-net-data**: Protobuf serialization with IDataReader support
+
+## Testing
+
+The project supports unit testing via `InternalsVisibleTo`:
+- `JdeScoping.DataSync.Dev.Tests`
+- `DynamicProxyGenAssembly2` (for Moq)
@@ -9,6 +9,12 @@ namespace JdeScoping.DataSync.Dev.Sources;
 /// Import source that reads from a zstd-compressed protobuf file.
 /// Uses protobuf-net-data for IDataReader deserialization.
 /// </summary>
+/// <remarks>
+/// This source wraps the synchronous <c>DataSerializer.Deserialize</c> in <c>Task.Run</c>
+/// because protobuf-net-data does not provide a native async API. The file is opened with
+/// <c>FileOptions.Asynchronous</c> to optimize for async I/O patterns, and the synchronous
+/// deserialization is offloaded to the thread pool to prevent blocking the calling context.
+/// </remarks>
 public sealed class ProtobufZstdFileSource : IImportSource
 {
    private const int FileBufferSize = 256 * 1024;  // 256 KB
@@ -33,28 +39,32 @@ public sealed class ProtobufZstdFileSource : IImportSource
        _filePath = filePath;
    }

-    public Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
+    public async Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
    {
        if (_fileStream != null)
            throw new InvalidOperationException("ReadDataAsync has already been called. Dispose and create a new source to read again.");

        try
        {
+            // Use FileOptions.Asynchronous for optimized async I/O patterns
            _fileStream = new FileStream(
                _filePath,
                FileMode.Open,
                FileAccess.Read,
                FileShare.Read,
                bufferSize: FileBufferSize,
-                FileOptions.SequentialScan);
+                FileOptions.SequentialScan | FileOptions.Asynchronous);

            _decompressionStream = new DecompressionStream(_fileStream);
            _bufferedStream = new BufferedStream(_decompressionStream, DecompressBufferSize);

-            // protobuf-net-data returns IDataReader directly
-            _reader = DataSerializer.Deserialize(_bufferedStream);
+            // Offload synchronous deserialization to thread pool since protobuf-net-data
+            // doesn't have a native async API. This prevents blocking the calling context.
+            _reader = await Task.Run(
+                () => DataSerializer.Deserialize(_bufferedStream),
+                cancellationToken);

-            return Task.FromResult(_reader);
+            return _reader;
        }
        catch
        {