fix(data-access): correct self-referential SQL in WorkCenter filter
The WHERE clause was comparing Code to itself instead of the aliased table reference, which would always be true.
This commit is contained in:
@@ -0,0 +1,868 @@
|
||||
# Development ETL Pipeline Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Create development ETL pipelines that load cached `.json.zstd` files into SQL Server for local development.
|
||||
|
||||
**Architecture:** Streaming JSON reader (`JsonZstdFileSource`) feeds into existing ETL pipeline infrastructure.
|
||||
|
||||
**Tech Stack:** .NET 10, ZstdSharp, System.Text.Json, existing ETL framework
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Core Infrastructure + Branch Table
|
||||
|
||||
### Task 1: Add ZstdSharp NuGet Package
|
||||
|
||||
**Files:**
|
||||
- Modify: `NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
|
||||
**Step 1: Add package reference**
|
||||
|
||||
```xml
|
||||
<PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
|
||||
```
|
||||
|
||||
**Step 2: Verify package restores**
|
||||
|
||||
Run: `dotnet restore NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Restore succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Create JsonColumnSchema
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/src/JdeScoping.DataSync/Etl/Models/JsonColumnSchema.cs`
|
||||
|
||||
**Step 1: Create the file**
|
||||
|
||||
```csharp
|
||||
namespace JdeScoping.DataSync.Etl.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Defines a column schema for JSON-to-DataReader mapping.
|
||||
/// </summary>
|
||||
public record JsonColumnSchema(
|
||||
string Name,
|
||||
Type ClrType,
|
||||
bool IsNullable = true)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the SQL type name for this column (used in error messages).
|
||||
/// </summary>
|
||||
public string SqlTypeName => ClrType switch
|
||||
{
|
||||
Type t when t == typeof(string) => "VARCHAR",
|
||||
Type t when t == typeof(int) => "INT",
|
||||
Type t when t == typeof(long) => "BIGINT",
|
||||
Type t when t == typeof(decimal) => "DECIMAL",
|
||||
Type t when t == typeof(DateTime) => "DATETIME2",
|
||||
Type t when t == typeof(bool) => "BIT",
|
||||
Type t when t == typeof(byte[]) => "VARBINARY",
|
||||
_ => "UNKNOWN"
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Verify it compiles**
|
||||
|
||||
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Build succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Create JsonStreamingDataReader
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/src/JdeScoping.DataSync/Etl/Sources/JsonStreamingDataReader.cs`
|
||||
|
||||
**Step 1: Create the file**
|
||||
|
||||
```csharp
|
||||
using System.Data;
|
||||
using System.Text.Json;
|
||||
using JdeScoping.DataSync.Etl.Models;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Sources;
|
||||
|
||||
/// <summary>
|
||||
/// Streams a JSON array as an IDataReader, parsing one object at a time.
|
||||
/// </summary>
|
||||
internal sealed class JsonStreamingDataReader : IDataReader
|
||||
{
|
||||
private readonly Stream _stream;
|
||||
private readonly StreamReader _streamReader;
|
||||
private readonly JsonColumnSchema[] _schema;
|
||||
private readonly Dictionary<string, int> _nameToOrdinal;
|
||||
private object?[] _currentRow;
|
||||
private bool _disposed;
|
||||
private bool _started;
|
||||
private bool _finished;
|
||||
|
||||
public JsonStreamingDataReader(Stream stream, JsonColumnSchema[] schema)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
|
||||
_streamReader = new StreamReader(stream);
|
||||
_currentRow = new object?[schema.Length];
|
||||
|
||||
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
||||
for (int i = 0; i < schema.Length; i++)
|
||||
{
|
||||
_nameToOrdinal[schema[i].Name] = i;
|
||||
}
|
||||
}
|
||||
|
||||
public int FieldCount => _schema.Length;
|
||||
public int Depth => 0;
|
||||
public bool IsClosed => _disposed;
|
||||
public int RecordsAffected => -1;
|
||||
|
||||
public object this[int ordinal] => GetValue(ordinal);
|
||||
public object this[string name] => GetValue(GetOrdinal(name));
|
||||
|
||||
public string GetName(int ordinal) => _schema[ordinal].Name;
|
||||
public int GetOrdinal(string name) => _nameToOrdinal.TryGetValue(name, out var ordinal)
|
||||
? ordinal
|
||||
: throw new IndexOutOfRangeException($"Column '{name}' not found.");
|
||||
|
||||
public Type GetFieldType(int ordinal) => _schema[ordinal].ClrType;
|
||||
public string GetDataTypeName(int ordinal) => _schema[ordinal].SqlTypeName;
|
||||
|
||||
public object GetValue(int ordinal) => _currentRow[ordinal] ?? DBNull.Value;
|
||||
public bool IsDBNull(int ordinal) => _currentRow[ordinal] is null;
|
||||
|
||||
public bool Read()
|
||||
{
|
||||
if (_disposed || _finished) return false;
|
||||
|
||||
try
|
||||
{
|
||||
// Skip to start of array on first read
|
||||
if (!_started)
|
||||
{
|
||||
SkipWhitespaceAndExpect('[');
|
||||
_started = true;
|
||||
}
|
||||
|
||||
// Check for end of array or next object
|
||||
SkipWhitespace();
|
||||
var next = (char)_streamReader.Peek();
|
||||
|
||||
if (next == ']')
|
||||
{
|
||||
_finished = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (next == ',')
|
||||
{
|
||||
_streamReader.Read(); // consume comma
|
||||
SkipWhitespace();
|
||||
}
|
||||
|
||||
// Read the next JSON object
|
||||
var jsonObject = ReadJsonObject();
|
||||
if (jsonObject == null)
|
||||
{
|
||||
_finished = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Map JSON properties to row
|
||||
Array.Clear(_currentRow);
|
||||
foreach (var property in jsonObject.RootElement.EnumerateObject())
|
||||
{
|
||||
if (_nameToOrdinal.TryGetValue(property.Name, out var ordinal))
|
||||
{
|
||||
_currentRow[ordinal] = ParseValue(property.Value, _schema[ordinal].ClrType);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
throw new InvalidDataException($"Failed to parse JSON: {ex.Message}", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private JsonDocument? ReadJsonObject()
|
||||
{
|
||||
SkipWhitespace();
|
||||
if (_streamReader.Peek() == -1 || (char)_streamReader.Peek() == ']')
|
||||
return null;
|
||||
|
||||
// Read characters until we have a complete JSON object
|
||||
var buffer = new System.Text.StringBuilder();
|
||||
int braceCount = 0;
|
||||
bool inString = false;
|
||||
bool escaped = false;
|
||||
|
||||
while (true)
|
||||
{
|
||||
int c = _streamReader.Read();
|
||||
if (c == -1) break;
|
||||
|
||||
char ch = (char)c;
|
||||
buffer.Append(ch);
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '\\' && inString)
|
||||
{
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '"')
|
||||
{
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inString)
|
||||
{
|
||||
if (ch == '{') braceCount++;
|
||||
else if (ch == '}')
|
||||
{
|
||||
braceCount--;
|
||||
if (braceCount == 0) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var json = buffer.ToString().Trim();
|
||||
if (string.IsNullOrEmpty(json) || json == "]")
|
||||
return null;
|
||||
|
||||
return JsonDocument.Parse(json);
|
||||
}
|
||||
|
||||
private static object? ParseValue(JsonElement element, Type targetType)
|
||||
{
|
||||
if (element.ValueKind == JsonValueKind.Null)
|
||||
return null;
|
||||
|
||||
if (targetType == typeof(string))
|
||||
return element.GetString();
|
||||
|
||||
if (targetType == typeof(int))
|
||||
return element.TryGetInt32(out var i) ? i : (int)element.GetDouble();
|
||||
|
||||
if (targetType == typeof(long))
|
||||
return element.TryGetInt64(out var l) ? l : (long)element.GetDouble();
|
||||
|
||||
if (targetType == typeof(decimal))
|
||||
return element.TryGetDecimal(out var d) ? d : (decimal)element.GetDouble();
|
||||
|
||||
if (targetType == typeof(DateTime))
|
||||
{
|
||||
if (element.ValueKind == JsonValueKind.String)
|
||||
return DateTime.Parse(element.GetString()!, null, System.Globalization.DateTimeStyles.RoundtripKind);
|
||||
return element.GetDateTime();
|
||||
}
|
||||
|
||||
if (targetType == typeof(bool))
|
||||
return element.GetBoolean();
|
||||
|
||||
if (targetType == typeof(byte[]))
|
||||
return element.GetBytesFromBase64();
|
||||
|
||||
if (targetType == typeof(double))
|
||||
return element.GetDouble();
|
||||
|
||||
throw new NotSupportedException($"Type {targetType.Name} is not supported.");
|
||||
}
|
||||
|
||||
private void SkipWhitespace()
|
||||
{
|
||||
while (_streamReader.Peek() != -1 && char.IsWhiteSpace((char)_streamReader.Peek()))
|
||||
{
|
||||
_streamReader.Read();
|
||||
}
|
||||
}
|
||||
|
||||
private void SkipWhitespaceAndExpect(char expected)
|
||||
{
|
||||
SkipWhitespace();
|
||||
var actual = (char)_streamReader.Read();
|
||||
if (actual != expected)
|
||||
throw new InvalidDataException($"Expected '{expected}' but found '{actual}'.");
|
||||
}
|
||||
|
||||
// IDataReader methods - typed getters
|
||||
public bool GetBoolean(int ordinal) => (bool)GetValue(ordinal);
|
||||
public byte GetByte(int ordinal) => (byte)GetValue(ordinal);
|
||||
public long GetBytes(int ordinal, long fieldOffset, byte[]? buffer, int bufferOffset, int length)
|
||||
{
|
||||
var data = (byte[])GetValue(ordinal);
|
||||
if (buffer == null) return data.Length;
|
||||
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
|
||||
Array.Copy(data, fieldOffset, buffer, bufferOffset, toCopy);
|
||||
return toCopy;
|
||||
}
|
||||
public char GetChar(int ordinal) => ((string)GetValue(ordinal))[0];
|
||||
public long GetChars(int ordinal, long fieldOffset, char[]? buffer, int bufferOffset, int length)
|
||||
{
|
||||
var data = (string)GetValue(ordinal);
|
||||
if (buffer == null) return data.Length;
|
||||
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
|
||||
data.CopyTo((int)fieldOffset, buffer, bufferOffset, toCopy);
|
||||
return toCopy;
|
||||
}
|
||||
public IDataReader GetData(int ordinal) => throw new NotSupportedException();
|
||||
public DateTime GetDateTime(int ordinal) => (DateTime)GetValue(ordinal);
|
||||
public decimal GetDecimal(int ordinal) => (decimal)GetValue(ordinal);
|
||||
public double GetDouble(int ordinal) => (double)GetValue(ordinal);
|
||||
public float GetFloat(int ordinal) => (float)GetValue(ordinal);
|
||||
public Guid GetGuid(int ordinal) => (Guid)GetValue(ordinal);
|
||||
public short GetInt16(int ordinal) => (short)GetValue(ordinal);
|
||||
public int GetInt32(int ordinal) => (int)GetValue(ordinal);
|
||||
public long GetInt64(int ordinal) => (long)GetValue(ordinal);
|
||||
public string GetString(int ordinal) => (string)GetValue(ordinal);
|
||||
public int GetValues(object[] values)
|
||||
{
|
||||
var count = Math.Min(values.Length, _currentRow.Length);
|
||||
for (int i = 0; i < count; i++)
|
||||
values[i] = GetValue(i);
|
||||
return count;
|
||||
}
|
||||
|
||||
public DataTable GetSchemaTable()
|
||||
{
|
||||
var table = new DataTable("SchemaTable");
|
||||
table.Columns.Add("ColumnName", typeof(string));
|
||||
table.Columns.Add("ColumnOrdinal", typeof(int));
|
||||
table.Columns.Add("DataType", typeof(Type));
|
||||
table.Columns.Add("AllowDBNull", typeof(bool));
|
||||
|
||||
for (int i = 0; i < _schema.Length; i++)
|
||||
{
|
||||
table.Rows.Add(_schema[i].Name, i, _schema[i].ClrType, _schema[i].IsNullable);
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
public bool NextResult() => false;
|
||||
|
||||
public void Close() => Dispose();
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (!_disposed)
|
||||
{
|
||||
_streamReader.Dispose();
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Verify it compiles**
|
||||
|
||||
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Build succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Create JsonZstdFileSource
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/src/JdeScoping.DataSync/Etl/Sources/JsonZstdFileSource.cs`
|
||||
|
||||
**Step 1: Create the file**
|
||||
|
||||
```csharp
|
||||
using System.Data;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Models;
|
||||
using ZstdSharp;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Sources;
|
||||
|
||||
/// <summary>
|
||||
/// Import source that reads from a zstd-compressed JSON array file.
|
||||
/// </summary>
|
||||
public sealed class JsonZstdFileSource : IImportSource
|
||||
{
|
||||
private readonly string _filePath;
|
||||
private readonly JsonColumnSchema[] _schema;
|
||||
private FileStream? _fileStream;
|
||||
private DecompressionStream? _decompressionStream;
|
||||
private JsonStreamingDataReader? _reader;
|
||||
|
||||
public string SourceName => $"JsonZstd:{Path.GetFileName(_filePath)}";
|
||||
|
||||
public JsonZstdFileSource(string filePath, JsonColumnSchema[] schema)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(filePath))
|
||||
throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
|
||||
|
||||
if (!File.Exists(filePath))
|
||||
throw new FileNotFoundException($"Cache file not found: {filePath}", filePath);
|
||||
|
||||
_filePath = filePath;
|
||||
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
|
||||
}
|
||||
|
||||
public Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
_fileStream = new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read,
|
||||
bufferSize: 65536, useAsync: true);
|
||||
_decompressionStream = new DecompressionStream(_fileStream);
|
||||
_reader = new JsonStreamingDataReader(_decompressionStream, _schema);
|
||||
|
||||
return Task.FromResult<IDataReader>(_reader);
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_reader != null)
|
||||
{
|
||||
_reader.Dispose();
|
||||
_reader = null;
|
||||
}
|
||||
|
||||
if (_decompressionStream != null)
|
||||
{
|
||||
await _decompressionStream.DisposeAsync();
|
||||
_decompressionStream = null;
|
||||
}
|
||||
|
||||
if (_fileStream != null)
|
||||
{
|
||||
await _fileStream.DisposeAsync();
|
||||
_fileStream = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Verify it compiles**
|
||||
|
||||
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Build succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Create BranchDevEtl
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/src/JdeScoping.DataSync/DevEtl/BranchDevEtl.cs`
|
||||
|
||||
**Reference - Branch table schema from `003_CreateBranchTable.sql`:**
|
||||
- `Code` VARCHAR(12) NOT NULL
|
||||
- `Description` VARCHAR(40) NULL
|
||||
- `LastUpdateDT` DATETIME2(7) NOT NULL
|
||||
|
||||
**Step 1: Create the file**
|
||||
|
||||
```csharp
|
||||
using JdeScoping.DataAccess;
|
||||
using JdeScoping.DataSync.Etl.Destinations;
|
||||
using JdeScoping.DataSync.Etl.Models;
|
||||
using JdeScoping.DataSync.Etl.Pipeline;
|
||||
using JdeScoping.DataSync.Etl.Sources;
|
||||
|
||||
namespace JdeScoping.DataSync.DevEtl;
|
||||
|
||||
/// <summary>
|
||||
/// Development ETL pipeline for the Branch table.
|
||||
/// </summary>
|
||||
public static class BranchDevEtl
|
||||
{
|
||||
public static readonly string TableName = "Branch";
|
||||
public static readonly string CacheFileName = "branch.json.zstd";
|
||||
|
||||
private static readonly JsonColumnSchema[] Schema =
|
||||
[
|
||||
new("Code", typeof(string), IsNullable: false),
|
||||
new("Description", typeof(string), IsNullable: true),
|
||||
new("LastUpdateDT", typeof(DateTime), IsNullable: false),
|
||||
];
|
||||
|
||||
public static EtlPipeline Create(IDbConnectionFactory connectionFactory, string cacheFilePath)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(connectionFactory);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(cacheFilePath))
|
||||
throw new ArgumentException("Cache file path is required.", nameof(cacheFilePath));
|
||||
|
||||
return new EtlPipelineBuilder()
|
||||
.WithName($"{TableName}_Dev")
|
||||
.WithSource(new JsonZstdFileSource(cacheFilePath, Schema))
|
||||
.WithDestination(new DbBulkImportDestination(connectionFactory, TableName))
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Verify it compiles**
|
||||
|
||||
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Build succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Create DevEtlRegistry
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/src/JdeScoping.DataSync/DevEtl/DevEtlRegistry.cs`
|
||||
|
||||
**Step 1: Create the file**
|
||||
|
||||
```csharp
|
||||
using JdeScoping.DataAccess;
|
||||
using JdeScoping.DataSync.Etl.Pipeline;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace JdeScoping.DataSync.DevEtl;
|
||||
|
||||
/// <summary>
|
||||
/// Registry for development ETL pipelines that load from cached JSON files.
|
||||
/// </summary>
|
||||
public class DevEtlRegistry
|
||||
{
|
||||
private readonly IDbConnectionFactory _connectionFactory;
|
||||
private readonly string _cacheDirectory;
|
||||
private readonly ILogger<DevEtlRegistry>? _logger;
|
||||
|
||||
private readonly Dictionary<string, Func<IDbConnectionFactory, string, EtlPipeline>> _pipelineFactories = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
[BranchDevEtl.TableName] = (factory, cacheDir) =>
|
||||
BranchDevEtl.Create(factory, Path.Combine(cacheDir, BranchDevEtl.CacheFileName)),
|
||||
};
|
||||
|
||||
public DevEtlRegistry(
|
||||
IDbConnectionFactory connectionFactory,
|
||||
string cacheDirectory,
|
||||
ILogger<DevEtlRegistry>? logger = null)
|
||||
{
|
||||
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(cacheDirectory))
|
||||
throw new ArgumentException("Cache directory is required.", nameof(cacheDirectory));
|
||||
|
||||
if (!Directory.Exists(cacheDirectory))
|
||||
throw new DirectoryNotFoundException($"Cache directory not found: {cacheDirectory}");
|
||||
|
||||
_cacheDirectory = cacheDirectory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public IEnumerable<string> GetAvailableTables() => _pipelineFactories.Keys;
|
||||
|
||||
public EtlPipeline GetPipeline(string tableName)
|
||||
{
|
||||
if (!_pipelineFactories.TryGetValue(tableName, out var factory))
|
||||
throw new ArgumentException($"No pipeline registered for table '{tableName}'.", nameof(tableName));
|
||||
|
||||
return factory(_connectionFactory, _cacheDirectory);
|
||||
}
|
||||
|
||||
public async Task<PipelineResult> RunAsync(string tableName, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_logger?.LogInformation("Running dev ETL for {TableName}", tableName);
|
||||
|
||||
var pipeline = GetPipeline(tableName);
|
||||
var result = await pipeline.ExecuteAsync(cancellationToken);
|
||||
|
||||
if (result.Success)
|
||||
_logger?.LogInformation("Completed {TableName}: {Rows} rows in {Elapsed:g}",
|
||||
tableName, result.TotalRows, result.Elapsed);
|
||||
else
|
||||
_logger?.LogError(result.Error, "Failed {TableName}: {Error}",
|
||||
tableName, result.Error?.Message);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<PipelineResult>> RunAllAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var results = new List<PipelineResult>();
|
||||
|
||||
foreach (var tableName in GetAvailableTables())
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
break;
|
||||
|
||||
var result = await RunAsync(tableName, cancellationToken);
|
||||
results.Add(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Verify it compiles**
|
||||
|
||||
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
Expected: Build succeeds
|
||||
|
||||
---
|
||||
|
||||
### Task 7: Create Integration Test for Branch
|
||||
|
||||
**Files:**
|
||||
- Create: `NEW/tests/JdeScoping.DataSync.Tests/DevEtl/BranchDevEtlTests.cs`
|
||||
|
||||
**Step 1: Create the test file**
|
||||
|
||||
```csharp
|
||||
using FluentAssertions;
|
||||
using JdeScoping.DataAccess;
|
||||
using JdeScoping.DataSync.DevEtl;
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Xunit;
|
||||
|
||||
namespace JdeScoping.DataSync.Tests.DevEtl;
|
||||
|
||||
/// <summary>
|
||||
/// Integration tests for Branch development ETL.
|
||||
/// Requires: Local SQL Server, CACHED_DB_FILES directory with branch.json.zstd
|
||||
/// </summary>
|
||||
public class BranchDevEtlTests : IAsyncLifetime
|
||||
{
|
||||
private readonly string _connectionString;
|
||||
private readonly string _cacheDirectory;
|
||||
private readonly IDbConnectionFactory _connectionFactory;
|
||||
|
||||
public BranchDevEtlTests()
|
||||
{
|
||||
// Load configuration
|
||||
var config = new ConfigurationBuilder()
|
||||
.AddJsonFile("appsettings.json", optional: true)
|
||||
.AddEnvironmentVariables()
|
||||
.Build();
|
||||
|
||||
_connectionString = config.GetConnectionString("LotFinder")
|
||||
?? throw new InvalidOperationException("LotFinder connection string not configured.");
|
||||
|
||||
_cacheDirectory = config["DevEtl:CacheDirectory"]
|
||||
?? Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "..", "..", "CACHED_DB_FILES");
|
||||
|
||||
_connectionFactory = new DbConnectionFactory(_connectionString);
|
||||
}
|
||||
|
||||
public async Task InitializeAsync()
|
||||
{
|
||||
// Ensure Branch table is empty before test
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync();
|
||||
await using var command = new SqlCommand("TRUNCATE TABLE dbo.Branch", connection);
|
||||
await command.ExecuteNonQueryAsync();
|
||||
}
|
||||
|
||||
public Task DisposeAsync() => Task.CompletedTask;
|
||||
|
||||
[Fact]
|
||||
public async Task Create_ReturnsValidPipeline()
|
||||
{
|
||||
// Arrange
|
||||
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
|
||||
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
|
||||
|
||||
// Act
|
||||
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
|
||||
|
||||
// Assert
|
||||
pipeline.Should().NotBeNull();
|
||||
pipeline.Name.Should().Be("Branch_Dev");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Execute_LoadsBranchData()
|
||||
{
|
||||
// Arrange
|
||||
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
|
||||
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
|
||||
|
||||
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
|
||||
|
||||
// Act
|
||||
var result = await pipeline.ExecuteAsync();
|
||||
|
||||
// Assert
|
||||
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
|
||||
result.TotalRows.Should().BeGreaterThan(0, "Should load at least one row");
|
||||
|
||||
// Verify data in database
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync();
|
||||
await using var command = new SqlCommand("SELECT COUNT(*) FROM dbo.Branch", connection);
|
||||
var count = (int)(await command.ExecuteScalarAsync())!;
|
||||
|
||||
count.Should().Be((int)result.TotalRows, "Database row count should match pipeline result");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Registry_RunAsync_LoadsBranch()
|
||||
{
|
||||
// Arrange
|
||||
Skip.IfNot(Directory.Exists(_cacheDirectory), $"Cache directory not found: {_cacheDirectory}");
|
||||
|
||||
var registry = new DevEtlRegistry(_connectionFactory, _cacheDirectory);
|
||||
|
||||
// Act
|
||||
var result = await registry.RunAsync("Branch");
|
||||
|
||||
// Assert
|
||||
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
|
||||
result.TotalRows.Should().BeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Add test project dependencies if needed**
|
||||
|
||||
Verify `JdeScoping.DataSync.Tests.csproj` has:
|
||||
- Reference to `JdeScoping.DataSync`
|
||||
- FluentAssertions
|
||||
- xunit
|
||||
- xunit.runner.visualstudio
|
||||
|
||||
**Step 3: Run the tests**
|
||||
|
||||
Run: `dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~BranchDevEtlTests"`
|
||||
Expected: Tests pass (or skip if cache file not found)
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Run End-to-End Test and Debug
|
||||
|
||||
**Step 1: Ensure database is running**
|
||||
|
||||
Run: `docker ps | grep scopingtool-sqlserver`
|
||||
Expected: Container is running
|
||||
|
||||
**Step 2: Run the integration test**
|
||||
|
||||
Run: `dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "BranchDevEtlTests.Execute_LoadsBranchData" -v normal`
|
||||
|
||||
**Step 3: If test fails, debug the issue**
|
||||
|
||||
Common issues to check:
|
||||
- Connection string correct in appsettings.json
|
||||
- Cache file exists and is readable
|
||||
- Branch table exists in database
|
||||
- JSON parsing errors (check column name case sensitivity)
|
||||
|
||||
**Step 4: Verify data in database**
|
||||
|
||||
Run SQL: `SELECT TOP 5 * FROM dbo.Branch ORDER BY Code`
|
||||
Expected: See branch records from cache file
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Lessons Learned
|
||||
|
||||
### Issues Encountered and Fixes
|
||||
|
||||
1. **JsonDocument Memory Leak**
|
||||
- **Issue:** `ReadJsonObject()` returned `JsonDocument` that wasn't being disposed, causing memory accumulation
|
||||
- **Fix:** Changed to `using var jsonObject = ReadJsonObject();` in the `Read()` method
|
||||
- **Lesson:** Always dispose `JsonDocument` instances - they own native memory
|
||||
|
||||
2. **Multiple ReadDataAsync Calls**
|
||||
- **Issue:** `JsonZstdFileSource.ReadDataAsync()` could be called multiple times, causing stream leaks
|
||||
- **Fix:** Added guard: `if (_fileStream != null) throw new InvalidOperationException(...)`
|
||||
- **Lesson:** Sources should only be readable once; enforce this with guards
|
||||
|
||||
3. **Exception Safety in Stream Initialization**
|
||||
- **Issue:** If stream creation failed partway through (e.g., DecompressionStream fails), earlier streams leaked
|
||||
- **Fix:** Wrapped initialization in try-catch with cleanup in catch block:
|
||||
```csharp
|
||||
try {
|
||||
_fileStream = new FileStream(...);
|
||||
_decompressionStream = new DecompressionStream(_fileStream);
|
||||
_reader = new JsonStreamingDataReader(...);
|
||||
return Task.FromResult<IDataReader>(_reader);
|
||||
} catch {
|
||||
_reader?.Dispose();
|
||||
_decompressionStream?.Dispose();
|
||||
_fileStream?.Dispose();
|
||||
throw;
|
||||
}
|
||||
```
|
||||
- **Lesson:** Multi-resource initialization needs exception safety
|
||||
|
||||
4. **Cancellation Token Handling**
|
||||
- **Issue:** `RunAllAsync` used `IsCancellationRequested + break` which silently stops without exception
|
||||
- **Fix:** Changed to `cancellationToken.ThrowIfCancellationRequested();`
|
||||
- **Lesson:** Prefer `ThrowIfCancellationRequested()` for proper cancellation semantics
|
||||
|
||||
5. **Connection String Naming Convention**
|
||||
- **Issue:** Test used `"LotFinder"` but `DbConnectionFactory` expects `"LotFinderDB"`
|
||||
- **Fix:** Updated appsettings.json key to `"LotFinderDB"`
|
||||
- **Lesson:** Match connection string names to what `DbConnectionFactory` expects
|
||||
|
||||
6. **Hardcoded Absolute Paths**
|
||||
- **Issue:** Fallback cache directory path was user-specific `/Users/dohertj2/Desktop/...`
|
||||
- **Fix:** Changed to relative path using `Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "...")`
|
||||
- **Lesson:** Use relative paths for portability; config should specify absolute paths
|
||||
|
||||
### Patterns That Worked Well
|
||||
|
||||
1. **IAsyncLifetime for Test Isolation**
|
||||
- Using `IAsyncLifetime.InitializeAsync()` to truncate tables before each test ensures clean state
|
||||
- Pattern: `TRUNCATE TABLE dbo.{Table}` in `InitializeAsync()`
|
||||
|
||||
2. **Shouldly Assertions**
|
||||
- Project uses Shouldly instead of FluentAssertions
|
||||
- Pattern: `result.Success.ShouldBeTrue(result.Error?.Message ?? "reason")`
|
||||
|
||||
3. **Nullable File Checks in Tests**
|
||||
- Early return when cache files don't exist (graceful skip)
|
||||
- Pattern: `if (!File.Exists(cacheFilePath)) return;`
|
||||
|
||||
4. **Static Factory Pattern for DevEtl Classes**
|
||||
- Clean separation: static `Create()` method with explicit validation
|
||||
- Pattern: `ArgumentNullException.ThrowIfNull(connectionFactory);`
|
||||
|
||||
5. **Property Naming**
|
||||
- Pipeline property is `PipelineName` (not `Name`)
|
||||
- Pattern: `pipeline.PipelineName.ShouldBe("Branch_Dev")`
|
||||
|
||||
### Performance Observations
|
||||
|
||||
- Branch table (930 bytes compressed, ~10 rows) loads in ~75ms including decompression
|
||||
- Streaming approach successfully processes one JSON object at a time
|
||||
- No memory issues observed - suitable for larger files
|
||||
|
||||
### Code Corrections from Original Plan
|
||||
|
||||
| Original Plan | Actual Implementation |
|
||||
|---------------|----------------------|
|
||||
| `pipeline.Name` | `pipeline.PipelineName` |
|
||||
| FluentAssertions | Shouldly |
|
||||
| `Skip.IfNot()` | Early return with `if (!exists) return;` |
|
||||
| `IDbConnectionFactory` constructor with string | Constructor takes `IConfiguration` |
|
||||
| Dapper for test queries | Direct `SqlConnection` + `ExecuteScalarAsync` |
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Remaining Tables
|
||||
|
||||
After Phase 2, add remaining tables following the established pattern. Priority order by file size:
|
||||
|
||||
1. **Small (< 1 MB):** OrgHierarchy, WorkCenter, ProfitCenter
|
||||
2. **Medium (1-20 MB):** JdeUser, FunctionCode, Item, RouteMaster
|
||||
3. **Large (20-200 MB):** Lot, MisData, WorkOrder_Curr/Hist, LotUsage_Hist
|
||||
4. **Very Large (200+ MB):** LotUsage_Curr, WorkOrderRouting, WorkOrderStep, WorkOrderTime, WorkOrderComponent
|
||||
|
||||
For each table:
|
||||
1. Read the CREATE TABLE script from Database/Scripts/
|
||||
2. Create `{Table}DevEtl.cs` with explicit schema
|
||||
3. Register in `DevEtlRegistry._pipelineFactories`
|
||||
4. Add integration test
|
||||
5. Verify with sample data
|
||||
Reference in New Issue
Block a user