docs: switch cache conversion design from MessagePack to protobuf-net-data

protobuf-net-data is purpose-built for IDataReader serialization and
returns IDataReader directly from Deserialize(), eliminating the need
for custom streaming reader implementations.
This commit is contained in:
Joseph Doherty
2026-01-06 14:15:19 -05:00
parent 4965b49c4e
commit 8ce9a7dae1
8 changed files with 169 additions and 720 deletions
-1
View File
@@ -5,7 +5,6 @@
<Project Path="src/JdeScoping.Core/JdeScoping.Core.csproj" />
<Project Path="src/JdeScoping.DataAccess/JdeScoping.DataAccess.csproj" />
<Project Path="src/JdeScoping.Database/JdeScoping.Database.csproj" />
<Project Path="src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj" />
<Project Path="src/JdeScoping.DataSync/JdeScoping.DataSync.csproj" />
<Project Path="src/JdeScoping.DataSync.Dev/JdeScoping.DataSync.Dev.csproj" />
<Project Path="src/JdeScoping.ExcelIO/JdeScoping.ExcelIO.csproj" />
@@ -1,10 +1,10 @@
using JdeScoping.DataAccess.Models;
using SqlKata.Compilers;
namespace JdeScoping.DataAccess.QueryBuilders;
/// <summary>
/// Builds MIS extraction queries for work order step matching.
/// Uses SQL extraction functions to retrieve criteria from Search.Criteria JSON.
/// </summary>
public sealed class MisQueryBuilder
{
@@ -21,10 +21,11 @@ public sealed class MisQueryBuilder
/// <summary>
/// Builds the complete MIS extraction SQL including temp table setup and data population.
/// Uses extraction functions to get filter criteria from the database.
/// </summary>
/// <param name="model">The search model containing filter criteria.</param>
/// <param name="searchId">The search ID to extract criteria from.</param>
/// <returns>The SQL statements for MIS extraction.</returns>
public IReadOnlyList<string> BuildMisExtractionSql(SearchModel model)
public IReadOnlyList<string> BuildMisExtractionSql(int searchId)
{
var statements = new List<string>();
@@ -32,7 +33,7 @@ public sealed class MisQueryBuilder
statements.Add(BuildTempMisDataTableSql());
// Build and execute MIS CTE query to populate temp table
statements.Add(BuildMisCteSql(model));
statements.Add(BuildMisCteSql());
return statements;
}
@@ -1,548 +0,0 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
namespace JdeScoping.DataSync.SourceGenerators;
/// <summary>
/// Source generator that creates IDataReader implementations for types listed in BulkCopyTypeRegistry.
/// </summary>
[Generator]
public class DataReaderGenerator : IIncrementalGenerator
{
// Diagnostic for when no types are found (error condition)
private static readonly DiagnosticDescriptor DiagNoTypesFound = new(
"DRGEN001",
"DataReaderGenerator",
"No types found in BulkCopyTypeRegistry. Ensure the Types field contains typeof() expressions.",
"SourceGenerator",
DiagnosticSeverity.Warning,
isEnabledByDefault: true);
public void Initialize(IncrementalGeneratorInitializationContext context)
{
// Find the BulkCopyTypeRegistry class and extract type symbols directly
var registryProvider = context.SyntaxProvider
.CreateSyntaxProvider(
predicate: static (node, _) => IsBulkCopyTypeRegistry(node),
transform: static (ctx, _) => GetRegisteredTypeSymbols(ctx))
.Where(static types => types.Length > 0)
.Collect();
// Generate source directly from type symbols (no need for compilation lookup)
context.RegisterSourceOutput(registryProvider, static (spc, typeSymbolArrays) => ExecuteFromSymbols(typeSymbolArrays, spc));
}
private static bool IsBulkCopyTypeRegistry(SyntaxNode node)
{
return node is ClassDeclarationSyntax cds &&
cds.Identifier.Text == "BulkCopyTypeRegistry";
}
/// <summary>
/// Extract type symbols directly using semantic model - resolves types from referenced assemblies.
/// </summary>
private static ImmutableArray<INamedTypeSymbol> GetRegisteredTypeSymbols(GeneratorSyntaxContext context)
{
var classDecl = (ClassDeclarationSyntax)context.Node;
// Find the Types field
var typesField = classDecl.Members
.OfType<FieldDeclarationSyntax>()
.FirstOrDefault(f => f.Declaration.Variables.Any(v => v.Identifier.Text == "Types"));
if (typesField == null)
return ImmutableArray<INamedTypeSymbol>.Empty;
// Get the initializer
var variable = typesField.Declaration.Variables.First();
// Try collection expression syntax (C# 12: [ typeof(X), typeof(Y) ])
if (variable.Initializer?.Value is CollectionExpressionSyntax collection)
{
return ExtractTypeSymbolsFromCollection(collection, context.SemanticModel);
}
// Try array initializer: new Type[] { typeof(X), typeof(Y) }
if (variable.Initializer?.Value is ArrayCreationExpressionSyntax arrayCreation &&
arrayCreation.Initializer != null)
{
return ExtractTypeSymbolsFromExpressions(arrayCreation.Initializer.Expressions, context.SemanticModel);
}
// Try implicit array: new[] { typeof(X), typeof(Y) }
if (variable.Initializer?.Value is ImplicitArrayCreationExpressionSyntax implicitArray)
{
return ExtractTypeSymbolsFromExpressions(implicitArray.Initializer.Expressions, context.SemanticModel);
}
return ImmutableArray<INamedTypeSymbol>.Empty;
}
private static ImmutableArray<INamedTypeSymbol> ExtractTypeSymbolsFromCollection(
CollectionExpressionSyntax collection,
SemanticModel semanticModel)
{
var types = new List<INamedTypeSymbol>();
foreach (var element in collection.Elements)
{
if (element is ExpressionElementSyntax exprElement &&
exprElement.Expression is TypeOfExpressionSyntax typeOf)
{
var typeInfo = semanticModel.GetTypeInfo(typeOf.Type);
if (typeInfo.Type is INamedTypeSymbol namedType)
{
types.Add(namedType);
}
}
}
return types.ToImmutableArray();
}
private static ImmutableArray<INamedTypeSymbol> ExtractTypeSymbolsFromExpressions(
SeparatedSyntaxList<ExpressionSyntax> expressions,
SemanticModel semanticModel)
{
var types = new List<INamedTypeSymbol>();
foreach (var expr in expressions)
{
if (expr is TypeOfExpressionSyntax typeOf)
{
var typeInfo = semanticModel.GetTypeInfo(typeOf.Type);
if (typeInfo.Type is INamedTypeSymbol namedType)
{
types.Add(namedType);
}
}
}
return types.ToImmutableArray();
}
/// <summary>
/// Execute generation directly from resolved type symbols.
/// </summary>
private static void ExecuteFromSymbols(
ImmutableArray<ImmutableArray<INamedTypeSymbol>> typeSymbolArrays,
SourceProductionContext context)
{
if (typeSymbolArrays.IsDefaultOrEmpty)
return;
var typeSymbols = typeSymbolArrays
.SelectMany(x => x)
.Distinct(SymbolEqualityComparer.Default)
.Cast<INamedTypeSymbol>()
.ToList();
if (typeSymbols.Count == 0)
{
context.ReportDiagnostic(Diagnostic.Create(DiagNoTypesFound, Location.None));
return;
}
// Build type infos
var typeInfos = new List<TypeInfo>();
foreach (var symbol in typeSymbols)
{
var properties = GetBulkCopyProperties(symbol);
typeInfos.Add(new TypeInfo(symbol, properties));
}
if (typeInfos.Count == 0)
return;
// Generate DataReader classes
foreach (var typeInfo in typeInfos)
{
var source = GenerateDataReader(typeInfo);
context.AddSource($"{typeInfo.Symbol.Name}DataReader.g.cs", source);
}
// Generate factory
var factorySource = GenerateFactory(typeInfos);
context.AddSource("DataReaderFactory.g.cs", factorySource);
// Generate DI extension
var extensionSource = GenerateDIExtension();
context.AddSource("BulkCopyServiceCollectionExtensions.g.cs", extensionSource);
// Generate base class
var baseClassSource = GenerateBaseClass();
context.AddSource("AsyncEnumerableDataReader.g.cs", baseClassSource);
}
private static string GenerateBaseClass()
{
return """
// <auto-generated />
#nullable enable
using System;
using System.Collections.Generic;
using System.Data;
using System.Threading.Tasks;
namespace JdeScoping.DataSync.Generated;
/// <summary>
/// Base class for IDataReader implementations that wrap IAsyncEnumerable sources.
/// </summary>
public abstract class AsyncEnumerableDataReader<T> : IDataReader where T : class
{
private readonly IAsyncEnumerator<T> _enumerator;
private bool _disposed;
protected T? Current { get; private set; }
protected AsyncEnumerableDataReader(IAsyncEnumerable<T> source)
{
_enumerator = source.GetAsyncEnumerator();
}
protected abstract string[] ColumnNames { get; }
protected abstract object GetColumnValue(int ordinal);
protected abstract Type GetColumnType(int ordinal);
public int FieldCount => ColumnNames.Length;
public int Depth => 0;
public bool IsClosed => _disposed;
public int RecordsAffected => -1;
public object this[int i] => GetValue(i);
public object this[string name] => GetValue(GetOrdinal(name));
public bool Read()
{
var task = _enumerator.MoveNextAsync();
if (task.IsCompleted)
{
if (task.Result)
{
Current = _enumerator.Current;
return true;
}
return false;
}
var result = task.AsTask().GetAwaiter().GetResult();
if (result)
{
Current = _enumerator.Current;
return true;
}
return false;
}
public object GetValue(int i)
{
if (Current == null)
throw new InvalidOperationException("No current row.");
return GetColumnValue(i);
}
public string GetName(int i)
{
if (i < 0 || i >= ColumnNames.Length)
throw new IndexOutOfRangeException($"Column index {i} is out of range.");
return ColumnNames[i];
}
public int GetOrdinal(string name)
{
for (int i = 0; i < ColumnNames.Length; i++)
{
if (string.Equals(ColumnNames[i], name, StringComparison.OrdinalIgnoreCase))
return i;
}
throw new IndexOutOfRangeException($"Column '{name}' not found.");
}
public Type GetFieldType(int i) => GetColumnType(i);
public bool IsDBNull(int i) => GetValue(i) == DBNull.Value;
public string GetDataTypeName(int i) => GetFieldType(i).Name;
public int GetValues(object[] values)
{
var count = Math.Min(values.Length, FieldCount);
for (int i = 0; i < count; i++)
values[i] = GetValue(i);
return count;
}
public bool GetBoolean(int i) => (bool)GetValue(i);
public byte GetByte(int i) => (byte)GetValue(i);
public long GetBytes(int i, long fieldOffset, byte[]? buffer, int bufferoffset, int length) => throw new NotSupportedException();
public char GetChar(int i) => (char)GetValue(i);
public long GetChars(int i, long fieldoffset, char[]? buffer, int bufferoffset, int length) => throw new NotSupportedException();
public IDataReader GetData(int i) => throw new NotSupportedException();
public DateTime GetDateTime(int i) => (DateTime)GetValue(i);
public decimal GetDecimal(int i) => (decimal)GetValue(i);
public double GetDouble(int i) => (double)GetValue(i);
public float GetFloat(int i) => (float)GetValue(i);
public Guid GetGuid(int i) => (Guid)GetValue(i);
public short GetInt16(int i) => (short)GetValue(i);
public int GetInt32(int i) => (int)GetValue(i);
public long GetInt64(int i) => (long)GetValue(i);
public string GetString(int i) => (string)GetValue(i);
public DataTable? GetSchemaTable() => null;
public bool NextResult() => false;
public void Close() => Dispose();
public void Dispose()
{
if (!_disposed)
{
_disposed = true;
_enumerator.DisposeAsync().AsTask().GetAwaiter().GetResult();
}
}
}
""";
}
private static List<PropertyInfo> GetBulkCopyProperties(INamedTypeSymbol typeSymbol)
{
var properties = new List<PropertyInfo>();
foreach (var member in typeSymbol.GetMembers())
{
if (member is IPropertySymbol prop &&
prop.DeclaredAccessibility == Accessibility.Public &&
prop.SetMethod != null &&
prop.SetMethod.DeclaredAccessibility == Accessibility.Public &&
!prop.IsReadOnly &&
!prop.IsIndexer)
{
properties.Add(new PropertyInfo(
prop.Name,
prop.Type.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat),
prop.Type.NullableAnnotation == NullableAnnotation.Annotated ||
prop.Type.OriginalDefinition.SpecialType == SpecialType.System_Nullable_T));
}
}
// Sort alphabetically for consistency
properties.Sort((a, b) => string.Compare(a.Name, b.Name, StringComparison.Ordinal));
return properties;
}
private static string GenerateDataReader(TypeInfo typeInfo)
{
var typeName = typeInfo.Symbol.Name;
var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
var properties = typeInfo.Properties;
var sb = new StringBuilder();
sb.AppendLine("// <auto-generated />");
sb.AppendLine("#nullable enable");
sb.AppendLine();
sb.AppendLine("using System;");
sb.AppendLine("using System.Collections.Generic;");
sb.AppendLine();
sb.AppendLine("namespace JdeScoping.DataSync.Generated;");
sb.AppendLine();
sb.AppendLine($"/// <summary>");
sb.AppendLine($"/// IDataReader implementation for {typeName} for use with SqlBulkCopy.");
sb.AppendLine($"/// </summary>");
sb.AppendLine($"public sealed class {typeName}DataReader : AsyncEnumerableDataReader<{fullTypeName}>");
sb.AppendLine("{");
// Column names and types arrays
sb.AppendLine(" private static readonly string[] _columnNames =");
sb.AppendLine(" [");
for (int i = 0; i < properties.Count; i++)
{
var comma = i < properties.Count - 1 ? "," : "";
sb.AppendLine($" \"{properties[i].Name}\"{comma}");
}
sb.AppendLine(" ];");
sb.AppendLine();
sb.AppendLine(" private static readonly Type[] _columnTypes =");
sb.AppendLine(" [");
for (int i = 0; i < properties.Count; i++)
{
var prop = properties[i];
var clrType = GetClrTypeName(prop.TypeName);
var comma = i < properties.Count - 1 ? "," : "";
sb.AppendLine($" typeof({clrType}){comma}");
}
sb.AppendLine(" ];");
sb.AppendLine();
// Constructor
sb.AppendLine($" public {typeName}DataReader(IAsyncEnumerable<{fullTypeName}> source) : base(source) {{ }}");
sb.AppendLine();
// Override abstract members
sb.AppendLine(" protected override string[] ColumnNames => _columnNames;");
sb.AppendLine();
sb.AppendLine(" public static IReadOnlyList<string> GetColumnNames() => _columnNames;");
sb.AppendLine();
// GetColumnValue override
sb.AppendLine(" protected override object GetColumnValue(int ordinal)");
sb.AppendLine(" {");
sb.AppendLine(" var entity = Current!;");
sb.AppendLine(" return ordinal switch");
sb.AppendLine(" {");
for (int i = 0; i < properties.Count; i++)
{
var prop = properties[i];
if (prop.IsNullable)
{
sb.AppendLine($" {i} => entity.{prop.Name} ?? (object)DBNull.Value,");
}
else
{
sb.AppendLine($" {i} => entity.{prop.Name},");
}
}
sb.AppendLine(" _ => throw new IndexOutOfRangeException()");
sb.AppendLine(" };");
sb.AppendLine(" }");
sb.AppendLine();
// GetColumnType override
sb.AppendLine(" protected override Type GetColumnType(int ordinal) => _columnTypes[ordinal];");
sb.AppendLine("}");
return sb.ToString();
}
private static string GetClrTypeName(string fullTypeName)
{
// Handle nullable types
if (fullTypeName.EndsWith("?"))
{
var underlyingType = fullTypeName.TrimEnd('?');
return GetClrTypeName(underlyingType);
}
// Handle Nullable<T>
if (fullTypeName.StartsWith("global::System.Nullable<"))
{
var inner = fullTypeName.Substring("global::System.Nullable<".Length);
inner = inner.TrimEnd('>');
return GetClrTypeName(inner);
}
// Map common types
return fullTypeName switch
{
"global::System.String" => "string",
"global::System.Int32" => "int",
"global::System.Int64" => "long",
"global::System.Int16" => "short",
"global::System.Byte" => "byte",
"global::System.Boolean" => "bool",
"global::System.Decimal" => "decimal",
"global::System.Double" => "double",
"global::System.Single" => "float",
"global::System.DateTime" => "DateTime",
"global::System.Guid" => "Guid",
"global::System.Char" => "char",
"string" => "string",
"int" => "int",
"long" => "long",
"short" => "short",
"byte" => "byte",
"bool" => "bool",
"decimal" => "decimal",
"double" => "double",
"float" => "float",
"char" => "char",
_ => fullTypeName.Replace("global::", "")
};
}
private static string GenerateFactory(List<TypeInfo> typeInfos)
{
var sb = new StringBuilder();
sb.AppendLine("// <auto-generated />");
sb.AppendLine("#nullable enable");
sb.AppendLine();
sb.AppendLine("using System;");
sb.AppendLine("using System.Collections.Generic;");
sb.AppendLine("using System.Data;");
sb.AppendLine("using JdeScoping.DataSync.Contracts;");
sb.AppendLine();
sb.AppendLine("namespace JdeScoping.DataSync.Generated;");
sb.AppendLine();
sb.AppendLine("/// <summary>");
sb.AppendLine("/// Factory for creating IDataReader instances from IAsyncEnumerable sources.");
sb.AppendLine("/// </summary>");
sb.AppendLine("public sealed class DataReaderFactory : IDataReaderFactory");
sb.AppendLine("{");
// CreateReader method
sb.AppendLine(" public IDataReader CreateReader<T>(IAsyncEnumerable<T> source) where T : class");
sb.AppendLine(" {");
sb.AppendLine(" return source switch");
sb.AppendLine(" {");
foreach (var typeInfo in typeInfos)
{
var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
sb.AppendLine($" IAsyncEnumerable<{fullTypeName}> typed => new {typeInfo.Symbol.Name}DataReader(typed),");
}
sb.AppendLine(" _ => throw new NotSupportedException($\"No DataReader converter exists for type {typeof(T).Name}. Add it to BulkCopyTypeRegistry.Types.\")");
sb.AppendLine(" };");
sb.AppendLine(" }");
sb.AppendLine();
// GetColumnNames method
sb.AppendLine(" public IReadOnlyList<string> GetColumnNames<T>() where T : class");
sb.AppendLine(" {");
sb.AppendLine(" var type = typeof(T);");
foreach (var typeInfo in typeInfos)
{
var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
sb.AppendLine($" if (type == typeof({fullTypeName}))");
sb.AppendLine($" return {typeInfo.Symbol.Name}DataReader.GetColumnNames();");
}
sb.AppendLine(" throw new NotSupportedException($\"No DataReader converter exists for type {type.Name}. Add it to BulkCopyTypeRegistry.Types.\");");
sb.AppendLine(" }");
sb.AppendLine("}");
return sb.ToString();
}
private static string GenerateDIExtension()
{
var sb = new StringBuilder();
sb.AppendLine("// <auto-generated />");
sb.AppendLine("#nullable enable");
sb.AppendLine();
sb.AppendLine("using JdeScoping.DataSync.Contracts;");
sb.AppendLine("using JdeScoping.DataSync.Generated;");
sb.AppendLine("using Microsoft.Extensions.DependencyInjection;");
sb.AppendLine();
sb.AppendLine("namespace JdeScoping.DataSync;");
sb.AppendLine();
sb.AppendLine("/// <summary>");
sb.AppendLine("/// Extension methods for registering bulk copy converters.");
sb.AppendLine("/// </summary>");
sb.AppendLine("public static class BulkCopyServiceCollectionExtensions");
sb.AppendLine("{");
sb.AppendLine(" /// <summary>");
sb.AppendLine(" /// Adds the generated IDataReaderFactory to the service collection.");
sb.AppendLine(" /// </summary>");
sb.AppendLine(" public static IServiceCollection AddBulkCopyConverters(this IServiceCollection services)");
sb.AppendLine(" {");
sb.AppendLine(" services.AddSingleton<IDataReaderFactory, DataReaderFactory>();");
sb.AppendLine(" return services;");
sb.AppendLine(" }");
sb.AppendLine("}");
return sb.ToString();
}
private record TypeInfo(INamedTypeSymbol Symbol, List<PropertyInfo> Properties);
private record PropertyInfo(string Name, string TypeName, bool IsNullable);
}
@@ -1,6 +0,0 @@
// Polyfill for init-only properties in netstandard2.0
namespace System.Runtime.CompilerServices
{
internal static class IsExternalInit { }
}
@@ -1,15 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
<EnforceExtendedAnalyzerRules>true</EnforceExtendedAnalyzerRules>
<IsRoslynComponent>true</IsRoslynComponent>
<RootNamespace>JdeScoping.DataSync.SourceGenerators</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.4" PrivateAssets="all" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.8.0" PrivateAssets="all" />
</ItemGroup>
</Project>
@@ -8,16 +8,12 @@
<ItemGroup>
<InternalsVisibleTo Include="JdeScoping.DataSync.Tests" />
<InternalsVisibleTo Include="JdeScoping.DataSync.IntegrationTests" />
<InternalsVisibleTo Include="DynamicProxyGenAssembly2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\JdeScoping.Core\JdeScoping.Core.csproj" />
<ProjectReference Include="..\JdeScoping.DataAccess\JdeScoping.DataAccess.csproj" />
<ProjectReference Include="..\JdeScoping.DataSync.SourceGenerators\JdeScoping.DataSync.SourceGenerators.csproj"
OutputItemType="Analyzer"
ReferenceOutputAssembly="false" />
</ItemGroup>
<ItemGroup>
@@ -1,142 +0,0 @@
# MessagePack Cache Conversion Design
## Purpose
Convert the development cache files in `CACHED_DB_FILES/` from zstd-compressed JSON (`.json.zstd`) to zstd-compressed MessagePack (`.msgpack.zstd`) for faster deserialization and smaller file sizes.
## Goals
1. **Faster deserialization** - MessagePack is faster to parse than JSON
2. **Smaller file sizes** - MessagePack is more compact than JSON
## Current State
- 22 cache files in `CACHED_DB_FILES/` totaling ~3.6 GB (zstd-compressed JSON)
- `JsonZstdFileSource` reads files using ZstdSharp + Utf8JsonReader
- Each `*DevEtl.cs` defines a schema and creates a pipeline from JSON files
- Tests verify ETL loads data from cache files into SQL Server
## Design Decisions
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Conversion approach | One-time manual | Cache files are static snapshots, not actively regenerated |
| Data structure | Map format (field names as keys) | Self-describing, maintainable, keys compress well with zstd |
| Compression | Keep zstd | Largest file is 878 MB; raw MessagePack would be 2-4x larger |
| Converter location | Standalone console app in `Tools/CacheConverter/` | Isolated utility, not part of main solution |
## File Format
**New extension:** `.msgpack.zstd`
**File naming:**
- `branch.json.zstd``branch.msgpack.zstd`
- `workordertime_curr.json.zstd``workordertime_curr.msgpack.zstd`
**Data structure:** Array of maps (same logical structure as JSON)
```
[
{ "Code": "ABC", "Description": "Branch ABC", "LastUpdateDT": <DateTime> },
{ "Code": "DEF", "Description": "Branch DEF", "LastUpdateDT": <DateTime> },
...
]
```
**Library:** MessagePack-CSharp (`MessagePack` NuGet package)
## Components
### 1. Converter Tool
**Location:** `/JdeScopingTool/Tools/CacheConverter/`
```
Tools/
└── CacheConverter/
├── CacheConverter.csproj
└── Program.cs
```
**Dependencies:**
- `ZstdSharp.Port` - read zstd JSON, write zstd MessagePack
- `MessagePack` - MessagePack serialization
**Behavior:**
1. Read each `.json.zstd` file from `CACHED_DB_FILES/`
2. Stream JSON → deserialize to `Dictionary<string, object?>[]`
3. Serialize to MessagePack (map format) → compress with zstd
4. Write to `.msgpack.zstd` alongside originals
5. Print before/after sizes for comparison
**Usage:**
```bash
cd Tools/CacheConverter
dotnet run -- ../../CACHED_DB_FILES
```
### 2. MessagePackZstdFileSource
**New file:** `NEW/src/JdeScoping.DataSync.Dev/Sources/MessagePackZstdFileSource.cs`
- Implements `IImportSource` (same interface as `JsonZstdFileSource`)
- Reads `.msgpack.zstd` files using streaming decompression
- Uses `MessagePackStreamReader` for efficient streaming deserialization
- Returns an `IDataReader` that yields rows one at a time
- Schema still needed for `IDataReader` field metadata (column names, types, ordinals)
**Package addition:** Add `MessagePack` to `JdeScoping.DataSync.Dev.csproj`
### 3. DevEtl Class Updates
**Changes to each `*DevEtl.cs` file (22 files):**
1. Update `CacheFileName` constant:
```csharp
// Before
public static readonly string CacheFileName = "branch.json.zstd";
// After
public static readonly string CacheFileName = "branch.msgpack.zstd";
```
2. Update `Create()` method:
```csharp
// Before
.WithSource(new JsonZstdFileSource(cacheFilePath, Schema))
// After
.WithSource(new MessagePackZstdFileSource(cacheFilePath, Schema))
```
**No changes to:**
- Schema definitions (same column names and types)
- Pipeline structure
- `DevEtlRegistry.cs`
### 4. Cleanup (After Verification)
Remove obsolete JSON readers:
- `JsonZstdFileSource.cs`
- `JsonStreamingDataReader.cs`
- `Utf8JsonStreamingDataReader.cs`
Remove old cache files:
- All `*.json.zstd` files in `CACHED_DB_FILES/`
## Test Strategy
1. Run converter tool, verify all 22 files convert without errors
2. Compare file sizes (expect 10-30% reduction)
3. Run existing `JdeScoping.DataSync.Dev.Tests` - all tests should pass unchanged
4. Verify data loaded matches previous JSON-based loads
## Files Changed
| File | Change |
|------|--------|
| `Tools/CacheConverter/` (new) | Standalone converter tool |
| `Sources/MessagePackZstdFileSource.cs` (new) | New MessagePack reader |
| `JdeScoping.DataSync.Dev.csproj` | Add MessagePack package |
| `*DevEtl.cs` (22 files) | Update file extension and source class |
| `Sources/JsonZstdFileSource.cs` | Delete after migration |
| `Sources/JsonStreamingDataReader.cs` | Delete after migration |
| `Sources/Utf8JsonStreamingDataReader.cs` | Delete after migration |
| `CACHED_DB_FILES/*.json.zstd` | Delete after verification |
@@ -0,0 +1,164 @@
# Protobuf Cache Conversion Design
## Purpose
Convert the development cache files in `CACHED_DB_FILES/` from zstd-compressed JSON (`.json.zstd`) to zstd-compressed Protocol Buffers (`.pb.zstd`) using protobuf-net-data for faster deserialization and smaller file sizes.
## Goals
1. **Faster deserialization** - Protobuf is faster to parse than JSON
2. **Smaller file sizes** - Protobuf is more compact than JSON
3. **Simpler code** - protobuf-net-data returns `IDataReader` directly, no custom reader needed
## Current State
- 22 cache files in `CACHED_DB_FILES/` totaling ~3.6 GB (zstd-compressed JSON)
- `JsonZstdFileSource` reads files using ZstdSharp + Utf8JsonReader
- Custom `Utf8JsonStreamingDataReader` implements `IDataReader` for streaming
- Each `*DevEtl.cs` defines a schema and creates a pipeline from JSON files
## Design Decisions
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Serialization library | protobuf-net-data | Purpose-built for IDataReader, returns IDataReader directly |
| Conversion approach | One-time manual | Cache files are static snapshots, not actively regenerated |
| Compression | zstd on whole file | Consistent with current approach, excellent compression |
| Converter location | Standalone console app in `Tools/CacheConverter/` | Isolated utility, not part of main solution |
## File Format
**New extension:** `.pb.zstd`
**File naming:**
- `branch.json.zstd``branch.pb.zstd`
- `workordertime_curr.json.zstd``workordertime_curr.pb.zstd`
**Data structure:** protobuf-net-data binary format
- Schema embedded in stream (column names, types, nullability)
- Rows serialized sequentially
- Native ADO.NET type support (DateTime, Guid, decimal, etc.)
**Libraries:**
- `protobuf-net-data` - IDataReader serialization/deserialization
- `ZstdSharp.Port` - compression
## Components
### 1. Converter Tool
**Location:** `/JdeScopingTool/Tools/CacheConverter/`
```
Tools/
└── CacheConverter/
├── CacheConverter.csproj
└── Program.cs
```
**Dependencies:**
- `ZstdSharp.Port` - read zstd JSON, write zstd protobuf
- `protobuf-net-data` - protobuf serialization
**Behavior:**
1. Read each `.json.zstd` file from `CACHED_DB_FILES/`
2. Decompress and parse JSON into an `IDataReader`
3. Use `DataSerializer.Serialize(stream, reader)` to write protobuf
4. Compress with zstd and write to `.pb.zstd`
5. Print before/after sizes for comparison
**Usage:**
```bash
cd Tools/CacheConverter
dotnet run -- ../../CACHED_DB_FILES
```
### 2. ProtobufZstdFileSource
**New file:** `NEW/src/JdeScoping.DataSync.Dev/Sources/ProtobufZstdFileSource.cs`
**Key simplification:** No custom `IDataReader` implementation needed!
```csharp
public sealed class ProtobufZstdFileSource : IImportSource
{
public async Task<IDataReader> ReadDataAsync(CancellationToken ct = default)
{
_fileStream = new FileStream(_filePath, FileMode.Open, ...);
_decompressionStream = new DecompressionStream(_fileStream);
// protobuf-net-data returns IDataReader directly!
return DataSerializer.Deserialize(_decompressionStream);
}
}
```
**Package additions to `JdeScoping.DataSync.Dev.csproj`:**
- `protobuf-net-data`
### 3. DevEtl Class Updates
**Changes to each `*DevEtl.cs` file (22 files):**
1. Update `CacheFileName` constant:
```csharp
// Before
public static readonly string CacheFileName = "branch.json.zstd";
// After
public static readonly string CacheFileName = "branch.pb.zstd";
```
2. Update `Create()` method:
```csharp
// Before
.WithSource(new JsonZstdFileSource(cacheFilePath, Schema))
// After
.WithSource(new ProtobufZstdFileSource(cacheFilePath))
```
3. **Remove schema definitions** - protobuf-net-data embeds schema in the file, so `JsonColumnSchema[]` arrays are no longer needed in DevEtl classes.
**No changes to:**
- Pipeline structure
- `DevEtlRegistry.cs`
### 4. Cleanup (After Verification)
**Remove obsolete files:**
- `Sources/JsonZstdFileSource.cs`
- `Sources/JsonStreamingDataReader.cs`
- `Sources/Utf8JsonStreamingDataReader.cs`
- `Models/JsonColumnSchema.cs`
**Remove old cache files:**
- All `*.json.zstd` files in `CACHED_DB_FILES/`
## Code Simplification Summary
| Before (JSON) | After (Protobuf) |
|---------------|------------------|
| `JsonZstdFileSource` | `ProtobufZstdFileSource` |
| `Utf8JsonStreamingDataReader` (custom) | `DataSerializer.Deserialize()` (library) |
| `JsonStreamingDataReader` (legacy) | Removed |
| `JsonColumnSchema[]` per table | Not needed (embedded in file) |
## Test Strategy
1. Run converter tool, verify all 22 files convert without errors
2. Compare file sizes (expect 10-30% reduction)
3. Run existing `JdeScoping.DataSync.Dev.Tests` - all tests should pass unchanged
4. Verify data loaded matches previous JSON-based loads
## Files Changed
| File | Change |
|------|--------|
| `Tools/CacheConverter/` (new) | Standalone converter tool |
| `Sources/ProtobufZstdFileSource.cs` (new) | New protobuf reader (much simpler) |
| `JdeScoping.DataSync.Dev.csproj` | Add protobuf-net-data package |
| `*DevEtl.cs` (22 files) | Update file extension, source class, remove schema |
| `Sources/JsonZstdFileSource.cs` | Delete after migration |
| `Sources/JsonStreamingDataReader.cs` | Delete after migration |
| `Sources/Utf8JsonStreamingDataReader.cs` | Delete after migration |
| `Models/JsonColumnSchema.cs` | Delete after migration |
| `CACHED_DB_FILES/*.json.zstd` | Delete after verification |