Files
jdescopingtool/Tools/CacheConverter/Program.cs
T
Joseph Doherty 35c1e6baf0 refactor: use SQL schema and streaming in converter
- Read schema from SQL CREATE TABLE scripts instead of inferring from JSON
- Stream JSON records using Utf8JsonReader instead of loading all into memory
- Write protobuf output in batches of 10000 rows to reduce memory usage
- Add mapping from cache file names to SQL scripts and table names
- Map SQL types (VARCHAR, BIGINT, DECIMAL, DATETIME2, BIT) to .NET types
- Update usage to require scripts directory as second argument
2026-01-06 14:39:22 -05:00

384 lines
13 KiB
C#

using System.Data;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using ProtoBuf.Data;
using ZstdSharp;
if (args.Length < 2)
{
Console.WriteLine("Usage: CacheConverter <cache-directory> <scripts-directory>");
Console.WriteLine("Example: dotnet run -- ../../CACHED_DB_FILES ../../NEW/src/JdeScoping.Database/Scripts");
return 1;
}
var cacheDir = args[0];
var scriptsDir = args[1];
if (!Directory.Exists(cacheDir))
{
Console.WriteLine($"Error: Cache directory not found: {cacheDir}");
return 1;
}
if (!Directory.Exists(scriptsDir))
{
Console.WriteLine($"Error: Scripts directory not found: {scriptsDir}");
return 1;
}
// Map cache file base names to SQL script filenames and table names
var fileMapping = new Dictionary<string, (string ScriptFile, string TableName)>(StringComparer.OrdinalIgnoreCase)
{
["branch"] = ("003_CreateBranchTable.sql", "Branch"),
["functioncode"] = ("005_CreateFunctionCodeTable.sql", "FunctionCode"),
["item"] = ("008_CreateItemTable.sql", "Item"),
["jdeuser"] = ("009_CreateJdeUserTable.sql", "JdeUser"),
["lot"] = ("013_CreateLotTable.sql", "Lot"),
["lotusage_curr"] = ("024_CreateLotUsageCurrTable.sql", "LotUsage_Curr"),
["lotusage_hist"] = ("025_CreateLotUsageHistTable.sql", "LotUsage_Hist"),
["misdata"] = ("012_CreateMisDataTable.sql", "MisData"),
["orghierarchy"] = ("010_CreateOrgHierarchyTable.sql", "OrgHierarchy"),
["profitcenter"] = ("006_CreateProfitCenterTable.sql", "ProfitCenter"),
["routemaster"] = ("011_CreateRouteMasterTable.sql", "RouteMaster"),
["workcenter"] = ("007_CreateWorkCenterTable.sql", "WorkCenter"),
["workorder_curr"] = ("015_CreateWorkOrderCurrTable.sql", "WorkOrder_Curr"),
["workorder_hist"] = ("016_CreateWorkOrderHistTable.sql", "WorkOrder_Hist"),
["workordercomponent_curr"] = ("021_CreateWorkOrderComponentCurrTable.sql", "WorkOrderComponent_Curr"),
["workordercomponent_hist"] = ("022_CreateWorkOrderComponentHistTable.sql", "WorkOrderComponent_Hist"),
["workorderrouting"] = ("023_CreateWorkOrderRoutingTable.sql", "WorkOrderRouting"),
["workorderstep_curr"] = ("017_CreateWorkOrderStepCurrTable.sql", "WorkOrderStep_Curr"),
["workorderstep_hist"] = ("018_CreateWorkOrderStepHistTable.sql", "WorkOrderStep_Hist"),
["workordertime_curr"] = ("019_CreateWorkOrderTimeCurrTable.sql", "WorkOrderTime_Curr"),
["workordertime_hist"] = ("020_CreateWorkOrderTimeHistTable.sql", "WorkOrderTime_Hist"),
};
var jsonFiles = Directory.GetFiles(cacheDir, "*.json.zstd");
Console.WriteLine($"Found {jsonFiles.Length} JSON files to convert");
long totalOriginalSize = 0;
long totalNewSize = 0;
const int BatchSize = 10000;
foreach (var jsonFile in jsonFiles)
{
var baseName = Path.GetFileName(jsonFile).Replace(".json.zstd", "");
var outputFile = Path.Combine(cacheDir, $"{baseName}.pb.zstd");
Console.Write($"Converting {baseName}... ");
try
{
// Look up the SQL script for this file
if (!fileMapping.TryGetValue(baseName, out var mapping))
{
Console.WriteLine($"SKIP (no SQL mapping for '{baseName}')");
continue;
}
var scriptPath = Path.Combine(scriptsDir, mapping.ScriptFile);
if (!File.Exists(scriptPath))
{
Console.WriteLine($"SKIP (script not found: {mapping.ScriptFile})");
continue;
}
// Parse schema from SQL script
var schema = ParseSqlSchema(scriptPath, mapping.TableName);
if (schema.Count == 0)
{
Console.WriteLine("SKIP (could not parse schema)");
continue;
}
var originalSize = new FileInfo(jsonFile).Length;
totalOriginalSize += originalSize;
// Create DataTable with schema from SQL
var dataTable = new DataTable(mapping.TableName);
foreach (var (colName, colType) in schema)
{
dataTable.Columns.Add(colName, colType);
}
// Stream JSON and write to protobuf in batches
using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan);
using var decompressStream = new DecompressionStream(inputFs);
using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024);
using var compressStream = new CompressionStream(outputFs, level: 3);
int rowCount = 0;
int batchCount = 0;
// Stream JSON records one at a time
var buffer = new byte[4096];
using var memoryStream = new MemoryStream();
int bytesRead;
while ((bytesRead = decompressStream.Read(buffer, 0, buffer.Length)) > 0)
{
memoryStream.Write(buffer, 0, bytesRead);
}
memoryStream.Position = 0;
var jsonReader = new Utf8JsonReader(memoryStream.ToArray(), new JsonReaderOptions { AllowTrailingCommas = true });
// Skip to start of array
while (jsonReader.Read())
{
if (jsonReader.TokenType == JsonTokenType.StartArray)
break;
}
// Read each object in the array
while (jsonReader.Read())
{
if (jsonReader.TokenType == JsonTokenType.EndArray)
break;
if (jsonReader.TokenType == JsonTokenType.StartObject)
{
var row = dataTable.NewRow();
ReadJsonObject(ref jsonReader, row, dataTable);
dataTable.Rows.Add(row);
rowCount++;
// Write batch when we hit the batch size
if (dataTable.Rows.Count >= BatchSize)
{
using var reader = dataTable.CreateDataReader();
DataSerializer.Serialize(compressStream, reader);
dataTable.Clear();
batchCount++;
}
}
}
// Write remaining rows
if (dataTable.Rows.Count > 0)
{
using var reader = dataTable.CreateDataReader();
DataSerializer.Serialize(compressStream, reader);
batchCount++;
}
compressStream.Flush();
var newSize = new FileInfo(outputFile).Length;
totalNewSize += newSize;
var ratio = (double)newSize / originalSize * 100;
Console.WriteLine($"OK ({rowCount:N0} rows, {batchCount} batches, {originalSize:N0} -> {newSize:N0} bytes, {ratio:F1}%)");
}
catch (Exception ex)
{
Console.WriteLine($"ERROR: {ex.Message}");
}
}
Console.WriteLine();
if (totalOriginalSize > 0)
{
Console.WriteLine($"Total: {totalOriginalSize:N0} -> {totalNewSize:N0} bytes ({(double)totalNewSize / totalOriginalSize * 100:F1}%)");
}
else
{
Console.WriteLine("No files were converted.");
}
return 0;
/// <summary>
/// Parse CREATE TABLE statement from SQL script to extract column names and types.
/// </summary>
static List<(string Name, Type Type)> ParseSqlSchema(string scriptPath, string tableName)
{
var result = new List<(string Name, Type Type)>();
var sql = File.ReadAllText(scriptPath);
// Find CREATE TABLE block - match the table name
var tablePattern = $@"CREATE\s+TABLE\s+\[dbo\]\.\[{Regex.Escape(tableName)}\]\s*\((.*?)\);";
var tableMatch = Regex.Match(sql, tablePattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
if (!tableMatch.Success)
{
return result;
}
var columnsBlock = tableMatch.Groups[1].Value;
// Parse each column definition
// Pattern: [ColumnName] TYPE [(size)] [NULL|NOT NULL]
var columnPattern = @"\[(\w+)\]\s+(VARCHAR|CHAR|BIGINT|DECIMAL|DATETIME2|BIT|INT)(?:\s*\(([^)]+)\))?";
var columnMatches = Regex.Matches(columnsBlock, columnPattern, RegexOptions.IgnoreCase);
foreach (Match match in columnMatches)
{
var columnName = match.Groups[1].Value;
var sqlType = match.Groups[2].Value.ToUpperInvariant();
// Skip CONSTRAINT lines
if (columnName.Equals("CONSTRAINT", StringComparison.OrdinalIgnoreCase))
continue;
var netType = MapSqlTypeToNet(sqlType);
result.Add((columnName, netType));
}
return result;
}
/// <summary>
/// Map SQL Server types to .NET types.
/// </summary>
static Type MapSqlTypeToNet(string sqlType) => sqlType.ToUpperInvariant() switch
{
"VARCHAR" => typeof(string),
"CHAR" => typeof(string),
"BIGINT" => typeof(long),
"INT" => typeof(int),
"DECIMAL" => typeof(decimal),
"DATETIME2" => typeof(DateTime),
"BIT" => typeof(bool),
_ => typeof(string)
};
/// <summary>
/// Read a JSON object into a DataRow using streaming reader.
/// </summary>
static void ReadJsonObject(ref Utf8JsonReader reader, DataRow row, DataTable table)
{
while (reader.Read())
{
if (reader.TokenType == JsonTokenType.EndObject)
break;
if (reader.TokenType == JsonTokenType.PropertyName)
{
var propertyName = reader.GetString()!;
reader.Read(); // Move to value
// Find matching column (case-insensitive)
DataColumn? column = null;
foreach (DataColumn col in table.Columns)
{
if (col.ColumnName.Equals(propertyName, StringComparison.OrdinalIgnoreCase))
{
column = col;
break;
}
}
if (column == null)
{
// Skip unknown property
SkipJsonValue(ref reader);
continue;
}
row[column] = ReadJsonValue(ref reader, column.DataType);
}
}
}
/// <summary>
/// Read a JSON value and convert to the target .NET type.
/// </summary>
static object ReadJsonValue(ref Utf8JsonReader reader, Type targetType)
{
if (reader.TokenType == JsonTokenType.Null)
return DBNull.Value;
if (targetType == typeof(string))
{
return reader.TokenType switch
{
JsonTokenType.String => reader.GetString() ?? (object)DBNull.Value,
JsonTokenType.Number => reader.GetDecimal().ToString(),
JsonTokenType.True => "true",
JsonTokenType.False => "false",
_ => DBNull.Value
};
}
if (targetType == typeof(DateTime))
{
if (reader.TokenType == JsonTokenType.String)
{
var str = reader.GetString();
if (str != null && DateTime.TryParse(str, out var dt))
return dt;
}
return DBNull.Value;
}
if (targetType == typeof(long))
{
return reader.TokenType switch
{
JsonTokenType.Number => reader.GetInt64(),
JsonTokenType.String when long.TryParse(reader.GetString(), out var val) => val,
_ => DBNull.Value
};
}
if (targetType == typeof(int))
{
return reader.TokenType switch
{
JsonTokenType.Number => reader.GetInt32(),
JsonTokenType.String when int.TryParse(reader.GetString(), out var val) => val,
_ => DBNull.Value
};
}
if (targetType == typeof(decimal))
{
return reader.TokenType switch
{
JsonTokenType.Number => reader.GetDecimal(),
JsonTokenType.String when decimal.TryParse(reader.GetString(), out var val) => val,
_ => DBNull.Value
};
}
if (targetType == typeof(bool))
{
return reader.TokenType switch
{
JsonTokenType.True => true,
JsonTokenType.False => false,
JsonTokenType.Number => reader.GetInt32() != 0,
JsonTokenType.String => reader.GetString()?.Equals("true", StringComparison.OrdinalIgnoreCase) ?? false,
_ => DBNull.Value
};
}
return DBNull.Value;
}
/// <summary>
/// Skip a JSON value (used for unknown properties).
/// </summary>
static void SkipJsonValue(ref Utf8JsonReader reader)
{
if (reader.TokenType == JsonTokenType.StartObject)
{
int depth = 1;
while (depth > 0 && reader.Read())
{
if (reader.TokenType == JsonTokenType.StartObject) depth++;
else if (reader.TokenType == JsonTokenType.EndObject) depth--;
}
}
else if (reader.TokenType == JsonTokenType.StartArray)
{
int depth = 1;
while (depth > 0 && reader.Read())
{
if (reader.TokenType == JsonTokenType.StartArray) depth++;
else if (reader.TokenType == JsonTokenType.EndArray) depth--;
}
}
// Simple values are already consumed by the Read() call
}