6d08fd4a6c
Add standalone CLI tool to convert zstd-compressed JSON cache files to zstd-compressed Protocol Buffers format for faster deserialization.
148 lines
4.6 KiB
C#
148 lines
4.6 KiB
C#
using System.Data;
|
|
using System.Text.Json;
|
|
using ProtoBuf.Data;
|
|
using ZstdSharp;
|
|
|
|
if (args.Length == 0)
|
|
{
|
|
Console.WriteLine("Usage: CacheConverter <cache-directory>");
|
|
Console.WriteLine("Example: dotnet run -- ../../CACHED_DB_FILES");
|
|
return 1;
|
|
}
|
|
|
|
var cacheDir = args[0];
|
|
if (!Directory.Exists(cacheDir))
|
|
{
|
|
Console.WriteLine($"Error: Directory not found: {cacheDir}");
|
|
return 1;
|
|
}
|
|
|
|
var jsonFiles = Directory.GetFiles(cacheDir, "*.json.zstd");
|
|
Console.WriteLine($"Found {jsonFiles.Length} JSON files to convert");
|
|
|
|
long totalOriginalSize = 0;
|
|
long totalNewSize = 0;
|
|
|
|
foreach (var jsonFile in jsonFiles)
|
|
{
|
|
var baseName = Path.GetFileName(jsonFile).Replace(".json.zstd", "");
|
|
var outputFile = Path.Combine(cacheDir, $"{baseName}.pb.zstd");
|
|
|
|
Console.Write($"Converting {baseName}... ");
|
|
|
|
try
|
|
{
|
|
var originalSize = new FileInfo(jsonFile).Length;
|
|
totalOriginalSize += originalSize;
|
|
|
|
// Read and decompress JSON
|
|
using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan);
|
|
using var decompressStream = new DecompressionStream(inputFs);
|
|
using var bufferedInput = new BufferedStream(decompressStream, 256 * 1024);
|
|
|
|
// Parse JSON array into list of dictionaries
|
|
var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
|
|
var records = JsonSerializer.Deserialize<List<Dictionary<string, JsonElement>>>(bufferedInput, jsonOptions)
|
|
?? throw new InvalidDataException("Failed to parse JSON array");
|
|
|
|
if (records.Count == 0)
|
|
{
|
|
Console.WriteLine("SKIP (empty)");
|
|
continue;
|
|
}
|
|
|
|
// Create DataTable from records
|
|
var dataTable = CreateDataTable(records);
|
|
|
|
// Write protobuf with zstd compression
|
|
using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024);
|
|
using var compressStream = new CompressionStream(outputFs, level: 3);
|
|
using var reader = dataTable.CreateDataReader();
|
|
DataSerializer.Serialize(compressStream, reader);
|
|
compressStream.Flush();
|
|
|
|
var newSize = new FileInfo(outputFile).Length;
|
|
totalNewSize += newSize;
|
|
|
|
var ratio = (double)newSize / originalSize * 100;
|
|
Console.WriteLine($"OK ({originalSize:N0} -> {newSize:N0} bytes, {ratio:F1}%)");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine($"ERROR: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
Console.WriteLine();
|
|
Console.WriteLine($"Total: {totalOriginalSize:N0} -> {totalNewSize:N0} bytes ({(double)totalNewSize / totalOriginalSize * 100:F1}%)");
|
|
return 0;
|
|
|
|
static DataTable CreateDataTable(List<Dictionary<string, JsonElement>> records)
|
|
{
|
|
var dt = new DataTable();
|
|
var firstRecord = records[0];
|
|
|
|
// Infer column types from first record
|
|
foreach (var (key, value) in firstRecord)
|
|
{
|
|
var colType = InferType(value);
|
|
dt.Columns.Add(key, colType);
|
|
}
|
|
|
|
// Add all rows
|
|
foreach (var record in records)
|
|
{
|
|
var row = dt.NewRow();
|
|
foreach (DataColumn col in dt.Columns)
|
|
{
|
|
if (record.TryGetValue(col.ColumnName, out var value))
|
|
{
|
|
row[col] = ConvertValue(value, col.DataType);
|
|
}
|
|
else
|
|
{
|
|
row[col] = DBNull.Value;
|
|
}
|
|
}
|
|
dt.Rows.Add(row);
|
|
}
|
|
|
|
return dt;
|
|
}
|
|
|
|
static Type InferType(JsonElement element) => element.ValueKind switch
|
|
{
|
|
JsonValueKind.String => typeof(string),
|
|
JsonValueKind.Number when element.TryGetInt64(out _) => typeof(long),
|
|
JsonValueKind.Number => typeof(decimal),
|
|
JsonValueKind.True or JsonValueKind.False => typeof(bool),
|
|
JsonValueKind.Null => typeof(string), // Default nullable to string
|
|
_ => typeof(string)
|
|
};
|
|
|
|
static object ConvertValue(JsonElement element, Type targetType)
|
|
{
|
|
if (element.ValueKind == JsonValueKind.Null)
|
|
return DBNull.Value;
|
|
|
|
if (targetType == typeof(string))
|
|
{
|
|
var str = element.GetString();
|
|
// Try to parse as DateTime if it looks like one
|
|
if (str != null && DateTime.TryParse(str, out var dt))
|
|
return dt;
|
|
return (object?)str ?? DBNull.Value;
|
|
}
|
|
|
|
if (targetType == typeof(long))
|
|
return element.GetInt64();
|
|
|
|
if (targetType == typeof(decimal))
|
|
return element.GetDecimal();
|
|
|
|
if (targetType == typeof(bool))
|
|
return element.GetBoolean();
|
|
|
|
return (object?)element.GetString() ?? DBNull.Value;
|
|
}
|