feat: add protobuf cache converter tool
Add standalone CLI tool to convert zstd-compressed JSON cache files to zstd-compressed Protocol Buffers format for faster deserialization.
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
|
||||
<PackageReference Include="protobuf-net-data" Version="4.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,147 @@
|
||||
using System.Data;
|
||||
using System.Text.Json;
|
||||
using ProtoBuf.Data;
|
||||
using ZstdSharp;
|
||||
|
||||
if (args.Length == 0)
|
||||
{
|
||||
Console.WriteLine("Usage: CacheConverter <cache-directory>");
|
||||
Console.WriteLine("Example: dotnet run -- ../../CACHED_DB_FILES");
|
||||
return 1;
|
||||
}
|
||||
|
||||
var cacheDir = args[0];
|
||||
if (!Directory.Exists(cacheDir))
|
||||
{
|
||||
Console.WriteLine($"Error: Directory not found: {cacheDir}");
|
||||
return 1;
|
||||
}
|
||||
|
||||
var jsonFiles = Directory.GetFiles(cacheDir, "*.json.zstd");
|
||||
Console.WriteLine($"Found {jsonFiles.Length} JSON files to convert");
|
||||
|
||||
long totalOriginalSize = 0;
|
||||
long totalNewSize = 0;
|
||||
|
||||
foreach (var jsonFile in jsonFiles)
|
||||
{
|
||||
var baseName = Path.GetFileName(jsonFile).Replace(".json.zstd", "");
|
||||
var outputFile = Path.Combine(cacheDir, $"{baseName}.pb.zstd");
|
||||
|
||||
Console.Write($"Converting {baseName}... ");
|
||||
|
||||
try
|
||||
{
|
||||
var originalSize = new FileInfo(jsonFile).Length;
|
||||
totalOriginalSize += originalSize;
|
||||
|
||||
// Read and decompress JSON
|
||||
using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan);
|
||||
using var decompressStream = new DecompressionStream(inputFs);
|
||||
using var bufferedInput = new BufferedStream(decompressStream, 256 * 1024);
|
||||
|
||||
// Parse JSON array into list of dictionaries
|
||||
var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
|
||||
var records = JsonSerializer.Deserialize<List<Dictionary<string, JsonElement>>>(bufferedInput, jsonOptions)
|
||||
?? throw new InvalidDataException("Failed to parse JSON array");
|
||||
|
||||
if (records.Count == 0)
|
||||
{
|
||||
Console.WriteLine("SKIP (empty)");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create DataTable from records
|
||||
var dataTable = CreateDataTable(records);
|
||||
|
||||
// Write protobuf with zstd compression
|
||||
using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024);
|
||||
using var compressStream = new CompressionStream(outputFs, level: 3);
|
||||
using var reader = dataTable.CreateDataReader();
|
||||
DataSerializer.Serialize(compressStream, reader);
|
||||
compressStream.Flush();
|
||||
|
||||
var newSize = new FileInfo(outputFile).Length;
|
||||
totalNewSize += newSize;
|
||||
|
||||
var ratio = (double)newSize / originalSize * 100;
|
||||
Console.WriteLine($"OK ({originalSize:N0} -> {newSize:N0} bytes, {ratio:F1}%)");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"ERROR: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($"Total: {totalOriginalSize:N0} -> {totalNewSize:N0} bytes ({(double)totalNewSize / totalOriginalSize * 100:F1}%)");
|
||||
return 0;
|
||||
|
||||
static DataTable CreateDataTable(List<Dictionary<string, JsonElement>> records)
|
||||
{
|
||||
var dt = new DataTable();
|
||||
var firstRecord = records[0];
|
||||
|
||||
// Infer column types from first record
|
||||
foreach (var (key, value) in firstRecord)
|
||||
{
|
||||
var colType = InferType(value);
|
||||
dt.Columns.Add(key, colType);
|
||||
}
|
||||
|
||||
// Add all rows
|
||||
foreach (var record in records)
|
||||
{
|
||||
var row = dt.NewRow();
|
||||
foreach (DataColumn col in dt.Columns)
|
||||
{
|
||||
if (record.TryGetValue(col.ColumnName, out var value))
|
||||
{
|
||||
row[col] = ConvertValue(value, col.DataType);
|
||||
}
|
||||
else
|
||||
{
|
||||
row[col] = DBNull.Value;
|
||||
}
|
||||
}
|
||||
dt.Rows.Add(row);
|
||||
}
|
||||
|
||||
return dt;
|
||||
}
|
||||
|
||||
static Type InferType(JsonElement element) => element.ValueKind switch
|
||||
{
|
||||
JsonValueKind.String => typeof(string),
|
||||
JsonValueKind.Number when element.TryGetInt64(out _) => typeof(long),
|
||||
JsonValueKind.Number => typeof(decimal),
|
||||
JsonValueKind.True or JsonValueKind.False => typeof(bool),
|
||||
JsonValueKind.Null => typeof(string), // Default nullable to string
|
||||
_ => typeof(string)
|
||||
};
|
||||
|
||||
static object ConvertValue(JsonElement element, Type targetType)
|
||||
{
|
||||
if (element.ValueKind == JsonValueKind.Null)
|
||||
return DBNull.Value;
|
||||
|
||||
if (targetType == typeof(string))
|
||||
{
|
||||
var str = element.GetString();
|
||||
// Try to parse as DateTime if it looks like one
|
||||
if (str != null && DateTime.TryParse(str, out var dt))
|
||||
return dt;
|
||||
return (object?)str ?? DBNull.Value;
|
||||
}
|
||||
|
||||
if (targetType == typeof(long))
|
||||
return element.GetInt64();
|
||||
|
||||
if (targetType == typeof(decimal))
|
||||
return element.GetDecimal();
|
||||
|
||||
if (targetType == typeof(bool))
|
||||
return element.GetBoolean();
|
||||
|
||||
return (object?)element.GetString() ?? DBNull.Value;
|
||||
}
|
||||
Reference in New Issue
Block a user