Initial commit: JDE Scoping Tool migration project
Set up repository with legacy .NET Framework 4.8 source (OLD/), new .NET 10 Blazor solution (NEW/), OpenSpec specifications, documentation, and project configuration.
This commit is contained in:
@@ -0,0 +1,581 @@
|
||||
# Data Sync Service Design
|
||||
|
||||
## Overview
|
||||
|
||||
This document describes the architecture and implementation patterns for the data synchronization background service.
|
||||
|
||||
## Component Architecture
|
||||
|
||||
```
|
||||
JdeScoping.DataSync/
|
||||
├── DataSyncService.cs # BackgroundService implementation
|
||||
├── IDataFetcher.cs # Generic fetcher interface
|
||||
├── IPostProcessor.cs # Post-processing interface
|
||||
├── DataSyncOptions.cs # Configuration options
|
||||
├── DataSourceConfig.cs # Per-table configuration
|
||||
├── ScheduleChecker.cs # Schedule evaluation logic
|
||||
├── SyncOrchestrator.cs # Coordinates parallel sync operations
|
||||
├── TableSyncOperation.cs # Single table sync execution
|
||||
├── StagingTableManager.cs # Temp table creation and MERGE
|
||||
├── DataSyncHealthCheck.cs # IHealthCheck implementation
|
||||
├── DataSyncMetrics.cs # Metrics and telemetry
|
||||
├── ServiceCollectionExtensions.cs # AddDataSync registration
|
||||
└── Fetchers/ # IDataFetcher<T> implementations
|
||||
├── JdeWorkOrderFetcher.cs
|
||||
├── JdeLotUsageFetcher.cs
|
||||
├── JdeItemFetcher.cs
|
||||
└── ...
|
||||
```
|
||||
|
||||
## BackgroundService Pattern
|
||||
|
||||
### ExecuteAsync Implementation
|
||||
|
||||
```csharp
|
||||
public class DataSyncService : BackgroundService
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly IOptions<DataSyncOptions> _options;
|
||||
private readonly ILogger<DataSyncService> _logger;
|
||||
private readonly DataSyncMetrics _metrics;
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
// Startup: close any interrupted syncs from prior runs
|
||||
await CloseOpenUpdateEntriesAsync(stoppingToken);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Create scope for this sync cycle
|
||||
await using var scope = _scopeFactory.CreateAsyncScope();
|
||||
|
||||
var orchestrator = scope.ServiceProvider
|
||||
.GetRequiredService<ISyncOrchestrator>();
|
||||
|
||||
// Check schedules and execute pending syncs
|
||||
await orchestrator.ExecutePendingSyncsAsync(stoppingToken);
|
||||
|
||||
// Periodic purge of old DataUpdate records
|
||||
await PurgeUpdateEntriesAsync(scope, stoppingToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
// Graceful shutdown
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error in sync cycle");
|
||||
}
|
||||
|
||||
// Wait before next check
|
||||
await Task.Delay(_options.Value.CheckInterval, stoppingToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Graceful Shutdown
|
||||
|
||||
- `CancellationToken` propagates to all child operations
|
||||
- `Parallel.ForEachAsync` respects cancellation token
|
||||
- In-progress operations complete current batch or cancel gracefully
|
||||
- Incomplete syncs marked as failed with `WasSuccessful = false`
|
||||
|
||||
## IDataFetcher<T> Interface
|
||||
|
||||
### Interface Definition
|
||||
|
||||
```csharp
|
||||
public interface IDataFetcher<TEntity> where TEntity : class
|
||||
{
|
||||
/// <summary>
|
||||
/// Fetches entities from source system as an async stream.
|
||||
/// </summary>
|
||||
/// <param name="minimumDT">For incremental fetches, only return records modified after this time. Null for full fetch.</param>
|
||||
/// <param name="cancellationToken">Cancellation token for graceful shutdown.</param>
|
||||
/// <returns>Async enumerable of entities, streamed from source.</returns>
|
||||
IAsyncEnumerable<TEntity> FetchAsync(
|
||||
DateTime? minimumDT,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
```
|
||||
|
||||
### Fetcher Resolution
|
||||
|
||||
Fetchers are registered in DI by convention:
|
||||
|
||||
```csharp
|
||||
services.AddScoped<IDataFetcher<WorkOrder>, JdeWorkOrderFetcher>();
|
||||
services.AddScoped<IDataFetcher<LotUsage>, JdeLotUsageFetcher>();
|
||||
services.AddScoped<IDataFetcher<Item>, JdeItemFetcher>();
|
||||
// ... etc
|
||||
```
|
||||
|
||||
Configuration references fetcher type name:
|
||||
|
||||
```json
|
||||
{
|
||||
"DataSync": {
|
||||
"DataSources": [
|
||||
{
|
||||
"TableName": "WorkOrder_Curr",
|
||||
"SourceSystem": "JDE",
|
||||
"FetcherTypeName": "JdeWorkOrderFetcher",
|
||||
"IsEnabled": true,
|
||||
"MassConfig": { "Enabled": true, "IntervalMinutes": 10080, "PrepurgeData": true },
|
||||
"DailyConfig": { "Enabled": true, "IntervalMinutes": 1440 },
|
||||
"HourlyConfig": { "Enabled": true, "IntervalMinutes": 60 }
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
At startup, `FetcherTypeName` is validated and resolved to a registered `IDataFetcher<T>`.
|
||||
|
||||
## Configuration Classes
|
||||
|
||||
### DataSyncOptions
|
||||
|
||||
```csharp
|
||||
public class DataSyncOptions
|
||||
{
|
||||
public const string SectionName = "DataSync";
|
||||
|
||||
/// <summary>Time between schedule checks (default: 1 minute)</summary>
|
||||
public TimeSpan CheckInterval { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>Maximum parallel sync operations (default: 8)</summary>
|
||||
public int MaxDegreeOfParallelism { get; set; } = 8;
|
||||
|
||||
/// <summary>Records per batch for streaming (default: 1,000,000)</summary>
|
||||
public int BatchSize { get; set; } = 1_000_000;
|
||||
|
||||
/// <summary>Rows per bulk copy batch (default: 10,000)</summary>
|
||||
public int BulkCopyBatchSize { get; set; } = 10_000;
|
||||
|
||||
/// <summary>Multiplier for lookback window (default: 3)</summary>
|
||||
public int LookbackMultiplier { get; set; } = 3;
|
||||
|
||||
/// <summary>Days to retain DataUpdate history (default: 30)</summary>
|
||||
public int PurgeRetentionDays { get; set; } = 30;
|
||||
|
||||
/// <summary>Per-table data source configurations</summary>
|
||||
public List<DataSourceConfig> DataSources { get; set; } = new();
|
||||
}
|
||||
```
|
||||
|
||||
### DataSourceConfig
|
||||
|
||||
```csharp
|
||||
public class DataSourceConfig
|
||||
{
|
||||
/// <summary>Target table name in SQL Server cache</summary>
|
||||
public required string TableName { get; set; }
|
||||
|
||||
/// <summary>Source system: "JDE" or "CMS"</summary>
|
||||
public required string SourceSystem { get; set; }
|
||||
|
||||
/// <summary>Name of IDataFetcher<T> implementation type</summary>
|
||||
public required string FetcherTypeName { get; set; }
|
||||
|
||||
/// <summary>Optional IPostProcessor implementation type name</summary>
|
||||
public string? PostProcessorTypeName { get; set; }
|
||||
|
||||
/// <summary>Whether this data source is enabled for sync</summary>
|
||||
public bool IsEnabled { get; set; } = true;
|
||||
|
||||
/// <summary>Mass sync schedule configuration</summary>
|
||||
public ScheduleConfig MassConfig { get; set; } = new();
|
||||
|
||||
/// <summary>Daily incremental sync configuration</summary>
|
||||
public ScheduleConfig DailyConfig { get; set; } = new();
|
||||
|
||||
/// <summary>Hourly incremental sync configuration</summary>
|
||||
public ScheduleConfig HourlyConfig { get; set; } = new();
|
||||
}
|
||||
|
||||
public class ScheduleConfig
|
||||
{
|
||||
public bool Enabled { get; set; } = true;
|
||||
public int IntervalMinutes { get; set; }
|
||||
public bool PrepurgeData { get; set; } = false;
|
||||
public bool ReIndexData { get; set; } = false;
|
||||
}
|
||||
```
|
||||
|
||||
## Parallel Sync Execution
|
||||
|
||||
### Parallel.ForEachAsync Pattern
|
||||
|
||||
```csharp
|
||||
public class SyncOrchestrator : ISyncOrchestrator
|
||||
{
|
||||
public async Task ExecutePendingSyncsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var pendingTasks = await _scheduleChecker.GetPendingTasksAsync(cancellationToken);
|
||||
|
||||
if (pendingTasks.Count == 0)
|
||||
return;
|
||||
|
||||
var parallelOptions = new ParallelOptions
|
||||
{
|
||||
MaxDegreeOfParallelism = _options.Value.MaxDegreeOfParallelism,
|
||||
CancellationToken = cancellationToken
|
||||
};
|
||||
|
||||
await Parallel.ForEachAsync(pendingTasks, parallelOptions, async (task, ct) =>
|
||||
{
|
||||
// Each task gets its own scope
|
||||
await using var scope = _scopeFactory.CreateAsyncScope();
|
||||
|
||||
var operation = scope.ServiceProvider
|
||||
.GetRequiredService<ITableSyncOperation>();
|
||||
|
||||
await operation.ExecuteAsync(task, ct);
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Isolation Requirements
|
||||
|
||||
- Each parallel sync operation creates its own `IServiceScope`
|
||||
- Each operation uses its own SQL connection from the scoped `DbContext` or connection factory
|
||||
- Staging tables use unique suffixes: `#Staging{TableName}_{OperationId}`
|
||||
- No shared mutable state between parallel operations
|
||||
|
||||
## Staging Table Management
|
||||
|
||||
### Naming Convention
|
||||
|
||||
```
|
||||
#Staging{TableName}_{OperationId} - Bulk copy destination
|
||||
#{TableName}_{OperationId} - Deduplicated temp table for MERGE
|
||||
```
|
||||
|
||||
Where `OperationId` is a GUID or sequential ID unique to each sync operation.
|
||||
|
||||
### MERGE Operation Flow
|
||||
|
||||
1. **Create staging table** matching destination schema with unique suffix
|
||||
2. **Bulk copy** source data to staging table (batched at 10,000 rows)
|
||||
3. **Deduplicate** into temp table using `ROW_NUMBER() OVER (PARTITION BY PK ORDER BY LastUpdateDT DESC)`
|
||||
4. **MERGE** from temp table to destination:
|
||||
- INSERT new records (not matched by primary key)
|
||||
- UPDATE existing records WHERE `source.LastUpdateDT > target.LastUpdateDT`
|
||||
5. **Cleanup** staging and temp tables
|
||||
|
||||
### Mass Update with Truncation
|
||||
|
||||
For mass updates with `PrepurgeData = true`:
|
||||
|
||||
1. **Disable non-PK indexes** on destination table
|
||||
2. **TRUNCATE** destination table
|
||||
3. **Bulk copy** directly to destination (no staging needed)
|
||||
4. **Rebuild indexes** if `ReIndexData = true`
|
||||
5. **Update statistics**
|
||||
|
||||
### Batching Large Datasets
|
||||
|
||||
When streaming more than 1,000,000 records:
|
||||
|
||||
```csharp
|
||||
int batchNumber = 0;
|
||||
var batch = new List<T>(_options.BatchSize);
|
||||
|
||||
await foreach (var entity in fetcher.FetchAsync(minimumDT, ct))
|
||||
{
|
||||
batch.Add(entity);
|
||||
|
||||
if (batch.Count >= _options.BatchSize)
|
||||
{
|
||||
await ProcessBatchAsync(batch, operationId, batchNumber++, ct);
|
||||
batch.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining records
|
||||
if (batch.Count > 0)
|
||||
{
|
||||
await ProcessBatchAsync(batch, operationId, batchNumber, ct);
|
||||
}
|
||||
```
|
||||
|
||||
## Update Logging
|
||||
|
||||
### DataUpdate Record Lifecycle
|
||||
|
||||
```
|
||||
Start: NumberRecords = -2 (in-progress marker)
|
||||
|
|
||||
v
|
||||
Success: NumberRecords = actual count, WasSuccessful = true, EndDT = now
|
||||
OR
|
||||
Failure: NumberRecords = -1, WasSuccessful = false, EndDT = now
|
||||
```
|
||||
|
||||
### Logging with Scope
|
||||
|
||||
```csharp
|
||||
public async Task ExecuteAsync(DataUpdateTask task, CancellationToken ct)
|
||||
{
|
||||
using var logScope = _logger.BeginScope(new Dictionary<string, object>
|
||||
{
|
||||
["TableName"] = task.TableName,
|
||||
["UpdateType"] = task.UpdateType,
|
||||
["OperationId"] = task.OperationId
|
||||
});
|
||||
|
||||
var updateId = await _repository.StartUpdateAsync(task, ct);
|
||||
|
||||
try
|
||||
{
|
||||
var recordCount = await ExecuteSyncAsync(task, ct);
|
||||
await _repository.CompleteUpdateAsync(updateId, recordCount, success: true, ct);
|
||||
_logger.LogInformation("Sync completed: {RecordCount} records", recordCount);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
await _repository.CompleteUpdateAsync(updateId, -1, success: false, ct);
|
||||
_logger.LogError(ex, "Sync failed");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Startup Recovery
|
||||
|
||||
At startup, `CloseOpenUpdateEntries()` updates any records with `NumberRecords = -2`:
|
||||
|
||||
```sql
|
||||
UPDATE DataUpdate
|
||||
SET EndDT = GETDATE(),
|
||||
WasSuccessful = 0,
|
||||
NumberRecords = -1
|
||||
WHERE NumberRecords = -2
|
||||
```
|
||||
|
||||
## Health Checks
|
||||
|
||||
### IHealthCheck Implementation
|
||||
|
||||
```csharp
|
||||
public class DataSyncHealthCheck : IHealthCheck
|
||||
{
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var statuses = await GetTableStatusesAsync(cancellationToken);
|
||||
var data = new Dictionary<string, object>();
|
||||
|
||||
foreach (var status in statuses)
|
||||
{
|
||||
data[$"{status.TableName}_LastSync"] = status.LastSyncTime?.ToString("O") ?? "Never";
|
||||
data[$"{status.TableName}_Status"] = status.IsOverdue ? "Overdue" : "Current";
|
||||
}
|
||||
|
||||
var overdueCount = statuses.Count(s => s.IsOverdue);
|
||||
var failedCount = statuses.Count(s => s.RecentFailures > 0);
|
||||
|
||||
if (failedCount > 0)
|
||||
return HealthCheckResult.Unhealthy("Multiple recent sync failures", data: data);
|
||||
|
||||
if (overdueCount > 0)
|
||||
return HealthCheckResult.Degraded($"{overdueCount} tables overdue for sync", data: data);
|
||||
|
||||
return HealthCheckResult.Healthy("All syncs current", data: data);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Telemetry
|
||||
|
||||
### Metrics
|
||||
|
||||
```csharp
|
||||
public class DataSyncMetrics
|
||||
{
|
||||
private readonly Meter _meter;
|
||||
private readonly Counter<long> _operationsStarted;
|
||||
private readonly Counter<long> _operationsCompleted;
|
||||
private readonly Counter<long> _operationsFailed;
|
||||
private readonly Histogram<double> _operationDuration;
|
||||
private readonly Histogram<long> _recordsProcessed;
|
||||
|
||||
public DataSyncMetrics(IMeterFactory meterFactory)
|
||||
{
|
||||
_meter = meterFactory.Create("DataSync");
|
||||
_operationsStarted = _meter.CreateCounter<long>("sync.operations.started");
|
||||
_operationsCompleted = _meter.CreateCounter<long>("sync.operations.completed");
|
||||
_operationsFailed = _meter.CreateCounter<long>("sync.operations.failed");
|
||||
_operationDuration = _meter.CreateHistogram<double>("sync.duration.seconds");
|
||||
_recordsProcessed = _meter.CreateHistogram<long>("sync.records.processed");
|
||||
}
|
||||
|
||||
public void RecordOperationStarted(string tableName, string updateType)
|
||||
{
|
||||
_operationsStarted.Add(1,
|
||||
new KeyValuePair<string, object?>("table", tableName),
|
||||
new KeyValuePair<string, object?>("type", updateType));
|
||||
}
|
||||
|
||||
// ... similar for completed, failed, duration, records
|
||||
}
|
||||
```
|
||||
|
||||
### Activity Tracing
|
||||
|
||||
```csharp
|
||||
public static class DataSyncActivitySource
|
||||
{
|
||||
public static readonly ActivitySource Source = new("DataSync");
|
||||
|
||||
public static Activity? StartSyncOperation(string tableName, string updateType)
|
||||
{
|
||||
return Source.StartActivity("SyncTable", ActivityKind.Internal)?
|
||||
.SetTag("table.name", tableName)
|
||||
.SetTag("update.type", updateType);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## DI Registration
|
||||
|
||||
### AddDataSync Extension
|
||||
|
||||
```csharp
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
public static IServiceCollection AddDataSync(
|
||||
this IServiceCollection services,
|
||||
IConfiguration configuration)
|
||||
{
|
||||
// Bind configuration
|
||||
services.Configure<DataSyncOptions>(
|
||||
configuration.GetSection(DataSyncOptions.SectionName));
|
||||
|
||||
// Register core services
|
||||
services.AddHostedService<DataSyncService>();
|
||||
services.AddScoped<ISyncOrchestrator, SyncOrchestrator>();
|
||||
services.AddScoped<IScheduleChecker, ScheduleChecker>();
|
||||
services.AddScoped<ITableSyncOperation, TableSyncOperation>();
|
||||
services.AddScoped<IStagingTableManager, StagingTableManager>();
|
||||
|
||||
// Register health check
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<DataSyncHealthCheck>("data-sync");
|
||||
|
||||
// Register metrics
|
||||
services.AddSingleton<DataSyncMetrics>();
|
||||
|
||||
// Register fetchers
|
||||
services.AddScoped<IDataFetcher<WorkOrder>, JdeWorkOrderFetcher>();
|
||||
services.AddScoped<IDataFetcher<LotUsage>, JdeLotUsageFetcher>();
|
||||
// ... etc
|
||||
|
||||
// Validate configuration at startup
|
||||
services.AddOptions<DataSyncOptions>()
|
||||
.ValidateDataAnnotations()
|
||||
.ValidateOnStart();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Schedule Checking Logic
|
||||
|
||||
### Priority: Mass > Daily > Hourly
|
||||
|
||||
```csharp
|
||||
public async Task<List<DataUpdateTask>> GetPendingTasksAsync(CancellationToken ct)
|
||||
{
|
||||
var lastUpdates = await _repository.GetLastDataUpdatesAsync(ct);
|
||||
var tasks = new List<DataUpdateTask>();
|
||||
|
||||
foreach (var config in _options.Value.DataSources.Where(c => c.IsEnabled))
|
||||
{
|
||||
var lastSync = lastUpdates.GetValueOrDefault(config.TableName);
|
||||
var now = DateTime.UtcNow;
|
||||
|
||||
// Check Mass first (highest priority)
|
||||
if (config.MassConfig.Enabled && NeedsMassSync(config, lastSync, now))
|
||||
{
|
||||
tasks.Add(CreateMassTask(config));
|
||||
continue; // Skip daily/hourly checks
|
||||
}
|
||||
|
||||
// Check Daily
|
||||
if (config.DailyConfig.Enabled && NeedsDailySync(config, lastSync, now))
|
||||
{
|
||||
tasks.Add(CreateDailyTask(config, lastSync));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check Hourly
|
||||
if (config.HourlyConfig.Enabled && NeedsHourlySync(config, lastSync, now))
|
||||
{
|
||||
tasks.Add(CreateHourlyTask(config, lastSync));
|
||||
}
|
||||
}
|
||||
|
||||
return tasks;
|
||||
}
|
||||
```
|
||||
|
||||
### MinimumDT Calculation
|
||||
|
||||
For Daily updates:
|
||||
```
|
||||
MinimumDT = LastDailyUpdateDT - (LookbackMultiplier * DailyInterval)
|
||||
```
|
||||
|
||||
For Hourly updates (uses Daily timestamp, not Hourly):
|
||||
```
|
||||
MinimumDT = LastDailyUpdateDT - (LookbackMultiplier * DailyInterval)
|
||||
```
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
NEW/src/
|
||||
├── JdeScoping.DataSync/
|
||||
│ ├── JdeScoping.DataSync.csproj
|
||||
│ ├── DataSyncService.cs
|
||||
│ ├── Configuration/
|
||||
│ │ ├── DataSyncOptions.cs
|
||||
│ │ └── DataSourceConfig.cs
|
||||
│ ├── Contracts/
|
||||
│ │ ├── IDataFetcher.cs
|
||||
│ │ ├── IPostProcessor.cs
|
||||
│ │ ├── ISyncOrchestrator.cs
|
||||
│ │ ├── IScheduleChecker.cs
|
||||
│ │ ├── ITableSyncOperation.cs
|
||||
│ │ └── IStagingTableManager.cs
|
||||
│ ├── Services/
|
||||
│ │ ├── SyncOrchestrator.cs
|
||||
│ │ ├── ScheduleChecker.cs
|
||||
│ │ ├── TableSyncOperation.cs
|
||||
│ │ └── StagingTableManager.cs
|
||||
│ ├── Fetchers/
|
||||
│ │ ├── Jde/
|
||||
│ │ │ ├── JdeWorkOrderFetcher.cs
|
||||
│ │ │ ├── JdeLotUsageFetcher.cs
|
||||
│ │ │ └── ...
|
||||
│ │ └── Cms/
|
||||
│ │ └── CmsMisDataFetcher.cs
|
||||
│ ├── HealthChecks/
|
||||
│ │ └── DataSyncHealthCheck.cs
|
||||
│ ├── Telemetry/
|
||||
│ │ ├── DataSyncMetrics.cs
|
||||
│ │ └── DataSyncActivitySource.cs
|
||||
│ └── DependencyInjection/
|
||||
│ └── ServiceCollectionExtensions.cs
|
||||
└── JdeScoping.Host/
|
||||
└── Program.cs (add: builder.Services.AddDataSync(configuration))
|
||||
```
|
||||
@@ -0,0 +1,68 @@
|
||||
# Implement Data Sync Service
|
||||
|
||||
## Summary
|
||||
|
||||
Implement the background data synchronization service as a .NET BackgroundService that maintains the local SQL Server cache by fetching data from JDE (Oracle) and CMS (Sybase) source systems on configurable schedules.
|
||||
|
||||
## Scope
|
||||
|
||||
### In Scope
|
||||
|
||||
- `DataSyncService` inheriting from `BackgroundService` with proper lifecycle management
|
||||
- `IDataFetcher<T>` interface and fetcher implementations for each table type
|
||||
- `DataSyncOptions` and `DataSourceConfig` strongly-typed configuration classes
|
||||
- Schedule-based triggering (Mass/Daily/Hourly) with interval checking
|
||||
- Staging table management with MERGE operations for upserts
|
||||
- `DataUpdate` logging for audit trail and recovery
|
||||
- Health checks exposing sync status via ASP.NET Core health check framework
|
||||
- Telemetry via `System.Diagnostics.Metrics` and `ActivitySource`
|
||||
- `AddDataSync` extension method for DI registration
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- Admin API for manual archive sync triggering (separate change)
|
||||
- Circuit breaker implementation for CMS (can be added later)
|
||||
- Periodic index maintenance (separate change)
|
||||
- Actual JDE/CMS database connectivity (will use mock fetchers initially)
|
||||
|
||||
## Motivation
|
||||
|
||||
The legacy `UpdateProcessor` runs as a Topshelf Windows service with reflection-based data fetchers and global temp tables. The new implementation uses modern .NET patterns:
|
||||
|
||||
- `BackgroundService` for proper ASP.NET Core hosting integration
|
||||
- `IDataFetcher<T>` interfaces for type-safe, testable data retrieval
|
||||
- `Parallel.ForEachAsync` for cancellation-aware parallel execution
|
||||
- Local temp tables with unique suffixes for parallel isolation
|
||||
- `IOptions<T>` pattern for strongly-typed configuration
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
1. `DataSyncService` starts with the host and respects `CancellationToken` for graceful shutdown
|
||||
2. Service checks schedules and queues sync tasks based on `LastDataUpdates` timestamps
|
||||
3. Sync operations execute in parallel with configurable `MaxDegreeOfParallelism`
|
||||
4. Each sync creates staging tables, bulk copies data, and executes MERGE operations
|
||||
5. All sync operations are logged to `DataUpdate` table with proper start/end/success tracking
|
||||
6. Interrupted syncs are marked as failed at startup via `CloseOpenUpdateEntries()`
|
||||
7. Health check reports sync status (Healthy/Degraded/Unhealthy) based on interval compliance
|
||||
8. Metrics emitted for operations started/completed/failed and duration histograms
|
||||
9. `openspec validate implement-data-sync --strict` passes
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `migrate-database-schema` - DataUpdate table and related schema must exist
|
||||
- `data-access` spec - Repository patterns for database operations
|
||||
|
||||
## Risks
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| Complex parallel execution | Use `Parallel.ForEachAsync` with proper scoping; local temp tables with unique suffixes |
|
||||
| Schedule calculation edge cases | Comprehensive unit tests for schedule checking logic |
|
||||
| Memory pressure from large datasets | `IAsyncEnumerable<T>` streaming with batched bulk copy |
|
||||
| Staging table conflicts | Unique `_{OperationId}` suffix on all temp tables |
|
||||
|
||||
## Related Specs
|
||||
|
||||
- `data-sync` - Core specification for sync behavior and schedules
|
||||
- `domain-models` - Entity definitions for synced data
|
||||
- `database-schema` - Table structures and DataUpdate table
|
||||
@@ -0,0 +1,156 @@
|
||||
# Data Sync Specification - Implementation Additions
|
||||
|
||||
## Purpose
|
||||
|
||||
This specification extends the base data-sync spec with additional implementation-focused requirements for the BackgroundService pattern and parallel fetch isolation.
|
||||
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Background service implementation pattern
|
||||
|
||||
The system SHALL implement the data synchronization service following .NET BackgroundService best practices for hosted service lifecycle management.
|
||||
|
||||
#### Inputs
|
||||
|
||||
- `IServiceScopeFactory` for creating scoped service instances
|
||||
- `IOptions<DataSyncOptions>` for configuration access
|
||||
- `ILogger<DataSyncService>` for structured logging
|
||||
- `CancellationToken` from `ExecuteAsync` stoppingToken parameter
|
||||
|
||||
#### Outputs
|
||||
|
||||
- Continuously running background task that checks schedules and executes syncs
|
||||
- Proper cleanup on shutdown with all resources disposed
|
||||
- Logging scope context for all operations
|
||||
|
||||
#### Business Rules
|
||||
|
||||
- The service MUST implement `BackgroundService.ExecuteAsync(CancellationToken)`
|
||||
- The main loop MUST use `Task.Delay(checkInterval, stoppingToken)` between cycles
|
||||
- Each sync cycle MUST create a new `IServiceScope` via `IServiceScopeFactory.CreateAsyncScope()`
|
||||
- All scoped services MUST be resolved from the current scope, not from root provider
|
||||
- The scope MUST be disposed using `await using` pattern after each cycle
|
||||
- Exception handling MUST catch and log errors without crashing the service
|
||||
- `OperationCanceledException` MUST be caught and result in graceful loop exit when `stoppingToken.IsCancellationRequested`
|
||||
- The service MUST NOT use static state or shared mutable collections
|
||||
|
||||
#### Scenario: Normal sync cycle execution
|
||||
|
||||
- **WHEN** the BackgroundService enters ExecuteAsync
|
||||
- **THEN** the service SHALL call CloseOpenUpdateEntriesAsync to recover from prior crashes
|
||||
- **THEN** the service SHALL enter a while loop checking `!stoppingToken.IsCancellationRequested`
|
||||
- **THEN** each iteration SHALL create a new IServiceScope
|
||||
- **THEN** the ISyncOrchestrator SHALL be resolved from the scope
|
||||
- **THEN** ExecutePendingSyncsAsync SHALL be called with the stoppingToken
|
||||
- **THEN** the scope SHALL be disposed after the call completes
|
||||
- **THEN** Task.Delay SHALL pause before the next iteration
|
||||
|
||||
#### Scenario: Exception during sync cycle
|
||||
|
||||
- **WHEN** an exception occurs during sync execution (not OperationCanceledException)
|
||||
- **THEN** the exception SHALL be caught and logged with LogError
|
||||
- **THEN** the service SHALL continue to the next iteration
|
||||
- **THEN** the current scope SHALL still be disposed properly
|
||||
- **THEN** the service SHALL NOT crash or stop unexpectedly
|
||||
|
||||
#### Scenario: Graceful shutdown request
|
||||
|
||||
- **WHEN** the host signals shutdown by canceling the stoppingToken
|
||||
- **THEN** any running Task.Delay SHALL throw OperationCanceledException
|
||||
- **THEN** the while loop SHALL exit on the IsCancellationRequested check
|
||||
- **THEN** the ExecuteAsync method SHALL complete normally
|
||||
- **THEN** any in-progress sync operations SHALL receive the cancellation and complete or cancel
|
||||
|
||||
### Requirement: Parallel fetch isolation with scoped resources
|
||||
|
||||
The system SHALL ensure complete isolation between parallel sync operations using scoped resources and unique identifiers.
|
||||
|
||||
#### Inputs
|
||||
|
||||
- List of `DataUpdateTask` objects to execute in parallel
|
||||
- `MaxDegreeOfParallelism` configuration value
|
||||
- `CancellationToken` for coordinated cancellation
|
||||
|
||||
#### Outputs
|
||||
|
||||
- Concurrent execution of sync operations with no resource conflicts
|
||||
- Unique staging tables per operation that do not collide
|
||||
- Independent database connections per operation
|
||||
|
||||
#### Business Rules
|
||||
|
||||
- `Parallel.ForEachAsync` MUST be used with `ParallelOptions.CancellationToken` set
|
||||
- Each parallel task MUST create its own `IServiceScope` inside the parallel delegate
|
||||
- Database connections MUST NOT be shared across parallel operations
|
||||
- Staging table names MUST include a unique `OperationId` suffix (GUID or sequential ID)
|
||||
- Format: `#Staging{TableName}_{OperationId}` and `#{TableName}_{OperationId}`
|
||||
- Each parallel operation MUST resolve its own instances of all scoped services
|
||||
- No `ConcurrentDictionary`, shared counters, or other shared mutable state SHALL exist between operations
|
||||
- Total record counts SHALL be accumulated via return values, not shared state
|
||||
|
||||
#### Scenario: Parallel sync with isolated scopes
|
||||
|
||||
- **WHEN** multiple DataUpdateTasks are executed via Parallel.ForEachAsync
|
||||
- **THEN** each task SHALL execute the async delegate independently
|
||||
- **THEN** each delegate SHALL create a new IServiceScope using CreateAsyncScope
|
||||
- **THEN** ITableSyncOperation SHALL be resolved from each scope independently
|
||||
- **THEN** each operation SHALL use its own database connection from the scope
|
||||
- **THEN** staging tables SHALL use unique OperationId suffixes preventing name collisions
|
||||
- **THEN** completion of one operation SHALL NOT affect the execution of others
|
||||
|
||||
#### Scenario: Parallel cancellation propagation
|
||||
|
||||
- **WHEN** cancellation is requested during Parallel.ForEachAsync execution
|
||||
- **THEN** the CancellationToken SHALL propagate to all running parallel operations
|
||||
- **THEN** Parallel.ForEachAsync SHALL stop starting new operations
|
||||
- **THEN** running operations SHALL receive the token in their async methods
|
||||
- **THEN** each operation SHALL check the token and exit gracefully
|
||||
- **THEN** incomplete operations SHALL mark their DataUpdate records as failed
|
||||
|
||||
#### Scenario: Staging table uniqueness verification
|
||||
|
||||
- **WHEN** two sync operations for the same table run in parallel
|
||||
- **THEN** each operation SHALL generate a unique OperationId as GUID
|
||||
- **THEN** operation A SHALL create staging table with GuidA suffix
|
||||
- **THEN** operation B SHALL create staging table with GuidB suffix
|
||||
- **THEN** no SQL errors SHALL occur from table name conflicts
|
||||
- **THEN** each operation cleanup SHALL only drop its own staging tables
|
||||
|
||||
### Requirement: Structured logging context
|
||||
|
||||
The system SHALL use ILogger.BeginScope to attach contextual information to all log entries during sync operations.
|
||||
|
||||
#### Inputs
|
||||
|
||||
- `ILogger<T>` injected into sync operation classes
|
||||
- TableName, UpdateType, OperationId values from current operation
|
||||
|
||||
#### Outputs
|
||||
|
||||
- All log entries within the scope contain the contextual properties
|
||||
- Log aggregation systems can filter and group by table, type, or operation
|
||||
|
||||
#### Business Rules
|
||||
|
||||
- Each sync operation MUST call `_logger.BeginScope(...)` at the start
|
||||
- The scope MUST include at minimum: TableName, UpdateType, OperationId
|
||||
- The scope MUST be disposed using `using` statement when operation completes
|
||||
- Nested scopes for batches SHALL preserve parent scope properties
|
||||
- LogInformation, LogWarning, LogError calls within the scope SHALL include the context automatically
|
||||
|
||||
#### Scenario: Log scope creation and usage
|
||||
|
||||
- **WHEN** a TableSyncOperation begins execution
|
||||
- **THEN** the operation SHALL create a logging scope with TableName, UpdateType, OperationId
|
||||
- **THEN** all log calls within ExecuteAsync SHALL include these properties
|
||||
- **THEN** when the operation completes the scope SHALL be disposed
|
||||
- **THEN** subsequent operations SHALL have their own independent scopes
|
||||
|
||||
## Migration Notes
|
||||
|
||||
| Legacy Pattern | New Pattern | Rationale |
|
||||
|----------------|-------------|-----------|
|
||||
| Static `UpdateProcessor` methods | Scoped services resolved per operation | Proper DI lifecycle, testability |
|
||||
| Shared instance state | Return values and scoped state only | Thread safety in parallel scenarios |
|
||||
| `Console.WriteLine` logging | `ILogger<T>` with `BeginScope` | Structured logging, context propagation |
|
||||
| Global temp tables `##table` | Local temp tables `#table_{id}` | Session-scoped isolation for parallelism |
|
||||
@@ -0,0 +1,227 @@
|
||||
# Tasks: Implement Data Sync Service
|
||||
|
||||
## Phase 1: Configuration and Interfaces
|
||||
|
||||
- [x] Create JdeScoping.DataSync project
|
||||
- Create `NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
||||
- Add references to JdeScoping.Domain and JdeScoping.Database
|
||||
- Validation: Project compiles and is referenced by JdeScoping.Host
|
||||
|
||||
- [x] Create DataSyncOptions configuration class
|
||||
- File: `Configuration/DataSyncOptions.cs`
|
||||
- Properties: CheckInterval, MaxDegreeOfParallelism, BatchSize, BulkCopyBatchSize, LookbackMultiplier, PurgeRetentionDays, DataSources
|
||||
- Validation: Options bind from appsettings.json DataSync section
|
||||
|
||||
- [x] Create DataSourceConfig configuration class
|
||||
- File: `Configuration/DataSourceConfig.cs`
|
||||
- Properties: TableName, SourceSystem, FetcherTypeName, PostProcessorTypeName, IsEnabled, MassConfig, DailyConfig, HourlyConfig
|
||||
- Include ScheduleConfig nested class
|
||||
- Validation: Configuration parses correctly from JSON
|
||||
|
||||
- [x] Create IDataFetcher<T> interface
|
||||
- File: `Contracts/IDataFetcher.cs`
|
||||
- Method: `IAsyncEnumerable<T> FetchAsync(DateTime? minimumDT, CancellationToken cancellationToken)`
|
||||
- Validation: Interface compiles with correct signature
|
||||
|
||||
- [x] Create IPostProcessor interface
|
||||
- File: `Contracts/IPostProcessor.cs`
|
||||
- Method: `Task ProcessAsync(string tableName, CancellationToken cancellationToken)`
|
||||
- Validation: Interface compiles with correct signature
|
||||
|
||||
- [x] Create supporting interfaces
|
||||
- Files: `Contracts/ISyncOrchestrator.cs`, `IScheduleChecker.cs`, `ITableSyncOperation.cs`, `IStagingTableManager.cs`
|
||||
- Validation: All interfaces compile
|
||||
|
||||
## Phase 2: Core Service Implementation
|
||||
|
||||
- [x] Create DataSyncService (BackgroundService)
|
||||
- File: `DataSyncService.cs`
|
||||
- Implement ExecuteAsync with main sync loop
|
||||
- Inject IServiceScopeFactory, IOptions<DataSyncOptions>, ILogger
|
||||
- Call CloseOpenUpdateEntriesAsync at startup
|
||||
- Call PurgeUpdateEntriesAsync periodically
|
||||
- Respect CancellationToken throughout
|
||||
- Validation: Service starts with host and stops gracefully
|
||||
|
||||
- [x] Create ScheduleChecker service
|
||||
- File: `Services/ScheduleChecker.cs`
|
||||
- Implement GetPendingTasksAsync to check Mass/Daily/Hourly schedules
|
||||
- Priority order: Mass > Daily > Hourly
|
||||
- Check both IsEnabled and specific schedule Enabled flags
|
||||
- Calculate MinimumDT with lookback multiplier (Daily timestamp for Hourly)
|
||||
- Validation: Unit tests for schedule checking logic pass
|
||||
|
||||
- [x] Create SyncOrchestrator service
|
||||
- File: `Services/SyncOrchestrator.cs`
|
||||
- Implement ExecutePendingSyncsAsync using Parallel.ForEachAsync
|
||||
- Create IServiceScope per parallel operation
|
||||
- Pass CancellationToken to all operations
|
||||
- Validation: Multiple syncs run in parallel up to MaxDegreeOfParallelism
|
||||
|
||||
- [x] Create DataUpdateTask model
|
||||
- File: `Models/DataUpdateTask.cs`
|
||||
- Properties: TableName, UpdateType, SourceSystem, MinimumDT, OperationId, Config
|
||||
- Validation: Model used by ScheduleChecker and SyncOrchestrator
|
||||
|
||||
## Phase 3: Table Sync Operations
|
||||
|
||||
- [x] Create TableSyncOperation service
|
||||
- File: `Services/TableSyncOperation.cs`
|
||||
- Implement ExecuteAsync for single table sync
|
||||
- Create DataUpdate record at start (NumberRecords = -2)
|
||||
- Resolve IDataFetcher<T> and execute FetchAsync
|
||||
- Batch records and delegate to StagingTableManager
|
||||
- Update DataUpdate record on success/failure
|
||||
- Use ILogger.BeginScope for structured logging
|
||||
- Validation: Single table sync executes end-to-end
|
||||
|
||||
- [x] Create StagingTableManager service
|
||||
- File: `Services/StagingTableManager.cs`
|
||||
- Create staging tables with unique suffix: `#Staging{Table}_{OperationId}`
|
||||
- Implement bulk copy with BulkCopyBatchSize
|
||||
- Implement deduplication to temp table with ROW_NUMBER
|
||||
- Generate and execute MERGE statement
|
||||
- Handle tables with/without LastUpdateDT column
|
||||
- Clean up staging and temp tables
|
||||
- Validation: MERGE correctly inserts new and updates existing records
|
||||
|
||||
- [x] Implement mass update with truncation
|
||||
- In StagingTableManager or separate method
|
||||
- Disable non-PK indexes before truncate
|
||||
- TRUNCATE destination table when PrepurgeData = true
|
||||
- Bulk copy directly to destination
|
||||
- Rebuild indexes if ReIndexData = true
|
||||
- Validation: Mass update truncates and reloads table
|
||||
|
||||
- [x] Implement batching for large datasets
|
||||
- In TableSyncOperation
|
||||
- Process records in batches of BatchSize (1,000,000)
|
||||
- Each batch creates fresh staging/temp tables with unique suffix
|
||||
- Accumulate total record count across batches
|
||||
- Validation: Large dataset processes in multiple batches
|
||||
|
||||
## Phase 4: Data Fetcher Implementations
|
||||
|
||||
- [x] Create mock/test fetcher base class
|
||||
- File: `Fetchers/MockDataFetcher.cs`
|
||||
- Returns sample data for testing without JDE/CMS connectivity
|
||||
- Validation: Tests can run without external databases
|
||||
|
||||
- [x] Create JDE fetcher implementations (stubs)
|
||||
- Files: `Fetchers/Jde/JdeWorkOrderFetcher.cs`, `JdeLotUsageFetcher.cs`, `JdeItemFetcher.cs`, etc.
|
||||
- Implement IDataFetcher<T> interface
|
||||
- Initially delegate to mock or throw NotImplementedException
|
||||
- Validation: All fetchers register in DI and resolve correctly
|
||||
|
||||
- [x] Create CMS fetcher implementation (stub)
|
||||
- File: `Fetchers/Cms/CmsMisDataFetcher.cs`
|
||||
- Implement IDataFetcher<MisData>
|
||||
- Initially delegate to mock or throw NotImplementedException
|
||||
- Validation: CMS fetcher registers in DI and resolves correctly
|
||||
|
||||
## Phase 5: Update Logging and Recovery
|
||||
|
||||
- [x] Implement update logging repository methods
|
||||
- In existing repository or new DataUpdateRepository
|
||||
- StartUpdateAsync: Insert DataUpdate with NumberRecords = -2
|
||||
- CompleteUpdateAsync: Update EndDT, WasSuccessful, NumberRecords
|
||||
- GetLastDataUpdatesAsync: Query LastDataUpdates view
|
||||
- Validation: DataUpdate records created and updated correctly
|
||||
|
||||
- [x] Implement CloseOpenUpdateEntries
|
||||
- Method in DataSyncService or repository
|
||||
- Update all records where NumberRecords = -2 to failed state
|
||||
- Called at service startup
|
||||
- Validation: Interrupted syncs marked as failed on restart
|
||||
|
||||
- [x] Implement PurgeUpdateEntries
|
||||
- Method in DataSyncService or repository
|
||||
- Delete DataUpdate records older than PurgeRetentionDays
|
||||
- Called periodically (e.g., daily)
|
||||
- Validation: Old records purged correctly
|
||||
|
||||
## Phase 6: Health Checks and Telemetry
|
||||
|
||||
- [x] Create DataSyncHealthCheck
|
||||
- File: `HealthChecks/DataSyncHealthCheck.cs`
|
||||
- Implement IHealthCheck interface
|
||||
- Return Healthy when all tables synced within interval
|
||||
- Return Degraded when tables overdue but syncs progressing
|
||||
- Return Unhealthy when repeated failures
|
||||
- Include per-table status in response data
|
||||
- Validation: Health endpoint returns correct status
|
||||
|
||||
- [x] Create DataSyncMetrics
|
||||
- File: `Telemetry/DataSyncMetrics.cs`
|
||||
- Create Meter named "DataSync"
|
||||
- Counters: sync.operations.started, completed, failed
|
||||
- Histograms: sync.duration.seconds, sync.records.processed
|
||||
- Include table name and update type as tags
|
||||
- Validation: Metrics emitted during sync operations
|
||||
|
||||
- [x] Create DataSyncActivitySource
|
||||
- File: `Telemetry/DataSyncActivitySource.cs`
|
||||
- Create ActivitySource named "DataSync"
|
||||
- Start activity for each sync operation with table/type tags
|
||||
- Complete activity with record count on success
|
||||
- Set error status on failure
|
||||
- Validation: Activities visible in distributed tracing
|
||||
|
||||
## Phase 7: DI Registration
|
||||
|
||||
- [x] Create AddDataSync extension method
|
||||
- File: `DependencyInjection/ServiceCollectionExtensions.cs`
|
||||
- Configure DataSyncOptions from configuration
|
||||
- Register DataSyncService as hosted service
|
||||
- Register all scoped services (orchestrator, checker, operation, staging)
|
||||
- Register health check
|
||||
- Register metrics singleton
|
||||
- Register all fetcher implementations
|
||||
- Add options validation
|
||||
- Validation: All services resolve correctly at startup
|
||||
|
||||
- [x] Update JdeScoping.Host Program.cs
|
||||
- Add `builder.Services.AddDataSync(builder.Configuration)`
|
||||
- Validation: Host starts with data sync service running
|
||||
|
||||
- [x] Add DataSync configuration to appsettings.json
|
||||
- Add DataSync section with options and data sources
|
||||
- Include all table configurations from spec
|
||||
- Validation: Configuration loads correctly
|
||||
|
||||
## Phase 8: Testing
|
||||
|
||||
- [x] Write unit tests for ScheduleChecker
|
||||
- Test Mass/Daily/Hourly priority
|
||||
- Test MinimumDT calculation with lookback
|
||||
- Test disabled table handling
|
||||
- Test first sync (no prior updates) scenario
|
||||
- Validation: All schedule logic tests pass
|
||||
|
||||
- [x] Write unit tests for StagingTableManager
|
||||
- Test staging table creation with unique suffix
|
||||
- Test MERGE with/without LastUpdateDT column
|
||||
- Test mass update truncation path
|
||||
- Validation: All staging/merge logic tests pass
|
||||
|
||||
- [x] Write integration tests for DataSyncService
|
||||
- Test service startup and shutdown
|
||||
- Test CloseOpenUpdateEntries at startup
|
||||
- Test parallel sync execution
|
||||
- Test cancellation handling
|
||||
- Validation: Integration tests pass with test database
|
||||
|
||||
## Phase 9: Validation
|
||||
|
||||
- [x] Run openspec validate
|
||||
- Command: `openspec validate implement-data-sync --strict`
|
||||
- Fix any validation errors
|
||||
- Validation: Validation passes
|
||||
|
||||
- [x] Verify all acceptance criteria met
|
||||
- DataSyncService starts and stops gracefully
|
||||
- Schedules checked and tasks queued correctly
|
||||
- Parallel execution works with proper isolation
|
||||
- DataUpdate logging complete
|
||||
- Health check reports correct status
|
||||
- Metrics emitted correctly
|
||||
Reference in New Issue
Block a user