diff --git a/code-reviews/Driver.Historian.Wonderware/findings.md b/code-reviews/Driver.Historian.Wonderware/findings.md
index d154c7b..4052a95 100644
--- a/code-reviews/Driver.Historian.Wonderware/findings.md
+++ b/code-reviews/Driver.Historian.Wonderware/findings.md
@@ -161,7 +161,7 @@ lock), so the snapshot is internally consistent.
| Severity | Medium |
| Category | Error handling and resilience |
| Location | `Ipc/PipeServer.cs:120-128` |
-| Status | Open |
+| Status | Resolved |
**Description:** `RunAsync` re-accepts connections in a `while` loop. If
`RunOneConnectionAsync` throws synchronously and immediately on every iteration
@@ -175,7 +175,7 @@ seconds) before re-accepting after a caught exception, and consider a
consecutive-failure threshold that escalates to a fatal exit so the supervisor can
restart the sidecar cleanly.
-**Resolution:** _(open)_
+**Resolution:** Resolved 2026-05-22 — added exponential backoff (250 ms → 8 s, six steps) after each connection-loop failure and a `MaxConsecutiveFailures=20` threshold that re-throws so the SCM/NSSM supervisor can restart the sidecar cleanly.
### Driver.Historian.Wonderware-007
diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/PipeServer.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/PipeServer.cs
index 8737bc9..d20fe49 100644
--- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/PipeServer.cs
+++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/PipeServer.cs
@@ -113,17 +113,62 @@ public sealed class PipeServer : IDisposable
}
}
+ // Backoff sequence for consecutive RunOneConnection failures: 250 ms → 500 ms →
+ // 1 000 ms → 2 000 ms → 4 000 ms → capped at 8 000 ms thereafter.
+ private static readonly TimeSpan[] BackoffSteps =
+ {
+ TimeSpan.FromMilliseconds(250),
+ TimeSpan.FromMilliseconds(500),
+ TimeSpan.FromSeconds(1),
+ TimeSpan.FromSeconds(2),
+ TimeSpan.FromSeconds(4),
+ TimeSpan.FromSeconds(8),
+ };
+
+ ///
+ /// Maximum consecutive failures before the server gives up and lets the process exit
+ /// so the supervisor (NSSM / SCM) can restart the sidecar cleanly.
+ ///
+ private const int MaxConsecutiveFailures = 20;
+
///
/// Runs the server continuously, handling one connection at a time. When a connection
- /// ends (clean or error), accepts the next.
+ /// ends (clean or error), waits with exponential backoff before accepting the next.
+ /// If consecutive failures occur the method
+ /// throws so the supervisor can restart the sidecar.
///
public async Task RunAsync(IFrameHandler handler, CancellationToken ct)
{
+ var consecutiveFailures = 0;
+
while (!ct.IsCancellationRequested)
{
- try { await RunOneConnectionAsync(handler, ct).ConfigureAwait(false); }
+ try
+ {
+ await RunOneConnectionAsync(handler, ct).ConfigureAwait(false);
+ consecutiveFailures = 0; // a clean connection (even a short-lived one) resets the counter
+ }
catch (OperationCanceledException) { break; }
- catch (Exception ex) { _logger.Error(ex, "Sidecar IPC connection loop error — accepting next"); }
+ catch (Exception ex)
+ {
+ consecutiveFailures++;
+
+ if (consecutiveFailures >= MaxConsecutiveFailures)
+ {
+ _logger.Fatal(ex,
+ "Sidecar IPC connection loop failed {Count} consecutive times — giving up so supervisor can restart",
+ consecutiveFailures);
+ throw;
+ }
+
+ var delay = BackoffSteps[Math.Min(consecutiveFailures - 1, BackoffSteps.Length - 1)];
+ _logger.Error(ex,
+ "Sidecar IPC connection loop error (consecutive failure {Count}/{Max}) — retrying in {Delay}",
+ consecutiveFailures, MaxConsecutiveFailures, delay);
+
+ try { await Task.Delay(delay, ct).ConfigureAwait(false); }
+ catch (OperationCanceledException) { break; }
+ }
}
}