From 7ad1336598ae35408b43969c9b9e090a1b5761ac Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 17 Jun 2026 14:06:42 +0200 Subject: [PATCH 1/9] fix: handle LND NotFound for Probing and Pending statuses without changing row state --- src/Jobs/MonitorRebalancesJob.cs | 31 +++++++++----- .../Jobs/MonitorRebalancesJobTests.cs | 42 +++++++++++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/Jobs/MonitorRebalancesJob.cs b/src/Jobs/MonitorRebalancesJob.cs index 9f49acca..250b9213 100644 --- a/src/Jobs/MonitorRebalancesJob.cs +++ b/src/Jobs/MonitorRebalancesJob.cs @@ -122,11 +122,21 @@ private async Task ReconcileAsync(Rebalance rebalance, CancellationToken ct) if (payment == null) { - // LND returned NotFound (or the call errored). Only act on the NotFound signal - // when our row is still in a non-terminal state: in that case LND never saw the - // payment, so the optimistic InFlight/Probing/Pending is wrong. For terminal rows - // we leave them alone — a transient track error shouldn't reopen a Failed row. - if (IsNonTerminal(rebalance.Status)) + // LND returned NotFound (or the call errored). The right action depends on which + // non-terminal state the row is in: + // + // - Pending/Probing: the local ExecuteAsync persisted PaymentHashHex right after + // AddInvoice, but SendPaymentV2 doesn't fire until after the probe succeeds — + // so "no record in LND" is the EXPECTED state during this window. Flipping the + // row to Failed here would kill an in-progress probe. Leave it alone; the local + // execution is the source of truth until it dispatches SendPaymentV2. + // + // - InFlight: we believe SendPaymentV2 has been dispatched, so LND should know. + // If it doesn't, the payment never reached LND (process crashed mid-dispatch, + // or LND lost it). Flip to Failed so the row doesn't stay InFlight forever. + // + // - Terminal statuses: leave alone — a transient track error must not reopen them. + if (rebalance.Status == RebalanceStatus.InFlight) { var oldStatus = rebalance.Status; rebalance.Status = RebalanceStatus.Failed; @@ -147,6 +157,12 @@ await _auditService.LogSystemAsync( NewStatus = rebalance.Status.ToString(), }); } + else if (rebalance.Status is RebalanceStatus.Pending or RebalanceStatus.Probing) + { + _logger.LogDebug( + "Rebalance {RebalanceId} not yet visible in LND (status={Status}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", + rebalance.Id, rebalance.Status, rebalance.PaymentHashHex); + } else { _logger.LogInformation( @@ -206,11 +222,6 @@ await _auditService.LogSystemAsync( }); } - private static bool IsNonTerminal(RebalanceStatus status) => - status is RebalanceStatus.Pending - or RebalanceStatus.Probing - or RebalanceStatus.InFlight; - private static bool IsNonTerminalLndStatus(Payment.Types.PaymentStatus status) => status is Payment.Types.PaymentStatus.Initiated or Payment.Types.PaymentStatus.InFlight; diff --git a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs index 102f00b5..8058700c 100644 --- a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs +++ b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs @@ -149,6 +149,48 @@ public async Task Execute_InFlight_LndNotFound_RowMarkedFailed() _rebalanceRepo.Verify(r => r.Update(reb), Times.Once); } + [Fact] + public async Task Execute_Probing_LndNotFound_RowLeftAlone() + { + // ExecuteAsync persists PaymentHashHex right after AddInvoice but does not call + // SendPaymentV2 until the probe succeeds. While Status=Probing, "no record in LND" is + // the expected state — flipping to Failed here would kill an in-progress probe. + var reb = MakeRebalance(RebalanceStatus.Probing); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync((Payment?)null); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Probing); + _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + _audit.Verify(a => a.LogSystemAsync( + AuditActionType.RebalanceCompleted, AuditEventType.Failure, + AuditObjectType.Rebalance, It.IsAny(), It.IsAny()), + Times.Never); + } + + [Fact] + public async Task Execute_Pending_LndNotFound_RowLeftAlone() + { + // Same reasoning as Probing — the brief Pending+hash window inside ExecuteAsync + // between AddInvoice and the Status=Probing flip. SendPaymentV2 hasn't fired; LND + // can't possibly know about the hash yet. + var reb = MakeRebalance(RebalanceStatus.Pending); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync((Payment?)null); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Pending); + _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + _audit.Verify(a => a.LogSystemAsync( + AuditActionType.RebalanceCompleted, AuditEventType.Failure, + AuditObjectType.Rebalance, It.IsAny(), It.IsAny()), + Times.Never); + } + [Fact] public async Task Execute_RecentlyFailed_LndNotFound_RowLeftAlone() { From 17b2ffa85cc5d1a0f26a2e2dbe614bc5f9a797e9 Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 17 Jun 2026 20:05:47 +0200 Subject: [PATCH 2/9] fix: avoid rebalance status oscillation --- src/Jobs/MonitorRebalancesJob.cs | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/Jobs/MonitorRebalancesJob.cs b/src/Jobs/MonitorRebalancesJob.cs index 250b9213..bc333860 100644 --- a/src/Jobs/MonitorRebalancesJob.cs +++ b/src/Jobs/MonitorRebalancesJob.cs @@ -159,9 +159,34 @@ await _auditService.LogSystemAsync( } else if (rebalance.Status is RebalanceStatus.Pending or RebalanceStatus.Probing) { + var window = TimeSpan.FromHours(Constants.REBALANCE_RECONCILE_TERMINAL_WINDOW_HOURS); + if (rebalance.UpdateDatetime < DateTimeOffset.UtcNow - window) + { + _logger.LogWarning( + "Rebalance {RebalanceId} (status={Status}, hash={PaymentHashHex}) has no record in LND and is older than {WindowHours}h; flipping to Failed", + rebalance.Id, rebalance.Status, rebalance.PaymentHashHex, window.TotalHours); + var oldStatus = rebalance.Status; + rebalance.Status = RebalanceStatus.Failed; + _rebalanceRepository.Update(rebalance); + await _auditService.LogSystemAsync( + AuditActionType.RebalanceCompleted, + AuditEventType.Failure, + AuditObjectType.Rebalance, + rebalance.Id.ToString(), + new + { + Reason = "MonitorReconciliation", + Detail = "LND has no record of this payment hash and the row is older than the reconciliation window", + OldStatus = oldStatus.ToString(), + NewStatus = rebalance.Status.ToString(), + }); + } + else + { _logger.LogDebug( "Rebalance {RebalanceId} not yet visible in LND (status={Status}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", rebalance.Id, rebalance.Status, rebalance.PaymentHashHex); + } } else { @@ -173,6 +198,14 @@ await _auditService.LogSystemAsync( return; } + if (rebalance.Status is RebalanceStatus.Pending or RebalanceStatus.Probing) + { + _logger.LogInformation( + "Rebalance {RebalanceId} (status={Status}) has a payment record in LND (status={LndStatus}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", + rebalance.Id, rebalance.Status, payment.Status, rebalance.PaymentHashHex); + return; + } + if (IsNonTerminalLndStatus(payment.Status)) { // LND still in flight — emitted at info so an operator can spot rebalances stuck From 51d7f837b3e0602c26a1cb6e458d9c23465ead40 Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 17 Jun 2026 20:33:00 +0200 Subject: [PATCH 3/9] fix: avoid double sending a rebalance --- src/Services/RebalanceService.cs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/Services/RebalanceService.cs b/src/Services/RebalanceService.cs index 0452d2fc..b6cc5f44 100644 --- a/src/Services/RebalanceService.cs +++ b/src/Services/RebalanceService.cs @@ -191,6 +191,32 @@ public async Task ExecuteAsync(int rebalanceId, CancellationToken ct try { + // Avoid double paying a rebalance that has already succeeded. + if (rebalance.PaymentHashHex != null) + { + byte[] paymentHash = Convert.FromHexString(rebalance.PaymentHashHex); + var oldPayment = await _lightningService.TrackPaymentV2Async(node, paymentHash, ct); + if (oldPayment != null && oldPayment.Status is Payment.Types.PaymentStatus.Succeeded) + { + _logger.LogInformation( + "Rebalance {RebalanceId} already succeeded in LND (hash={PaymentHashHex}, feeSats={FeeSats}, ppm={Ppm})", + rebalance.Id, rebalance.PaymentHashHex, oldPayment.FeeMsat / 1_000L, + (long?)Math.Round((decimal)oldPayment.FeeMsat / rebalance.SatsAmount * 1_000_000m, MidpointRounding.AwayFromZero)); + ApplyTerminalPayment(rebalance, oldPayment); + _rebalanceRepository.Update(rebalance); + await _auditService.LogAsync(AuditActionType.RebalanceCompleted, AuditEventType.Success, + AuditObjectType.Rebalance, rebalance.Id.ToString(), + new + { + rebalance.FeePaidSats, + rebalance.EffectivePpm, + ActualAmountSats = rebalance.SatsAmount, + rebalance.AttemptNumber, + }); + return rebalance; + } + } + var memo = $"NG rebalance #{rebalance.Id} attempt {rebalance.AttemptNumber}"; var invoiceExpiry = ComputeInvoiceExpirySeconds(rebalance); var invoice = await _lightningService.AddInvoiceAsync(node, rebalance.SatsAmount, memo, invoiceExpiry); From 6beb82a53a0d094f36545ad70cf644369e03330b Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 17 Jun 2026 20:47:29 +0200 Subject: [PATCH 4/9] fix: enhance rebalance job tests to handle LND payment status scenarios --- .../Jobs/MonitorRebalancesJobTests.cs | 97 ++++++++++++- .../Services/RebalanceServiceTests.cs | 137 ++++++++++++++++++ 2 files changed, 233 insertions(+), 1 deletion(-) diff --git a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs index 8058700c..6bfc5317 100644 --- a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs +++ b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs @@ -22,6 +22,7 @@ using Microsoft.Extensions.Logging; using NodeGuard.Data.Models; using NodeGuard.Data.Repositories.Interfaces; +using NodeGuard.Helpers; using NodeGuard.Services; using Quartz; @@ -50,7 +51,11 @@ public class MonitorRebalancesJobTests PubKey = "030000000000000000000000000000000000000000000000000000000000000001", }; - private static Rebalance MakeRebalance(RebalanceStatus status, string? hashHex = "abcdef01", Node? node = null) + private static Rebalance MakeRebalance( + RebalanceStatus status, + string? hashHex = "abcdef01", + Node? node = null, + DateTimeOffset? updateDatetime = null) => new() { Id = 42, @@ -61,6 +66,9 @@ private static Rebalance MakeRebalance(RebalanceStatus status, string? hashHex = SatsAmount = 100_000, RequestedAmountSats = 100_000, MaxFeePct = 0.05, + // Default to "fresh" so tests don't accidentally trip the age-based stuck-row + // fallback in the Pending/Probing null branch. + UpdateDatetime = updateDatetime ?? DateTimeOffset.UtcNow, }; [Fact] @@ -191,6 +199,93 @@ public async Task Execute_Pending_LndNotFound_RowLeftAlone() Times.Never); } + [Fact] + public async Task Execute_Pending_LndNotFound_OlderThanWindow_FlipsToFailed() + { + // Safety net for a Pending row stuck past the reconciliation window — typically a + // crash mid-ExecuteAsync with no Quartz retry queued. The age-based fallback flips it + // to Failed so it doesn't sit non-terminal forever. + var stale = DateTimeOffset.UtcNow.AddHours(-(Constants.REBALANCE_RECONCILE_TERMINAL_WINDOW_HOURS + 1)); + var reb = MakeRebalance(RebalanceStatus.Pending, updateDatetime: stale); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync((Payment?)null); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Failed); + _rebalanceRepo.Verify(r => r.Update(reb), Times.Once); + _audit.Verify(a => a.LogSystemAsync( + AuditActionType.RebalanceCompleted, AuditEventType.Failure, + AuditObjectType.Rebalance, reb.Id.ToString(), It.IsAny()), + Times.Once); + } + + [Fact] + public async Task Execute_Pending_LndFailedPayment_RowLeftAlone() + { + // The oscillation case: ScheduleRetryIfEligibleAsync left the row at Pending with the + // PRIOR attempt's hash. LND still has that prior payment record (Failed). The monitor + // must NOT write the prior failure onto a queued-for-retry row — local execution + // (ExecuteAsync's pre-flight) is the only thing allowed to touch it. + var reb = MakeRebalance(RebalanceStatus.Pending); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment + { + Status = Payment.Types.PaymentStatus.Failed, + FailureReason = PaymentFailureReason.FailureReasonNoRoute, + }); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Pending); + _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + _audit.Verify(a => a.LogSystemAsync( + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + } + + [Fact] + public async Task Execute_Pending_LndSucceededPayment_RowLeftAlone() + { + // The lying-stream variant of the oscillation: prior attempt actually settled in LND + // but the local stream said Failed. The monitor still must NOT adopt that success + // here — the row is Pending (queued for retry), and ExecuteAsync's pre-flight in the + // retry path is the component responsible for adopting the prior success. + var reb = MakeRebalance(RebalanceStatus.Pending); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment + { + Status = Payment.Types.PaymentStatus.Succeeded, + FeeMsat = 1_000, + PaymentPreimage = "deadbeef", + }); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Pending); + _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + } + + [Fact] + public async Task Execute_Probing_LndHasRecord_RowLeftAlone() + { + // While Status=Probing, the local ExecuteAsync owns the row — the monitor stays back + // regardless of what LND has on the hash. Covers the normal in-progress probe. + var reb = MakeRebalance(RebalanceStatus.Probing); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment { Status = Payment.Types.PaymentStatus.InFlight }); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Probing); + _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + } + [Fact] public async Task Execute_RecentlyFailed_LndNotFound_RowLeftAlone() { diff --git a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs index 1004aa69..7eaad444 100644 --- a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs +++ b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs @@ -569,4 +569,141 @@ public async Task RebalanceAsync_AuditsInitiation() It.IsAny()), Times.Once); } + + /// + /// Builds a retry row simulating the exact post-ScheduleRetryIfEligibleAsync state: + /// Status=Pending, AttemptNumber>1, and the prior attempt's PaymentHashHex still on the row. + /// + private Rebalance BuildRetryRow(Node node, string priorHashHex = "deadbeef", int attempt = 2) + => new() + { + Id = 200, + NodeId = node.Id, + Node = node, + Status = RebalanceStatus.Pending, + AttemptNumber = attempt, + RequestedAmountSats = 100_000, + SatsAmount = 100_000, + MaxFeePct = 0.05, + TimeoutSeconds = 60, + PaymentHashHex = priorHashHex, + }; + + [Fact] + public async Task ExecuteAsync_RetryPreflight_PriorSucceeded_AdoptsAndSkipsRetry() + { + // The lying-stream case: local row was marked Failed (then queued for retry), but LND + // actually settled the prior attempt. The retry's pre-flight TrackPaymentV2 must catch + // this and adopt the success WITHOUT dispatching another payment. + var node = CreateNode(); + var rebalance = BuildRetryRow(node); + _rebalanceRepo.Setup(r => r.GetById(rebalance.Id)).ReturnsAsync(rebalance); + _rebalanceRepo.Setup(r => r.Update(It.IsAny())).Returns((true, (string?)null)); + + _lightning.Setup(x => x.TrackPaymentV2Async(node, It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment + { + Status = Payment.Types.PaymentStatus.Succeeded, + FeeMsat = 4_321, + PaymentPreimage = "abc", + }); + + var service = CreateService(); + var result = await service.ExecuteAsync(rebalance.Id); + + result.Status.Should().Be(RebalanceStatus.Succeeded); + result.FeePaidMsat.Should().Be(4_321); + result.PreimageHex.Should().Be("abc"); + _lightning.Verify(x => x.AddInvoiceAsync(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + _lightning.Verify(x => x.SendPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + _audit.Verify(a => a.LogAsync( + AuditActionType.RebalanceCompleted, AuditEventType.Success, + AuditObjectType.Rebalance, rebalance.Id.ToString(), It.IsAny()), + Times.Once); + } + + [Fact] + public async Task ExecuteAsync_RetryPreflight_PriorFailed_ProceedsWithNewInvoice() + { + // The common retry case: LND confirms the prior attempt failed, matching the local + // row's pre-retry state. Pre-flight must NOT bail — it must fall through to the normal + // AddInvoice → probe → pay flow so the retry actually runs. + var node = CreateNode(); + var rebalance = BuildRetryRow(node); + _rebalanceRepo.Setup(r => r.GetById(rebalance.Id)).ReturnsAsync(rebalance); + _rebalanceRepo.Setup(r => r.Update(It.IsAny())).Returns((true, (string?)null)); + + _lightning.Setup(x => x.TrackPaymentV2Async(node, It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment + { + Status = Payment.Types.PaymentStatus.Failed, + FailureReason = PaymentFailureReason.FailureReasonNoRoute, + }); + _lightning.Setup(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new AddInvoiceResponse + { + PaymentRequest = "lnbc...", + RHash = Google.Protobuf.ByteString.CopyFrom(new byte[] { 0xAA, 0xBB }), + }); + _lightning.Setup(x => x.ProbeRouteAsync(node, It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new ProbeResult.NoRoute("stop")); + + var service = CreateService(); + var result = await service.ExecuteAsync(rebalance.Id); + + _lightning.Verify(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny()), + Times.Once); + // The new invoice's hash replaced the stale one. + result.PaymentHashHex.Should().Be("aabb"); + } + + [Fact] + public async Task ExecuteAsync_RetryPreflight_LndNotFound_ProceedsWithNewInvoice() + { + // LND returned NotFound for the prior hash. The pre-flight only short-circuits on + // Succeeded; for null it must fall through to the normal flow so the retry runs. + var node = CreateNode(); + var rebalance = BuildRetryRow(node); + _rebalanceRepo.Setup(r => r.GetById(rebalance.Id)).ReturnsAsync(rebalance); + _rebalanceRepo.Setup(r => r.Update(It.IsAny())).Returns((true, (string?)null)); + + _lightning.Setup(x => x.TrackPaymentV2Async(node, It.IsAny(), It.IsAny())) + .ReturnsAsync((Payment?)null); + _lightning.Setup(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new AddInvoiceResponse { PaymentRequest = "lnbc..." }); + _lightning.Setup(x => x.ProbeRouteAsync(node, It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new ProbeResult.NoRoute("stop")); + + var service = CreateService(); + await service.ExecuteAsync(rebalance.Id); + + _lightning.Verify(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny()), + Times.Once); + } + + [Fact] + public async Task ExecuteAsync_FirstAttempt_NoPriorHash_SkipsPreflightTrack() + { + // First attempt: PaymentHashHex starts null, so the pre-flight TrackPaymentV2 must NOT + // fire. Otherwise every fresh rebalance pays an extra LND round trip for nothing. + var node = CreateNode(); + _nodeRepo.Setup(x => x.GetById(node.Id, It.IsAny())).ReturnsAsync(node); + StubRepoForCapture(); + _lightning.Setup(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new AddInvoiceResponse { PaymentRequest = "lnbc..." }); + _lightning.Setup(x => x.ProbeRouteAsync(node, It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new ProbeResult.NoRoute("test")); + + var service = CreateService(); + await service.RebalanceAsync(new RebalanceRequest(node.Id, null, null, 100_000, MaxFeePct: 0.05)); + + _lightning.Verify(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + } } From d90ff27cbea0a06851208eaecbc44d9c088a17d4 Mon Sep 17 00:00:00 2001 From: Marcos Date: Thu, 18 Jun 2026 09:29:40 +0200 Subject: [PATCH 5/9] fix: indentation --- src/Jobs/MonitorRebalancesJob.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Jobs/MonitorRebalancesJob.cs b/src/Jobs/MonitorRebalancesJob.cs index bc333860..5998447d 100644 --- a/src/Jobs/MonitorRebalancesJob.cs +++ b/src/Jobs/MonitorRebalancesJob.cs @@ -183,9 +183,9 @@ await _auditService.LogSystemAsync( } else { - _logger.LogDebug( - "Rebalance {RebalanceId} not yet visible in LND (status={Status}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", - rebalance.Id, rebalance.Status, rebalance.PaymentHashHex); + _logger.LogDebug( + "Rebalance {RebalanceId} not yet visible in LND (status={Status}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", + rebalance.Id, rebalance.Status, rebalance.PaymentHashHex); } } else From 5377d9ab5df558c77556cfb007c9c4b281f00588 Mon Sep 17 00:00:00 2001 From: Marcos Date: Thu, 18 Jun 2026 11:50:50 +0200 Subject: [PATCH 6/9] fix: improve rebalance payment tracking to handle in-flight and initiated statuses --- src/Services/RebalanceService.cs | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/Services/RebalanceService.cs b/src/Services/RebalanceService.cs index b6cc5f44..2125fb9d 100644 --- a/src/Services/RebalanceService.cs +++ b/src/Services/RebalanceService.cs @@ -192,28 +192,38 @@ public async Task ExecuteAsync(int rebalanceId, CancellationToken ct try { // Avoid double paying a rebalance that has already succeeded. - if (rebalance.PaymentHashHex != null) + if (!string.IsNullOrWhiteSpace(rebalance.PaymentHashHex)) { byte[] paymentHash = Convert.FromHexString(rebalance.PaymentHashHex); var oldPayment = await _lightningService.TrackPaymentV2Async(node, paymentHash, ct); - if (oldPayment != null && oldPayment.Status is Payment.Types.PaymentStatus.Succeeded) + if (oldPayment != null) { - _logger.LogInformation( - "Rebalance {RebalanceId} already succeeded in LND (hash={PaymentHashHex}, feeSats={FeeSats}, ppm={Ppm})", - rebalance.Id, rebalance.PaymentHashHex, oldPayment.FeeMsat / 1_000L, - (long?)Math.Round((decimal)oldPayment.FeeMsat / rebalance.SatsAmount * 1_000_000m, MidpointRounding.AwayFromZero)); - ApplyTerminalPayment(rebalance, oldPayment); - _rebalanceRepository.Update(rebalance); - await _auditService.LogAsync(AuditActionType.RebalanceCompleted, AuditEventType.Success, - AuditObjectType.Rebalance, rebalance.Id.ToString(), - new - { - rebalance.FeePaidSats, - rebalance.EffectivePpm, - ActualAmountSats = rebalance.SatsAmount, - rebalance.AttemptNumber, - }); - return rebalance; + if (oldPayment.Status is Payment.Types.PaymentStatus.Succeeded) + { + _logger.LogInformation( + "Rebalance {RebalanceId} already succeeded in LND (hash={PaymentHashHex}, feeSats={FeeSats}, ppm={Ppm})", + rebalance.Id, rebalance.PaymentHashHex, oldPayment.FeeMsat / 1_000L, + (long?)Math.Round((decimal)oldPayment.FeeMsat / rebalance.SatsAmount * 1_000_000m, MidpointRounding.AwayFromZero)); + ApplyTerminalPayment(rebalance, oldPayment); + _rebalanceRepository.Update(rebalance); + await _auditService.LogAsync(AuditActionType.RebalanceCompleted, AuditEventType.Success, + AuditObjectType.Rebalance, rebalance.Id.ToString(), + new + { + rebalance.FeePaidSats, + rebalance.EffectivePpm, + ActualAmountSats = rebalance.SatsAmount, + rebalance.AttemptNumber, + }); + return rebalance; + } + else if (oldPayment.Status is Payment.Types.PaymentStatus.InFlight or Payment.Types.PaymentStatus.Initiated) + { + _logger.LogInformation( + "Rebalance {RebalanceId} already in-flight in LND (hash={PaymentHashHex}, status={LndStatus})", + rebalance.Id, rebalance.PaymentHashHex, oldPayment.Status); + return rebalance; + } } } From fbf610eb8e714896e4b30adf46576dd26d4f3e91 Mon Sep 17 00:00:00 2001 From: Marcos Date: Thu, 18 Jun 2026 11:51:15 +0200 Subject: [PATCH 7/9] test: update audit logging in rebalance job tests to use generic parameters --- .../Jobs/MonitorRebalancesJobTests.cs | 16 ++++++-- .../Services/RebalanceServiceTests.cs | 39 +++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs index 6bfc5317..8dd8198c 100644 --- a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs +++ b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs @@ -173,8 +173,8 @@ public async Task Execute_Probing_LndNotFound_RowLeftAlone() reb.Status.Should().Be(RebalanceStatus.Probing); _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); _audit.Verify(a => a.LogSystemAsync( - AuditActionType.RebalanceCompleted, AuditEventType.Failure, - AuditObjectType.Rebalance, It.IsAny(), It.IsAny()), + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny()), Times.Never); } @@ -194,8 +194,8 @@ public async Task Execute_Pending_LndNotFound_RowLeftAlone() reb.Status.Should().Be(RebalanceStatus.Pending); _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); _audit.Verify(a => a.LogSystemAsync( - AuditActionType.RebalanceCompleted, AuditEventType.Failure, - AuditObjectType.Rebalance, It.IsAny(), It.IsAny()), + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny()), Times.Never); } @@ -268,6 +268,10 @@ public async Task Execute_Pending_LndSucceededPayment_RowLeftAlone() reb.Status.Should().Be(RebalanceStatus.Pending); _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + _audit.Verify(a => a.LogSystemAsync( + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); } [Fact] @@ -284,6 +288,10 @@ public async Task Execute_Probing_LndHasRecord_RowLeftAlone() reb.Status.Should().Be(RebalanceStatus.Probing); _rebalanceRepo.Verify(r => r.Update(It.IsAny()), Times.Never); + _audit.Verify(a => a.LogSystemAsync( + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); } [Fact] diff --git a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs index 7eaad444..e92b680b 100644 --- a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs +++ b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs @@ -686,6 +686,45 @@ public async Task ExecuteAsync_RetryPreflight_LndNotFound_ProceedsWithNewInvoice Times.Once); } + [Theory] + [InlineData(Payment.Types.PaymentStatus.InFlight)] + [InlineData(Payment.Types.PaymentStatus.Initiated)] + public async Task ExecuteAsync_RetryPreflight_PriorStillInFlight_ProceedsWithNewInvoice( + Payment.Types.PaymentStatus priorLndStatus) + { + // Pins current behavior: the pre-flight only short-circuits on Succeeded. When the + // prior attempt is still settling in LND (InFlight/Initiated), the retry falls through + // and dispatches a new payment — a narrowed but not fully closed double-pay window. + // If we ever extend the pre-flight to park the row InFlight and defer to the monitor, + // this test should flip to assert AddInvoice is NEVER called and the row is parked. + var node = CreateNode(); + var rebalance = BuildRetryRow(node); + _rebalanceRepo.Setup(r => r.GetById(rebalance.Id)).ReturnsAsync(rebalance); + _rebalanceRepo.Setup(r => r.Update(It.IsAny())).Returns((true, (string?)null)); + + _lightning.Setup(x => x.TrackPaymentV2Async(node, It.IsAny(), It.IsAny())) + .ReturnsAsync(new Payment { Status = priorLndStatus }); + _lightning.Setup(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new AddInvoiceResponse + { + PaymentRequest = "lnbc...", + RHash = Google.Protobuf.ByteString.CopyFrom(new byte[] { 0xCC, 0xDD }), + }); + _lightning.Setup(x => x.ProbeRouteAsync(node, It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(new ProbeResult.NoRoute("stop")); + + var service = CreateService(); + var result = await service.ExecuteAsync(rebalance.Id); + + _lightning.Verify(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny()), + Times.Once); + // The new invoice's hash replaced the stale one — proving the pre-flight fell through. + result.PaymentHashHex.Should().Be("ccdd"); + // The prior payment was NOT adopted (Status didn't get flipped to Succeeded by ApplyTerminalPayment). + result.PreimageHex.Should().BeNull(); + } + [Fact] public async Task ExecuteAsync_FirstAttempt_NoPriorHash_SkipsPreflightTrack() { From cd2a360e9e60e2dee0b2517be3775ff18d774069 Mon Sep 17 00:00:00 2001 From: Marcos Date: Fri, 19 Jun 2026 08:55:36 +0200 Subject: [PATCH 8/9] fix: schedule retry for eligible rebalance requests already in-flight --- src/Services/RebalanceService.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Services/RebalanceService.cs b/src/Services/RebalanceService.cs index 2125fb9d..2da87f1e 100644 --- a/src/Services/RebalanceService.cs +++ b/src/Services/RebalanceService.cs @@ -222,6 +222,7 @@ await _auditService.LogAsync(AuditActionType.RebalanceCompleted, AuditEventType. _logger.LogInformation( "Rebalance {RebalanceId} already in-flight in LND (hash={PaymentHashHex}, status={LndStatus})", rebalance.Id, rebalance.PaymentHashHex, oldPayment.Status); + await ScheduleRetryIfEligibleAsync(rebalance); return rebalance; } } From aad3bb2f2b4adb2920e60ac6ca2db62ec634ba4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20A=2EP?= <53834183+Jossec101@users.noreply.github.com> Date: Mon, 29 Jun 2026 13:56:18 +0200 Subject: [PATCH 9/9] More tests --- src/Jobs/MonitorRebalancesJob.cs | 4 +- .../Jobs/MonitorRebalancesJobTests.cs | 22 +++++++++++ .../Services/RebalanceServiceTests.cs | 39 +++++++++---------- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/Jobs/MonitorRebalancesJob.cs b/src/Jobs/MonitorRebalancesJob.cs index 5998447d..936f6388 100644 --- a/src/Jobs/MonitorRebalancesJob.cs +++ b/src/Jobs/MonitorRebalancesJob.cs @@ -131,7 +131,7 @@ private async Task ReconcileAsync(Rebalance rebalance, CancellationToken ct) // row to Failed here would kill an in-progress probe. Leave it alone; the local // execution is the source of truth until it dispatches SendPaymentV2. // - // - InFlight: we believe SendPaymentV2 has been dispatched, so LND should know. + // - InFlight: SendpaymentV2 has been invoked, so LND should know. // If it doesn't, the payment never reached LND (process crashed mid-dispatch, // or LND lost it). Flip to Failed so the row doesn't stay InFlight forever. // @@ -200,7 +200,7 @@ await _auditService.LogSystemAsync( if (rebalance.Status is RebalanceStatus.Pending or RebalanceStatus.Probing) { - _logger.LogInformation( + _logger.LogDebug( "Rebalance {RebalanceId} (status={Status}) has a payment record in LND (status={LndStatus}, hash={PaymentHashHex}); local execution still owns it, leaving as-is", rebalance.Id, rebalance.Status, payment.Status, rebalance.PaymentHashHex); return; diff --git a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs index 8dd8198c..4468941f 100644 --- a/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs +++ b/test/NodeGuard.Tests/Jobs/MonitorRebalancesJobTests.cs @@ -221,6 +221,28 @@ public async Task Execute_Pending_LndNotFound_OlderThanWindow_FlipsToFailed() Times.Once); } + [Fact] + public async Task Execute_Probing_LndNotFound_OlderThanWindow_FlipsToFailed() + { + // Same age-based safety net as the Pending case — the null branch handles Pending and + // Probing identically, so a Probing row stuck past the reconciliation window must also + // flip to Failed. Pinned independently so splitting the two statuses can't regress this. + var stale = DateTimeOffset.UtcNow.AddHours(-(Constants.REBALANCE_RECONCILE_TERMINAL_WINDOW_HOURS + 1)); + var reb = MakeRebalance(RebalanceStatus.Probing, updateDatetime: stale); + _rebalanceRepo.Setup(r => r.GetReconcilable(It.IsAny())).ReturnsAsync(new List { reb }); + _lightning.Setup(x => x.TrackPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync((Payment?)null); + + await CreateJob().Execute(_ctx.Object); + + reb.Status.Should().Be(RebalanceStatus.Failed); + _rebalanceRepo.Verify(r => r.Update(reb), Times.Once); + _audit.Verify(a => a.LogSystemAsync( + AuditActionType.RebalanceCompleted, AuditEventType.Failure, + AuditObjectType.Rebalance, reb.Id.ToString(), It.IsAny()), + Times.Once); + } + [Fact] public async Task Execute_Pending_LndFailedPayment_RowLeftAlone() { diff --git a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs index e92b680b..142dd84f 100644 --- a/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs +++ b/test/NodeGuard.Tests/Services/RebalanceServiceTests.cs @@ -689,14 +689,13 @@ public async Task ExecuteAsync_RetryPreflight_LndNotFound_ProceedsWithNewInvoice [Theory] [InlineData(Payment.Types.PaymentStatus.InFlight)] [InlineData(Payment.Types.PaymentStatus.Initiated)] - public async Task ExecuteAsync_RetryPreflight_PriorStillInFlight_ProceedsWithNewInvoice( + public async Task ExecuteAsync_RetryPreflight_PriorStillInFlight_SchedulesRetryWithoutRepaying( Payment.Types.PaymentStatus priorLndStatus) { - // Pins current behavior: the pre-flight only short-circuits on Succeeded. When the - // prior attempt is still settling in LND (InFlight/Initiated), the retry falls through - // and dispatches a new payment — a narrowed but not fully closed double-pay window. - // If we ever extend the pre-flight to park the row InFlight and defer to the monitor, - // this test should flip to assert AddInvoice is NEVER called and the row is parked. + // The prior attempt is still settling in LND (InFlight/Initiated). The pre-flight must + // NOT dispatch another payment on top of the live one — that's the double-pay window the + // payment-hash fix closes. Instead it leaves the in-flight payment alone and schedules a + // retry, whose own pre-flight re-checks the hash once LND reaches a terminal state. var node = CreateNode(); var rebalance = BuildRetryRow(node); _rebalanceRepo.Setup(r => r.GetById(rebalance.Id)).ReturnsAsync(rebalance); @@ -704,25 +703,25 @@ public async Task ExecuteAsync_RetryPreflight_PriorStillInFlight_ProceedsWithNew _lightning.Setup(x => x.TrackPaymentV2Async(node, It.IsAny(), It.IsAny())) .ReturnsAsync(new Payment { Status = priorLndStatus }); - _lightning.Setup(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny())) - .ReturnsAsync(new AddInvoiceResponse - { - PaymentRequest = "lnbc...", - RHash = Google.Protobuf.ByteString.CopyFrom(new byte[] { 0xCC, 0xDD }), - }); - _lightning.Setup(x => x.ProbeRouteAsync(node, It.IsAny(), It.IsAny(), - It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny())) - .ReturnsAsync(new ProbeResult.NoRoute("stop")); var service = CreateService(); var result = await service.ExecuteAsync(rebalance.Id); - _lightning.Verify(x => x.AddInvoiceAsync(node, It.IsAny(), It.IsAny(), It.IsAny()), - Times.Once); - // The new invoice's hash replaced the stale one — proving the pre-flight fell through. - result.PaymentHashHex.Should().Be("ccdd"); - // The prior payment was NOT adopted (Status didn't get flipped to Succeeded by ApplyTerminalPayment). + // No new invoice or payment — the live in-flight payment is left untouched. + _lightning.Verify(x => x.AddInvoiceAsync(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + _lightning.Verify(x => x.SendPaymentV2Async(It.IsAny(), It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny()), + Times.Never); + // The prior attempt's hash is preserved, not overwritten by a fresh invoice. + result.PaymentHashHex.Should().Be("deadbeef"); + // The prior payment was NOT adopted as terminal. result.PreimageHex.Should().BeNull(); + // A retry is queued (attempt 2 -> 3) so the true outcome gets reconciled later. + result.Status.Should().Be(RebalanceStatus.Pending); + result.AttemptNumber.Should().Be(3); + _scheduler.Verify(s => s.ScheduleJob(It.IsAny(), It.IsAny(), It.IsAny()), + Times.Once); } [Fact]