From 944f16255a60f52ecaa27c40417048853e39aaa3 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Sun, 10 Nov 2024 17:18:14 +0800 Subject: [PATCH 1/3] htlcswitch: handle nil circuit properly when settling We have two sources which can call `handlePacketSettle`, either through the link's `<-s.htlcPlex`, or the `<-s.resolutionMsgs`, which means the `closeCircuit` could be call twice. Previously we already caught this case inside `closeCircuit`, in that we would return a nil circuit upon seeing `ErrUnknownCircuit`, indicating the circuit was removed. However, we still need to account the case when the circuit is the process of being closed, which is now fixed as we will ignore when seeing `ErrCircuitClosing`. --- htlcswitch/packet.go | 11 +++++++++++ htlcswitch/switch.go | 11 ++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/htlcswitch/packet.go b/htlcswitch/packet.go index e9918585191..ed5f82588c3 100644 --- a/htlcswitch/packet.go +++ b/htlcswitch/packet.go @@ -1,6 +1,8 @@ package htlcswitch import ( + "fmt" + "github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/graph/db/models" "github.com/lightningnetwork/lnd/htlcswitch/hop" @@ -136,3 +138,12 @@ func (p *htlcPacket) keystone() Keystone { OutKey: p.outKey(), } } + +// String returns a human-readable description of the packet. +func (p *htlcPacket) String() string { + return fmt.Sprintf("keystone=%v, sourceRef=%v, destRef=%v, "+ + "incomingAmount=%v, amount=%v, localFailure=%v, hasSource=%v "+ + "isResolution=%v", p.keystone(), p.sourceRef, p.destRef, + p.incomingAmount, p.amount, p.localFailure, p.hasSource, + p.isResolution) +} diff --git a/htlcswitch/switch.go b/htlcswitch/switch.go index 4c54fab0a57..704887e8e12 100644 --- a/htlcswitch/switch.go +++ b/htlcswitch/switch.go @@ -2994,6 +2994,15 @@ func (s *Switch) handlePacketSettle(packet *htlcPacket) error { // If the source of this packet has not been set, use the circuit map // to lookup the origin. circuit, err := s.closeCircuit(packet) + + // If the circuit is in the process of closing, we will return a nil as + // there's another packet handling undergoing. + if errors.Is(err, ErrCircuitClosing) { + log.Debugf("Circuit is closing for packet=%v", packet) + return nil + } + + // Exit early if there's another error. if err != nil { return err } @@ -3005,7 +3014,7 @@ func (s *Switch) handlePacketSettle(packet *htlcPacket) error { // and when `UpdateFulfillHTLC` is received. After which `RevokeAndAck` // is received, which invokes `processRemoteSettleFails` in its link. if circuit == nil { - log.Debugf("Found nil circuit: packet=%v", spew.Sdump(packet)) + log.Debugf("Circuit already closed for packet=%v", packet) return nil } From 39584be7e1181de34e5ecdb29ea320bc50acbb10 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 8 Nov 2024 02:44:11 +0800 Subject: [PATCH 2/3] routing: fix nil pointer dereference in `exitWithErr` In a rare case when the critical log is triggered when using postgres as db backend, the `payment` could be nil cause the server is shutting down, causing the payment fetching to return an error. We now cache its state before fetching it from the db. --- routing/payment_lifecycle.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/routing/payment_lifecycle.go b/routing/payment_lifecycle.go index 5c4bae55854..34de74461e3 100644 --- a/routing/payment_lifecycle.go +++ b/routing/payment_lifecycle.go @@ -189,10 +189,21 @@ func (p *paymentLifecycle) resumePayment(ctx context.Context) ([32]byte, p.resultCollector(&a) } + // Get the payment status. + status := payment.GetStatus() + // exitWithErr is a helper closure that logs and returns an error. exitWithErr := func(err error) ([32]byte, *route.Route, error) { - log.Errorf("Payment %v with status=%v failed: %v", - p.identifier, payment.GetStatus(), err) + // Log an error with the latest payment status. + // + // NOTE: this `status` variable is reassigned in the loop + // below. We could also call `payment.GetStatus` here, but in a + // rare case when the critical log is triggered when using + // postgres as db backend, the `payment` could be nil, causing + // the payment fetching to return an error. + log.Errorf("Payment %v with status=%v failed: %v", p.identifier, + status, err) + return [32]byte{}, nil, err } @@ -213,10 +224,10 @@ lifecycle: ps := payment.GetState() remainingFees := p.calcFeeBudget(ps.FeesPaid) + status = payment.GetStatus() log.Debugf("Payment %v: status=%v, active_shards=%v, "+ - "rem_value=%v, fee_limit=%v", p.identifier, - payment.GetStatus(), ps.NumAttemptsInFlight, - ps.RemainingAmt, remainingFees) + "rem_value=%v, fee_limit=%v", p.identifier, status, + ps.NumAttemptsInFlight, ps.RemainingAmt, remainingFees) // We now proceed our lifecycle with the following tasks in // order, From 7882e1ccffc047ed1c1e4ad1e52966ffb1d1c3a7 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Tue, 5 Nov 2024 08:20:32 +0800 Subject: [PATCH 3/3] lnwallet: add debug logs --- lnwallet/channel.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lnwallet/channel.go b/lnwallet/channel.go index dc45a12bed1..6486464d0fe 100644 --- a/lnwallet/channel.go +++ b/lnwallet/channel.go @@ -1710,7 +1710,7 @@ func (lc *LightningChannel) restorePendingRemoteUpdates( localCommitmentHeight uint64, pendingRemoteCommit *commitment) error { - lc.log.Debugf("Restoring %v dangling remote updates", + lc.log.Debugf("Restoring %v dangling remote updates pending our sig", len(unsignedAckedUpdates)) for _, logUpdate := range unsignedAckedUpdates { @@ -1829,6 +1829,9 @@ func (lc *LightningChannel) restorePendingLocalUpdates( pendingCommit := pendingRemoteCommitDiff.Commitment pendingHeight := pendingCommit.CommitHeight + lc.log.Debugf("Restoring pending remote commitment %v at commit "+ + "height %v", pendingCommit.CommitTx.TxHash(), pendingHeight) + auxResult, err := fn.MapOptionZ( lc.leafStore, func(s AuxLeafStore) fn.Result[CommitDiffAuxResult] {