From 2bd09e4ee6fa416bf84c30015b45ec67088cd9c3 Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Thu, 9 Apr 2026 19:03:58 -0400 Subject: [PATCH 1/2] Hold peer lock when pushing to decode_update_adds This avoids race conditions where we're unable to properly update an HTLC's state because we need to update its state in the ChannelManager, but the HTLC is stuck in transit from the Channel to ChannelManager::decode_update_add_htlcs. Now the HTLC will atomically go from the Channel to the ChannelManager decode queue under the same lock. --- lightning/src/ln/channelmanager.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 2c97e4adaa1..4aad2a5381c 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1516,7 +1516,6 @@ enum PostMonitorUpdateChanResume { unbroadcasted_batch_funding_txid: Option, update_actions: Vec, htlc_forwards: Vec, - decode_update_add_htlcs: Option<(u64, Vec)>, finalized_claimed_htlcs: Vec<(HTLCSource, Option)>, failed_htlcs: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, committed_outbound_htlc_sources: Vec<(HTLCPreviousHopData, u64)>, @@ -10116,7 +10115,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ &self, channel_id: ChannelId, counterparty_node_id: PublicKey, funding_txo: OutPoint, user_channel_id: u128, unbroadcasted_batch_funding_txid: Option, update_actions: Vec, htlc_forwards: Vec, - decode_update_add_htlcs: Option<(u64, Vec)>, finalized_claimed_htlcs: Vec<(HTLCSource, Option)>, failed_htlcs: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, committed_outbound_htlc_sources: Vec<(HTLCPreviousHopData, u64)>, @@ -10177,9 +10175,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ self.handle_monitor_update_completion_actions(update_actions); self.forward_htlcs(htlc_forwards); - if let Some(decode) = decode_update_add_htlcs { - self.push_decode_update_add_htlcs(decode); - } self.finalize_claims(finalized_claimed_htlcs); for failure in failed_htlcs { let failure_type = failure.0.failure_type(counterparty_node_id, channel_id); @@ -10667,6 +10662,10 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ pending_msg_events.push(upd); } + if let Some(update_adds) = decode_update_add_htlcs { + self.push_decode_update_add_htlcs(update_adds); + } + let unbroadcasted_batch_funding_txid = chan.context.unbroadcasted_batch_funding_txid(&chan.funding); @@ -10678,7 +10677,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ unbroadcasted_batch_funding_txid, update_actions, htlc_forwards, - decode_update_add_htlcs, finalized_claimed_htlcs: updates.finalized_claimed_htlcs, failed_htlcs: updates.failed_htlcs, committed_outbound_htlc_sources: updates.committed_outbound_htlc_sources, @@ -10780,7 +10778,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ unbroadcasted_batch_funding_txid, update_actions, htlc_forwards, - decode_update_add_htlcs, finalized_claimed_htlcs, failed_htlcs, committed_outbound_htlc_sources, @@ -10793,7 +10790,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ unbroadcasted_batch_funding_txid, update_actions, htlc_forwards, - decode_update_add_htlcs, finalized_claimed_htlcs, failed_htlcs, committed_outbound_htlc_sources, @@ -17920,12 +17916,6 @@ impl< } } - let mut decode_update_add_htlcs_opt = None; - let decode_update_add_htlcs = self.decode_update_add_htlcs.lock().unwrap(); - if !decode_update_add_htlcs.is_empty() { - decode_update_add_htlcs_opt = Some(decode_update_add_htlcs); - } - let claimable_payments = self.claimable_payments.lock().unwrap(); let pending_outbound_payments = self.pending_outbound_payments.pending_outbound_payments.lock().unwrap(); @@ -17951,6 +17941,14 @@ impl< peer_states.push(peer_state_mutex.unsafe_well_ordered_double_lock_self()); } + let mut decode_update_add_htlcs_opt = None; + { + let decode_update_add_htlcs = self.decode_update_add_htlcs.lock().unwrap(); + if !decode_update_add_htlcs.is_empty() { + decode_update_add_htlcs_opt = Some(decode_update_add_htlcs); + } + } + let mut peer_storage_dir: Vec<(&PublicKey, &Vec)> = Vec::new(); (serializable_peer_count).write(writer)?; From 2ebc372fbce1763201ff9f66b1490d59192de8bf Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Tue, 7 Apr 2026 13:34:53 -0400 Subject: [PATCH 2/2] Fix async release before HTLC decode Handle `ReleaseHeldHtlc` messages that arrive before the sender-side LSP has even queued the held HTLC for onion decoding. Unlike lightningdevkit#4106, which covers releases arriving after the HTLC is in `decode_update_add_htlcs` but before it reaches `pending_intercepted_htlcs`, this preserves releases that arrive one step earlier and would otherwise be dropped as HTLC not found. Co-Authored-By: HAL 9000 Co-Authored-By: Elias Rohrer --- lightning/src/ln/async_payments_tests.rs | 164 +++++++++++++++++++++++ lightning/src/ln/channel.rs | 37 +++++ lightning/src/ln/channelmanager.rs | 12 ++ 3 files changed, 213 insertions(+) diff --git a/lightning/src/ln/async_payments_tests.rs b/lightning/src/ln/async_payments_tests.rs index 341bd5d7269..60632b13f7a 100644 --- a/lightning/src/ln/async_payments_tests.rs +++ b/lightning/src/ln/async_payments_tests.rs @@ -3458,3 +3458,167 @@ fn release_htlc_races_htlc_onion_decode() { claim_payment_along_route(ClaimAlongRouteArgs::new(sender, route, keysend_preimage)); assert_eq!(res, Some(PaidBolt12Invoice::StaticInvoice(static_invoice))); } + +#[test] +fn async_payment_e2e_release_before_hold_registered() { + // Tests that an LSP will release a held htlc if the `ReleaseHeldHtlc` message was received + // before the HTLC was fully committed to the channel, which was previously broken. + let chanmon_cfgs = create_chanmon_cfgs(4); + let node_cfgs = create_node_cfgs(4, &chanmon_cfgs); + + let (sender_cfg, recipient_cfg) = (often_offline_node_cfg(), often_offline_node_cfg()); + let mut sender_lsp_cfg = test_default_channel_config(); + sender_lsp_cfg.enable_htlc_hold = true; + let mut invoice_server_cfg = test_default_channel_config(); + invoice_server_cfg.accept_forwards_to_priv_channels = true; + + let node_chanmgrs = create_node_chanmgrs( + 4, + &node_cfgs, + &[Some(sender_cfg), Some(sender_lsp_cfg), Some(invoice_server_cfg), Some(recipient_cfg)], + ); + let nodes = create_network(4, &node_cfgs, &node_chanmgrs); + create_unannounced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0); + create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0); + create_unannounced_chan_between_nodes_with_value(&nodes, 2, 3, 1_000_000, 0); + unify_blockheight_across_nodes(&nodes); + let sender = &nodes[0]; + let sender_lsp = &nodes[1]; + let invoice_server = &nodes[2]; + let recipient = &nodes[3]; + + let recipient_id = vec![42; 32]; + let inv_server_paths = + invoice_server.node.blinded_paths_for_async_recipient(recipient_id.clone(), None).unwrap(); + recipient.node.set_paths_to_static_invoice_server(inv_server_paths).unwrap(); + expect_offer_paths_requests(recipient, &[invoice_server, sender_lsp]); + let invoice_flow_res = + pass_static_invoice_server_messages(invoice_server, recipient, recipient_id.clone()); + let invoice = invoice_flow_res.invoice; + let invreq_path = invoice_flow_res.invoice_request_path; + + let offer = recipient.node.get_async_receive_offer().unwrap(); + recipient.node.peer_disconnected(invoice_server.node.get_our_node_id()); + recipient.onion_messenger.peer_disconnected(invoice_server.node.get_our_node_id()); + invoice_server.node.peer_disconnected(recipient.node.get_our_node_id()); + invoice_server.onion_messenger.peer_disconnected(recipient.node.get_our_node_id()); + + let amt_msat = 5000; + let payment_id = PaymentId([1; 32]); + sender.node.pay_for_offer(&offer, Some(amt_msat), payment_id, Default::default()).unwrap(); + + let (peer_id, invreq_om) = extract_invoice_request_om(sender, &[sender_lsp, invoice_server]); + invoice_server.onion_messenger.handle_onion_message(peer_id, &invreq_om); + + let mut events = invoice_server.node.get_and_clear_pending_events(); + assert_eq!(events.len(), 1); + let (reply_path, invreq) = match events.pop().unwrap() { + Event::StaticInvoiceRequested { + recipient_id: ev_id, reply_path, invoice_request, .. + } => { + assert_eq!(recipient_id, ev_id); + (reply_path, invoice_request) + }, + _ => panic!(), + }; + + invoice_server + .node + .respond_to_static_invoice_request(invoice, reply_path, invreq, invreq_path) + .unwrap(); + let (peer_node_id, static_invoice_om, static_invoice) = + extract_static_invoice_om(invoice_server, &[sender_lsp, sender]); + + // Lock the HTLC in with the sender LSP, but stop before the sender's revoke_and_ack is handed + // back to the sender LSP. This reproduces the real LSPS2 timing where ReleaseHeldHtlc can + // arrive before the held HTLC is queued for decode on the sender LSP. + sender.onion_messenger.handle_onion_message(peer_node_id, &static_invoice_om); + check_added_monitors(sender, 1); + let commitment_update = get_htlc_update_msgs(&sender, &sender_lsp.node.get_our_node_id()); + let update_add = commitment_update.update_add_htlcs[0].clone(); + let payment_hash = update_add.payment_hash; + assert!(update_add.hold_htlc.is_some()); + sender_lsp.node.handle_update_add_htlc(sender.node.get_our_node_id(), &update_add); + sender_lsp.node.handle_commitment_signed_batch_test( + sender.node.get_our_node_id(), + &commitment_update.commitment_signed, + ); + check_added_monitors(sender_lsp, 1); + let (_extra_msg_option, sender_raa, sender_holding_cell_htlcs) = + do_main_commitment_signed_dance(sender_lsp, sender, false); + assert!(sender_holding_cell_htlcs.is_empty()); + + let held_htlc_om_to_inv_server = sender + .onion_messenger + .next_onion_message_for_peer(invoice_server.node.get_our_node_id()) + .unwrap(); + invoice_server + .onion_messenger + .handle_onion_message(sender_lsp.node.get_our_node_id(), &held_htlc_om_to_inv_server); + + let mut events_rc = core::cell::RefCell::new(Vec::new()); + invoice_server.onion_messenger.process_pending_events(&|e| Ok(events_rc.borrow_mut().push(e))); + let events = events_rc.into_inner(); + let held_htlc_om = events + .into_iter() + .find_map(|ev| { + if let Event::OnionMessageIntercepted { message, .. } = ev { + let peeled_onion = recipient.onion_messenger.peel_onion_message(&message).unwrap(); + if matches!( + peeled_onion, + PeeledOnion::Offers(OffersMessage::InvoiceRequest { .. }, _, _) + ) { + return None; + } + + assert!(matches!( + peeled_onion, + PeeledOnion::AsyncPayments(AsyncPaymentsMessage::HeldHtlcAvailable(_), _, _) + )); + Some(message) + } else { + None + } + }) + .unwrap(); + + let mut reconnect_args = ReconnectArgs::new(invoice_server, recipient); + reconnect_args.send_channel_ready = (true, true); + reconnect_nodes(reconnect_args); + + let events = core::cell::RefCell::new(Vec::new()); + invoice_server.onion_messenger.process_pending_events(&|e| Ok(events.borrow_mut().push(e))); + assert_eq!(events.borrow().len(), 1); + assert!(matches!(events.into_inner().pop().unwrap(), Event::OnionMessagePeerConnected { .. })); + expect_offer_paths_requests(recipient, &[invoice_server]); + + recipient + .onion_messenger + .handle_onion_message(invoice_server.node.get_our_node_id(), &held_htlc_om); + let (peer_id, release_htlc_om) = + extract_release_htlc_oms(recipient, &[sender, sender_lsp, invoice_server]).pop().unwrap(); + sender_lsp.onion_messenger.handle_onion_message(peer_id, &release_htlc_om); + + // Now let the sender LSP receive the sender's revoke_and_ack and continue processing the held + // HTLC, which previously would've resulted in holding the HTLC even though the release message + // was already received. + sender_lsp.node.handle_revoke_and_ack(sender.node.get_our_node_id(), &sender_raa); + check_added_monitors(sender_lsp, 1); + assert!(sender_lsp.node.get_and_clear_pending_msg_events().is_empty()); + sender_lsp.node.process_pending_htlc_forwards(); + let mut events = sender_lsp.node.get_and_clear_pending_msg_events(); + assert_eq!(events.len(), 1); + let ev = remove_first_msg_event_to_node(&invoice_server.node.get_our_node_id(), &mut events); + check_added_monitors(&sender_lsp, 1); + + let path: &[&Node] = &[invoice_server, recipient]; + let args = PassAlongPathArgs::new(sender_lsp, path, amt_msat, payment_hash, ev) + .with_dummy_tlvs(&[DummyTlvs::default(); DEFAULT_PAYMENT_DUMMY_HOPS]); + let claimable_ev = do_pass_along_path(args).unwrap(); + + let route: &[&[&Node]] = &[&[sender_lsp, invoice_server, recipient]]; + let keysend_preimage = extract_payment_preimage(&claimable_ev); + let (res, _) = + claim_payment_along_route(ClaimAlongRouteArgs::new(sender, route, keysend_preimage)); + assert_eq!(res, Some(PaidBolt12Invoice::StaticInvoice(static_invoice))); +} diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 32c0e94bdc8..55d4a84eb91 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -8108,6 +8108,43 @@ where debug_assert!(false, "If we go to prune an inbound HTLC it should be present") } + /// Clears the `hold_htlc` flag for a pending inbound HTLC, returning `true` if the HTLC was + /// successfully released. Useful when a [`ReleaseHeldHtlc`] onion message arrives before the + /// HTLC has been fully committed. + /// + /// [`ReleaseHeldHtlc`]: crate::onion_message::async_payments::ReleaseHeldHtlc + pub(super) fn release_pending_inbound_held_htlc(&mut self, htlc_id: u64) -> bool { + for update_add in self.context.monitor_pending_update_adds.iter_mut() { + if update_add.htlc_id == htlc_id { + update_add.hold_htlc.take(); + return true; + } + } + for htlc in self.context.pending_inbound_htlcs.iter_mut() { + if htlc.htlc_id != htlc_id { + continue; + } + match &mut htlc.state { + // Clearing `hold_htlc` here directly affects the copy that will be cloned into the decode + // pipeline when RAA promotes the HTLC. + InboundHTLCState::RemoteAnnounced(InboundHTLCResolution::Pending { + update_add_htlc, + }) + | InboundHTLCState::AwaitingRemoteRevokeToAnnounce( + InboundHTLCResolution::Pending { update_add_htlc }, + ) + | InboundHTLCState::AwaitingAnnouncedRemoteRevoke( + InboundHTLCResolution::Pending { update_add_htlc }, + ) => { + update_add_htlc.hold_htlc.take(); + return true; + }, + _ => return false, + } + } + false + } + /// Useful for testing crash scenarios where the holding cell is not persisted. #[cfg(test)] pub(super) fn test_clear_holding_cell(&mut self) { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 4aad2a5381c..b08864af737 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -17189,6 +17189,18 @@ impl< htlc_id, } => { let _serialize_guard = PersistenceNotifierGuard::notify_on_drop(self); + // It's possible the release_held_htlc message raced ahead of us fully committing to the + // HTLC. If that's the case, update the pending update_add to indicate that the HTLC should + // be released immediately. + let released_pre_commitment_htlc = self + .do_funded_channel_callback(prev_outbound_scid_alias, |chan| { + chan.release_pending_inbound_held_htlc(htlc_id) + }) + .unwrap_or(false); + if released_pre_commitment_htlc { + return; + } + // It's possible the release_held_htlc message raced ahead of us transitioning the pending // update_add to `Self::pending_intercept_htlcs`. If that's the case, update the pending // update_add to indicate that the HTLC should be released immediately.