Skip to content

Commit b15785c

Browse files
committed
Defer monitor update completions after funding spend
When no_further_updates_allowed() is true and the persister returns Completed, ChainMonitor now overrides the return to InProgress and queues the update_id in deferred_monitor_update_completions. These are resolved in release_pending_monitor_events, so ChannelManager sees them complete together with the force-close MonitorEvents. This eliminates phantom InProgress entries that would never complete: previously, a rejected pre-close update (e.g. commitment_signed arriving after funding spend) returned InProgress with no completion path, blocking MonitorUpdateCompletionActions (PaymentClaimed, PaymentForwarded) indefinitely. A subsequent post-close update returning Completed would then violate the in-order completion invariant. AI tools were used in preparing this commit.
1 parent 6e2f078 commit b15785c

2 files changed

Lines changed: 164 additions & 46 deletions

File tree

lightning/src/chain/chainmonitor.rs

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,11 @@ struct MonitorHolder<ChannelSigner: EcdsaChannelSigner> {
227227
/// [`ChannelMonitorUpdate`] which was already applied. While this isn't an issue for the
228228
/// LDK-provided update-based [`Persist`], it is somewhat surprising for users so we avoid it.
229229
pending_monitor_updates: Mutex<Vec<u64>>,
230+
/// Monitor update IDs for which the persister returned `Completed` but we overrode the return
231+
/// to `InProgress` because the channel is post-close (`no_further_updates_allowed()` is true).
232+
/// Resolved during the next monitor event release so that `ChannelManager` sees them complete
233+
/// together with the force-close `MonitorEvent`s.
234+
deferred_monitor_update_completions: Mutex<Vec<u64>>,
230235
}
231236

232237
impl<ChannelSigner: EcdsaChannelSigner> MonitorHolder<ChannelSigner> {
@@ -1054,7 +1059,11 @@ where
10541059
if let Some(ref chain_source) = self.chain_source {
10551060
monitor.load_outputs_to_watch(chain_source, &self.logger);
10561061
}
1057-
entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(Vec::new()) });
1062+
entry.insert(MonitorHolder {
1063+
monitor,
1064+
pending_monitor_updates: Mutex::new(Vec::new()),
1065+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
1066+
});
10581067

10591068
Ok(ChannelMonitorUpdateStatus::Completed)
10601069
}
@@ -1305,6 +1314,7 @@ where
13051314
entry.insert(MonitorHolder {
13061315
monitor,
13071316
pending_monitor_updates: Mutex::new(pending_monitor_updates),
1317+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
13081318
});
13091319
Ok(persist_res)
13101320
}
@@ -1414,7 +1424,26 @@ where
14141424
}
14151425
}
14161426

1417-
if update_res.is_err() {
1427+
if monitor.no_further_updates_allowed()
1428+
&& persist_res == ChannelMonitorUpdateStatus::Completed
1429+
{
1430+
// The channel is post-close (funding spend seen, lockdown, or holder
1431+
// tx signed). Return InProgress so ChannelManager freezes the
1432+
// channel until the force-close MonitorEvents are processed. We
1433+
// track this update_id in deferred_monitor_update_completions so it
1434+
// gets resolved during release_pending_monitor_events, together with
1435+
// those MonitorEvents.
1436+
pending_monitor_updates.push(update_id);
1437+
monitor_state
1438+
.deferred_monitor_update_completions
1439+
.lock()
1440+
.unwrap()
1441+
.push(update_id);
1442+
log_debug!(
1443+
logger,
1444+
"Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)",
1445+
update_id,
1446+
);
14181447
ChannelMonitorUpdateStatus::InProgress
14191448
} else {
14201449
persist_res
@@ -1429,8 +1458,9 @@ where
14291458
for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() {
14301459
let _ = self.channel_monitor_updated(channel_id, update_id);
14311460
}
1461+
let monitors = self.monitors.read().unwrap();
14321462
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
1433-
for monitor_state in self.monitors.read().unwrap().values() {
1463+
for monitor_state in monitors.values() {
14341464
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
14351465
if monitor_events.len() > 0 {
14361466
let monitor_funding_txo = monitor_state.monitor.get_funding_txo();
@@ -1444,6 +1474,34 @@ where
14441474
));
14451475
}
14461476
}
1477+
// Resolve any deferred monitor update completions after collecting regular monitor
1478+
// events. The regular events include the force-close (CommitmentTxConfirmed), which
1479+
// ChannelManager processes first. The deferred completions come after, so that
1480+
// completion actions resolve once the ChannelForceClosed update (generated during
1481+
// force-close processing) also gets deferred and resolved in the next event cycle.
1482+
for monitor_state in monitors.values() {
1483+
let deferred =
1484+
monitor_state.deferred_monitor_update_completions.lock().unwrap().split_off(0);
1485+
for update_id in deferred {
1486+
let mut pending = monitor_state.pending_monitor_updates.lock().unwrap();
1487+
pending.retain(|id| *id != update_id);
1488+
let monitor_is_pending_updates = monitor_state.has_pending_updates(&pending);
1489+
if !monitor_is_pending_updates {
1490+
let funding_txo = monitor_state.monitor.get_funding_txo();
1491+
let channel_id = monitor_state.monitor.channel_id();
1492+
pending_monitor_events.push((
1493+
funding_txo,
1494+
channel_id,
1495+
vec![MonitorEvent::Completed {
1496+
funding_txo,
1497+
channel_id,
1498+
monitor_update_id: monitor_state.monitor.get_latest_update_id(),
1499+
}],
1500+
monitor_state.monitor.get_counterparty_node_id(),
1501+
));
1502+
}
1503+
}
1504+
}
14471505
pending_monitor_events
14481506
}
14491507
}

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 103 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,11 +3904,28 @@ fn do_test_durable_preimages_on_closed_channel(
39043904
}
39053905
if !close_chans_before_reload {
39063906
check_closed_broadcast(&nodes[1], 1, false);
3907-
let reason = ClosureReason::CommitmentTxConfirmed;
3908-
check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000);
3907+
// When hold=false, get_and_clear_pending_events also triggers
3908+
// process_background_events (replaying the preimage and force-close updates)
3909+
// and resolves the deferred completions, firing PaymentForwarded alongside
3910+
// ChannelClosed. When hold=true, only ChannelClosed fires.
3911+
let evs = nodes[1].node.get_and_clear_pending_events();
3912+
let expected = if hold_post_reload_mon_update { 1 } else { 2 };
3913+
assert_eq!(evs.len(), expected, "{:?}", evs);
3914+
assert!(evs.iter().any(|e| matches!(
3915+
e,
3916+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. }
3917+
)));
3918+
if !hold_post_reload_mon_update {
3919+
assert!(evs.iter().any(|e| matches!(e, Event::PaymentForwarded { .. })));
3920+
check_added_monitors(&nodes[1], mons_added);
3921+
}
39093922
}
39103923
nodes[1].node.timer_tick_occurred();
3911-
check_added_monitors(&nodes[1], mons_added);
3924+
// For !close_chans_before_reload && !hold, background events were already replayed
3925+
// during get_and_clear_pending_events above, so timer_tick adds no monitors.
3926+
let expected_mons =
3927+
if !close_chans_before_reload && !hold_post_reload_mon_update { 0 } else { mons_added };
3928+
check_added_monitors(&nodes[1], expected_mons);
39123929

39133930
// Finally, check that B created a payment preimage transaction and close out the payment.
39143931
let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
@@ -3923,44 +3940,61 @@ fn do_test_durable_preimages_on_closed_channel(
39233940
check_closed_broadcast(&nodes[0], 1, false);
39243941
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
39253942

3943+
if close_chans_before_reload && !hold_post_reload_mon_update {
3944+
// For close_chans_before_reload with hold=false, the deferred completions
3945+
// haven't been processed yet. Trigger process_pending_monitor_events now.
3946+
let _ = nodes[1].node.get_and_clear_pending_msg_events();
3947+
check_added_monitors(&nodes[1], 0);
3948+
}
3949+
39263950
if !close_chans_before_reload || close_only_a {
39273951
// Make sure the B<->C channel is still alive and well by sending a payment over it.
39283952
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
39293953
reconnect_args.pending_responding_commitment_signed.1 = true;
3930-
// The B<->C `ChannelMonitorUpdate` shouldn't be allowed to complete, which is the
3931-
// equivalent to the responding `commitment_signed` being a duplicate for node B, thus we
3932-
// need to set the `pending_responding_commitment_signed_dup` flag.
3933-
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3954+
if hold_post_reload_mon_update {
3955+
// When the A-B update is still InProgress, B-C monitor updates are blocked,
3956+
// so the responding commitment_signed is a duplicate that generates no update.
3957+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3958+
}
39343959
reconnect_args.pending_raa.1 = true;
39353960

39363961
reconnect_nodes(reconnect_args);
39373962
}
39383963

3939-
// Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending
3940-
// `PaymentForwarded` event will finally be released.
3941-
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3942-
nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3964+
if hold_post_reload_mon_update {
3965+
// When the persister returned InProgress, we need to manually complete the
3966+
// A-B monitor update to unblock the PaymentForwarded completion action.
3967+
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3968+
nodes[1]
3969+
.chain_monitor
3970+
.chain_monitor
3971+
.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3972+
}
39433973

39443974
// If the A<->B channel was closed before we reload, we'll replay the claim against it on
39453975
// reload, causing the `PaymentForwarded` event to get replayed.
39463976
let evs = nodes[1].node.get_and_clear_pending_events();
3947-
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 });
3948-
for ev in evs {
3949-
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3950-
if !claim_from_onchain_tx {
3951-
// If the outbound channel is still open, the `next_user_channel_id` should be available.
3952-
// This was previously broken.
3953-
assert!(next_htlcs[0].user_channel_id.is_some())
3977+
if !close_chans_before_reload && !hold_post_reload_mon_update {
3978+
// PaymentForwarded already fired during get_and_clear_pending_events above.
3979+
assert!(evs.is_empty(), "{:?}", evs);
3980+
} else {
3981+
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }, "{:?}", evs);
3982+
for ev in evs {
3983+
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3984+
if !claim_from_onchain_tx {
3985+
assert!(next_htlcs[0].user_channel_id.is_some())
3986+
}
3987+
} else {
3988+
panic!("Unexpected event: {:?}", ev);
39543989
}
3955-
} else {
3956-
panic!();
39573990
}
39583991
}
39593992

39603993
if !close_chans_before_reload || close_only_a {
3961-
// Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel
3962-
// will fly, removing the payment preimage from it.
3963-
check_added_monitors(&nodes[1], 1);
3994+
if hold_post_reload_mon_update {
3995+
// The B-C monitor update from the completion action fires now.
3996+
check_added_monitors(&nodes[1], 1);
3997+
}
39643998
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
39653999
send_payment(&nodes[1], &[&nodes[2]], 100_000);
39664000
}
@@ -5178,15 +5212,16 @@ fn test_mpp_claim_to_holding_cell() {
51785212
}
51795213

51805214
#[test]
5181-
#[should_panic(expected = "Watch::update_channel returned Completed while prior updates are still InProgress")]
5182-
fn test_monitor_update_fail_after_funding_spend() {
5183-
// When a counterparty commitment transaction confirms (funding spend), the
5184-
// ChannelMonitor sets funding_spend_seen. If a commitment_signed from the
5185-
// counterparty is then processed (a race between chain events and message
5186-
// processing), update_monitor returns Err because no_further_updates_allowed()
5187-
// is true. ChainMonitor overrides the result to InProgress, permanently
5188-
// freezing the channel. A subsequent preimage claim returning Completed then
5189-
// triggers the per-channel assertion.
5215+
fn test_monitor_update_after_funding_spend() {
5216+
// Test that monitor updates still work after a funding spend is detected by the
5217+
// ChainMonitor but before ChannelManager has processed the corresponding block.
5218+
//
5219+
// When the counterparty commitment transaction confirms (funding spend), the
5220+
// ChannelMonitor sets funding_spend_seen and no_further_updates_allowed() returns
5221+
// true. ChainMonitor overrides all subsequent update_channel results to InProgress
5222+
// to freeze the channel. These overridden updates complete via deferred completions
5223+
// in release_pending_monitor_events, so that MonitorUpdateCompletionActions (like
5224+
// PaymentClaimed) can still fire.
51905225
let chanmon_cfgs = create_chanmon_cfgs(2);
51915226
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
51925227
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -5197,7 +5232,7 @@ fn test_monitor_update_fail_after_funding_spend() {
51975232
let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);
51985233

51995234
// Route payment 1 fully so B can claim it later.
5200-
let (payment_preimage_1, _payment_hash_1, ..) =
5235+
let (payment_preimage_1, payment_hash_1, ..) =
52015236
route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
52025237

52035238
// Get A's commitment tx (this is the "counterparty" commitment from B's perspective).
@@ -5206,12 +5241,14 @@ fn test_monitor_update_fail_after_funding_spend() {
52065241

52075242
// Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager).
52085243
// This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true.
5244+
// We also update the best block on the chain_monitor so the broadcaster height is
5245+
// consistent when claiming HTLCs.
52095246
let (block_hash, height) = nodes[1].best_block_info();
52105247
let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]);
52115248
let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
5212-
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(
5213-
&block.header, &txdata, height + 1,
5214-
);
5249+
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(&block.header, &txdata, height + 1);
5250+
nodes[1].chain_monitor.chain_monitor.best_block_updated(&block.header, height + 1);
5251+
nodes[1].blocks.lock().unwrap().push((block, height + 1));
52155252

52165253
// Send payment 2 from A to B.
52175254
let (route, payment_hash_2, _, payment_secret_2) =
@@ -5233,15 +5270,38 @@ fn test_monitor_update_fail_after_funding_spend() {
52335270

52345271
nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]);
52355272

5236-
// B processes commitment_signed. The monitor's update_monitor succeeds on the
5237-
// update steps, but returns Err at the end because no_further_updates_allowed()
5238-
// is true (funding_spend_seen). ChainMonitor overrides the result to InProgress.
5273+
// B processes commitment_signed. The monitor applies the update but returns Err
5274+
// because no_further_updates_allowed() is true. ChainMonitor overrides to InProgress,
5275+
// freezing the channel.
52395276
nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]);
52405277
check_added_monitors(&nodes[1], 1);
5241-
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
52425278

5243-
// B claims payment 1. The PaymentPreimage monitor update returns Completed
5244-
// (update_monitor succeeds for preimage, and persister returns Completed),
5245-
// but the prior InProgress from the commitment_signed is still pending.
5279+
// B claims payment 1. The preimage monitor update also returns InProgress (deferred),
5280+
// so no Completed-while-InProgress assertion fires.
52465281
nodes[1].node.claim_funds(payment_preimage_1);
5282+
check_added_monitors(&nodes[1], 1);
5283+
5284+
// First event cycle: the force-close MonitorEvent (CommitmentTxConfirmed) fires first,
5285+
// then the deferred completions resolve. The force-close generates a ChannelForceClosed
5286+
// update (also deferred), which blocks completion actions. So we only get ChannelClosed.
5287+
let events = nodes[1].node.get_and_clear_pending_events();
5288+
assert_eq!(events.len(), 1);
5289+
match &events[0] {
5290+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
5291+
_ => panic!("Unexpected event: {:?}", events[0]),
5292+
}
5293+
check_added_monitors(&nodes[1], 1);
5294+
nodes[1].node.get_and_clear_pending_msg_events();
5295+
5296+
// Second event cycle: the ChannelForceClosed deferred completion resolves, unblocking
5297+
// the PaymentClaimed completion action.
5298+
let events = nodes[1].node.get_and_clear_pending_events();
5299+
assert_eq!(events.len(), 1);
5300+
match &events[0] {
5301+
Event::PaymentClaimed { payment_hash, amount_msat, .. } => {
5302+
assert_eq!(payment_hash_1, *payment_hash);
5303+
assert_eq!(1_000_000, *amount_msat);
5304+
},
5305+
_ => panic!("Unexpected event: {:?}", events[0]),
5306+
}
52475307
}

0 commit comments

Comments
 (0)