@@ -3337,6 +3337,93 @@ fn test_durable_preimages_on_closed_channel() {
33373337 do_test_durable_preimages_on_closed_channel ( false , false , false ) ;
33383338}
33393339
3340+ #[ test]
3341+ fn test_sync_async_persist_doesnt_hang ( ) {
3342+ // Previously, we checked if a channel was a candidate for making forward progress based on if
3343+ // the `MonitorEvent::Completed` matched the channel's latest monitor update id. However, this
3344+ // could lead to a rare race when `ChannelMonitor`s were being persisted both synchronously and
3345+ // asynchronously leading to channel hangs.
3346+ //
3347+ // To hit this case, we need to generate a `MonitorEvent::Completed` prior to a new channel
3348+ // update, but which is only processed after the channel update.
3349+ let chanmon_cfgs = create_chanmon_cfgs ( 2 ) ;
3350+ let node_cfgs = create_node_cfgs ( 2 , & chanmon_cfgs) ;
3351+ let node_chanmgrs = create_node_chanmgrs ( 2 , & node_cfgs, & [ None , None ] ) ;
3352+ let nodes = create_network ( 2 , & node_cfgs, & node_chanmgrs) ;
3353+
3354+ let chan_id_ab = create_announced_chan_between_nodes ( & nodes, 0 , 1 ) . 2 ;
3355+
3356+ // Send two payments from A to B, then claim the first, marking the very last
3357+ // ChannelMonitorUpdate as InProgress...
3358+ let ( payment_preimage_1, payment_hash_1, ..) = route_payment ( & nodes[ 0 ] , & [ & nodes[ 1 ] ] , 1_000_000 ) ;
3359+ let ( payment_preimage_2, payment_hash_2, ..) = route_payment ( & nodes[ 0 ] , & [ & nodes[ 1 ] ] , 1_000_000 ) ;
3360+
3361+ nodes[ 1 ] . node . claim_funds ( payment_preimage_1) ;
3362+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3363+ expect_payment_claimed ! ( nodes[ 1 ] , payment_hash_1, 1_000_000 ) ;
3364+
3365+ let bs_updates = get_htlc_update_msgs ( & nodes[ 1 ] , & nodes[ 0 ] . node . get_our_node_id ( ) ) ;
3366+ nodes[ 0 ] . node . handle_update_fulfill_htlc ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_updates. update_fulfill_htlcs [ 0 ] ) ;
3367+ expect_payment_sent ( & nodes[ 0 ] , payment_preimage_1, None , false , false ) ;
3368+ nodes[ 0 ] . node . handle_commitment_signed ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_updates. commitment_signed ) ;
3369+ check_added_monitors ( & nodes[ 0 ] , 1 ) ;
3370+ let ( as_raa, as_cs) = get_revoke_commit_msgs ! ( nodes[ 0 ] , nodes[ 1 ] . node. get_our_node_id( ) ) ;
3371+
3372+ nodes[ 1 ] . node . handle_revoke_and_ack ( & nodes[ 0 ] . node . get_our_node_id ( ) , & as_raa) ;
3373+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3374+ nodes[ 1 ] . node . handle_commitment_signed ( & nodes[ 0 ] . node . get_our_node_id ( ) , & as_cs) ;
3375+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3376+
3377+ let bs_final_raa = get_event_msg ! ( nodes[ 1 ] , MessageSendEvent :: SendRevokeAndACK , nodes[ 0 ] . node. get_our_node_id( ) ) ;
3378+ chanmon_cfgs[ 0 ] . persister . set_update_ret ( ChannelMonitorUpdateStatus :: InProgress ) ;
3379+ nodes[ 0 ] . node . handle_revoke_and_ack ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_final_raa) ;
3380+ check_added_monitors ( & nodes[ 0 ] , 1 ) ;
3381+
3382+ // Immediately complete the monitor update, but before the ChannelManager has a chance to see
3383+ // the MonitorEvent::Completed, create a channel update by receiving a claim on the second
3384+ // payment.
3385+ let ( outpoint, _, ab_update_id) = nodes[ 0 ] . chain_monitor . latest_monitor_update_id . lock ( ) . unwrap ( ) . get ( & chan_id_ab) . unwrap ( ) . clone ( ) ;
3386+ nodes[ 0 ] . chain_monitor . chain_monitor . channel_monitor_updated ( outpoint, ab_update_id) . unwrap ( ) ;
3387+
3388+ nodes[ 1 ] . node . claim_funds ( payment_preimage_2) ;
3389+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3390+ expect_payment_claimed ! ( nodes[ 1 ] , payment_hash_2, 1_000_000 ) ;
3391+
3392+ let bs_updates = get_htlc_update_msgs ( & nodes[ 1 ] , & nodes[ 0 ] . node . get_our_node_id ( ) ) ;
3393+ nodes[ 0 ] . node . handle_update_fulfill_htlc ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_updates. update_fulfill_htlcs [ 0 ] ) ;
3394+ nodes[ 0 ] . node . handle_commitment_signed ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_updates. commitment_signed ) ;
3395+ check_added_monitors ( & nodes[ 0 ] , 1 ) ;
3396+
3397+ // At this point, we have completed an extra `ChannelMonitorUpdate` but the `ChannelManager`
3398+ // hasn't yet seen our `MonitorEvent::Completed`. When we call
3399+ // `get_and_clear_pending_msg_events` here, the `ChannelManager` finally sees that event and
3400+ // should return the channel to normal operation.
3401+ let ( as_raa, as_cs) = get_revoke_commit_msgs ! ( nodes[ 0 ] , nodes[ 1 ] . node. get_our_node_id( ) ) ;
3402+
3403+ // Now that we've completed our test, process the events we have queued up (which we were not
3404+ // able to check until now as they would have caused the `ChannelManager` to look at the
3405+ // pending `MonitorEvent`s).
3406+ let pending_events = nodes[ 0 ] . node . get_and_clear_pending_events ( ) ;
3407+ assert_eq ! ( pending_events. len( ) , 2 ) ;
3408+ if let Event :: PaymentPathSuccessful { ref payment_hash, ..} = pending_events[ 1 ] {
3409+ assert_eq ! ( payment_hash. unwrap( ) , payment_hash_1) ;
3410+ } else { panic ! ( ) ; }
3411+ if let Event :: PaymentSent { ref payment_hash, ..} = pending_events[ 0 ] {
3412+ assert_eq ! ( * payment_hash, payment_hash_2) ;
3413+ } else { panic ! ( ) ; }
3414+
3415+ // Finally, complete the claiming of the second payment
3416+ nodes[ 1 ] . node . handle_revoke_and_ack ( & nodes[ 0 ] . node . get_our_node_id ( ) , & as_raa) ;
3417+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3418+ nodes[ 1 ] . node . handle_commitment_signed ( & nodes[ 0 ] . node . get_our_node_id ( ) , & as_cs) ;
3419+ check_added_monitors ( & nodes[ 1 ] , 1 ) ;
3420+
3421+ let bs_final_raa = get_event_msg ! ( nodes[ 1 ] , MessageSendEvent :: SendRevokeAndACK , nodes[ 0 ] . node. get_our_node_id( ) ) ;
3422+ nodes[ 0 ] . node . handle_revoke_and_ack ( & nodes[ 1 ] . node . get_our_node_id ( ) , & bs_final_raa) ;
3423+ check_added_monitors ( & nodes[ 0 ] , 1 ) ;
3424+ expect_payment_path_successful ! ( nodes[ 0 ] ) ;
3425+ }
3426+
33403427fn do_test_reload_mon_update_completion_actions ( close_during_reload : bool ) {
33413428 // Test that if a `ChannelMonitorUpdate` completes but a `ChannelManager` isn't serialized
33423429 // before restart we run the monitor update completion action on startup.
0 commit comments