Skip to content

Commit 799aa7f

Browse files
Cong WangAlexei Starovoitov
Cong Wang
authored and
Alexei Starovoitov
committed
skmsg: Avoid lock_sock() in sk_psock_backlog()
We do not have to lock the sock to avoid losing sk_socket, instead we can purge all the ingress queues when we close the socket. Sending or receiving packets after orphaning socket makes no sense. We do purge these queues when psock refcnt reaches zero but here we want to purge them explicitly in sock_map_close(). There are also some nasty race conditions on testing bit SK_PSOCK_TX_ENABLED and queuing/canceling the psock work, we can expand psock->ingress_lock a bit to protect them too. As noticed by John, we still have to lock the psock->work, because the same work item could be running concurrently on different CPU's. Signed-off-by: Cong Wang <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: John Fastabend <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 0739cd2 commit 799aa7f

File tree

3 files changed

+37
-16
lines changed

3 files changed

+37
-16
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ struct sk_psock {
9999
void (*saved_write_space)(struct sock *sk);
100100
void (*saved_data_ready)(struct sock *sk);
101101
struct proto *sk_proto;
102+
struct mutex work_mutex;
102103
struct sk_psock_work_state work_state;
103104
struct work_struct work;
104105
union {
@@ -347,6 +348,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
347348
}
348349

349350
struct sk_psock *sk_psock_init(struct sock *sk, int node);
351+
void sk_psock_stop(struct sk_psock *psock, bool wait);
350352

351353
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
352354
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);

net/core/skmsg.c

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
497497
if (!ingress) {
498498
if (!sock_writeable(psock->sk))
499499
return -EAGAIN;
500-
return skb_send_sock_locked(psock->sk, skb, off, len);
500+
return skb_send_sock(psock->sk, skb, off, len);
501501
}
502502
return sk_psock_skb_ingress(psock, skb);
503503
}
@@ -511,8 +511,7 @@ static void sk_psock_backlog(struct work_struct *work)
511511
u32 len, off;
512512
int ret;
513513

514-
/* Lock sock to avoid losing sk_socket during loop. */
515-
lock_sock(psock->sk);
514+
mutex_lock(&psock->work_mutex);
516515
if (state->skb) {
517516
skb = state->skb;
518517
len = state->len;
@@ -529,7 +528,7 @@ static void sk_psock_backlog(struct work_struct *work)
529528
skb_bpf_redirect_clear(skb);
530529
do {
531530
ret = -EIO;
532-
if (likely(psock->sk->sk_socket))
531+
if (!sock_flag(psock->sk, SOCK_DEAD))
533532
ret = sk_psock_handle_skb(psock, skb, off,
534533
len, ingress);
535534
if (ret <= 0) {
@@ -553,7 +552,7 @@ static void sk_psock_backlog(struct work_struct *work)
553552
kfree_skb(skb);
554553
}
555554
end:
556-
release_sock(psock->sk);
555+
mutex_unlock(&psock->work_mutex);
557556
}
558557

559558
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -591,6 +590,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
591590
spin_lock_init(&psock->link_lock);
592591

593592
INIT_WORK(&psock->work, sk_psock_backlog);
593+
mutex_init(&psock->work_mutex);
594594
INIT_LIST_HEAD(&psock->ingress_msg);
595595
spin_lock_init(&psock->ingress_lock);
596596
skb_queue_head_init(&psock->ingress_skb);
@@ -631,17 +631,15 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
631631
}
632632
}
633633

634-
static void sk_psock_zap_ingress(struct sk_psock *psock)
634+
static void __sk_psock_zap_ingress(struct sk_psock *psock)
635635
{
636636
struct sk_buff *skb;
637637

638638
while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
639639
skb_bpf_redirect_clear(skb);
640640
kfree_skb(skb);
641641
}
642-
spin_lock_bh(&psock->ingress_lock);
643642
__sk_psock_purge_ingress_msg(psock);
644-
spin_unlock_bh(&psock->ingress_lock);
645643
}
646644

647645
static void sk_psock_link_destroy(struct sk_psock *psock)
@@ -654,6 +652,18 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
654652
}
655653
}
656654

655+
void sk_psock_stop(struct sk_psock *psock, bool wait)
656+
{
657+
spin_lock_bh(&psock->ingress_lock);
658+
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
659+
sk_psock_cork_free(psock);
660+
__sk_psock_zap_ingress(psock);
661+
spin_unlock_bh(&psock->ingress_lock);
662+
663+
if (wait)
664+
cancel_work_sync(&psock->work);
665+
}
666+
657667
static void sk_psock_done_strp(struct sk_psock *psock);
658668

659669
static void sk_psock_destroy_deferred(struct work_struct *gc)
@@ -665,12 +675,12 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
665675
sk_psock_done_strp(psock);
666676

667677
cancel_work_sync(&psock->work);
678+
mutex_destroy(&psock->work_mutex);
668679

669680
psock_progs_drop(&psock->progs);
670681

671682
sk_psock_link_destroy(psock);
672683
sk_psock_cork_free(psock);
673-
sk_psock_zap_ingress(psock);
674684

675685
if (psock->sk_redir)
676686
sock_put(psock->sk_redir);
@@ -688,8 +698,7 @@ static void sk_psock_destroy(struct rcu_head *rcu)
688698

689699
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
690700
{
691-
sk_psock_cork_free(psock);
692-
sk_psock_zap_ingress(psock);
701+
sk_psock_stop(psock, false);
693702

694703
write_lock_bh(&sk->sk_callback_lock);
695704
sk_psock_restore_proto(sk, psock);
@@ -699,7 +708,6 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
699708
else if (psock->progs.stream_verdict)
700709
sk_psock_stop_verdict(sk, psock);
701710
write_unlock_bh(&sk->sk_callback_lock);
702-
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
703711

704712
call_rcu(&psock->rcu, sk_psock_destroy);
705713
}
@@ -770,14 +778,20 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
770778
* error that caused the pipe to break. We can't send a packet on
771779
* a socket that is in this state so we drop the skb.
772780
*/
773-
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
774-
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
781+
if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
782+
kfree_skb(skb);
783+
return;
784+
}
785+
spin_lock_bh(&psock_other->ingress_lock);
786+
if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
787+
spin_unlock_bh(&psock_other->ingress_lock);
775788
kfree_skb(skb);
776789
return;
777790
}
778791

779792
skb_queue_tail(&psock_other->ingress_skb, skb);
780793
schedule_work(&psock_other->work);
794+
spin_unlock_bh(&psock_other->ingress_lock);
781795
}
782796

783797
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
@@ -845,8 +859,12 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
845859
err = sk_psock_skb_ingress_self(psock, skb);
846860
}
847861
if (err < 0) {
848-
skb_queue_tail(&psock->ingress_skb, skb);
849-
schedule_work(&psock->work);
862+
spin_lock_bh(&psock->ingress_lock);
863+
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
864+
skb_queue_tail(&psock->ingress_skb, skb);
865+
schedule_work(&psock->work);
866+
}
867+
spin_unlock_bh(&psock->ingress_lock);
850868
}
851869
break;
852870
case __SK_REDIRECT:

net/core/sock_map.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,6 +1540,7 @@ void sock_map_close(struct sock *sk, long timeout)
15401540
saved_close = psock->saved_close;
15411541
sock_map_remove_links(sk, psock);
15421542
rcu_read_unlock();
1543+
sk_psock_stop(psock, true);
15431544
release_sock(sk);
15441545
saved_close(sk, timeout);
15451546
}

0 commit comments

Comments
 (0)