Skip to content

Commit

Permalink
tcp_bpf: add sk_rmem_alloc related logic for tcp_bpf ingress redirection
Browse files Browse the repository at this point in the history
When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
be created out of the skb, and the rmem accounting of the sk_msg will be
handled by the skb.

For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
except for the global memory limit, the rmem of sk_redir is nearly
unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.

Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
used in these two paths. We use "msg->skb" to test whether the sk_msg is
skb backed up. If it's not, we shall do the memory accounting explicitly.

Fixes: 604326b ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Zijian Zhang <[email protected]>
  • Loading branch information
Zijian Zhang authored and Kernel Patches Daemon committed Dec 10, 2024
1 parent 7d2c134 commit d001ec9
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
11 changes: 8 additions & 3 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,17 +317,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}

static inline void sk_psock_queue_msg(struct sk_psock *psock,
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
bool ret;

spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
else {
ret = true;
} else {
sk_msg_free(psock->sk, msg);
kfree(msg);
ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
return ret;
}

static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
Expand Down
6 changes: 5 additions & 1 deletion net/core/skmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
if (!msg_rx->skb)
if (!msg_rx->skb) {
sk_mem_uncharge(sk, copy);
atomic_sub(copy, &sk->sk_rmem_alloc);
}
msg_rx->sg.size -= copy;

if (!sge->length) {
Expand Down Expand Up @@ -772,6 +774,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)

list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
list_del(&msg->list);
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
sk_msg_free(psock->sk, msg);
kfree(msg);
}
Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/tcp_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
}

sk_mem_charge(sk, size);
atomic_add(size, &sk->sk_rmem_alloc);
sk_msg_xfer(tmp, msg, i, size);
copied += size;
if (sge->length)
Expand All @@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,

if (!ret) {
msg->sg.start = i;
sk_psock_queue_msg(psock, tmp);
if (!sk_psock_queue_msg(psock, tmp))
atomic_sub(copied, &sk->sk_rmem_alloc);
sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
Expand Down

0 comments on commit d001ec9

Please sign in to comment.