[lvc-project] [PATCH net v2] net: sockmap: avoid race between sock_map_destroy() and sk_psock_put()
Michal Luczaj
mhal at rbox.co
Wed Sep 25 03:22:39 MSK 2024
On 9/24/24 10:23, Paolo Abeni wrote:
> ...
> I guess that the main point in Cong's feedback is that a sockmap update
> is not supposed to race with sock_map_destroy() (???) @Cong, @John,
> @JakubS: any comments on that?
In somewhat related news: sock_map_unhash() races with the update, hitting
WARN_ON_ONCE(saved_unhash == sock_map_unhash).
CPU0 CPU1
==== ====
BPF_MAP_DELETE_ELEM
sk_psock_drop()
sk_psock_restore_proto
rcu_assign_sk_user_data(NULL)
shutdown()
sock_map_unhash()
psock = sk_psock(sk)
if (unlikely(!psock)) {
BPF_MAP_UPDATE_ELEM
sock_map_init_proto()
sock_replace_proto
saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
}
if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
return;
[ 20.860668] WARNING: CPU: 1 PID: 1238 at net/core/sock_map.c:1641 sock_map_unhash+0xa1/0x220
[ 20.860686] CPU: 1 UID: 0 PID: 1238 Comm: a.out Not tainted 6.11.0+ #59
[ 20.860688] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
[ 20.860705] Call Trace:
[ 20.860706] <TASK>
[ 20.860725] unix_shutdown+0xb0/0x470
[ 20.860728] __sys_shutdown+0x7a/0xa0
[ 20.860731] __x64_sys_shutdown+0x10/0x20
[ 20.860733] do_syscall_64+0x93/0x180
[ 20.860788] entry_SYSCALL_64_after_hwframe+0x76/0x7e
Under VM it takes about 10 minutes to trigger the splat:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/un.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <linux/bpf.h>
int s[2], sockmap;
static void die(char *msg)
{
perror(msg);
exit(-1);
}
static int create_sockmap(int key_size, int value_size, int max_entries)
{
union bpf_attr attr = {
.map_type = BPF_MAP_TYPE_SOCKMAP,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries
};
int map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
if (map < 0)
die("bpf_create_map");
return map;
}
static void map_update_elem(int map_fd, int key, void *value, uint64_t flags)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)&key,
.value = (uint64_t)value,
.flags = flags
};
syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
static void map_del_elem(int map_fd, int key)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)&key
};
syscall(SYS_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}
static void *racer_del(void *unused)
{
for (;;)
map_del_elem(sockmap, 0);
return NULL;
}
static void *racer_update(void *unused)
{
for (;;)
map_update_elem(sockmap, 0, &s[0], BPF_ANY);
return NULL;
}
int main(void)
{
pthread_t t0, t1;
if (pthread_create(&t0, NULL, racer_del, NULL))
die("pthread_create");
if (pthread_create(&t1, NULL, racer_update, NULL))
die("pthread_create");
sockmap = create_sockmap(sizeof(int), sizeof(int), 1);
for (;;) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) < 0)
die("socketpair");
map_update_elem(sockmap, 0, &s[0], BPF_ANY);
shutdown(s[1], 0);
close(s[0]);
close(s[1]);
}
}
With mdelay(1) it's a matter of seconds:
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 724b6856fcc3..98a964399813 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1631,6 +1631,7 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
+ mdelay(1);
saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
} else {
saved_unhash = psock->saved_unhash;
I've tried the patch below and it seems to do the trick
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 724b6856fcc3..a384771a66e8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1627,6 +1627,7 @@ void sock_map_unhash(struct sock *sk)
void (*saved_unhash)(struct sock *sk);
struct sk_psock *psock;
+ lock_sock(sk);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
@@ -1637,6 +1638,7 @@ void sock_map_unhash(struct sock *sk)
sock_map_remove_links(sk, psock);
rcu_read_unlock();
}
+ release_sock(sk);
if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
return;
if (saved_unhash)
but perhaps what needs to be fixed instead is af_unix shutdown()?
CCing Kuniyuki.
thanks,
Michal
More information about the lvc-project
mailing list