[lvc-project] [PATCH net v2] net: sockmap: avoid race between sock_map_destroy() and sk_psock_put()

Michal Luczaj mhal at rbox.co
Wed Sep 25 03:22:39 MSK 2024


On 9/24/24 10:23, Paolo Abeni wrote:
> ...
> I guess that the main point in Cong's feedback is that a sockmap update 
> is not supposed to race with sock_map_destroy() (???) @Cong, @John, 
> @JakubS: any comments on that?

In somewhat related news: sock_map_unhash() races with the update, hitting
WARN_ON_ONCE(saved_unhash == sock_map_unhash).

CPU0					CPU1
====					====

BPF_MAP_DELETE_ELEM
  sk_psock_drop()
    sk_psock_restore_proto
    rcu_assign_sk_user_data(NULL)
    					shutdown()
					  sock_map_unhash()
					    psock = sk_psock(sk)
					    if (unlikely(!psock)) {
BPF_MAP_UPDATE_ELEM
  sock_map_init_proto()
    sock_replace_proto
					      saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
					    }
					    if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
					      return;

[   20.860668] WARNING: CPU: 1 PID: 1238 at net/core/sock_map.c:1641 sock_map_unhash+0xa1/0x220
[   20.860686] CPU: 1 UID: 0 PID: 1238 Comm: a.out Not tainted 6.11.0+ #59
[   20.860688] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
[   20.860705] Call Trace:
[   20.860706]  <TASK>
[   20.860725]  unix_shutdown+0xb0/0x470
[   20.860728]  __sys_shutdown+0x7a/0xa0
[   20.860731]  __x64_sys_shutdown+0x10/0x20
[   20.860733]  do_syscall_64+0x93/0x180
[   20.860788]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Under VM it takes about 10 minutes to trigger the splat:

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/un.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <linux/bpf.h>

int s[2], sockmap;

static void die(char *msg)
{
	perror(msg);
	exit(-1);
}

static int create_sockmap(int key_size, int value_size, int max_entries)
{
	union bpf_attr attr = {
		.map_type = BPF_MAP_TYPE_SOCKMAP,
		.key_size = key_size,
		.value_size = value_size,
		.max_entries = max_entries
	};

	int map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
	if (map < 0)
		die("bpf_create_map");

	return map;
}

static void map_update_elem(int map_fd, int key, void *value, uint64_t flags)
{
	union bpf_attr attr = {
		.map_fd = map_fd,
		.key = (uint64_t)&key,
		.value = (uint64_t)value,
		.flags = flags
	};

	syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}

static void map_del_elem(int map_fd, int key)
{
	union bpf_attr attr = {
		.map_fd = map_fd,
		.key = (uint64_t)&key
	};

	syscall(SYS_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}

static void *racer_del(void *unused)
{
	for (;;)
		map_del_elem(sockmap, 0);

	return NULL;
}
static void *racer_update(void *unused)
{
	for (;;)
		map_update_elem(sockmap, 0, &s[0], BPF_ANY);

	return NULL;
}

int main(void)
{
	pthread_t t0, t1;

	if (pthread_create(&t0, NULL, racer_del, NULL))
		die("pthread_create");

	if (pthread_create(&t1, NULL, racer_update, NULL))
		die("pthread_create");

	sockmap = create_sockmap(sizeof(int), sizeof(int), 1);

	for (;;) {
		if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) < 0)
			die("socketpair");

		map_update_elem(sockmap, 0, &s[0], BPF_ANY);
		shutdown(s[1], 0);

		close(s[0]);
		close(s[1]);
	}
}

With mdelay(1) it's a matter of seconds:

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 724b6856fcc3..98a964399813 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1631,6 +1631,7 @@ void sock_map_unhash(struct sock *sk)
 	psock = sk_psock(sk);
 	if (unlikely(!psock)) {
 		rcu_read_unlock();
+		mdelay(1);
 		saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
 	} else {
 		saved_unhash = psock->saved_unhash;

I've tried the patch below and it seems to do the trick

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 724b6856fcc3..a384771a66e8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1627,6 +1627,7 @@ void sock_map_unhash(struct sock *sk)
 	void (*saved_unhash)(struct sock *sk);
 	struct sk_psock *psock;
 
+	lock_sock(sk);
 	rcu_read_lock();
 	psock = sk_psock(sk);
 	if (unlikely(!psock)) {
@@ -1637,6 +1638,7 @@ void sock_map_unhash(struct sock *sk)
 		sock_map_remove_links(sk, psock);
 		rcu_read_unlock();
 	}
+	release_sock(sk);
 	if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
 		return;
 	if (saved_unhash)

but perhaps what needs to be fixed instead is af_unix shutdown()?
CCing Kuniyuki.

thanks,
Michal




More information about the lvc-project mailing list