[rulkc] [PATCH v1] vsock/virtio: rework MSG_ZEROCOPY flag handling

Arseniy Krasnov avkrasnov at rulkc.org
Mon Jun 8 11:10:24 MSK 2026


On 05/06/2026 18:08, David Laight wrote:
> On Fri,  5 Jun 2026 14:53:14 +0300
> Arseniy Krasnov <avkrasnov at rulkc.org> wrote:
>
>> Logically it was based on TCP implementation, so to make further
>> support easier, rewrite it in the TCP way.
>>
>> Signed-off-by: Arseniy Krasnov <avkrasnov at rulkc.org>
>> ---
>>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>>  1 file changed, 32 insertions(+), 32 deletions(-)
>>
>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index 2fd9eaaf5ca6..00caeeaa5590 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
>>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>>  				     struct virtio_vsock_pkt_info *info,
>>  				     size_t len,
>> -				     bool zcopy)
>> +				     bool zcopy, struct ubuf_info *uarg)
>>  {
>>  	struct msghdr *msg = info->msg;
>>  
>> +	/* We have completion - attach it to 'skb'. */
>> +	skb_zcopy_set(skb, uarg, NULL);
>> +
>>  	if (zcopy)
>>  		return __zerocopy_sg_from_iter(msg, NULL, skb,
>>  					       &msg->msg_iter, len, NULL);
>> @@ -208,7 +211,8 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>  						  u32 src_cid,
>>  						  u32 src_port,
>>  						  u32 dst_cid,
>> -						  u32 dst_port)
>> +						  u32 dst_port,
>> +						  struct ubuf_info *uarg)
>>  {
>>  	struct vsock_sock *vsk;
>>  	struct sk_buff *skb;
>> @@ -245,7 +249,7 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>  	if (info->msg && payload_len > 0) {
>>  		int err;
>>  
>> -		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
>> +		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, uarg);
>>  		if (err)
>>  			goto out;
>>  
>> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>  	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>  		return pkt_len;
>>  
>> -	if (info->msg) {
>> -		/* If zerocopy is not enabled by 'setsockopt()', we behave as
>> -		 * there is no MSG_ZEROCOPY flag set.
>> +	if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
>> +		/* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
>> +		 * 'MSG_ZEROCOPY' flag handling here is based on the same flag
>> +		 * handling from 'tcp_sendmsg_locked()'.
>>  		 */
>> -		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
>> -			info->msg->msg_flags &= ~MSG_ZEROCOPY;
>> +		if (info->msg->msg_ubuf) {
>> +			uarg = info->msg->msg_ubuf;
>> +			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>> +		} else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
>> +			uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
>> +						    NULL, false);
>> +			if (!uarg) {
>> +				virtio_transport_put_credit(vvs, pkt_len);
>> +				return -ENOMEM;
>> +			}
>>  
>> -		if (info->msg->msg_flags & MSG_ZEROCOPY)
>>  			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
>>  
>> +			if (!can_zcopy)
>> +				uarg_to_msgzc(uarg)->zerocopy = 0;
>> +
>> +			have_uref = true;
>> +		}
>> +
>> +		/* 'can_zcopy' means that this transmission will be
>> +		 * in zerocopy way (e.g. using 'frags' array).
>> +		 */
> I've not looked at the tcp code, but the above doesn't look right.
> I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
> That would give the outer code a callback when the last skb is freed but
> still copy the data.

Hi, 

I guess case when 'msg->msg_ubuf' is non-NULL is special case today for io_uring MSG_ZEROCOPY implementation.
It was added here https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eb315a7d1396b1139fc7daea55f2d3191e8e7092
As I see implementation of its tests in tools/testing/selftests/net/io_uring_zerocopy_tx.c , it doesn't require setting SOCK_ZEROCOPY option for
socket, so for virtio vsock case I just copied same logic to maintain compatibility, because there is MSG_ZEROCOPY io_uring test for virtio/vsock.


>
> I also don't see the point of calling msg_zerocopy_realloc() to get a
> callback when the last skb is freed and then setting
> 	uarg_to_msgzc(uarg)->zerocopy = 0;
> so that the callback doesn't actually do anything.
> It isn't as though you 'find out' later on that you can't actually do
> zerocopy.


Sorry, what do You mean "last skb" ? In this code we first allocate uarg (allocate, because third arg is always NULL). Then in
loop we allocate sk_buffs, fill it with data and send. I mean first/last skb will be freed after uarg is already allocated and we
don't touch it. I think i didn't understand Your question here.


>
>>  		if (can_zcopy)
>>  			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>>  					    (MAX_SKB_FRAGS * PAGE_SIZE));
>> -
>> -		if (info->msg->msg_flags & MSG_ZEROCOPY &&
>> -		    info->op == VIRTIO_VSOCK_OP_RW) {
>> -			uarg = info->msg->msg_ubuf;
>> -
>> -			if (!uarg) {
>> -				uarg = msg_zerocopy_realloc(sk_vsock(vsk),
>> -							    pkt_len, NULL, false);
>> -				if (!uarg) {
>> -					virtio_transport_put_credit(vvs, pkt_len);
>> -					return -ENOMEM;
>> -				}
>> -
>> -				if (!can_zcopy)
>> -					uarg_to_msgzc(uarg)->zerocopy = 0;
>> -
>> -				have_uref = true;
>> -			}
>> -		}
>>  	}
>>  
>>  	rest_len = pkt_len;
>> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>  
>>  		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>>  						 src_cid, src_port,
>> -						 dst_cid, dst_port);
>> +						 dst_cid, dst_port, uarg);
>>  		if (!skb) {
>>  			ret = -ENOMEM;
>>  			break;
>>  		}
>>  
>> -		skb_zcopy_set(skb, uarg, NULL);
> Aren't you passing uarg through two function calls instead of doing it here.
> Doesn't even make it clearer what is going on.


Agree, to simplify patch, uarg could be set earlier (without passing it to functions) I guess.

Thanks


>
> -- David
>
>> -
>>  		virtio_transport_inc_tx_pkt(vvs, skb);
>>  
>>  		ret = t_ops->send_pkt(skb, info->net);
>> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>>  					   le64_to_cpu(hdr->dst_cid),
>>  					   le32_to_cpu(hdr->dst_port),
>>  					   le64_to_cpu(hdr->src_cid),
>> -					   le32_to_cpu(hdr->src_port));
>> +					   le32_to_cpu(hdr->src_port), NULL);
>>  	if (!reply)
>>  		return -ENOMEM;
>>  



More information about the rulkc mailing list