From e38f22c860edb7804b4722ac2332f7c51b9c6b72 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:25:19 +0000 Subject: [PATCH 1/9] vsock: SO_RCVLOWAT transport set callback This adds transport specific callback for SO_RCVLOWAT, because in some transports it may be difficult to know current available number of bytes ready to read. Thus, when SO_RCVLOWAT is set, transport may reject it. Signed-off-by: Arseniy Krasnov Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 1 + net/vmw_vsock/af_vsock.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 1c53c4c4d88fe..d609a088cb27e 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -135,6 +135,7 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); + int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b4ee163154a68..07c8f74a821f9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2137,6 +2137,25 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, return err; } +static int vsock_set_rcvlowat(struct sock *sk, int val) +{ + const struct vsock_transport *transport; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + if (val > vsk->buffer_size) + return -EINVAL; + + transport = vsk->transport; + + if (transport && transport->set_rcvlowat) + return transport->set_rcvlowat(vsk, val); + + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + return 0; +} + static const struct proto_ops vsock_stream_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, @@ -2156,6 +2175,7 @@ static const struct proto_ops vsock_stream_ops = { .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_rcvlowat = vsock_set_rcvlowat, }; static const struct proto_ops vsock_seqpacket_ops = { From 24764f8d3c316a3c58b51140d8e489e98e7ffdcc Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:27:34 +0000 Subject: [PATCH 2/9] hv_sock: disable SO_RCVLOWAT support For Hyper-V it is quiet difficult to support this socket option,due to transport internals, so disable it. Signed-off-by: Arseniy Krasnov Reviewed-by: Dexuan Cui Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/hyperv_transport.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index fd98229e3db30..59c3e26970690 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -815,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, return 0; } +static +int hvs_set_rcvlowat(struct vsock_sock *vsk, int val) +{ + return -EOPNOTSUPP; +} + static struct vsock_transport hvs_transport = { .module = THIS_MODULE, @@ -850,6 +856,7 @@ static struct vsock_transport hvs_transport = { .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, .notify_send_post_enqueue = hvs_notify_send_post_enqueue, + .set_rcvlowat = hvs_set_rcvlowat }; static bool hvs_check_transport(struct vsock_sock *vsk) From e7a3266c9167fe8878c303959a8cc4527f83888b Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:29:34 +0000 Subject: [PATCH 3/9] virtio/vsock: use 'target' in notify_poll_in callback This callback controls setting of POLLIN, POLLRDNORM output bits of poll() syscall, but in some cases, it is incorrectly to set it, when socket has at least 1 bytes of available data. Use 'target' which is already exists. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index ec2c2afbf0d06..8f6356ebcdd17 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -634,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *data_ready_now) { - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; + *data_ready_now = vsock_stream_has_data(vsk) >= target; return 0; } From a274f6ff3c5c79c27d254b48cad3b4814c950908 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:31:43 +0000 Subject: [PATCH 4/9] vmci/vsock: use 'target' in notify_poll_in callback This callback controls setting of POLLIN, POLLRDNORM output bits of poll() syscall, but in some cases, it is incorrectly to set it, when socket has at least 1 bytes of available data. Use 'target' which is already exists. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Reviewed-by: Vishnu Dasa Signed-off-by: Paolo Abeni --- net/vmw_vsock/vmci_transport_notify.c | 8 ++++---- net/vmw_vsock/vmci_transport_notify_qstate.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index d69fc4b595ad4..852097e2b9e6d 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, { struct vsock_sock *vsk = vsock_sk(sk); - if (vsock_stream_has_data(vsk)) { + if (vsock_stream_has_data(vsk) >= target) { *data_ready_now = true; } else { - /* We can't read right now because there is nothing in the - * queue. Ask for notifications when there is something to - * read. + /* We can't read right now because there is not enough data + * in the queue. Ask for notifications when there is something + * to read. */ if (sk->sk_state == TCP_ESTABLISHED) { if (!send_waiting_read(sk, 1)) diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index 0f36d7c45db39..12f0cb8fe9988 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, { struct vsock_sock *vsk = vsock_sk(sk); - if (vsock_stream_has_data(vsk)) { + if (vsock_stream_has_data(vsk) >= target) { *data_ready_now = true; } else { - /* We can't read right now because there is nothing in the - * queue. Ask for notifications when there is something to - * read. + /* We can't read right now because there is not enough data + * in the queue. Ask for notifications when there is something + * to read. */ if (sk->sk_state == TCP_ESTABLISHED) vsock_block_update_write_window(sk); From ee0b3843a26920dad713c27cd8f3a3cfc5ae9c37 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:33:47 +0000 Subject: [PATCH 5/9] vsock: pass sock_rcvlowat to notify_poll_in as target Passing 1 as the target to notify_poll_in(), we don't honor what the user has set via SO_RCVLOWAT, going to set POLLIN and POLLRDNORM, even if we don't have the amount of bytes expected by the user. Let's use sock_rcvlowat() to get the right target to pass to notify_poll_in(); Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 07c8f74a821f9..15171ba76cc3d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1066,8 +1066,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, if (transport && transport->stream_is_active(vsk) && !(sk->sk_shutdown & RCV_SHUTDOWN)) { bool data_ready_now = false; + int target = sock_rcvlowat(sk, 0, INT_MAX); int ret = transport->notify_poll_in( - vsk, 1, &data_ready_now); + vsk, target, &data_ready_now); if (ret < 0) { mask |= EPOLLERR; } else { From f2fdcf67aceb1a7d5e0661cb7ca95cda68d3014a Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:36:52 +0000 Subject: [PATCH 6/9] vsock: add API call for data ready This adds 'vsock_data_ready()' which must be called by transport to kick sleeping data readers. It checks for SO_RCVLOWAT value before waking user, thus preventing spurious wake ups. Based on 'tcp_data_ready()' logic. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 1 + net/vmw_vsock/af_vsock.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d609a088cb27e..568a87c5e0d0f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -78,6 +78,7 @@ struct vsock_sock { s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); struct sock *vsock_create_connected(struct sock *parent); +void vsock_data_ready(struct sock *sk); /**** TRANSPORT ****/ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 15171ba76cc3d..ee418701cdee9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -882,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_space); +void vsock_data_ready(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat || + sock_flag(sk, SOCK_DONE)) + sk->sk_data_ready(sk); +} +EXPORT_SYMBOL_GPL(vsock_data_ready); + static int vsock_release(struct socket *sock) { __vsock_release(sock->sk, 0); From 39f1ed33a4489e2f7a55d5a96576c73af3529461 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:39:24 +0000 Subject: [PATCH 7/9] virtio/vsock: check SO_RCVLOWAT before wake up reader This adds extra condition to wake up data reader: do it only when number of readable bytes >= SO_RCVLOWAT. Otherwise, there is no sense to kick user,because it will wait until SO_RCVLOWAT bytes will be dequeued. This check is performed in vsock_data_ready(). Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8f6356ebcdd17..35863132f4f11 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1081,7 +1081,7 @@ virtio_transport_recv_connected(struct sock *sk, switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: virtio_transport_recv_enqueue(vsk, pkt); - sk->sk_data_ready(sk); + vsock_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_REQUEST: virtio_transport_send_credit_update(vsk); From e061aed99855ccef2d64f5bdd66996e19d6cf60b Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:41:35 +0000 Subject: [PATCH 8/9] vmci/vsock: check SO_RCVLOWAT before wake up reader This adds extra condition to wake up data reader: do it only when number of readable bytes >= SO_RCVLOWAT. Otherwise, there is no sense to kick user, because it will wait until SO_RCVLOWAT bytes will be dequeued. This check is performed in vsock_data_ready(). Signed-off-by: Arseniy Krasnov Reviewed-by: Vishnu Dasa Signed-off-by: Paolo Abeni --- net/vmw_vsock/vmci_transport_notify.c | 2 +- net/vmw_vsock/vmci_transport_notify_qstate.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 852097e2b9e6d..7c3a7db134b28 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk, struct vsock_sock *vsk = vsock_sk(sk); PKT_FIELD(vsk, sent_waiting_read) = false; #endif - sk->sk_data_ready(sk); + vsock_data_ready(sk); } static void vmci_transport_notify_pkt_socket_init(struct sock *sk) diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index 12f0cb8fe9988..e96a88d850a86 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk, bool bottom_half, struct sockaddr_vm *dst, struct sockaddr_vm *src) { - sk->sk_data_ready(sk); + vsock_data_ready(sk); } static void vsock_block_update_write_window(struct sock *sk) @@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue( /* See the comment in * vmci_transport_notify_pkt_send_post_enqueue(). */ - sk->sk_data_ready(sk); + vsock_data_ready(sk); } return err; From b1346338fbaefac1b796a50478f8e8070b54e9e4 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 19 Aug 2022 05:43:50 +0000 Subject: [PATCH 9/9] vsock_test: POLLIN + SO_RCVLOWAT test This adds test to check, that when poll() returns POLLIN, POLLRDNORM bits, next read call won't block. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_test.c | 108 +++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index dc577461afc20..bb6d691cb30d0 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "timeout.h" #include "control.h" @@ -596,6 +597,108 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt close(fd); } +#define RCVLOWAT_BUF_SIZE 128 + +static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) +{ + int fd; + int i; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Send 1 byte. */ + send_byte(fd, 1, 0); + + control_writeln("SRVSENT"); + + /* Wait until client is ready to receive rest of data. */ + control_expectln("CLNSENT"); + + for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++) + send_byte(fd, 1, 0); + + /* Keep socket in active state. */ + control_expectln("POLLDONE"); + + close(fd); +} + +static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) +{ + unsigned long lowat_val = RCVLOWAT_BUF_SIZE; + char buf[RCVLOWAT_BUF_SIZE]; + struct pollfd fds; + ssize_t read_res; + short poll_flags; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &lowat_val, sizeof(lowat_val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } + + control_expectln("SRVSENT"); + + /* At this point, server sent 1 byte. */ + fds.fd = fd; + poll_flags = POLLIN | POLLRDNORM; + fds.events = poll_flags; + + /* Try to wait for 1 sec. */ + if (poll(&fds, 1, 1000) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + /* poll() must return nothing. */ + if (fds.revents) { + fprintf(stderr, "Unexpected poll result %hx\n", + fds.revents); + exit(EXIT_FAILURE); + } + + /* Tell server to send rest of data. */ + control_writeln("CLNSENT"); + + /* Poll for data. */ + if (poll(&fds, 1, 10000) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + /* Only these two bits are expected. */ + if (fds.revents != poll_flags) { + fprintf(stderr, "Unexpected poll result %hx\n", + fds.revents); + exit(EXIT_FAILURE); + } + + /* Use MSG_DONTWAIT, if call is going to wait, EAGAIN + * will be returned. + */ + read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (read_res != RCVLOWAT_BUF_SIZE) { + fprintf(stderr, "Unexpected recv result %zi\n", + read_res); + exit(EXIT_FAILURE); + } + + control_writeln("POLLDONE"); + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -646,6 +749,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_invalid_rec_buffer_client, .run_server = test_seqpacket_invalid_rec_buffer_server, }, + { + .name = "SOCK_STREAM poll() + SO_RCVLOWAT", + .run_client = test_stream_poll_rcvlowat_client, + .run_server = test_stream_poll_rcvlowat_server, + }, {}, };