From abf359acd208624b7155804d15947dba6461b446 Mon Sep 17 00:00:00 2001 From: David Bar-On Date: Fri, 14 Jun 2024 16:29:01 +0300 Subject: [PATCH] Changes per reviewer comments receive UDP prefix --- configure.ac | 28 ++++++++++++++++++++ src/iperf.h | 7 ++++- src/iperf_api.c | 58 ++++++++++++++++++++++++++++++++++++++++-- src/iperf_api.h | 2 ++ src/iperf_client_api.c | 2 +- src/iperf_error.c | 7 +++++ src/iperf_locale.c | 10 +++++++- src/iperf_tcp.c | 16 ++++++++++-- src/iperf_udp.c | 22 +++++++++++++--- src/net.c | 57 +++++++++++++++++++++++++++++++++++------ src/net.h | 6 +++-- 11 files changed, 195 insertions(+), 20 deletions(-) diff --git a/configure.ac b/configure.ac index a23c3bcf4..b71559db5 100644 --- a/configure.ac +++ b/configure.ac @@ -337,6 +337,34 @@ if test "x$iperf3_cv_header_tcp_info_snd_wnd" = "xyes"; then AC_DEFINE([HAVE_TCP_INFO_SND_WND], [1], [Have tcpi_snd_wnd field in tcp_info.]) fi +# Check for MSG_ZEROCOPY (mostly on Linux) +AC_CACHE_CHECK([MSG_ZEROCOPY send option], +[iperf3_cv_header_msg_zerocopy], +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include + #include + #include ]], + [[int foo = MSG_ZEROCOPY;]])], + iperf3_cv_header_msg_zerocopy=yes, + iperf3_cv_header_msg_zerocopy=no)) +if test "x$iperf3_cv_header_msg_zerocopy" = "xyes"; then + AC_DEFINE([HAVE_MSG_ZEROCOPY], [1], [Have MSG_ZEROCOPY send option.]) +fi + +# Check for MSG_TRUNC (mostly on Linux) +AC_CACHE_CHECK([MSG_TRUNC recv option], +[iperf3_cv_header_msg_trunc], +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include + #include + #include ]], + [[int foo = MSG_TRUNC;]])], + iperf3_cv_header_msg_trunc=yes, + iperf3_cv_header_msg_trunc=no)) +if test "x$iperf3_cv_header_msg_trunc" = "xyes"; then + AC_DEFINE([HAVE_MSG_TRUNC], [1], [Have MSG_TRUNC recv option.]) +fi + # Check if we need -lrt for clock_gettime AC_SEARCH_LIBS(clock_gettime, [rt posix4]) # Check for clock_gettime support diff --git a/src/iperf.h b/src/iperf.h index 527e549ed..e3fc9e294 100644 --- a/src/iperf.h +++ b/src/iperf.h @@ -169,6 +169,7 @@ struct iperf_settings char *client_password; EVP_PKEY *client_rsa_pubkey; #endif // HAVE_SSL + int skip_rx_copy; /* Whether to ignore received messages data, using MSG_TRUNC option */ int connect_timeout; /* socket connection timeout, in ms */ int idle_timeout; /* server idle time timeout */ unsigned int snd_timeout; /* Timeout for sending tcp messages in active mode, in us */ @@ -331,7 +332,7 @@ struct iperf_test int verbose; /* -V option - verbose mode */ int json_output; /* -J option - JSON output */ int json_stream; /* --json-stream */ - int zerocopy; /* -Z option - use sendfile */ + int zerocopy; /* -Z option - use sendfile for TCP */ int debug; /* -d option - enable debug */ enum debug_level debug_level; /* -d option option - level of debug messages to show */ int get_server_output; /* --get-server-output */ @@ -459,4 +460,8 @@ extern int gerror; /* error value from getaddrinfo(3), for use in internal error /* In Reverse mode, maximum number of packets to wait for "accept" response - to handle out of order packets */ #define MAX_REVERSE_OUT_OF_ORDER_PACKETS 2 +/* Zerocopy - when using sendfile() of MSG_ZEROCOPY for TCP (for UDP any not 0 value is using MSG_ZEROCOPY) */ +#define ZEROCOPY_TCP_SENDFILE 1 +#define ZEROCOPY_TCP_MSG_ZEROCOPY 2 + #endif /* !__IPERF_H */ diff --git a/src/iperf_api.c b/src/iperf_api.c index 4c73e8328..bf691f66f 100644 --- a/src/iperf_api.c +++ b/src/iperf_api.c @@ -702,7 +702,7 @@ iperf_has_zerocopy( void ) void iperf_set_test_zerocopy(struct iperf_test *ipt, int zerocopy) { - ipt->zerocopy = (zerocopy && has_sendfile()); + ipt->zerocopy = (zerocopy && (ipt->protocol->id == Pudp ? 1 : has_sendfile())); } void @@ -1104,7 +1104,11 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) #if defined(HAVE_FLOWLABEL) {"flowlabel", required_argument, NULL, 'L'}, #endif /* HAVE_FLOWLABEL */ +#if defined(HAVE_MSG_ZEROCOPY) + {"zerocopy", optional_argument, NULL, 'Z'}, +#else {"zerocopy", no_argument, NULL, 'Z'}, +#endif /* HAVE_MSG_ZEROCOPY */ {"omit", required_argument, NULL, 'O'}, {"file", required_argument, NULL, 'F'}, {"repeating-payload", no_argument, NULL, OPT_REPEATING_PAYLOAD}, @@ -1131,6 +1135,9 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) #if defined(HAVE_DONT_FRAGMENT) {"dont-fragment", no_argument, NULL, OPT_DONT_FRAGMENT}, #endif /* HAVE_DONT_FRAGMENT */ +#if defined(HAVE_MSG_TRUNC) + {"skip-rx-copy", no_argument, NULL, OPT_SKIP_RX_COPY}, +#endif /* HAVE_MSG_TRUNC */ #if defined(HAVE_SSL) {"username", required_argument, NULL, OPT_CLIENT_USERNAME}, {"rsa-public-key-path", required_argument, NULL, OPT_CLIENT_RSA_PUBLIC_KEY}, @@ -1467,11 +1474,24 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) TAILQ_INSERT_TAIL(&test->xbind_addrs, xbe, link); break; case 'Z': +#if defined(HAVE_MSG_ZEROCOPY) + if (optarg && strcmp(optarg, "")) { + if (!strcmp(optarg, "z")) + test->zerocopy = ZEROCOPY_TCP_MSG_ZEROCOPY; + else { + i_errno = IENOSENDFILE; + return -1; + } + } else { + test->zerocopy = ZEROCOPY_TCP_SENDFILE; + } +#else if (!has_sendfile()) { i_errno = IENOSENDFILE; return -1; } - test->zerocopy = 1; + test->zerocopy = ZEROCOPY_TCP_SENDFILE; +#endif /* HAVE_MSG_ZEROCOPY */ client_flag = 1; break; case OPT_REPEATING_PAYLOAD: @@ -1635,6 +1655,12 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) test->use_pkcs1_padding = 1; break; #endif /* HAVE_SSL */ +#if defined(HAVE_MSG_TRUNC) + case OPT_SKIP_RX_COPY: + test->settings->skip_rx_copy = 1; + client_flag = 1; + break; +#endif /* HAVE_MSG_TRUNC */ case OPT_PACING_TIMER: test->settings->pacing_timer = unit_atoi(optarg); client_flag = 1; @@ -1744,6 +1770,28 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) return -1; } +#if defined(HAVE_MSG_ZEROCOPY) + // UDP supports "zero copy" only using MSG_ZEROCOPY + if (test->protocol->id == Pudp && test->zerocopy) + test->zerocopy = ZEROCOPY_TCP_MSG_ZEROCOPY; + // Zero copy for TCP use sendfile() + if (test->zerocopy && test->protocol->id != Pudp && !has_sendfile()) { + i_errno = IENOSENDFILE; + return -1; + } + // Using MSG_ZEROCOPY is not supported when disk file is used + if (test->diskfile_name != (char*) 0 && test->zerocopy == ZEROCOPY_TCP_MSG_ZEROCOPY) { + i_errno = IEDISKFILEZEROCOPY; + return -1; + } +#else + // Zero copy is supported only by TCP + if (test->zerocopy && test->protocol->id != Ptcp) { + i_errno = IENOSENDFILE; + return -1; + } +#endif /* HAVE_MSG_ZEROCOPY */ + if (blksize == 0) { if (test->protocol->id == Pudp) blksize = 0; /* try to dynamically determine from MSS */ @@ -2270,6 +2318,8 @@ send_parameters(struct iperf_test *test) cJSON_AddStringToObject(j, "authtoken", test->settings->authtoken); } #endif // HAVE_SSL + if (test->settings->skip_rx_copy) + cJSON_AddNumberToObject(j, "skip_rx_copy", test->settings->skip_rx_copy); cJSON_AddStringToObject(j, "client_version", IPERF_VERSION); if (test->debug) { @@ -2376,6 +2426,8 @@ get_parameters(struct iperf_test *test) if ((j_p = cJSON_GetObjectItem(j, "authtoken")) != NULL) test->settings->authtoken = strdup(j_p->valuestring); #endif //HAVE_SSL + if ((j_p = cJSON_GetObjectItem(j, "skip_rx_copy")) != NULL) + test->settings->skip_rx_copy = j_p->valueint; if (test->mode && test->protocol->id == Ptcp && has_tcpinfo_retransmits()) test->sender_has_retransmits = 1; if (test->settings->rate) @@ -2971,6 +3023,7 @@ iperf_defaults(struct iperf_test *testp) testp->settings->rcv_timeout.secs = DEFAULT_NO_MSG_RCVD_TIMEOUT / SEC_TO_mS; testp->settings->rcv_timeout.usecs = (DEFAULT_NO_MSG_RCVD_TIMEOUT % SEC_TO_mS) * mS_TO_US; testp->zerocopy = 0; + testp->settings->skip_rx_copy = 0; memset(testp->cookie, 0, COOKIE_SIZE); @@ -3268,6 +3321,7 @@ iperf_reset_test(struct iperf_test *test) test->settings->tos = 0; test->settings->dont_fragment = 0; test->zerocopy = 0; + test->settings->skip_rx_copy = 0; #if defined(HAVE_SSL) if (test->settings->authtoken) { diff --git a/src/iperf_api.h b/src/iperf_api.h index 131314243..fe067caca 100644 --- a/src/iperf_api.h +++ b/src/iperf_api.h @@ -101,6 +101,7 @@ typedef atomic_uint_fast64_t atomic_iperf_size_t; #define OPT_JSON_STREAM 28 #define OPT_SND_TIMEOUT 29 #define OPT_USE_PKCS1_PADDING 30 +#define OPT_SKIP_RX_COPY 31 /* states */ #define TEST_START 1 @@ -420,6 +421,7 @@ enum { IESNDTIMEOUT = 33, // Illegal message send timeout IEUDPFILETRANSFER = 34, // Cannot transfer file using UDP IESERVERAUTHUSERS = 35, // Cannot access authorized users file + IEDISKFILEZEROCOPY = 36, // Sending disk file using MSG_ZEROCOPY is not supported /* Test errors */ IENEWTEST = 100, // Unable to create a new test (check perror) IEINITTEST = 101, // Test initialization failed (check perror) diff --git a/src/iperf_client_api.c b/src/iperf_client_api.c index 7ad4c939b..f51465ac6 100644 --- a/src/iperf_client_api.c +++ b/src/iperf_client_api.c @@ -399,7 +399,7 @@ iperf_connect(struct iperf_test *test) /* Create and connect the control channel */ if (test->ctrl_sck < 0) // Create the control channel using an ephemeral port - test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout); + test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout, 0); if (test->ctrl_sck < 0) { i_errno = IECONNECT; return -1; diff --git a/src/iperf_error.c b/src/iperf_error.c index 0fedf3110..24c653f60 100644 --- a/src/iperf_error.c +++ b/src/iperf_error.c @@ -210,7 +210,14 @@ iperf_strerror(int int_errno) snprintf(errstr, len, "TCP MSS too large (maximum = %d bytes)", MAX_MSS); break; case IENOSENDFILE: +#if defined(HAVE_MSG_ZEROCOPY) + snprintf(errstr, len, "invalid zerocopy option value or this OS does not support sendfile"); +#else snprintf(errstr, len, "this OS does not support sendfile"); +#endif /* HAVE_MSG_ZEROCOPY */ + break; + case IEDISKFILEZEROCOPY: + snprintf(errstr, len, "Sending disk file using MSG_ZEROCOPY is not supported"); break; case IEOMIT: snprintf(errstr, len, "bogus value for --omit"); diff --git a/src/iperf_locale.c b/src/iperf_locale.c index 9d94e0234..3cf2747fc 100644 --- a/src/iperf_locale.c +++ b/src/iperf_locale.c @@ -198,7 +198,15 @@ const char usage_longstr[] = "Usage: iperf3 [-s|-c host] [options]\n" #if defined(HAVE_FLOWLABEL) " -L, --flowlabel N set the IPv6 flow label (only supported on Linux)\n" #endif /* HAVE_FLOWLABEL */ - " -Z, --zerocopy use a 'zero copy' method of sending data\n" +#if defined(HAVE_MSG_ZEROCOPY) + " -Z, --zerocopy[=z] use a 'zero copy' method of sending data;\n" + " for TCP use MSG_ZEROCOPY with '=z', default is using sendfile()\n" +#else + " -Z, --zerocopy use a 'zero copy' method of sending TCP data\n" +#endif /* HAVE_MSG_ZEROCOPY */ +#if defined(HAVE_MSG_TRUNC) + " --skip-rx-copy ignore received messages using MSG_TRUNC option\n" +#endif /* HAVE_MSG_TRUNC */ " -O, --omit N perform pre-test for N seconds and omit the pre-test statistics\n" " -T, --title str prefix every output line with this string\n" " --extra-data str data string to include in client and server JSON\n" diff --git a/src/iperf_tcp.c b/src/iperf_tcp.c index e025515ab..5ea6a0d77 100644 --- a/src/iperf_tcp.c +++ b/src/iperf_tcp.c @@ -56,8 +56,15 @@ int iperf_tcp_recv(struct iperf_stream *sp) { int r; + int sock_opt; - r = Nread(sp->socket, sp->buffer, sp->settings->blksize, Ptcp); +#if defined(HAVE_MSG_TRUNC) + sock_opt = sp->test->settings->skip_rx_copy ? MSG_TRUNC : 0; +#else + sock_opt = 0; +#endif /* HAVE_MSG_TRUNC */ + + r = Nrecv(sp->socket, sp->buffer, sp->settings->blksize, Ptcp, sock_opt); if (r < 0) return r; @@ -88,6 +95,11 @@ iperf_tcp_send(struct iperf_stream *sp) if (!sp->pending_size) sp->pending_size = sp->settings->blksize; +#if defined(HAVE_MSG_ZEROCOPY) + if (sp->test->zerocopy == ZEROCOPY_TCP_MSG_ZEROCOPY) + r = Nsend(sp->socket, sp->buffer, sp->pending_size, Ptcp, MSG_ZEROCOPY); + else +#endif /* HAVE_MSG_ZEROCOPY */ if (sp->test->zerocopy) r = Nsendfile(sp->buffer_fd, sp->socket, sp->buffer, sp->pending_size); else @@ -380,7 +392,7 @@ iperf_tcp_connect(struct iperf_test *test) int saved_errno; int rcvbuf_actual, sndbuf_actual; - s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res); + s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res, test->zerocopy); if (s < 0) { i_errno = IESTREAMCONNECT; return -1; diff --git a/src/iperf_udp.c b/src/iperf_udp.c index a603236df..02d5dab4f 100644 --- a/src/iperf_udp.c +++ b/src/iperf_udp.c @@ -61,8 +61,17 @@ iperf_udp_recv(struct iperf_stream *sp) int first_packet = 0; double transit = 0, d = 0; struct iperf_time sent_time, arrival_time, temp_time; + int sock_opt = 0; - r = Nread(sp->socket, sp->buffer, size, Pudp); +#if defined(HAVE_MSG_TRUNC) + // UDP recv() with MSG_TRUNC reads only the size bytes, but return the length of the full packet + if (sp->test->settings->skip_rx_copy) { + sock_opt = MSG_TRUNC; + size = sizeof(sec) + sizeof(usec) + sizeof(pcount); + } +#endif /* HAVE_MSG_TRUNC */ + + r = Nrecv(sp->socket, sp->buffer, size, Pudp, sock_opt); /* * If we got an error in the read, or if we didn't read anything @@ -201,6 +210,12 @@ iperf_udp_send(struct iperf_stream *sp) int r; int size = sp->settings->blksize; struct iperf_time before; + int sock_opt = 0; + +#if defined(HAVE_MSG_ZEROCOPY) + if (sp->test->zerocopy) + sock_opt = MSG_ZEROCOPY; +#endif /* HAVE_MSG_ZEROCOPY */ iperf_time_now(&before); @@ -234,7 +249,7 @@ iperf_udp_send(struct iperf_stream *sp) } - r = Nwrite(sp->socket, sp->buffer, size, Pudp); + r = Nsend(sp->socket, sp->buffer, size, Pudp, sock_opt); if (r <= 0) { --sp->packet_count; /* Don't count messages that no data was sent from them. @@ -446,6 +461,7 @@ iperf_udp_accept(struct iperf_test *test) /* * Create a new "listening" socket to replace the one we were using before. */ + FD_CLR(test->prot_listener, &test->read_set); // No control messages from old listener test->prot_listener = netannounce(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->server_port); if (test->prot_listener < 0) { i_errno = IESTREAMLISTEN; @@ -507,7 +523,7 @@ iperf_udp_connect(struct iperf_test *test) int i, max_len_wait_for_reply; /* Create and bind our local socket. */ - if ((s = netdial(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, -1)) < 0) { + if ((s = netdial(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, -1, test->zerocopy)) < 0) { i_errno = IESTREAMCONNECT; return -1; } diff --git a/src/net.c b/src/net.c index 632ae0319..967c094e2 100644 --- a/src/net.c +++ b/src/net.c @@ -124,11 +124,12 @@ timeout_connect(int s, const struct sockaddr *name, socklen_t namelen, /* create a socket */ int -create_socket(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, struct addrinfo **server_res_out) +create_socket(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, struct addrinfo **server_res_out, int zerocopy) { struct addrinfo hints, *local_res = NULL, *server_res = NULL; int s, saved_errno; char portstr[6]; + int opt; if (local) { memset(&hints, 0, sizeof(hints)); @@ -157,6 +158,19 @@ create_socket(int domain, int proto, const char *local, const char *bind_dev, in return -1; } + /* Setting should be done before the socket is conected */ + if (zerocopy) { + opt = 1; + if (setsockopt(s, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)) < 0) { + saved_errno = errno; + close(s); + freeaddrinfo(local_res); + freeaddrinfo(server_res); + errno = saved_errno; + return -1; + } + } + if (bind_dev) { #if defined(HAVE_SO_BINDTODEVICE) if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, @@ -234,12 +248,12 @@ create_socket(int domain, int proto, const char *local, const char *bind_dev, in /* make connection to server */ int -netdial(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, int timeout) +netdial(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, int timeout, int zerocopy) { struct addrinfo *server_res = NULL; int s, saved_errno; - s = create_socket(domain, proto, local, bind_dev, local_port, server, port, &server_res); + s = create_socket(domain, proto, local, bind_dev, local_port, server, port, &server_res, zerocopy); if (s < 0) { return -1; } @@ -366,16 +380,25 @@ netannounce(int domain, int proto, const char *local, const char *bind_dev, int return s; } - /*******************************************************************/ -/* reads 'count' bytes from a socket */ +/* Nread - reads 'count' bytes from a socket */ /********************************************************************/ int Nread(int fd, char *buf, size_t count, int prot) +{ + return Nrecv(fd, buf, count, prot, 0); +} + +/*******************************************************************/ +/* Nrecv - reads 'count' bytes from a socket */ +/********************************************************************/ + +int +Nrecv(int fd, char *buf, size_t count, int prot, int sock_opt) { register ssize_t r; - register size_t nleft = count; + register ssize_t nleft = count; struct iperf_time ftimeout = { 0, 0 }; fd_set rfdset; @@ -404,7 +427,11 @@ Nread(int fd, char *buf, size_t count, int prot) } while (nleft > 0) { - r = read(fd, buf, nleft); + if (sock_opt) + r = recv(fd, buf, nleft, sock_opt); + else + r = read(fd, buf, nleft); + if (r < 0) { /* XXX EWOULDBLOCK can't happen without non-blocking sockets */ if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) @@ -460,12 +487,26 @@ Nread(int fd, char *buf, size_t count, int prot) int Nwrite(int fd, const char *buf, size_t count, int prot) +{ + return Nsend(fd, buf, count, prot, 0); +} + + +/* + * N S E N D + */ + +int +Nsend(int fd, const char *buf, size_t count, int prot, int sock_opt) { register ssize_t r; register size_t nleft = count; while (nleft > 0) { - r = write(fd, buf, nleft); + if (sock_opt) + r = send(fd, buf, nleft, sock_opt); + else + r = write(fd, buf, nleft); if (r < 0) { switch (errno) { case EINTR: diff --git a/src/net.h b/src/net.h index f0e1b4f98..71a06716d 100644 --- a/src/net.h +++ b/src/net.h @@ -28,11 +28,13 @@ #define __NET_H int timeout_connect(int s, const struct sockaddr *name, socklen_t namelen, int timeout); -int create_socket(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, struct addrinfo **server_res_out); -int netdial(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, int timeout); +int create_socket(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, struct addrinfo **server_res_out, int zerocopy); +int netdial(int domain, int proto, const char *local, const char *bind_dev, int local_port, const char *server, int port, int timeout, int zerocopy); int netannounce(int domain, int proto, const char *local, const char *bind_dev, int port); int Nread(int fd, char *buf, size_t count, int prot); +int Nrecv(int fd, char *buf, size_t count, int prot, int sock_opt); int Nwrite(int fd, const char *buf, size_t count, int prot) /* __attribute__((hot)) */; +int Nsend(int fd, const char *buf, size_t count, int prot, int sock_opt) /* __attribute__((hot)) */; int has_sendfile(void); int Nsendfile(int fromfd, int tofd, const char *buf, size_t count) /* __attribute__((hot)) */; int setnonblocking(int fd, int nonblocking);