Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for SKIP-RX-COPY using MSG_TRUNC and Zero-copy using SO_ZEROCOPY/MSG_ZEROCOPY #1690

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,34 @@ if test "x$iperf3_cv_header_tcp_info_snd_wnd" = "xyes"; then
AC_DEFINE([HAVE_TCP_INFO_SND_WND], [1], [Have tcpi_snd_wnd field in tcp_info.])
fi

# Check for MSG_ZEROCOPY (mostly on Linux)
AC_CACHE_CHECK([MSG_ZEROCOPY send option],
[iperf3_cv_header_msg_zerocopy],
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>]],
[[int foo = MSG_ZEROCOPY;]])],
iperf3_cv_header_msg_zerocopy=yes,
iperf3_cv_header_msg_zerocopy=no))
if test "x$iperf3_cv_header_msg_zerocopy" = "xyes"; then
AC_DEFINE([HAVE_MSG_ZEROCOPY], [1], [Have MSG_ZEROCOPY send option.])
fi

# Check for MSG_TRUNC (mostly on Linux)
AC_CACHE_CHECK([MSG_TRUNC recv option],
[iperf3_cv_header_msg_trunc],
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>]],
[[int foo = MSG_TRUNC;]])],
iperf3_cv_header_msg_trunc=yes,
iperf3_cv_header_msg_trunc=no))
if test "x$iperf3_cv_header_msg_trunc" = "xyes"; then
AC_DEFINE([HAVE_MSG_TRUNC], [1], [Have MSG_TRUNC recv option.])
fi

# Check if we need -lrt for clock_gettime
AC_SEARCH_LIBS(clock_gettime, [rt posix4])
# Check for clock_gettime support
Expand Down
7 changes: 6 additions & 1 deletion src/iperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ struct iperf_settings
char *client_password;
EVP_PKEY *client_rsa_pubkey;
#endif // HAVE_SSL
int skip_rx_copy; /* Whether to ignore received messages data, using MSG_TRUNC option */
int connect_timeout; /* socket connection timeout, in ms */
int idle_timeout; /* server idle time timeout */
unsigned int snd_timeout; /* Timeout for sending tcp messages in active mode, in us */
Expand Down Expand Up @@ -331,7 +332,7 @@ struct iperf_test
int verbose; /* -V option - verbose mode */
int json_output; /* -J option - JSON output */
int json_stream; /* --json-stream */
int zerocopy; /* -Z option - use sendfile */
int zerocopy; /* -Z option - use sendfile for TCP */
int debug; /* -d option - enable debug */
enum debug_level debug_level; /* -d option option - level of debug messages to show */
int get_server_output; /* --get-server-output */
Expand Down Expand Up @@ -459,4 +460,8 @@ extern int gerror; /* error value from getaddrinfo(3), for use in internal error
/* In Reverse mode, maximum number of packets to wait for "accept" response - to handle out of order packets */
#define MAX_REVERSE_OUT_OF_ORDER_PACKETS 2

/* Zerocopy - when using sendfile() of MSG_ZEROCOPY for TCP (for UDP any not 0 value is using MSG_ZEROCOPY) */
#define ZEROCOPY_TCP_SENDFILE 1
#define ZEROCOPY_TCP_MSG_ZEROCOPY 2

#endif /* !__IPERF_H */
58 changes: 56 additions & 2 deletions src/iperf_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ iperf_has_zerocopy( void )
void
iperf_set_test_zerocopy(struct iperf_test *ipt, int zerocopy)
{
ipt->zerocopy = (zerocopy && has_sendfile());
ipt->zerocopy = (zerocopy && (ipt->protocol->id == Pudp ? 1 : has_sendfile()));
}

void
Expand Down Expand Up @@ -1104,7 +1104,11 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
#if defined(HAVE_FLOWLABEL)
{"flowlabel", required_argument, NULL, 'L'},
#endif /* HAVE_FLOWLABEL */
#if defined(HAVE_MSG_ZEROCOPY)
{"zerocopy", optional_argument, NULL, 'Z'},
#else
{"zerocopy", no_argument, NULL, 'Z'},
#endif /* HAVE_MSG_ZEROCOPY */
{"omit", required_argument, NULL, 'O'},
{"file", required_argument, NULL, 'F'},
{"repeating-payload", no_argument, NULL, OPT_REPEATING_PAYLOAD},
Expand All @@ -1131,6 +1135,9 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
#if defined(HAVE_DONT_FRAGMENT)
{"dont-fragment", no_argument, NULL, OPT_DONT_FRAGMENT},
#endif /* HAVE_DONT_FRAGMENT */
#if defined(HAVE_MSG_TRUNC)
{"skip-rx-copy", no_argument, NULL, OPT_SKIP_RX_COPY},
#endif /* HAVE_MSG_TRUNC */
#if defined(HAVE_SSL)
{"username", required_argument, NULL, OPT_CLIENT_USERNAME},
{"rsa-public-key-path", required_argument, NULL, OPT_CLIENT_RSA_PUBLIC_KEY},
Expand Down Expand Up @@ -1467,11 +1474,24 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
TAILQ_INSERT_TAIL(&test->xbind_addrs, xbe, link);
break;
case 'Z':
#if defined(HAVE_MSG_ZEROCOPY)
if (optarg && strcmp(optarg, "")) {
if (!strcmp(optarg, "z"))
test->zerocopy = ZEROCOPY_TCP_MSG_ZEROCOPY;
else {
i_errno = IENOSENDFILE;
return -1;
}
} else {
test->zerocopy = ZEROCOPY_TCP_SENDFILE;
}
#else
if (!has_sendfile()) {
i_errno = IENOSENDFILE;
return -1;
}
test->zerocopy = 1;
test->zerocopy = ZEROCOPY_TCP_SENDFILE;
#endif /* HAVE_MSG_ZEROCOPY */
client_flag = 1;
break;
case OPT_REPEATING_PAYLOAD:
Expand Down Expand Up @@ -1635,6 +1655,12 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
test->use_pkcs1_padding = 1;
break;
#endif /* HAVE_SSL */
#if defined(HAVE_MSG_TRUNC)
case OPT_SKIP_RX_COPY:
test->settings->skip_rx_copy = 1;
client_flag = 1;
break;
#endif /* HAVE_MSG_TRUNC */
case OPT_PACING_TIMER:
test->settings->pacing_timer = unit_atoi(optarg);
client_flag = 1;
Expand Down Expand Up @@ -1744,6 +1770,28 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
return -1;
}

#if defined(HAVE_MSG_ZEROCOPY)
// UDP supports "zero copy" only using MSG_ZEROCOPY
if (test->protocol->id == Pudp && test->zerocopy)
test->zerocopy = ZEROCOPY_TCP_MSG_ZEROCOPY;
// Zero copy for TCP use sendfile()
if (test->zerocopy && test->protocol->id != Pudp && !has_sendfile()) {
i_errno = IENOSENDFILE;
return -1;
}
// Using MSG_ZEROCOPY is not supported when disk file is used
if (test->diskfile_name != (char*) 0 && test->zerocopy == ZEROCOPY_TCP_MSG_ZEROCOPY) {
i_errno = IEDISKFILEZEROCOPY;
return -1;
}
#else
// Zero copy is supported only by TCP
if (test->zerocopy && test->protocol->id != Ptcp) {
i_errno = IENOSENDFILE;
return -1;
}
#endif /* HAVE_MSG_ZEROCOPY */

if (blksize == 0) {
if (test->protocol->id == Pudp)
blksize = 0; /* try to dynamically determine from MSS */
Expand Down Expand Up @@ -2270,6 +2318,8 @@ send_parameters(struct iperf_test *test)
cJSON_AddStringToObject(j, "authtoken", test->settings->authtoken);
}
#endif // HAVE_SSL
if (test->settings->skip_rx_copy)
cJSON_AddNumberToObject(j, "skip_rx_copy", test->settings->skip_rx_copy);
cJSON_AddStringToObject(j, "client_version", IPERF_VERSION);

if (test->debug) {
Expand Down Expand Up @@ -2376,6 +2426,8 @@ get_parameters(struct iperf_test *test)
if ((j_p = cJSON_GetObjectItem(j, "authtoken")) != NULL)
test->settings->authtoken = strdup(j_p->valuestring);
#endif //HAVE_SSL
if ((j_p = cJSON_GetObjectItem(j, "skip_rx_copy")) != NULL)
test->settings->skip_rx_copy = j_p->valueint;
if (test->mode && test->protocol->id == Ptcp && has_tcpinfo_retransmits())
test->sender_has_retransmits = 1;
if (test->settings->rate)
Expand Down Expand Up @@ -2971,6 +3023,7 @@ iperf_defaults(struct iperf_test *testp)
testp->settings->rcv_timeout.secs = DEFAULT_NO_MSG_RCVD_TIMEOUT / SEC_TO_mS;
testp->settings->rcv_timeout.usecs = (DEFAULT_NO_MSG_RCVD_TIMEOUT % SEC_TO_mS) * mS_TO_US;
testp->zerocopy = 0;
testp->settings->skip_rx_copy = 0;

memset(testp->cookie, 0, COOKIE_SIZE);

Expand Down Expand Up @@ -3268,6 +3321,7 @@ iperf_reset_test(struct iperf_test *test)
test->settings->tos = 0;
test->settings->dont_fragment = 0;
test->zerocopy = 0;
test->settings->skip_rx_copy = 0;

#if defined(HAVE_SSL)
if (test->settings->authtoken) {
Expand Down
2 changes: 2 additions & 0 deletions src/iperf_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ typedef atomic_uint_fast64_t atomic_iperf_size_t;
#define OPT_JSON_STREAM 28
#define OPT_SND_TIMEOUT 29
#define OPT_USE_PKCS1_PADDING 30
#define OPT_SKIP_RX_COPY 31

/* states */
#define TEST_START 1
Expand Down Expand Up @@ -420,6 +421,7 @@ enum {
IESNDTIMEOUT = 33, // Illegal message send timeout
IEUDPFILETRANSFER = 34, // Cannot transfer file using UDP
IESERVERAUTHUSERS = 35, // Cannot access authorized users file
IEDISKFILEZEROCOPY = 36, // Sending disk file using MSG_ZEROCOPY is not supported
/* Test errors */
IENEWTEST = 100, // Unable to create a new test (check perror)
IEINITTEST = 101, // Test initialization failed (check perror)
Expand Down
2 changes: 1 addition & 1 deletion src/iperf_client_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ iperf_connect(struct iperf_test *test)
/* Create and connect the control channel */
if (test->ctrl_sck < 0)
// Create the control channel using an ephemeral port
test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout);
test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout, 0);
if (test->ctrl_sck < 0) {
i_errno = IECONNECT;
return -1;
Expand Down
7 changes: 7 additions & 0 deletions src/iperf_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,14 @@ iperf_strerror(int int_errno)
snprintf(errstr, len, "TCP MSS too large (maximum = %d bytes)", MAX_MSS);
break;
case IENOSENDFILE:
#if defined(HAVE_MSG_ZEROCOPY)
snprintf(errstr, len, "invalid zerocopy option value or this OS does not support sendfile");
#else
snprintf(errstr, len, "this OS does not support sendfile");
#endif /* HAVE_MSG_ZEROCOPY */
break;
case IEDISKFILEZEROCOPY:
snprintf(errstr, len, "Sending disk file using MSG_ZEROCOPY is not supported");
break;
case IEOMIT:
snprintf(errstr, len, "bogus value for --omit");
Expand Down
10 changes: 9 additions & 1 deletion src/iperf_locale.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,15 @@ const char usage_longstr[] = "Usage: iperf3 [-s|-c host] [options]\n"
#if defined(HAVE_FLOWLABEL)
" -L, --flowlabel N set the IPv6 flow label (only supported on Linux)\n"
#endif /* HAVE_FLOWLABEL */
" -Z, --zerocopy use a 'zero copy' method of sending data\n"
#if defined(HAVE_MSG_ZEROCOPY)
" -Z, --zerocopy[=z] use a 'zero copy' method of sending data;\n"
" for TCP use MSG_ZEROCOPY with '=z', default is using sendfile()\n"
#else
" -Z, --zerocopy use a 'zero copy' method of sending TCP data\n"
#endif /* HAVE_MSG_ZEROCOPY */
#if defined(HAVE_MSG_TRUNC)
" --skip-rx-copy ignore received messages using MSG_TRUNC option\n"
#endif /* HAVE_MSG_TRUNC */
" -O, --omit N perform pre-test for N seconds and omit the pre-test statistics\n"
" -T, --title str prefix every output line with this string\n"
" --extra-data str data string to include in client and server JSON\n"
Expand Down
16 changes: 14 additions & 2 deletions src/iperf_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,15 @@ int
iperf_tcp_recv(struct iperf_stream *sp)
{
int r;
int sock_opt;

r = Nread(sp->socket, sp->buffer, sp->settings->blksize, Ptcp);
#if defined(HAVE_MSG_TRUNC)
sock_opt = sp->test->settings->skip_rx_copy ? MSG_TRUNC : 0;
#else
sock_opt = 0;
#endif /* HAVE_MSG_TRUNC */

r = Nrecv(sp->socket, sp->buffer, sp->settings->blksize, Ptcp, sock_opt);

if (r < 0)
return r;
Expand Down Expand Up @@ -88,6 +95,11 @@ iperf_tcp_send(struct iperf_stream *sp)
if (!sp->pending_size)
sp->pending_size = sp->settings->blksize;

#if defined(HAVE_MSG_ZEROCOPY)
if (sp->test->zerocopy == ZEROCOPY_TCP_MSG_ZEROCOPY)
r = Nsend(sp->socket, sp->buffer, sp->pending_size, Ptcp, MSG_ZEROCOPY);
else
#endif /* HAVE_MSG_ZEROCOPY */
if (sp->test->zerocopy)
r = Nsendfile(sp->buffer_fd, sp->socket, sp->buffer, sp->pending_size);
else
Expand Down Expand Up @@ -380,7 +392,7 @@ iperf_tcp_connect(struct iperf_test *test)
int saved_errno;
int rcvbuf_actual, sndbuf_actual;

s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res);
s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res, test->zerocopy);
if (s < 0) {
i_errno = IESTREAMCONNECT;
return -1;
Expand Down
22 changes: 19 additions & 3 deletions src/iperf_udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,17 @@ iperf_udp_recv(struct iperf_stream *sp)
int first_packet = 0;
double transit = 0, d = 0;
struct iperf_time sent_time, arrival_time, temp_time;
int sock_opt = 0;

r = Nread(sp->socket, sp->buffer, size, Pudp);
#if defined(HAVE_MSG_TRUNC)
// UDP recv() with MSG_TRUNC reads only the size bytes, but return the length of the full packet
if (sp->test->settings->skip_rx_copy) {
sock_opt = MSG_TRUNC;
size = sizeof(sec) + sizeof(usec) + sizeof(pcount);
}
#endif /* HAVE_MSG_TRUNC */

r = Nrecv(sp->socket, sp->buffer, size, Pudp, sock_opt);

/*
* If we got an error in the read, or if we didn't read anything
Expand Down Expand Up @@ -201,6 +210,12 @@ iperf_udp_send(struct iperf_stream *sp)
int r;
int size = sp->settings->blksize;
struct iperf_time before;
int sock_opt = 0;

#if defined(HAVE_MSG_ZEROCOPY)
if (sp->test->zerocopy)
sock_opt = MSG_ZEROCOPY;
#endif /* HAVE_MSG_ZEROCOPY */

iperf_time_now(&before);

Expand Down Expand Up @@ -234,7 +249,7 @@ iperf_udp_send(struct iperf_stream *sp)

}

r = Nwrite(sp->socket, sp->buffer, size, Pudp);
r = Nsend(sp->socket, sp->buffer, size, Pudp, sock_opt);

if (r <= 0) {
--sp->packet_count; /* Don't count messages that no data was sent from them.
Expand Down Expand Up @@ -446,6 +461,7 @@ iperf_udp_accept(struct iperf_test *test)
/*
* Create a new "listening" socket to replace the one we were using before.
*/
FD_CLR(test->prot_listener, &test->read_set); // No control messages from old listener
test->prot_listener = netannounce(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->server_port);
if (test->prot_listener < 0) {
i_errno = IESTREAMLISTEN;
Expand Down Expand Up @@ -507,7 +523,7 @@ iperf_udp_connect(struct iperf_test *test)
int i, max_len_wait_for_reply;

/* Create and bind our local socket. */
if ((s = netdial(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, -1)) < 0) {
if ((s = netdial(test->settings->domain, Pudp, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, -1, test->zerocopy)) < 0) {
i_errno = IESTREAMCONNECT;
return -1;
}
Expand Down
Loading
Loading