From 45270ad8a86a80cca4c59dfa73d9a9ee0688d781 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sun, 11 Mar 2012 17:52:59 +0400 Subject: [PATCH 01/11] virtio-serial-bus: use correct lengths in control_out() message Original code has one thing to process (cur_len), requests to convert from iovec to buf another thing (len which is actually max_len), and processes something else (copied). Whole thing is very difficult to understand, even if it does a right thing. The iov_to_buf() conversion in this case will always return cur_len, because it is the length of the iovec it was asked to process, and the size we asked to convert is the same or larger, and iov_to_buf() will stop at reaching either iov or buf. Make the code saner by doing the only sane thing: dropping `copied' which is always the same as `cur_len' but just introduces questions. Signed-off-by: Michael Tokarev --- hw/virtio-serial-bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 72287d10c..1e477e5a3 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -454,7 +454,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) len = 0; buf = NULL; while (virtqueue_pop(vq, &elem)) { - size_t cur_len, copied; + size_t cur_len; cur_len = iov_size(elem.out_sg, elem.out_num); /* @@ -467,9 +467,9 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) buf = g_malloc(cur_len); len = cur_len; } - copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len); + iov_to_buf(elem.out_sg, elem.out_num, buf, 0, cur_len); - handle_control_message(vser, buf, copied); + handle_control_message(vser, buf, cur_len); virtqueue_push(vq, &elem, 0); } g_free(buf); From dcf6f5e15ecee4f593eeacbe0591c1addc004d92 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sun, 11 Mar 2012 18:05:12 +0400 Subject: [PATCH 02/11] change iov_* function prototypes to be more appropriate Reorder arguments to be more natural, readable and consistent with other iov_* functions, and change argument names, from: iov_from_buf(iov, iov_cnt, buf, iov_off, size) to iov_from_buf(iov, iov_cnt, offset, buf, bytes) The result becomes natural English: copy data to this `iov' vector with `iov_cnt' elements starting at byte offset `offset' from memory buffer `buf', processing `bytes' bytes max. (Try to read the original prototype this way). Also change iov_clear() to more general iov_memset() (it uses memset() internally anyway). While at it, add comments to the header file describing what the routines actually does. The patch only renames argumens in the header, but keeps old names in the implementation. The next patch will touch actual code to match. Now, it might look wrong to pay so much attention to so small things. But we've so many badly designed interfaces already so the whole thing becomes rather confusing or error prone. One example of this is previous commit and small discussion which emerged from it, with an outcome that the utility functions like these aren't well-understdandable, leading to strange usage cases. That's why I paid quite some attention to this set of functions and a few others in subsequent patches. Signed-off-by: Michael Tokarev --- hw/rtl8139.c | 2 +- hw/usb/core.c | 6 +++--- hw/virtio-balloon.c | 4 ++-- hw/virtio-net.c | 4 ++-- hw/virtio-serial-bus.c | 6 +++--- iov.c | 14 +++++++------- iov.h | 42 +++++++++++++++++++++++++++++++++++++----- net.c | 2 +- 8 files changed, 56 insertions(+), 24 deletions(-) diff --git a/hw/rtl8139.c b/hw/rtl8139.c index eb22d04fa..8128b64a0 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -1783,7 +1783,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size, if (iov) { buf2_size = iov_size(iov, 3); buf2 = g_malloc(buf2_size); - iov_to_buf(iov, 3, buf2, 0, buf2_size); + iov_to_buf(iov, 3, 0, buf2, buf2_size); buf = buf2; } diff --git a/hw/usb/core.c b/hw/usb/core.c index 0e02da760..2641685a3 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -522,10 +522,10 @@ void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes) switch (p->pid) { case USB_TOKEN_SETUP: case USB_TOKEN_OUT: - iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_to_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; case USB_TOKEN_IN: - iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_from_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; default: fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid); @@ -539,7 +539,7 @@ void usb_packet_skip(USBPacket *p, size_t bytes) assert(p->result >= 0); assert(p->result + bytes <= p->iov.size); if (p->pid == USB_TOKEN_IN) { - iov_clear(p->iov.iov, p->iov.niov, p->result, bytes); + iov_memset(p->iov.iov, p->iov.niov, p->result, 0, bytes); } p->result += bytes; } diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index 075ed87e3..23effa40a 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -77,7 +77,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) size_t offset = 0; uint32_t pfn; - while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) { + while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) { ram_addr_t pa; ram_addr_t addr; @@ -118,7 +118,7 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) */ reset_stats(s); - while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat)) + while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) == sizeof(stat)) { uint16_t tag = tswap16(stat.tag); uint64_t val = tswap64(stat.val); diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 3f190d417..533aa3d0f 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -656,8 +656,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_ } /* copy in packet. ugh */ - len = iov_from_buf(sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(sg, elem.in_num, 0, + buf + offset, size - offset); total += len; offset += len; /* If buffers can't be merged, at this point we diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 1e477e5a3..728f218dc 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -106,8 +106,8 @@ static size_t write_to_port(VirtIOSerialPort *port, break; } - len = iov_from_buf(elem.in_sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(elem.in_sg, elem.in_num, 0, + buf + offset, size - offset); offset += len; virtqueue_push(vq, &elem, len); @@ -467,7 +467,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) buf = g_malloc(cur_len); len = cur_len; } - iov_to_buf(elem.out_sg, elem.out_num, buf, 0, cur_len); + iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len); handle_control_message(vser, buf, cur_len); virtqueue_push(vq, &elem, 0); diff --git a/iov.c b/iov.c index 0f964939d..bc58cab05 100644 --- a/iov.c +++ b/iov.c @@ -17,8 +17,8 @@ #include "iov.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size) +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t iov_off, + const void *buf, size_t size) { size_t iovec_off, buf_off; unsigned int i; @@ -40,8 +40,8 @@ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, return buf_off; } -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size) +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_off, + void *buf, size_t size) { uint8_t *ptr; size_t iovec_off, buf_off; @@ -65,8 +65,8 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, return buf_off; } -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size) +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t iov_off, int fillc, size_t size) { size_t iovec_off, buf_off; unsigned int i; @@ -77,7 +77,7 @@ size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, if (iov_off < (iovec_off + iov[i].iov_len)) { size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - memset(iov[i].iov_base + (iov_off - iovec_off), 0, len); + memset(iov[i].iov_base + (iov_off - iovec_off), fillc, len); buf_off += len; iov_off += len; diff --git a/iov.h b/iov.h index 94d2f7828..0b4acf4a5 100644 --- a/iov.h +++ b/iov.h @@ -12,12 +12,44 @@ #include "qemu-common.h" +/** + * count and return data size, in bytes, of an iovec + * starting at `iov' of `iov_cnt' number of elements. + */ +size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); + +/** + * Copy from single continuous buffer to scatter-gather vector of buffers + * (iovec) and back like memcpy() between two continuous memory regions. + * Data in single continuous buffer starting at address `buf' and + * `bytes' bytes long will be copied to/from an iovec `iov' with + * `iov_cnt' number of elements, starting at byte position `offset' + * within the iovec. If the iovec does not contain enough space, + * only part of data will be copied, up to the end of the iovec. + * Number of bytes actually copied will be returned, which is + * min(bytes, iov_size(iov)-offset) + */ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size); + size_t offset, const void *buf, size_t bytes); size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size); -size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size); + size_t offset, void *buf, size_t bytes); + +/** + * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, + * starting at byte offset `start', to value `fillc', repeating it + * `bytes' number of times. + * If `bytes' is large enough, only last bytes portion of iovec, + * up to the end of it, will be filled with the specified value. + * Function return actual number of bytes processed, which is + * min(size, iov_size(iov) - offset). + */ +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes); + +/** + * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements + * in file `fp', prefixing each line with `prefix' and processing not more + * than `limit' data bytes. + */ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit); diff --git a/net.c b/net.c index 4aa416cff..abf0fd0a0 100644 --- a/net.c +++ b/net.c @@ -544,7 +544,7 @@ static ssize_t vc_sendv_compat(VLANClientState *vc, const struct iovec *iov, uint8_t buffer[4096]; size_t offset; - offset = iov_to_buf(iov, iovcnt, buffer, 0, sizeof(buffer)); + offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); return vc->info->receive(vc, buffer, offset); } From 2278a69e7020d86a8c73a28474e7709d3e7d5081 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:08:19 +0400 Subject: [PATCH 03/11] rewrite iov_* functions This changes implementations of all iov_* functions, completing the previous step. All iov_* functions now ensure that this offset argument is within the iovec (using assertion), but lets to specify `bytes' value larger than actual length of the iovec - in this case they stops at the actual end of iovec. It is also suggested to use convinient `-1' value as `bytes' to mean just this -- "up to the end". There's one very minor semantic change here: new requiriment is that `offset' points to inside of iovec. This is checked just at the end of functions (assert()), it does not actually need to be enforced, but using any of these functions with offset pointing past the end of iovec is wrong anyway. Note: the new code in iov.c uses arithmetic with void pointers. I thought this is not supported everywhere and is a GCC extension (indeed, the C standard does not define void arithmetic). However, the original code already use void arith in iov_from_buf() function: (memcpy(..., buf + buf_off,...) which apparently works well so far (it is this way in qemu 1.0). So I left it this way and used it in other places. While at it, add a unit-test file test-iov.c, to check various corner cases with iov_from_buf(), iov_to_buf() and iov_memset(). Signed-off-by: Michael Tokarev --- iov.c | 91 ++++++++++++---------------- iov.h | 12 +++- tests/Makefile | 2 + tests/test-iov.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 54 deletions(-) create mode 100644 tests/test-iov.c diff --git a/iov.c b/iov.c index bc58cab05..9657d286b 100644 --- a/iov.c +++ b/iov.c @@ -7,6 +7,7 @@ * Author(s): * Anthony Liguori * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -17,75 +18,61 @@ #include "iov.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t iov_off, - const void *buf, size_t size) +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off, size); - - memcpy(iov[i].iov_base + (iov_off - iovec_off), buf + buf_off, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_off, - void *buf, size_t size) +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes) { - uint8_t *ptr; - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - ptr = buf; - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memcpy(ptr + buf_off, iov[i].iov_base + (iov_off - iovec_off), len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(buf + done, iov[i].iov_base + offset, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, int fillc, size_t size) + size_t offset, int fillc, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memset(iov[i].iov_base + (iov_off - iovec_off), fillc, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memset(iov[i].iov_base + offset, fillc, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) diff --git a/iov.h b/iov.h index 0b4acf4a5..19ee3b3ac 100644 --- a/iov.h +++ b/iov.h @@ -1,10 +1,11 @@ /* - * Helpers for getting linearized buffers from iov / filling buffers into iovs + * Helpers for using (partial) iovecs. * * Copyright (C) 2010 Red Hat, Inc. * * Author(s): * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -28,6 +29,12 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); * only part of data will be copied, up to the end of the iovec. * Number of bytes actually copied will be returned, which is * min(bytes, iov_size(iov)-offset) + * `Offset' must point to the inside of iovec. + * It is okay to use very large value for `bytes' since we're + * limited by the size of the iovec anyway, provided that the + * buffer pointed to by buf has enough space. One possible + * such "large" value is -1 (sinice size_t is unsigned), + * so specifying `-1' as `bytes' means 'up to the end of iovec'. */ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes); @@ -37,11 +44,12 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, /** * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, * starting at byte offset `start', to value `fillc', repeating it - * `bytes' number of times. + * `bytes' number of times. `Offset' must point to the inside of iovec. * If `bytes' is large enough, only last bytes portion of iovec, * up to the end of it, will be filled with the specified value. * Function return actual number of bytes processed, which is * min(size, iov_size(iov) - offset). + * Again, it is okay to use large value for `bytes' to mean "up to the end". */ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, size_t offset, int fillc, size_t bytes); diff --git a/tests/Makefile b/tests/Makefile index ab7f66700..7340bc504 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,6 +13,7 @@ check-unit-y += tests/test-qmp-commands$(EXESUF) check-unit-y += tests/test-string-input-visitor$(EXESUF) check-unit-y += tests/test-string-output-visitor$(EXESUF) check-unit-y += tests/test-coroutine$(EXESUF) +check-unit-y += tests/test-iov$(EXESUF) check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -47,6 +48,7 @@ tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o $(tools-obj-y) tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o $(tools-obj-y) tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y) tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) +tests/test-iov$(EXESUF): tests/test-iov.o iov.o tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py diff --git a/tests/test-iov.c b/tests/test-iov.c new file mode 100644 index 000000000..5f82296a8 --- /dev/null +++ b/tests/test-iov.c @@ -0,0 +1,153 @@ +#include +#include "qemu-common.h" +#include "iov.h" + +/* create a randomly-sized iovec with random vectors */ +static void iov_random(struct iovec **iovp, unsigned *iov_cntp) +{ + unsigned niov = g_test_rand_int_range(3,8); + struct iovec *iov = g_malloc(niov * sizeof(*iov)); + unsigned i; + for (i = 0; i < niov; ++i) { + iov[i].iov_len = g_test_rand_int_range(5,20); + iov[i].iov_base = g_malloc(iov[i].iov_len); + } + *iovp = iov; + *iov_cntp = niov; +} + +static void iov_free(struct iovec *iov, unsigned niov) +{ + unsigned i; + for (i = 0; i < niov; ++i) { + g_free(iov[i].iov_base); + } + g_free(iov); +} + +static void test_iov_bytes(struct iovec *iov, unsigned niov, + size_t offset, size_t bytes) +{ + unsigned i; + size_t j, o; + unsigned char *b; + o = 0; + + /* we walk over all elements, */ + for (i = 0; i < niov; ++i) { + b = iov[i].iov_base; + /* over each char of each element, */ + for (j = 0; j < iov[i].iov_len; ++j) { + /* counting each of them and + * verifying that the ones within [offset,offset+bytes) + * range are equal to the position number (o) */ + if (o >= offset && o < offset + bytes) { + g_assert(b[j] == (o & 255)); + } else { + g_assert(b[j] == 0xff); + } + ++o; + } + } +} + +static void test_to_from_buf_1(void) +{ + unsigned niov; + struct iovec *iov; + size_t sz; + unsigned char *ibuf, *obuf; + unsigned i, j, n; + + iov_random(&iov, &niov); + + sz = iov_size(iov, niov); + + ibuf = g_malloc(sz + 8) + 4; + memcpy(ibuf-4, "aaaa", 4); memcpy(ibuf + sz, "bbbb", 4); + obuf = g_malloc(sz + 8) + 4; + memcpy(obuf-4, "xxxx", 4); memcpy(obuf + sz, "yyyy", 4); + + /* fill in ibuf with 0123456... */ + for (i = 0; i < sz; ++i) { + ibuf[i] = i & 255; + } + + for (i = 0; i <= sz; ++i) { + + /* Test from/to buf for offset(i) in [0..sz] up to the end of buffer. + * For last iteration with offset == sz, the procedure should + * skip whole vector and process exactly 0 bytes */ + + /* first set bytes [i..sz) to some "random" value */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* next copy bytes [i..sz) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, -1); + g_assert(n == sz - i); + + /* clear part of obuf */ + memset(obuf + i, 0, sz - i); + /* and set this part of obuf to values from iovec */ + n = iov_to_buf(iov, niov, i, obuf + i, -1); + g_assert(n == sz - i); + + /* now compare resulting buffers */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* test just one char */ + n = iov_to_buf(iov, niov, i, obuf + i, 1); + g_assert(n == (i < sz)); + if (n) { + g_assert(obuf[i] == (i & 255)); + } + + for (j = i; j <= sz; ++j) { + /* now test num of bytes cap up to byte no. j, + * with j in [i..sz]. */ + + /* clear iovec */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* copy bytes [i..j) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, j - i); + g_assert(n == j - i); + + /* clear part of obuf */ + memset(obuf + i, 0, j - i); + + /* copy bytes [i..j) from iovec to obuf */ + n = iov_to_buf(iov, niov, i, obuf + i, j - i); + g_assert(n == j - i); + + /* verify result */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* now actually check if the iovec contains the right data */ + test_iov_bytes(iov, niov, i, j - i); + } + } + g_assert(!memcmp(ibuf-4, "aaaa", 4) && !memcmp(ibuf+sz, "bbbb", 4)); + g_free(ibuf-4); + g_assert(!memcmp(obuf-4, "xxxx", 4) && !memcmp(obuf+sz, "yyyy", 4)); + g_free(obuf-4); + iov_free(iov, niov); +} + +static void test_to_from_buf(void) +{ + int x; + for (x = 0; x < 4; ++x) { + test_to_from_buf_1(); + } +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_rand_int(); + g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + return g_test_run(); +} From 3d9b49254f893f2a3739400e536de25db1cdc5f9 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sat, 10 Mar 2012 16:54:23 +0400 Subject: [PATCH 04/11] consolidate qemu_iovec_memset{,_skip}() into single function and use existing iov_memset() This patch combines two functions into one, and replaces the implementation with already existing iov_memset() from iov.c. The new prototype of qemu_iovec_memset(): size_t qemu_iovec_memset(qiov, size_t offset, int fillc, size_t bytes) It is different from former qemu_iovec_memset_skip(), and I want to make other functions to be consistent with it too: first how much to skip, second what, and 3rd how many of it. It also returns actual number of bytes filled in, which may be less than the requested `bytes' if qiov is smaller than offset+bytes, in the same way iov_memset() does. While at it, use utility function iov_memset() from iov.h in posix-aio-compat.c, where qiov was used. Signed-off-by: Michael Tokarev --- Makefile | 3 ++- Makefile.objs | 4 ++-- block/qcow2.c | 6 +++--- block/qed.c | 4 ++-- cutils.c | 44 ++++---------------------------------------- linux-aio.c | 4 ++-- posix-aio-compat.c | 8 +++----- qemu-common.h | 5 ++--- 8 files changed, 20 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index 9b7a85e4d..017836bc0 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,8 @@ qemu-img.o: qemu-img-cmds.h qemu-img.o qemu-tool.o qemu-nbd.o qemu-io.o cmd.o qemu-ga.o: $(GENERATED_HEADERS) tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ - qemu-timer-common.o main-loop.o notify.o iohandler.o cutils.o async.o + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o tools-obj-$(CONFIG_POSIX) += compatfd.o qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) diff --git a/Makefile.objs b/Makefile.objs index 70c5c79a6..f173946fd 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### # block-obj-y is code used by both qemu system emulation and qemu-img -block-obj-y = cutils.o cache-utils.o qemu-option.o module.o async.o +block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o @@ -198,7 +198,7 @@ common-obj-$(CONFIG_XEN_BACKEND) += xen_console.o xenfb.o xen_disk.o xen_nic.o user-obj-y = user-obj-y += envlist.o path.o user-obj-y += tcg-runtime.o host-utils.o -user-obj-y += cutils.o cache-utils.o +user-obj-y += cutils.o iov.o cache-utils.o user-obj-y += module.o user-obj-y += qemu-user.o user-obj-y += $(trace-obj-y) diff --git a/block/qcow2.c b/block/qcow2.c index c2e49cded..fcbf95273 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -510,7 +510,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, else n1 = bs->total_sectors - sector_num; - qemu_iovec_memset_skip(qiov, 0, 512 * (nb_sectors - n1), 512 * n1); + qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); return n1; } @@ -571,7 +571,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } } else { /* Note: in this case, no need to wait */ - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); } break; @@ -580,7 +580,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, ret = -EIO; goto fail; } - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); break; case QCOW2_CLUSTER_COMPRESSED: diff --git a/block/qed.c b/block/qed.c index 30a31f907..40bdb5364 100644 --- a/block/qed.c +++ b/block/qed.c @@ -736,7 +736,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* Zero all sectors if reading beyond the end of the backing file */ if (pos >= backing_length || pos + qiov->size > backing_length) { - qemu_iovec_memset(qiov, 0, qiov->size); + qemu_iovec_memset(qiov, 0, 0, qiov->size); } /* Complete now if there are no backing file sectors to read */ @@ -1251,7 +1251,7 @@ static void qed_aio_read_data(void *opaque, int ret, /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { - qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size); + qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); qed_aio_next_io(acb, 0); return; } else if (ret != QED_CLUSTER_FOUND) { diff --git a/cutils.c b/cutils.c index af308cd7b..0ddf4c7d7 100644 --- a/cutils.c +++ b/cutils.c @@ -26,6 +26,7 @@ #include #include "qemu_socket.h" +#include "iov.h" void pstrcpy(char *buf, int buf_size, const char *str) { @@ -260,47 +261,10 @@ void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) } } -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count) +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes) { - size_t n; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - n = MIN(count, qiov->iov[i].iov_len); - memset(qiov->iov[i].iov_base, c, n); - count -= n; - } -} - -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip) -{ - int i; - size_t done; - void *iov_base; - uint64_t iov_len; - - done = 0; - for (i = 0; (i < qiov->niov) && (done != count); i++) { - if (skip >= qiov->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= qiov->iov[i].iov_len; - continue; - } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) qiov->iov[i].iov_base + skip; - iov_len = qiov->iov[i].iov_len - skip; - skip = 0; - } - - if (done + iov_len > count) { - memset(iov_base, c, count - done); - break; - } else { - memset(iov_base, c, iov_len); - } - done += iov_len; - } + return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); } /* diff --git a/linux-aio.c b/linux-aio.c index fa0fbf34a..ce9b5d4be 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -63,8 +63,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } else if (ret >= 0) { /* Short reads mean EOF, pad with zeros. */ if (laiocb->is_read) { - qemu_iovec_memset_skip(laiocb->qiov, 0, - laiocb->qiov->size - ret, ret); + qemu_iovec_memset(laiocb->qiov, ret, 0, + laiocb->qiov->size - ret); } else { ret = -EINVAL; } diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 68361f555..96e4daf50 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -29,6 +29,7 @@ #include "qemu-common.h" #include "trace.h" #include "block_int.h" +#include "iov.h" #include "block/raw-posix-aio.h" @@ -351,11 +352,8 @@ static void *aio_thread(void *unused) if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) { /* A short read means that we have reached EOF. Pad the buffer * with zeros for bytes after EOF. */ - QEMUIOVector qiov; - - qemu_iovec_init_external(&qiov, aiocb->aio_iov, - aiocb->aio_niov); - qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret); + iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, + 0, aiocb->aio_nbytes - ret); ret = aiocb->aio_nbytes; } diff --git a/qemu-common.h b/qemu-common.h index 91e056296..e752d2b6c 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -347,9 +347,8 @@ void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip); +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes); bool buffer_is_zero(const void *buf, size_t len); From 03396148bca54c0e81ad8eecb12a136456d14c16 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:17:55 +0400 Subject: [PATCH 05/11] allow qemu_iovec_from_buffer() to specify offset from which to start copying Similar to qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int c, size_t bytes); the new prototype is: qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, const void *buf, size_t bytes); The processing starts at offset bytes within qiov. This way, we may copy a bounce buffer directly to a middle of qiov. This is exactly the same function as iov_from_buf() from iov.c, so use the existing implementation and rename it to qemu_iovec_from_buf() to be shorter and to match the utility function. As with utility implementation, we now assert that the offset is inside actual iovec. Nothing changed for current callers, because `offset' parameter is new. While at it, stop using "bounce-qiov" in block/qcow2.c and copy decrypted data directly from cluster_data instead of recreating a temp qiov for doing that. Signed-off-by: Michael Tokarev --- block.c | 6 +++--- block/curl.c | 6 +++--- block/qcow.c | 2 +- block/qcow2.c | 9 +++------ block/rbd.c | 2 +- cutils.c | 16 +++------------- qemu-common.h | 3 ++- 7 files changed, 16 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index 7547051ec..e0ef95e09 100644 --- a/block.c +++ b/block.c @@ -1821,8 +1821,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, } skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, + nb_sectors * BDRV_SECTOR_SIZE); err: qemu_vfree(bounce_buffer); @@ -3382,7 +3382,7 @@ static void bdrv_aio_bh_cb(void *opaque) BlockDriverAIOCBSync *acb = opaque; if (!acb->is_write) - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, acb->ret); qemu_bh_delete(acb->bh); diff --git a/block/curl.c b/block/curl.c index bf3680ba5..e7c3634d3 100644 --- a/block/curl.c +++ b/block/curl.c @@ -140,8 +140,8 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) continue; if ((s->buf_off >= acb->end)) { - qemu_iovec_from_buffer(acb->qiov, s->orig_buf + acb->start, - acb->end - acb->start); + qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start, + acb->end - acb->start); acb->common.cb(acb->common.opaque, 0); qemu_aio_release(acb); s->acb[i] = NULL; @@ -176,7 +176,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, { char *buf = state->orig_buf + (start - state->buf_start); - qemu_iovec_from_buffer(acb->qiov, buf, len); + qemu_iovec_from_buf(acb->qiov, 0, buf, len); acb->common.cb(acb->common.opaque, 0); return FIND_RET_OK; diff --git a/block/qcow.c b/block/qcow.c index 35dff497a..728010319 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -540,7 +540,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_unlock(&s->lock); if (qiov->niov > 1) { - qemu_iovec_from_buffer(qiov, orig_buf, qiov->size); + qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); qemu_vfree(orig_buf); } diff --git a/block/qcow2.c b/block/qcow2.c index fcbf95273..ccc599b51 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -590,7 +590,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, goto fail; } - qemu_iovec_from_buffer(&hd_qiov, + qemu_iovec_from_buf(&hd_qiov, 0, s->cluster_cache + index_in_cluster * 512, 512 * cur_nr_sectors); break; @@ -630,11 +630,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, if (s->crypt_method) { qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, - cur_nr_sectors * 512); - qemu_iovec_from_buffer(&hd_qiov, cluster_data, - 512 * cur_nr_sectors); + qemu_iovec_from_buf(qiov, bytes_done, + cluster_data, 512 * cur_nr_sectors); } break; diff --git a/block/rbd.c b/block/rbd.c index 1280d66d3..8bb3252bc 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -620,7 +620,7 @@ static void rbd_aio_bh_cb(void *opaque) RBDAIOCB *acb = opaque; if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); } qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); diff --git a/cutils.c b/cutils.c index 0ddf4c7d7..b4dd84464 100644 --- a/cutils.c +++ b/cutils.c @@ -245,20 +245,10 @@ void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) } } -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes) { - const uint8_t *p = (const uint8_t *)buf; - size_t copy; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - copy = count; - if (copy > qiov->iov[i].iov_len) - copy = qiov->iov[i].iov_len; - memcpy(qiov->iov[i].iov_base, p, copy); - p += copy; - count -= copy; - } + return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); } size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, diff --git a/qemu-common.h b/qemu-common.h index e752d2b6c..430ec15a4 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -346,7 +346,8 @@ void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes); size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int fillc, size_t bytes); From 1b093c480a32051cc856b6ab2395d8cbc3ae99da Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Mon, 12 Mar 2012 21:28:06 +0400 Subject: [PATCH 06/11] consolidate qemu_iovec_copy() and qemu_iovec_concat() and make them consistent qemu_iovec_concat() is currently a wrapper for qemu_iovec_copy(), use the former (with extra "0" arg) in a few places where it is used. Change skip argument of qemu_iovec_copy() from uint64_t to size_t, since size of qiov itself is size_t, so there's no way to skip larger sizes. Rename it to soffset, to make it clear that the offset is applied to src. Also change the only usage of uint64_t in hw/9pfs/virtio-9p.c, in v9fs_init_qiov_from_pdu() - all callers of it actually uses size_t too, not uint64_t. One added restriction: as for all other iovec-related functions, soffset must point inside src. Order of argumens is already good: qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int c, size_t bytes) vs: qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes) (note soffset is after _src_ not dst, since it applies to src; for memset it applies to qiov). Note that in many places where this function is used, the previous call is qemu_iovec_reset(), which means many callers actually want copy (replacing dst content), not concat. So we may want to add a wrapper like qemu_iovec_copy() with the same arguments but which calls qemu_iovec_reset() before _concat(). Signed-off-by: Michael Tokarev --- block.c | 4 ++-- block/qcow2.c | 4 ++-- block/qed.c | 6 ++--- cutils.c | 54 +++++++++++++++++---------------------------- hw/9pfs/virtio-9p.c | 8 +++---- qemu-common.h | 5 ++--- 6 files changed, 33 insertions(+), 48 deletions(-) diff --git a/block.c b/block.c index e0ef95e09..5b1a86249 100644 --- a/block.c +++ b/block.c @@ -3101,13 +3101,13 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, // Add the first request to the merged one. If the requests are // overlapping, drop the last sectors of the first request. size = (reqs[i].sector - reqs[outidx].sector) << 9; - qemu_iovec_concat(qiov, reqs[outidx].qiov, size); + qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); // We should need to add any zeros between the two requests assert (reqs[i].sector <= oldreq_last); // Add the second request - qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); + qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); reqs[outidx].nb_sectors = qiov->size >> 9; reqs[outidx].qiov = qiov; diff --git a/block/qcow2.c b/block/qcow2.c index ccc599b51..8458d10c1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -549,7 +549,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, index_in_cluster = sector_num & (s->cluster_sectors - 1); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); switch (ret) { @@ -720,7 +720,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); if (s->crypt_method) { diff --git a/block/qed.c b/block/qed.c index 40bdb5364..847417a3d 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1131,7 +1131,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) acb->cur_nclusters = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, acb->cur_pos) + len); - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); if (acb->flags & QED_AIOCB_ZERO) { /* Skip ahead if the clusters are already zero */ @@ -1177,7 +1177,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) /* Calculate the I/O vector */ acb->cur_cluster = offset; - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Do the actual write */ qed_aio_write_main(acb, 0); @@ -1247,7 +1247,7 @@ static void qed_aio_read_data(void *opaque, int ret, goto err; } - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { diff --git a/cutils.c b/cutils.c index b4dd84464..1aeac153a 100644 --- a/cutils.c +++ b/cutils.c @@ -172,48 +172,34 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) } /* - * Copies iovecs from src to the end of dst. It starts copying after skipping - * the given number of bytes in src and copies until src is completely copied - * or the total size of the copied iovec reaches size.The size of the last - * copied iovec is changed in order to fit the specified total size if it isn't - * a perfect fit already. + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. */ -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size) +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes) { int i; size_t done; - void *iov_base; - uint64_t iov_len; - + struct iovec *siov = src->iov; assert(dst->nalloc != -1); - - done = 0; - for (i = 0; (i < src->niov) && (done != size); i++) { - if (skip >= src->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= src->iov[i].iov_len; - continue; + assert(src->size >= soffset); + for (i = 0, done = 0; done < sbytes && i < src->niov; i++) { + if (soffset < siov[i].iov_len) { + size_t len = MIN(siov[i].iov_len - soffset, sbytes - done); + qemu_iovec_add(dst, siov[i].iov_base + soffset, len); + done += len; + soffset = 0; } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) src->iov[i].iov_base + skip; - iov_len = src->iov[i].iov_len - skip; - skip = 0; + soffset -= siov[i].iov_len; } - - if (done + iov_len > size) { - qemu_iovec_add(dst, iov_base, size - done); - break; - } else { - qemu_iovec_add(dst, iov_base, iov_len); - } - done += iov_len; } -} - -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) -{ - qemu_iovec_copy(dst, src, 0, size); + /* return done; */ } void qemu_iovec_destroy(QEMUIOVector *qiov) diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index c633fb9b7..f4a702638 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -1648,7 +1648,7 @@ static int v9fs_do_readdir_with_stat(V9fsPDU *pdu, * with qemu_iovec_destroy(). */ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, - uint64_t skip, size_t size, + size_t skip, size_t size, bool is_write) { QEMUIOVector elem; @@ -1665,7 +1665,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, qemu_iovec_init_external(&elem, iov, niov); qemu_iovec_init(qiov, niov); - qemu_iovec_copy(qiov, &elem, skip, size); + qemu_iovec_concat(qiov, &elem, skip, size); } static void v9fs_read(void *opaque) @@ -1715,7 +1715,7 @@ static void v9fs_read(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count); + qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); if (0) { print_sg(qiov.iov, qiov.niov); } @@ -1970,7 +1970,7 @@ static void v9fs_write(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total); + qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); if (0) { print_sg(qiov.iov, qiov.niov); } diff --git a/qemu-common.h b/qemu-common.h index 430ec15a4..cae7bb660 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -340,9 +340,8 @@ typedef struct QEMUIOVector { void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size); -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); From d5e6b1619c516fa1e2ee4d8d20f08fcda4fb67a0 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:21:06 +0400 Subject: [PATCH 07/11] change qemu_iovec_to_buf() to match other to,from_buf functions It now allows specifying offset within qiov to start from and amount of bytes to copy. Actual implementation is just a call to iov_to_buf(). Signed-off-by: Michael Tokarev --- block.c | 2 +- block/iscsi.c | 3 +-- block/qcow.c | 2 +- block/qcow2.c | 2 +- block/rbd.c | 2 +- cutils.c | 11 +++-------- qemu-common.h | 3 ++- 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index 5b1a86249..63b7ae083 100644 --- a/block.c +++ b/block.c @@ -3408,7 +3408,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); if (is_write) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); } else { acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); diff --git a/block/iscsi.c b/block/iscsi.c index 22888a084..ecb7a2211 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -240,8 +240,7 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, /* this will allow us to get rid of 'buf' completely */ size = nb_sectors * BDRV_SECTOR_SIZE; acb->buf = g_malloc(size); - qemu_iovec_to_buffer(acb->qiov, acb->buf); - + qemu_iovec_to_buf(acb->qiov, 0, acb->buf, size); acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { diff --git a/block/qcow.c b/block/qcow.c index 728010319..7b5ab87d2 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -569,7 +569,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, if (qiov->niov > 1) { buf = orig_buf = qemu_blockalign(bs, qiov->size); - qemu_iovec_to_buffer(qiov, buf); + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); } else { orig_buf = NULL; buf = (uint8_t *)qiov->iov->iov_base; diff --git a/block/qcow2.c b/block/qcow2.c index 8458d10c1..1b5b36cfc 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -731,7 +731,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert(hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buffer(&hd_qiov, cluster_data); + qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); diff --git a/block/rbd.c b/block/rbd.c index 8bb3252bc..49a4787b5 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -674,7 +674,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->bh = NULL; if (cmd == RBD_AIO_WRITE) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); } buf = acb->bounce; diff --git a/cutils.c b/cutils.c index 1aeac153a..352bc521b 100644 --- a/cutils.c +++ b/cutils.c @@ -220,15 +220,10 @@ void qemu_iovec_reset(QEMUIOVector *qiov) qiov->size = 0; } -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes) { - uint8_t *p = (uint8_t *)buf; - int i; - - for (i = 0; i < qiov->niov; ++i) { - memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); - p += qiov->iov[i].iov_len; - } + return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); } size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, diff --git a/qemu-common.h b/qemu-common.h index cae7bb660..056e495f3 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -344,7 +344,8 @@ void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes); size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, const void *buf, size_t bytes); size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, From 3e80bf9351f8fec9085c46df6da075efd5e71003 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sat, 10 Mar 2012 17:00:41 +0400 Subject: [PATCH 08/11] rename qemu_sendv to iov_send, change proto and move declarations to iov.h Rename arguments and use size_t for sizes instead of int, from int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset) to ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) The main motivation was to make it clear that length and offset are in _bytes_, not in iov elements: it was very confusing before, because all standard functions which deals with iovecs expects number of iovs, not bytes, even the fact that struct iovec has iov_len and iov_ prefix does not help. With "bytes" and "offset", especially since they're now size_t, it is much more explicit. Also change the return type to be ssize_t instead of int. This also changes it to match other iov-related functons, but not _quite_: there's still no argument indicating where iovec ends, ie, no iov_cnt parameter as used in iov_size() and friends. If will be added in subsequent patch/rewrite. All callers of qemu_sendv() and qemu_recvv() and related, like qemu_co_sendv() and qemu_co_recvv(), were checked to verify that it is safe to use unsigned datatype instead of int. Note that the order of arguments is changed to: offset and bytes (len and iov_offset) are swapped with each other. This is to make them consistent with very similar functions from qemu_iovec family, where offset always follows qiov, to mean the place in it to start from. Signed-off-by: Michael Tokarev --- cutils.c | 41 +++++++++++++---------------------------- iov.h | 18 ++++++++++++++++++ qemu-common.h | 3 --- qemu-coroutine-io.c | 5 +++-- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/cutils.c b/cutils.c index 352bc521b..2ad5fa3a9 100644 --- a/cutils.c +++ b/cutils.c @@ -376,43 +376,28 @@ int qemu_parse_fd(const char *param) return fd; } -/* - * Send/recv data with iovec buffers - * - * This function send/recv data from/to the iovec buffer directly. - * The first `offset' bytes in the iovec buffer are skipped and next - * `len' bytes are used. - * - * For example, - * - * do_sendv_recvv(sockfd, iov, len, offset, 1); - * - * is equal to - * - * char *buf = malloc(size); - * iov_to_buf(iov, iovcnt, buf, offset, size); - * send(sockfd, buf, size, 0); - * free(buf); - */ -static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, +static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, int do_sendv) { - int ret, diff, iovlen; + int iovlen; + ssize_t ret; + size_t diff; struct iovec *last_iov; /* last_iov is inclusive, so count from one. */ iovlen = 1; last_iov = iov; - len += offset; + bytes += offset; - while (last_iov->iov_len < len) { - len -= last_iov->iov_len; + while (last_iov->iov_len < bytes) { + bytes -= last_iov->iov_len; last_iov++; iovlen++; } - diff = last_iov->iov_len - len; + diff = last_iov->iov_len - bytes; last_iov->iov_len -= diff; while (iov->iov_len <= offset) { @@ -474,13 +459,13 @@ static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, return ret; } -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset) +ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes) { - return do_sendv_recvv(sockfd, iov, len, iov_offset, 0); + return do_sendv_recvv(sockfd, iov, offset, bytes, 0); } -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset) +ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) { - return do_sendv_recvv(sockfd, iov, len, iov_offset, 1); + return do_sendv_recvv(sockfd, iov, offset, bytes, 1); } diff --git a/iov.h b/iov.h index 19ee3b3ac..5aa2f455d 100644 --- a/iov.h +++ b/iov.h @@ -54,6 +54,24 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, size_t offset, int fillc, size_t bytes); +/* + * Send/recv data from/to iovec buffers directly + * + * `offset' bytes in the beginning of iovec buffer are skipped and + * next `bytes' bytes are used, which must be within data of iovec. + * + * r = iov_send(sockfd, iov, offset, bytes); + * + * is logically equivalent to + * + * char *buf = malloc(bytes); + * iov_to_buf(iov, iovcnt, offset, buf, bytes); + * r = send(sockfd, buf, bytes, 0); + * free(buf); + */ +ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes); +ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes); + /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements * in file `fp', prefixing each line with `prefix' and processing not more diff --git a/qemu-common.h b/qemu-common.h index 056e495f3..41b8ae77c 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -205,9 +205,6 @@ int qemu_pipe(int pipefd[2]); #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags) #endif -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset); -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset); - /* Error handling. */ void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 40fd51439..0461a9aae 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -25,6 +25,7 @@ #include "qemu-common.h" #include "qemu_socket.h" #include "qemu-coroutine.h" +#include "iov.h" int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, int len, int iov_offset) @@ -32,7 +33,7 @@ int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, int total = 0; int ret; while (len) { - ret = qemu_recvv(sockfd, iov, len, iov_offset + total); + ret = iov_recv(sockfd, iov, iov_offset + total, len); if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); @@ -58,7 +59,7 @@ int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, int total = 0; int ret; while (len) { - ret = qemu_sendv(sockfd, iov, len, iov_offset + total); + ret = iov_send(sockfd, iov, iov_offset + total, len); if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); From e3e87df4c94319b15017f958e22761aba03c452a Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 14 Mar 2012 10:56:04 +0400 Subject: [PATCH 09/11] export iov_send_recv() and use it in iov_send() and iov_recv() Rename do_sendv_recvv() to iov_send_recv(), change its last arg (do_send) from int to bool, export it in iov.h, and made the two callers of it (iov_send() and iov_recv()) to be trivial #defines just adding 5th arg. iov_send_recv() will be used later. Signed-off-by: Michael Tokarev --- cutils.c | 17 +++-------------- iov.h | 10 +++++++--- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/cutils.c b/cutils.c index 2ad5fa3a9..cb6f63848 100644 --- a/cutils.c +++ b/cutils.c @@ -376,9 +376,9 @@ int qemu_parse_fd(const char *param) return fd; } -static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - int do_sendv) +ssize_t iov_send_recv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, + bool do_sendv) { int iovlen; ssize_t ret; @@ -458,14 +458,3 @@ static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, last_iov->iov_len += diff; return ret; } - -ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes) -{ - return do_sendv_recvv(sockfd, iov, offset, bytes, 0); -} - -ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) -{ - return do_sendv_recvv(sockfd, iov, offset, bytes, 1); -} - diff --git a/iov.h b/iov.h index 5aa2f455d..9b6a88392 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send(sockfd, iov, offset, bytes); + * r = iov_send_recv(sockfd, iov, offset, bytes, true); * * is logically equivalent to * @@ -69,8 +69,12 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * r = send(sockfd, buf, bytes, 0); * free(buf); */ -ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes); -ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes); +ssize_t iov_send_recv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, bool do_send); +#define iov_recv(sockfd, iov, offset, bytes) \ + iov_send_recv(sockfd, iov, offset, bytes, false) +#define iov_send(sockfd, iov, offset, bytes) \ + iov_send_recv(sockfd, iov, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements From 2fc8ae1dd77fbc55146b602f703add6dc314dea4 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:22:46 +0400 Subject: [PATCH 10/11] cleanup qemu_co_sendv(), qemu_co_recvv() and friends The same as for non-coroutine versions in previous patches: rename arguments to be more obvious, change type of arguments from int to size_t where appropriate, and use common code for send and receive paths (with one extra argument) since these are exactly the same. Use common iov_send_recv() directly. qemu_co_sendv(), qemu_co_recvv(), and qemu_co_recv() are now trivial #define's merely adding one extra arg. qemu_co_sendv() and qemu_co_recvv() callers are converted to different argument order and extra `iov_cnt' argument. Signed-off-by: Michael Tokarev --- block/nbd.c | 18 +++++----- block/sheepdog.c | 6 ++-- qemu-common.h | 37 ++++++++++---------- qemu-coroutine-io.c | 82 ++++++++++++++------------------------------- 4 files changed, 55 insertions(+), 88 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 121261422..2bce47bf7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -196,7 +196,7 @@ static void nbd_restart_write(void *opaque) } static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int rc, ret; @@ -205,8 +205,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, s); rc = nbd_send_request(s->sock, request); - if (rc >= 0 && iov) { - ret = qemu_co_sendv(s->sock, iov, request->len, offset); + if (rc >= 0 && qiov) { + ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { return -EIO; } @@ -220,7 +221,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, struct nbd_reply *reply, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int ret; @@ -231,8 +232,9 @@ static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, if (reply->handle != request->handle) { reply->error = EIO; } else { - if (iov && reply->error == 0) { - ret = qemu_co_recvv(s->sock, iov, request->len, offset); + if (qiov && reply->error == 0) { + ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { reply->error = EIO; } @@ -349,7 +351,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); + nbd_co_receive_reply(s, &request, &reply, qiov, offset); } nbd_coroutine_end(s, &request); return -reply.error; @@ -374,7 +376,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - ret = nbd_co_send_request(s, &request, qiov->iov, offset); + ret = nbd_co_send_request(s, &request, qiov, offset); if (ret < 0) { reply.error = -ret; } else { diff --git a/block/sheepdog.c b/block/sheepdog.c index f46ca8fb6..2c7aecec4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -719,8 +719,8 @@ static void coroutine_fn aio_read_response(void *opaque) } break; case AIOCB_READ_UDATA: - ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length, - aio_req->iov_offset); + ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, + aio_req->iov_offset, rsp.data_length); if (ret < 0) { error_report("failed to get the data, %s", strerror(errno)); goto out; @@ -992,7 +992,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } if (wlen) { - ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset); + ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); if (ret < 0) { qemu_co_mutex_unlock(&s->lock); error_report("failed to send a data, %s", strerror(errno)); diff --git a/qemu-common.h b/qemu-common.h index 41b8ae77c..71395771d 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -300,32 +300,29 @@ struct qemu_work_item { void qemu_init_vcpu(void *env); #endif -/** - * Sends an iovec (or optionally a part of it) down a socket, yielding - * when the socket is full. - */ -int qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset); /** - * Receives data into an iovec (or optionally into a part of it) from - * a socket, yielding when there is no data in the socket. + * Sends a (part of) iovec down a socket, yielding when the socket is full, or + * Receives data into a (part of) iovec from a socket, + * yielding when there is no data in the socket. + * The same interface as qemu_sendv_recvv(), with added yielding. + * XXX should mark these as coroutine_fn */ -int qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset); - +ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) +#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true) /** - * Sends a buffer down a socket, yielding when the socket is full. + * The same as above, but with just a single buffer */ -int qemu_co_send(int sockfd, void *buf, int len); - -/** - * Receives data into a buffer from a socket, yielding when there - * is no data in the socket. - */ -int qemu_co_recv(int sockfd, void *buf, int len); - +ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send); +#define qemu_co_recv(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, false) +#define qemu_co_send(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, true) typedef struct QEMUIOVector { struct iovec *iov; diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 0461a9aae..6693c7824 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -27,71 +27,39 @@ #include "qemu-coroutine.h" #include "iov.h" -int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset) +ssize_t coroutine_fn +qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send) { - int total = 0; - int ret; - while (len) { - ret = iov_recv(sockfd, iov, iov_offset + total, len); - if (ret < 0) { + size_t done = 0; + ssize_t ret; + while (done < bytes) { + ret = iov_send_recv(sockfd, iov, + offset + done, bytes - done, do_send); + if (ret > 0) { + done += ret; + } else if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; - } - break; - } - if (ret == 0) { - break; - } - total += ret, len -= ret; - } - - return total; -} - -int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset) -{ - int total = 0; - int ret; - while (len) { - ret = iov_send(sockfd, iov, iov_offset + total, len); - if (ret < 0) { - if (errno == EAGAIN) { - qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; + } else if (done == 0) { + return -1; + } else { + break; } + } else if (ret == 0 && !do_send) { + /* write (send) should never return 0. + * read (recv) returns 0 for end-of-file (-data). + * In both cases there's little point retrying, + * but we do for write anyway, just in case */ break; } - total += ret, len -= ret; } - - return total; + return done; } -int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len) +ssize_t coroutine_fn +qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send) { - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_recvv(sockfd, &iov, len, 0); -} - -int coroutine_fn qemu_co_send(int sockfd, void *buf, int len) -{ - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_sendv(sockfd, &iov, len, 0); + struct iovec iov = { .iov_base = buf, .iov_len = bytes }; + return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send); } From 25e5e4c7e9d5ec3e95c9526d1abaca40ada50ab0 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 14 Mar 2012 11:18:54 +0400 Subject: [PATCH 11/11] rewrite iov_send_recv() and move it to iov.c Make it much more understandable, add a missing iov_cnt argument (number of iovs in the iov), and add comments to it. The new implementation has been extensively tested by splitting a large buffer into many small randomly-sized chunks, sending it over socket to another, slow process and verifying the receiving data is the same. Also add a unit test for iov_send_recv(), sending/ receiving data between two processes over a socketpair using random vectors and random sizes. Signed-off-by: Michael Tokarev --- cutils.c | 83 ---------------------------------- iov.c | 103 ++++++++++++++++++++++++++++++++++++++++++ iov.h | 15 ++++--- qemu-coroutine-io.c | 2 +- tests/test-iov.c | 107 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 90 deletions(-) diff --git a/cutils.c b/cutils.c index cb6f63848..e2bc1b89d 100644 --- a/cutils.c +++ b/cutils.c @@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param) } return fd; } - -ssize_t iov_send_recv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - bool do_sendv) -{ - int iovlen; - ssize_t ret; - size_t diff; - struct iovec *last_iov; - - /* last_iov is inclusive, so count from one. */ - iovlen = 1; - last_iov = iov; - bytes += offset; - - while (last_iov->iov_len < bytes) { - bytes -= last_iov->iov_len; - - last_iov++; - iovlen++; - } - - diff = last_iov->iov_len - bytes; - last_iov->iov_len -= diff; - - while (iov->iov_len <= offset) { - offset -= iov->iov_len; - - iov++; - iovlen--; - } - - iov->iov_base = (char *) iov->iov_base + offset; - iov->iov_len -= offset; - - { -#if defined CONFIG_IOVEC && defined CONFIG_POSIX - struct msghdr msg; - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - - do { - if (do_sendv) { - ret = sendmsg(sockfd, &msg, 0); - } else { - ret = recvmsg(sockfd, &msg, 0); - } - } while (ret == -1 && errno == EINTR); -#else - struct iovec *p = iov; - ret = 0; - while (iovlen > 0) { - int rc; - if (do_sendv) { - rc = send(sockfd, p->iov_base, p->iov_len, 0); - } else { - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); - } - if (rc == -1) { - if (errno == EINTR) { - continue; - } - if (ret == 0) { - ret = -1; - } - break; - } - if (rc == 0) { - break; - } - ret += rc; - iovlen--, p++; - } -#endif - } - - /* Undo the changes above */ - iov->iov_base = (char *) iov->iov_base - offset; - iov->iov_len += offset; - last_iov->iov_len += diff; - return ret; -} diff --git a/iov.c b/iov.c index 9657d286b..7cc08f0fe 100644 --- a/iov.c +++ b/iov.c @@ -18,6 +18,14 @@ #include "iov.h" +#ifdef _WIN32 +# include +# include +#else +# include +# include +#endif + size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes) { @@ -87,6 +95,101 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) return len; } +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#if defined CONFIG_IOVEC && defined CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i; + size_t count = 0; + for (i = 0; i < iov_cnt; ++i) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + return -1; + } + break; + } + } + return count; +#endif +} + +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t ret; + unsigned si, ei; /* start and end indexes */ + + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) { + offset -= iov[si].iov_len; + } + if (offset) { + assert(si < iov_cnt); + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[si].iov_base += offset; + iov[si].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) { + bytes -= iov[ei].iov_len; + } + if (bytes) { + /* second, fixup the last element, and remember + * the length we've cut from the end of it in `bytes' */ + size_t tail; + assert(ei < iov_cnt); + assert(iov[ei].iov_len > bytes); + tail = iov[ei].iov_len - bytes; + iov[ei].iov_len = bytes; + bytes = tail; /* bytes is now equal to the tail size */ + ++ei; + } + + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); + + /* Undo the changes above */ + if (offset) { + iov[si].iov_base -= offset; + iov[si].iov_len += offset; + } + if (bytes) { + iov[ei-1].iov_len += bytes; + } + + return ret; +} + + void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit) { diff --git a/iov.h b/iov.h index 9b6a88392..381f37a54 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send_recv(sockfd, iov, offset, bytes, true); + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); * * is logically equivalent to * @@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * iov_to_buf(iov, iovcnt, offset, buf, bytes); * r = send(sockfd, buf, bytes, 0); * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. */ -ssize_t iov_send_recv(int sockfd, struct iovec *iov, +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send); -#define iov_recv(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, false) -#define iov_send(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, true) +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 6693c7824..573496500 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t done = 0; ssize_t ret; while (done < bytes) { - ret = iov_send_recv(sockfd, iov, + ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; diff --git a/tests/test-iov.c b/tests/test-iov.c index 5f82296a8..cbe7a8955 100644 --- a/tests/test-iov.c +++ b/tests/test-iov.c @@ -1,6 +1,7 @@ #include #include "qemu-common.h" #include "iov.h" +#include "qemu_socket.h" /* create a randomly-sized iovec with random vectors */ static void iov_random(struct iovec **iovp, unsigned *iov_cntp) @@ -144,10 +145,116 @@ static void test_to_from_buf(void) } } +static void test_io(void) +{ +#ifndef _WIN32 +/* socketpair(PF_UNIX) which does not exist on windows */ + + int sv[2]; + int r; + unsigned i, j, k, s, t; + fd_set fds; + unsigned niov; + struct iovec *iov, *siov; + unsigned char *buf; + size_t sz; + + iov_random(&iov, &niov); + sz = iov_size(iov, niov); + buf = g_malloc(sz); + for (i = 0; i < sz; ++i) { + buf[i] = i & 255; + } + iov_from_buf(iov, niov, 0, buf, sz); + + siov = g_malloc(sizeof(*iov) * niov); + memcpy(siov, iov, sizeof(*iov) * niov); + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit(1); + } + + FD_ZERO(&fds); + + t = 0; + if (fork() == 0) { + /* writer */ + + close(sv[0]); + FD_SET(sv[1], &fds); + fcntl(sv[1], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &r, sizeof(r)); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_send(sv[1], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r >= 0) { + k += r; + t += r; + usleep(g_test_rand_int_range(0, 30)); + } else if (errno == EAGAIN) { + select(sv[1]+1, NULL, &fds, NULL, NULL); + continue; + } else { + perror("send"); + exit(1); + } + } while(k < j); + } + } + exit(0); + + } else { + /* reader & verifier */ + + close(sv[1]); + FD_SET(sv[0], &fds); + fcntl(sv[0], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &r, sizeof(r)); + usleep(500000); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + iov_memset(iov, niov, 0, 0xff, -1); + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_recv(sv[0], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r > 0) { + k += r; + t += r; + } else if (!r) { + if (s) { + break; + } + } else if (errno == EAGAIN) { + select(sv[0]+1, &fds, NULL, NULL, NULL); + continue; + } else { + perror("recv"); + exit(1); + } + } while(k < j); + test_iov_bytes(iov, niov, i, j - i); + } + } + } +#endif +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); g_test_rand_int(); g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + g_test_add_func("/basic/iov/io", test_io); return g_test_run(); }