From d57c66fa9c5fdbc4b6b60307378e63278d801f20 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Sun, 14 Jul 2024 12:23:19 -0500 Subject: [PATCH] Eliminate a redundant alloc+copy of each frame (#2852) * Eliminate a redundant alloc+copy for each frame * Replace std::copy_n() with std::memcpy() for better code generation --- src/stream.cpp | 80 +++++++++++++++++++++----------------- tests/unit/test_stream.cpp | 40 +++++++++++++++++++ 2 files changed, 85 insertions(+), 35 deletions(-) create mode 100644 tests/unit/test_stream.cpp diff --git a/src/stream.cpp b/src/stream.cpp index 82bb0d915af..93da58e7eb4 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -674,11 +674,14 @@ namespace stream { for (auto x = 0; x < nr_shards; ++x) { shards_p[x] = (uint8_t *) &shards[(x + 1) * prefixsize + x * blocksize]; + // GCC doesn't figure out that std::copy_n() can be replaced with memcpy() here + // and ends up compiling a horribly slow element-by-element copy loop, so we + // help it by using memcpy()/memset() directly. auto copy_len = std::min(blocksize, std::end(payload) - next); - std::copy_n(next, copy_len, shards_p[x]); + std::memcpy(shards_p[x], next, copy_len); if (copy_len < blocksize) { // Zero any additional space after the end of the payload - std::fill_n(shards_p[x] + copy_len, blocksize - copy_len, 0); + std::memset(shards_p[x] + copy_len, 0, blocksize - copy_len); } next += copy_len; @@ -702,32 +705,50 @@ namespace stream { } } // namespace fec - template + /** + * @brief Combines two buffers and inserts new buffers at each slice boundary of the result. + * @param insert_size The number of bytes to insert. + * @param slice_size The number of bytes between insertions. + * @param data1 The first data buffer. + * @param data2 The second data buffer. + */ std::vector - insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data, F &&f) { - auto pad = data.size() % slice_size != 0; - auto elements = data.size() / slice_size + (pad ? 1 : 0); + concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2) { + auto data_size = data1.size() + data2.size(); + auto pad = data_size % slice_size != 0; + auto elements = data_size / slice_size + (pad ? 1 : 0); std::vector result; - result.resize(elements * insert_size + data.size()); + result.resize(elements * insert_size + data_size); - auto next = std::begin(data); - for (auto x = 0; x < elements - 1; ++x) { + auto next = std::begin(data1); + auto end = std::end(data1); + for (auto x = 0; x < elements; ++x) { void *p = &result[x * (insert_size + slice_size)]; - f(p, x, elements); + // For the last iteration, only copy to the end of the data + if (x == elements - 1) { + slice_size = data_size - (x * slice_size); + } - std::copy(next, next + slice_size, (char *) p + insert_size); - next += slice_size; + // Test if this slice will extend into the next buffer + if (next + slice_size > end) { + // Copy the first portion from the first buffer + auto copy_len = end - next; + std::copy(next, end, (char *) p + insert_size); + + // Copy the remaining portion from the second buffer + next = std::begin(data2); + end = std::end(data2); + std::copy(next, next + (slice_size - copy_len), (char *) p + copy_len + insert_size); + next += slice_size - copy_len; + } + else { + std::copy(next, next + slice_size, (char *) p + insert_size); + next += slice_size; + } } - auto x = elements - 1; - void *p = &result[x * (insert_size + slice_size)]; - - f(p, x, elements); - - std::copy(next, std::end(data), (char *) p + insert_size); - return result; } @@ -1314,24 +1335,13 @@ namespace stream { frame_header.frame_processing_latency = 0; } - std::vector payload_new; - std::copy_n((uint8_t *) &frame_header, sizeof(frame_header), std::back_inserter(payload_new)); - std::copy(std::begin(payload), std::end(payload), std::back_inserter(payload_new)); - - payload = { (char *) payload_new.data(), payload_new.size() }; + auto fecPercentage = config::stream.fec_percentage; - // insert packet headers + // Insert space for packet headers auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE; auto payload_blocksize = blocksize - sizeof(video_packet_raw_t); - - auto fecPercentage = config::stream.fec_percentage; - - payload_new = insert(sizeof(video_packet_raw_t), payload_blocksize, - payload, [&](void *p, int fecIndex, int end) { - video_packet_raw_t *video_packet = (video_packet_raw_t *) p; - - video_packet->packet.flags = FLAG_CONTAINS_PIC_DATA; - }); + auto payload_new = concat_and_insert(sizeof(video_packet_raw_t), payload_blocksize, + std::string_view { (char *) &frame_header, sizeof(frame_header) }, payload); payload = std::string_view { (char *) payload_new.data(), payload_new.size() }; @@ -1422,10 +1432,10 @@ namespace stream { inspect->packet.multiFecFlags = 0x10; inspect->packet.multiFecBlocks = (blockIndex << 4) | ((fec_blocks_needed - 1) << 6); + inspect->packet.flags = FLAG_CONTAINS_PIC_DATA; if (x == 0) { inspect->packet.flags |= FLAG_SOF; } - if (x == packets - 1) { inspect->packet.flags |= FLAG_EOF; } diff --git a/tests/unit/test_stream.cpp b/tests/unit/test_stream.cpp new file mode 100644 index 00000000000..8973af324df --- /dev/null +++ b/tests/unit/test_stream.cpp @@ -0,0 +1,40 @@ +/** + * @file tests/unit/test_stream.cpp + * @brief Test src/stream.* + */ + +#include +#include +#include +#include + +namespace stream { + std::vector + concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2); +} + +#include + +TEST(ConcatAndInsertTests, ConcatNoInsertionTest) { + char b1[] = { 'a', 'b' }; + char b2[] = { 'c', 'd', 'e' }; + auto res = stream::concat_and_insert(0, 2, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) }); + auto expected = std::vector { 'a', 'b', 'c', 'd', 'e' }; + ASSERT_EQ(res, expected); +} + +TEST(ConcatAndInsertTests, ConcatLargeStrideTest) { + char b1[] = { 'a', 'b' }; + char b2[] = { 'c', 'd', 'e' }; + auto res = stream::concat_and_insert(1, sizeof(b1) + sizeof(b2) + 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) }); + auto expected = std::vector { 0, 'a', 'b', 'c', 'd', 'e' }; + ASSERT_EQ(res, expected); +} + +TEST(ConcatAndInsertTests, ConcatSmallStrideTest) { + char b1[] = { 'a', 'b' }; + char b2[] = { 'c', 'd', 'e' }; + auto res = stream::concat_and_insert(1, 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) }); + auto expected = std::vector { 0, 'a', 0, 'b', 0, 'c', 0, 'd', 0, 'e' }; + ASSERT_EQ(res, expected); +}