Skip to content

Commit

Permalink
Eliminate a redundant alloc+copy of each frame (#2852)
Browse files Browse the repository at this point in the history
* Eliminate a redundant alloc+copy for each frame

* Replace std::copy_n() with std::memcpy() for better code generation
  • Loading branch information
cgutman authored Jul 14, 2024
1 parent 18e7dfb commit d57c66f
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 35 deletions.
80 changes: 45 additions & 35 deletions src/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,11 +674,14 @@ namespace stream {
for (auto x = 0; x < nr_shards; ++x) {
shards_p[x] = (uint8_t *) &shards[(x + 1) * prefixsize + x * blocksize];

// GCC doesn't figure out that std::copy_n() can be replaced with memcpy() here
// and ends up compiling a horribly slow element-by-element copy loop, so we
// help it by using memcpy()/memset() directly.
auto copy_len = std::min<size_t>(blocksize, std::end(payload) - next);
std::copy_n(next, copy_len, shards_p[x]);
std::memcpy(shards_p[x], next, copy_len);
if (copy_len < blocksize) {
// Zero any additional space after the end of the payload
std::fill_n(shards_p[x] + copy_len, blocksize - copy_len, 0);
std::memset(shards_p[x] + copy_len, 0, blocksize - copy_len);
}

next += copy_len;
Expand All @@ -702,32 +705,50 @@ namespace stream {
}
} // namespace fec

template <class F>
/**
* @brief Combines two buffers and inserts new buffers at each slice boundary of the result.
* @param insert_size The number of bytes to insert.
* @param slice_size The number of bytes between insertions.
* @param data1 The first data buffer.
* @param data2 The second data buffer.
*/
std::vector<uint8_t>
insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data, F &&f) {
auto pad = data.size() % slice_size != 0;
auto elements = data.size() / slice_size + (pad ? 1 : 0);
concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2) {
auto data_size = data1.size() + data2.size();
auto pad = data_size % slice_size != 0;
auto elements = data_size / slice_size + (pad ? 1 : 0);

std::vector<uint8_t> result;
result.resize(elements * insert_size + data.size());
result.resize(elements * insert_size + data_size);

auto next = std::begin(data);
for (auto x = 0; x < elements - 1; ++x) {
auto next = std::begin(data1);
auto end = std::end(data1);
for (auto x = 0; x < elements; ++x) {
void *p = &result[x * (insert_size + slice_size)];

f(p, x, elements);
// For the last iteration, only copy to the end of the data
if (x == elements - 1) {
slice_size = data_size - (x * slice_size);
}

std::copy(next, next + slice_size, (char *) p + insert_size);
next += slice_size;
// Test if this slice will extend into the next buffer
if (next + slice_size > end) {
// Copy the first portion from the first buffer
auto copy_len = end - next;
std::copy(next, end, (char *) p + insert_size);

// Copy the remaining portion from the second buffer
next = std::begin(data2);
end = std::end(data2);
std::copy(next, next + (slice_size - copy_len), (char *) p + copy_len + insert_size);
next += slice_size - copy_len;
}
else {
std::copy(next, next + slice_size, (char *) p + insert_size);
next += slice_size;
}
}

auto x = elements - 1;
void *p = &result[x * (insert_size + slice_size)];

f(p, x, elements);

std::copy(next, std::end(data), (char *) p + insert_size);

return result;
}

Expand Down Expand Up @@ -1314,24 +1335,13 @@ namespace stream {
frame_header.frame_processing_latency = 0;
}

std::vector<uint8_t> payload_new;
std::copy_n((uint8_t *) &frame_header, sizeof(frame_header), std::back_inserter(payload_new));
std::copy(std::begin(payload), std::end(payload), std::back_inserter(payload_new));

payload = { (char *) payload_new.data(), payload_new.size() };
auto fecPercentage = config::stream.fec_percentage;

// insert packet headers
// Insert space for packet headers
auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE;
auto payload_blocksize = blocksize - sizeof(video_packet_raw_t);

auto fecPercentage = config::stream.fec_percentage;

payload_new = insert(sizeof(video_packet_raw_t), payload_blocksize,
payload, [&](void *p, int fecIndex, int end) {
video_packet_raw_t *video_packet = (video_packet_raw_t *) p;

video_packet->packet.flags = FLAG_CONTAINS_PIC_DATA;
});
auto payload_new = concat_and_insert(sizeof(video_packet_raw_t), payload_blocksize,
std::string_view { (char *) &frame_header, sizeof(frame_header) }, payload);

payload = std::string_view { (char *) payload_new.data(), payload_new.size() };

Expand Down Expand Up @@ -1422,10 +1432,10 @@ namespace stream {
inspect->packet.multiFecFlags = 0x10;
inspect->packet.multiFecBlocks = (blockIndex << 4) | ((fec_blocks_needed - 1) << 6);

inspect->packet.flags = FLAG_CONTAINS_PIC_DATA;
if (x == 0) {
inspect->packet.flags |= FLAG_SOF;
}

if (x == packets - 1) {
inspect->packet.flags |= FLAG_EOF;
}
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/test_stream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* @file tests/unit/test_stream.cpp
* @brief Test src/stream.*
*/

#include <cstdint>
#include <functional>
#include <string>
#include <vector>

namespace stream {
std::vector<uint8_t>
concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2);
}

#include <tests/conftest.cpp>

TEST(ConcatAndInsertTests, ConcatNoInsertionTest) {
char b1[] = { 'a', 'b' };
char b2[] = { 'c', 'd', 'e' };
auto res = stream::concat_and_insert(0, 2, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
auto expected = std::vector<uint8_t> { 'a', 'b', 'c', 'd', 'e' };
ASSERT_EQ(res, expected);
}

TEST(ConcatAndInsertTests, ConcatLargeStrideTest) {
char b1[] = { 'a', 'b' };
char b2[] = { 'c', 'd', 'e' };
auto res = stream::concat_and_insert(1, sizeof(b1) + sizeof(b2) + 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
auto expected = std::vector<uint8_t> { 0, 'a', 'b', 'c', 'd', 'e' };
ASSERT_EQ(res, expected);
}

TEST(ConcatAndInsertTests, ConcatSmallStrideTest) {
char b1[] = { 'a', 'b' };
char b2[] = { 'c', 'd', 'e' };
auto res = stream::concat_and_insert(1, 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
auto expected = std::vector<uint8_t> { 0, 'a', 0, 'b', 0, 'c', 0, 'd', 0, 'e' };
ASSERT_EQ(res, expected);
}

0 comments on commit d57c66f

Please sign in to comment.