Skip to content
This repository has been archived by the owner on Jun 14, 2024. It is now read-only.

Improving Videoencoder #395

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,20 @@ void CompositorInterface::StopRecording()
activeVideoEncoder = nullptr;
}

void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames)
std::unique_ptr<VideoEncoder::VideoInput> CompositorInterface::GetAvailableRecordFrame()
{
if (activeVideoEncoder == nullptr)
{
OutputDebugString(L"GetAvailableRecordFrame dropped, no active encoder\n");
return nullptr;
}
return activeVideoEncoder->GetAvailableVideoFrame();
}

void CompositorInterface::RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput> frame, int numFrames)
{
#if _DEBUG
std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frameTime) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frame->timestamp) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
OutputDebugString(debugString.data());
#endif

Expand All @@ -407,8 +417,8 @@ void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime,
// The encoder will update sample times internally based on the first seen sample time when recording.
// The encoder, however, does assume that audio and video samples will be based on the same source time.
// Providing audio and video samples with different starting times will cause issues in the generated video file.
LONGLONG sampleTime = frameTime;
activeVideoEncoder->QueueVideoFrame(videoFrame, sampleTime, numFrames * frameProvider->GetDurationHNS());
frame->duration = numFrames * frameProvider->GetDurationHNS();
activeVideoEncoder->QueueVideoFrame(std::move(frame));
}

void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class CompositorInterface
DLLEXPORT void StopRecording();

// frameTime is in hundred nano seconds
DLLEXPORT void RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames);
DLLEXPORT std::unique_ptr<VideoEncoder::VideoInput> GetAvailableRecordFrame();
DLLEXPORT void RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput>, int numFrames);

// audioTime is in hundrend nano seconds
DLLEXPORT void RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "codecapi.h"

#define NUM_VIDEO_BUFFERS 10

VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride, UINT fps,
UINT32 audioSampleRate, UINT32 audioChannels, UINT32 audioBPS, UINT32 videoBitrate, UINT32 videoMpegLevel) :
frameWidth(frameWidth),
Expand All @@ -21,10 +23,11 @@ VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride,
isRecording(false)
{
#if HARDWARE_ENCODE_VIDEO
inputFormat = MFVideoFormat_NV12;
inputFormat = MFVideoFormat_NV12;
#else
inputFormat = MFVideoFormat_RGB32;
inputFormat = MFVideoFormat_RGB32;
#endif
inputFormat = MFVideoFormat_RGB32;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this change mean that hardware encoding succeeds using RGB32?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For me it only worked with RGB32, I guess this is because the converted texture is still reported to D3D as being RGB. While it gets the information that the input is supposed to be NV12 with a therefore smaller buffer.
MF seems to be confused and writing samples failed with MF_E_BUFFERTOOSMALL.


VideoEncoder::~VideoEncoder()
Expand All @@ -41,11 +44,26 @@ bool VideoEncoder::Initialize(ID3D11Device* device)

#if HARDWARE_ENCODE_VIDEO
MFCreateDXGIDeviceManager(&resetToken, &deviceManager);
this->device = device;

if (deviceManager != nullptr)
{
OutputDebugString(L"Resetting device manager with graphics device.\n");
deviceManager->ResetDevice(device, resetToken);
hr = deviceManager->ResetDevice(device, resetToken);
}
for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
{
videoInputPool.push(std::make_unique<VideoInput>(device));
}

ID3D10Multithread* multithread;
device->QueryInterface(&multithread);
multithread->SetMultithreadProtected(TRUE);

#else
for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
{
videoInputPool.push(std::make_unique<VideoInput>(frameHeight * frameStride));
}
#endif

Expand All @@ -72,7 +90,7 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
prevVideoTime = INVALID_TIMESTAMP;
prevAudioTime = INVALID_TIMESTAMP;

HRESULT hr = E_PENDING;
HRESULT hr = S_OK;

sinkWriter = NULL;
videoStreamIndex = MAXDWORD;
Expand All @@ -87,13 +105,14 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
#endif

IMFAttributes *attr = nullptr;
MFCreateAttributes(&attr, 3);
MFCreateAttributes(&attr, 4);

if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE); }

#if HARDWARE_ENCODE_VIDEO
if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); }
if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, false); }
if (SUCCEEDED(hr)) { hr = attr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, deviceManager); }
#endif

hr = MFCreateSinkWriterFromURL(videoPath, NULL, attr, &sinkWriter);
Expand Down Expand Up @@ -138,6 +157,10 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(pVideoTypeIn, MF_MT_FRAME_SIZE, frameWidth, frameHeight); }
if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_FRAME_RATE, fps, 1); }
if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1); }
if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE); }
if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_DEFAULT_STRIDE, frameStride); }
if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE); }
if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_SAMPLE_SIZE, frameStride * frameHeight); }
if (SUCCEEDED(hr)) { hr = sinkWriter->SetInputMediaType(videoStreamIndex, pVideoTypeIn, NULL); }

if (encodeAudio)
Expand Down Expand Up @@ -284,12 +307,12 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
#endif
}

void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration)
void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
{
std::shared_lock<std::shared_mutex> lock(videoStateLock);
#if _DEBUG
{
std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(timestamp) + L"\n";
std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
OutputDebugString(debugString.data());
}
#endif
Expand All @@ -302,126 +325,91 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio

if (startTime == INVALID_TIMESTAMP)
{
startTime = timestamp;
startTime = frame->timestamp;
#if _DEBUG
std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
OutputDebugString(debugString.data());
#endif
}
else if (timestamp < startTime)
else if (frame->timestamp < startTime)
{
#if _DEBUG
std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
OutputDebugString(debugString.data());
#endif
return;
}

if (timestamp == prevVideoTime)
if (frame->timestamp == prevVideoTime)
{
#if _DEBUG
std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
OutputDebugString(debugString.data());
#endif
return;
}

LONGLONG sampleTimeNow = timestamp;
LONGLONG sampleTimeNow = frame->timestamp;
LONGLONG sampleTimeStart = startTime;

LONGLONG sampleTime = sampleTimeNow - sampleTimeStart;

if (prevVideoTime != INVALID_TIMESTAMP)
{
duration = sampleTime - prevVideoTime;
frame->duration = sampleTime - prevVideoTime;
#if _DEBUG
std::wstring debugString = L"Updated write video duration:" + std::to_wstring(duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
std::wstring debugString = L"Updated write video duration:" + std::to_wstring(frame->duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
OutputDebugString(debugString.data());
#endif
}

// Copy frame to a temporary buffer and process on a background thread.
#if HARDWARE_ENCODE_VIDEO
BYTE* tmpVideoBuffer = new BYTE[(int)(FRAME_BPP_NV12 * frameHeight * frameWidth)];
memcpy(tmpVideoBuffer, buffer, (int)(FRAME_BPP_NV12 * frameHeight * frameWidth));
#else
BYTE* tmpVideoBuffer = new BYTE[frameHeight * frameStride];
memcpy(tmpVideoBuffer, buffer, frameHeight * frameStride);
#endif

concurrency::create_task([=]()
videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
{
if (previousWriteFuture.valid())
{
previousWriteFuture.wait();
}
std::shared_lock<std::shared_mutex> lock(videoStateLock);

HRESULT hr = E_PENDING;
HRESULT hr = S_OK;
if (sinkWriter == NULL || !isRecording)
{
OutputDebugString(L"Must start recording before writing video frames.\n");
delete[] tmpVideoBuffer;
return;
}

LONG cbWidth = frameStride;
DWORD cbBuffer = cbWidth * frameHeight;
DWORD imageHeight = frameHeight;

#if HARDWARE_ENCODE_VIDEO
cbWidth = frameWidth;
cbBuffer = (int)(FRAME_BPP_NV12 * frameWidth * frameHeight);
imageHeight = (int)(FRAME_BPP_NV12 * frameHeight);
#endif

DWORD cbBuffer = frameStride * frameHeight;
IMFSample* pVideoSample = NULL;
IMFMediaBuffer* pVideoBuffer = NULL;
BYTE* pData = NULL;

// Create a new memory buffer.
hr = MFCreateMemoryBuffer(cbBuffer, &pVideoBuffer);

// Lock the buffer and copy the video frame to the buffer.
if (SUCCEEDED(hr)) { hr = pVideoBuffer->Lock(&pData, NULL, NULL); }

if (SUCCEEDED(hr))
{
//TODO: Can pVideoBuffer be created from an ID3D11Texture2D*?
hr = MFCopyImage(
pData, // Destination buffer.
cbWidth, // Destination stride.
tmpVideoBuffer,
cbWidth, // Source stride.
cbWidth, // Image width in bytes.
imageHeight // Image height in pixels.
);
}

if (pVideoBuffer)
#if _DEBUG
{
pVideoBuffer->Unlock();
std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
OutputDebugString(debugString.data());
}
#endif

#if _DEBUG
{
std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
OutputDebugString(debugString.data());
}
#if !HARDWARE_ENCODE_VIDEO
frame->Unlock();
#endif
hermann-noll marked this conversation as resolved.
Show resolved Hide resolved

// Set the data length of the buffer.
if (SUCCEEDED(hr)) { hr = pVideoBuffer->SetCurrentLength(cbBuffer); }
if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(frameHeight * frameStride); }

// Create a media sample and add the buffer to the sample.
if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); }
if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(pVideoBuffer); }
if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); }

if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleTime(sampleTime); } //100-nanosecond units
if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(duration); } //100-nanosecond units
if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(frame->duration); } //100-nanosecond units

// Send the sample to the Sink Writer.
if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(videoStreamIndex, pVideoSample); }

SafeRelease(pVideoSample);
SafeRelease(pVideoBuffer);
delete[] tmpVideoBuffer;

{
std::shared_lock<std::shared_mutex>(videoInputPoolLock);
videoInputPool.push(std::move(frame));
}

if (FAILED(hr))
{
Expand Down Expand Up @@ -456,6 +444,11 @@ void VideoEncoder::StopRecording()

concurrency::create_task([&]
{
if (videoWriteFuture.valid())
{
videoWriteFuture.wait();
videoWriteFuture = {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its inconsistent, but i have seen an exception throw during this cleanup process for debug flavors of the compositor dll when running locally. I have been using the split channels recording option

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

specifically what looks like a stack overflow when setting this videoWriteFuture to an empty struct


In reply to: 443834338 [](ancestors = 443834338)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look into reproducing that exception, I haven't experienced any stack overflows before with the current changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A very nice little bug, caused by future objects keeping the async lambda alive. By waiting on the previous future we create a chain of future objects where every element keeps the next one alive, growing rapidly over a long recording. When manually destroyed in line 450, this chain is recursively destructed causing the stack overflow.
I guess the really proper solution would be a more classic approach of a single task/thread consuming queued video frames, the chosen solution in cae0f9f just frees the future early, causing only the unprocessed frames to be in the chain.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that does sound quite nice. :)

In general, would you like me to test this out again locally so we can look into getting these changes in? Or would you prefer to continue looking into the TODO items?

Copy link
Contributor Author

@hermann-noll hermann-noll Jul 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer a bit more work done to it, there are still some unclean parts to it. Nevertheless it would be great to know whether you guys have a dependency on the RGB->NV12 color conversion? (e.g. idk whether it works on Intel/AMD hardware encoder)

EDIT: Testing-wise with the current state I could no longer reproduce any stuttering caused by recording, although definitely old frame insertions can be observed on very high load (e.g. I used artificial load on the CPU/GPU for repro) but this is caused by the unguarded ring buffers in both the k4a and Decklink frame providers and should be dealt with in separate issues (workaround is setting the ring size to 100).

}
while (!videoQueue.empty())
{
videoQueue.pop();
Expand Down Expand Up @@ -508,17 +501,36 @@ void VideoEncoder::StopRecording()
SafeRelease(sinkWriter);
}

void VideoEncoder::QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration)
std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
{
std::shared_lock<std::shared_mutex> lock(videoInputPoolLock);
if (videoInputPool.empty())
{
#if HARDWARE_ENCODE_VIDEO
return std::make_unique<VideoInput>(device);
#else
return std::make_unique<VideoInput>(frameStride * frameHeight);
#endif
}
else
{
auto result = std::move(videoInputPool.front());
videoInputPool.pop();
return result;
}
}

void VideoEncoder::QueueVideoFrame(std::unique_ptr<VideoEncoder::VideoInput> frame)
{
std::shared_lock<std::shared_mutex> lock(videoStateLock);

if (acceptQueuedFrames)
{
videoQueue.push(VideoInput(buffer, timestamp, duration));
#if _DEBUG
std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(timestamp) + L"\n";
OutputDebugString(debugString.data());
std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
OutputDebugString(debugString.data());
#endif
videoQueue.push(std::move(frame));
}
}

Expand Down Expand Up @@ -548,8 +560,7 @@ void VideoEncoder::Update()
{
if (isRecording)
{
VideoInput input = videoQueue.front();
WriteVideo(input.sharedBuffer, input.timestamp, input.duration);
WriteVideo(std::move(videoQueue.front()));
videoQueue.pop();
}
}
Expand Down
Loading