Skip to content

Commit

Permalink
[Vk] Fix memory leak when using Low Level Materials with uniform params
Browse files Browse the repository at this point in the history
The param buffer was never flushed, never unmapped, never destroyed, and
it kept growing slowly leaking RAM over time.

Additionally fix validation errors of vkInvalidateMappedMemoryRanges due
to incorrect handling of nonCoherentAtomSize.
  • Loading branch information
darksylinc committed Dec 5, 2023
1 parent 1f52a03 commit 696ec72
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 9 deletions.
36 changes: 35 additions & 1 deletion RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ namespace Ogre
// TODO: AutoParamsBuffer probably belongs to MetalDevice (because it's per device?)
typedef vector<ConstBufferPacked *>::type ConstBufferPackedVec;
ConstBufferPackedVec mAutoParamsBuffer;
size_t mFirstUnflushedAutoParamsBuffer;
size_t mAutoParamsBufferIdx;
uint8 *mCurrentAutoParamsBufferPtr;
size_t mCurrentAutoParamsBufferSpaceLeft;
size_t mHistoricalAutoParamsSize[60];

// For v1 rendering.
v1::IndexData *mCurrentIndexBuffer;
Expand Down Expand Up @@ -272,6 +272,40 @@ namespace Ogre
uint16 variabilityMask ) override;
void bindGpuProgramPassIterationParameters( GpuProgramType gptype ) override;

protected:
/** Low Level Materials use a params buffer to pass all uniforms. We emulate this using a large
const buffer to which we write to and bind the regions we need.
This is done in bindGpuProgramParameters().
When it runs out of space, we create another one (see mAutoParamsBuffer).
However:
- In all cases we must flush buffers before command submission or else the cmds we're
about to execute may not see the const buffer data up to date. We don't flush in
bindGpuProgramParameters() because we could end up with lots of 4-byte flushes
which is seriously inefficient. Flushing the whole thing once at the end is better.
- We musn't grow indefinitely. On submissionType >= NewFrameIdx, we
are certain we can set mAutoParamsBufferIdx = 0 and start over.
@remarks
bindGpuProgramParameters() tries to use BT_DYNAMIC_PERSISTENT_COHERENT which doesn't need
flushing (thus we'd only care about submissionType >= NewFrameIdx to reuse memory).
However VaoManager cannot guarantee BT_DYNAMIC_PERSISTENT_COHERENT will actually be
coherent thus we must call unmap( UO_KEEP_PERSISTENT ) anyway.
@param submissionType
See SubmissionType::SubmissionType.
*/
void flushBoundGpuProgramParameters( const SubmissionType::SubmissionType submissionType );

public:
/** All pending or queued buffer flushes (i.e. calls to vkFlushMappedMemoryRanges) must be done
now because we're about to submit commands for execution; and they need to see those
regions flushed.
@param submissionType
See SubmissionType::SubmissionType.
*/
void flushPendingNonCoherentFlushes( const SubmissionType::SubmissionType submissionType );

void clearFrameBuffer( RenderPassDescriptor *renderPassDesc, TextureGpu *anyTarget,
uint8 mipLevel ) override;

Expand Down
10 changes: 9 additions & 1 deletion RenderSystems/Vulkan/include/OgreVulkanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,19 @@ namespace Ogre
uint32_t findMemoryType( VkPhysicalDeviceMemoryProperties &memProperties, uint32_t typeFilter,
VkMemoryPropertyFlags properties );

inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize &alignment )
inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize alignment )
{
return ( ( offset + alignment - 1 ) / alignment ) * alignment;
}

inline void setAlignMemoryCoherentAtom( VkMappedMemoryRange &outMemRange, const size_t offset,
const size_t sizeBytes, const VkDeviceSize alignment )
{
const VkDeviceSize endOffset = alignMemory( offset + sizeBytes, alignment );
outMemRange.offset = ( offset / alignment ) * alignment;
outMemRange.size = endOffset - outMemRange.offset;
}

String getSpirvReflectError( SpvReflectResult spirvReflectResult );

VkSampleCountFlagBits getMaxUsableSampleCount( VkPhysicalDeviceProperties &physicalDeviceProperties,
Expand Down
2 changes: 2 additions & 0 deletions RenderSystems/Vulkan/src/OgreVulkanQueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,8 @@ namespace Ogre
{
endCommandBuffer();

mRenderSystem->flushPendingNonCoherentFlushes( submissionType );

// We must reset all bindings or else after 3 (mDynamicBufferCurrentFrame) frames
// there could be dangling API handles left hanging around indefinitely that
// may be collected by RootLayouts that use more slots than they need
Expand Down
89 changes: 87 additions & 2 deletions RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ namespace Ogre
mVulkanProgramFactory2( 0 ),
mVulkanProgramFactory3( 0 ),
mVkInstance( 0 ),
mFirstUnflushedAutoParamsBuffer( 0 ),
mAutoParamsBufferIdx( 0 ),
mCurrentAutoParamsBufferPtr( 0 ),
mCurrentAutoParamsBufferSpaceLeft( 0 ),
Expand Down Expand Up @@ -288,6 +289,18 @@ namespace Ogre
if( !mDevice )
return;

for( ConstBufferPacked *constBuffer : mAutoParamsBuffer )
{
if( constBuffer->getMappingState() != MS_UNMAPPED )
constBuffer->unmap( UO_UNMAP_ALL );
mVaoManager->destroyConstBuffer( constBuffer );
}
mAutoParamsBuffer.clear();
mFirstUnflushedAutoParamsBuffer = 0u;
mAutoParamsBufferIdx = 0u;
mCurrentAutoParamsBufferPtr = 0;
mCurrentAutoParamsBufferSpaceLeft = 0;

mDevice->stall();

{
Expand Down Expand Up @@ -2316,13 +2329,21 @@ namespace Ogre
{
if( mAutoParamsBufferIdx >= mAutoParamsBuffer.size() )
{
// Ask for a coherent buffer to avoid excessive flushing. Note: VaoManager may ignore
// this request if the GPU can't provide coherent memory and we must flush anyway.
ConstBufferPacked *constBuffer =
mVaoManager->createConstBuffer( std::max<size_t>( 512u * 1024u, bytesToWrite ),
BT_DYNAMIC_PERSISTENT, 0, false );
BT_DYNAMIC_PERSISTENT_COHERENT, 0, false );
mAutoParamsBuffer.push_back( constBuffer );
}

ConstBufferPacked *constBuffer = mAutoParamsBuffer[mAutoParamsBufferIdx];

// This should be near-impossible to trigger because most Const Buffers are <= 64kb
// and we reserver 512kb per const buffer. A Low Level Material using a Params buffer
// with > 64kb is an edge case we don't care handling.
OGRE_ASSERT_LOW( bytesToWrite <= constBuffer->getTotalSizeBytes() );

mCurrentAutoParamsBufferPtr =
reinterpret_cast<uint8 *>( constBuffer->map( 0, constBuffer->getNumElements() ) );
mCurrentAutoParamsBufferSpaceLeft = constBuffer->getTotalSizeBytes();
Expand All @@ -2332,7 +2353,7 @@ namespace Ogre

shader->updateBuffers( params, mCurrentAutoParamsBufferPtr );

assert( dynamic_cast<VulkanConstBufferPacked *>(
OGRE_ASSERT_HIGH( dynamic_cast<VulkanConstBufferPacked *>(
mAutoParamsBuffer[mAutoParamsBufferIdx - 1u] ) );

VulkanConstBufferPacked *constBuffer =
Expand All @@ -2357,6 +2378,70 @@ namespace Ogre
}
}
//-------------------------------------------------------------------------
void VulkanRenderSystem::flushBoundGpuProgramParameters(
const SubmissionType::SubmissionType submissionType )
{
bool bWillReuseLastBuffer = false;

const size_t maxBufferToFlush = mAutoParamsBufferIdx;
for( size_t i = mFirstUnflushedAutoParamsBuffer; i < maxBufferToFlush; ++i )
{
ConstBufferPacked *constBuffer = mAutoParamsBuffer[i];
if( i + 1u != maxBufferToFlush )
{
// Flush whole buffer.
constBuffer->unmap( UO_KEEP_PERSISTENT );
}
else
{
// Last buffer. Partial flush.
const size_t bytesToFlush =
constBuffer->getTotalSizeBytes() - mCurrentAutoParamsBufferSpaceLeft;

constBuffer->unmap( UO_KEEP_PERSISTENT, 0u, bytesToFlush );
if( submissionType <= SubmissionType::FlushOnly &&
mCurrentAutoParamsBufferSpaceLeft >= 4u )
{
// Map again so we can continue from where we left off.

// If the assert triggers then getNumElements is not in bytes and our math is wrong.
OGRE_ASSERT_LOW( constBuffer->getBytesPerElement() == 1u );
constBuffer->regressFrame();
mCurrentAutoParamsBufferPtr = reinterpret_cast<uint8 *>(
constBuffer->map( bytesToFlush, constBuffer->getNumElements() - bytesToFlush ) );
mCurrentAutoParamsBufferSpaceLeft = constBuffer->getNumElements() - bytesToFlush;
bWillReuseLastBuffer = true;
}
else
{
mCurrentAutoParamsBufferSpaceLeft = 0u;
mCurrentAutoParamsBufferPtr = 0;
}
}
}

if( submissionType >= SubmissionType::NewFrameIdx )
{
mAutoParamsBufferIdx = 0u;
mFirstUnflushedAutoParamsBuffer = 0u;
}
else
{
// If maxBufferToFlush == 0 then bindGpuProgramParameters() was never called this round
// and bWillReuseLastBuffer can't be true.
if( bWillReuseLastBuffer )
mFirstUnflushedAutoParamsBuffer = maxBufferToFlush - 1u;
else
mFirstUnflushedAutoParamsBuffer = maxBufferToFlush;
}
}
//-------------------------------------------------------------------------
void VulkanRenderSystem::flushPendingNonCoherentFlushes(
const SubmissionType::SubmissionType submissionType )
{
flushBoundGpuProgramParameters( submissionType );
}
//-------------------------------------------------------------------------
void VulkanRenderSystem::bindGpuProgramPassIterationParameters( GpuProgramType gptype ) {}
//-------------------------------------------------------------------------
void VulkanRenderSystem::clearFrameBuffer( RenderPassDescriptor *renderPassDesc,
Expand Down
9 changes: 4 additions & 5 deletions RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ namespace Ogre
VkMappedMemoryRange memRange;
makeVkStruct( memRange, VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE );
memRange.memory = mDeviceMemory;
memRange.offset = start;
memRange.size = alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize );
setAlignMemoryCoherentAtom( memRange, start, count,
mDevice->mDeviceProperties.limits.nonCoherentAtomSize );
VkResult result = vkInvalidateMappedMemoryRanges( mDevice->mDevice, 1u, &memRange );
checkVkResult( result, "vkInvalidateMappedMemoryRanges" );
}
Expand All @@ -107,9 +107,8 @@ namespace Ogre
mappedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
mappedRange.pNext = 0;
mappedRange.memory = mDeviceMemory;
mappedRange.offset = mMappedRanges[ticket].start + start;
mappedRange.size =
alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize );
setAlignMemoryCoherentAtom( mappedRange, mMappedRanges[ticket].start + start, count,
mDevice->mDeviceProperties.limits.nonCoherentAtomSize );
VkResult result = vkFlushMappedMemoryRanges( mDevice->mDevice, 1u, &mappedRange );
checkVkResult( result, "vkFlushMappedMemoryRanges" );
}
Expand Down

0 comments on commit 696ec72

Please sign in to comment.