From 696ec720fd5a0a3fccee41d7efed4cc65292d911 Mon Sep 17 00:00:00 2001 From: "Matias N. Goldberg" Date: Tue, 5 Dec 2023 20:42:18 -0300 Subject: [PATCH] [Vk] Fix memory leak when using Low Level Materials with uniform params The param buffer was never flushed, never unmapped, never destroyed, and it kept growing slowly leaking RAM over time. Additionally fix validation errors of vkInvalidateMappedMemoryRanges due to incorrect handling of nonCoherentAtomSize. --- .../Vulkan/include/OgreVulkanRenderSystem.h | 36 +++++++- .../Vulkan/include/OgreVulkanUtils.h | 10 ++- RenderSystems/Vulkan/src/OgreVulkanQueue.cpp | 2 + .../Vulkan/src/OgreVulkanRenderSystem.cpp | 89 ++++++++++++++++++- .../src/Vao/OgreVulkanDynamicBuffer.cpp | 9 +- 5 files changed, 137 insertions(+), 9 deletions(-) diff --git a/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h b/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h index 6a3fbdffe0b..d0f0a460309 100644 --- a/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h +++ b/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h @@ -78,10 +78,10 @@ namespace Ogre // TODO: AutoParamsBuffer probably belongs to MetalDevice (because it's per device?) typedef vector::type ConstBufferPackedVec; ConstBufferPackedVec mAutoParamsBuffer; + size_t mFirstUnflushedAutoParamsBuffer; size_t mAutoParamsBufferIdx; uint8 *mCurrentAutoParamsBufferPtr; size_t mCurrentAutoParamsBufferSpaceLeft; - size_t mHistoricalAutoParamsSize[60]; // For v1 rendering. v1::IndexData *mCurrentIndexBuffer; @@ -272,6 +272,40 @@ namespace Ogre uint16 variabilityMask ) override; void bindGpuProgramPassIterationParameters( GpuProgramType gptype ) override; + protected: + /** Low Level Materials use a params buffer to pass all uniforms. We emulate this using a large + const buffer to which we write to and bind the regions we need. + This is done in bindGpuProgramParameters(). + + When it runs out of space, we create another one (see mAutoParamsBuffer). + + However: + - In all cases we must flush buffers before command submission or else the cmds we're + about to execute may not see the const buffer data up to date. We don't flush in + bindGpuProgramParameters() because we could end up with lots of 4-byte flushes + which is seriously inefficient. Flushing the whole thing once at the end is better. + - We musn't grow indefinitely. On submissionType >= NewFrameIdx, we + are certain we can set mAutoParamsBufferIdx = 0 and start over. + @remarks + bindGpuProgramParameters() tries to use BT_DYNAMIC_PERSISTENT_COHERENT which doesn't need + flushing (thus we'd only care about submissionType >= NewFrameIdx to reuse memory). + + However VaoManager cannot guarantee BT_DYNAMIC_PERSISTENT_COHERENT will actually be + coherent thus we must call unmap( UO_KEEP_PERSISTENT ) anyway. + @param submissionType + See SubmissionType::SubmissionType. + */ + void flushBoundGpuProgramParameters( const SubmissionType::SubmissionType submissionType ); + + public: + /** All pending or queued buffer flushes (i.e. calls to vkFlushMappedMemoryRanges) must be done + now because we're about to submit commands for execution; and they need to see those + regions flushed. + @param submissionType + See SubmissionType::SubmissionType. + */ + void flushPendingNonCoherentFlushes( const SubmissionType::SubmissionType submissionType ); + void clearFrameBuffer( RenderPassDescriptor *renderPassDesc, TextureGpu *anyTarget, uint8 mipLevel ) override; diff --git a/RenderSystems/Vulkan/include/OgreVulkanUtils.h b/RenderSystems/Vulkan/include/OgreVulkanUtils.h index dcf285391d4..bbf3d1ad95d 100644 --- a/RenderSystems/Vulkan/include/OgreVulkanUtils.h +++ b/RenderSystems/Vulkan/include/OgreVulkanUtils.h @@ -61,11 +61,19 @@ namespace Ogre uint32_t findMemoryType( VkPhysicalDeviceMemoryProperties &memProperties, uint32_t typeFilter, VkMemoryPropertyFlags properties ); - inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize &alignment ) + inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize alignment ) { return ( ( offset + alignment - 1 ) / alignment ) * alignment; } + inline void setAlignMemoryCoherentAtom( VkMappedMemoryRange &outMemRange, const size_t offset, + const size_t sizeBytes, const VkDeviceSize alignment ) + { + const VkDeviceSize endOffset = alignMemory( offset + sizeBytes, alignment ); + outMemRange.offset = ( offset / alignment ) * alignment; + outMemRange.size = endOffset - outMemRange.offset; + } + String getSpirvReflectError( SpvReflectResult spirvReflectResult ); VkSampleCountFlagBits getMaxUsableSampleCount( VkPhysicalDeviceProperties &physicalDeviceProperties, diff --git a/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp b/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp index 782bcf7bded..327bc13047c 100644 --- a/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp +++ b/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp @@ -1187,6 +1187,8 @@ namespace Ogre { endCommandBuffer(); + mRenderSystem->flushPendingNonCoherentFlushes( submissionType ); + // We must reset all bindings or else after 3 (mDynamicBufferCurrentFrame) frames // there could be dangling API handles left hanging around indefinitely that // may be collected by RootLayouts that use more slots than they need diff --git a/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp b/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp index e4f274483ed..e82c2d5000c 100644 --- a/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp +++ b/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp @@ -161,6 +161,7 @@ namespace Ogre mVulkanProgramFactory2( 0 ), mVulkanProgramFactory3( 0 ), mVkInstance( 0 ), + mFirstUnflushedAutoParamsBuffer( 0 ), mAutoParamsBufferIdx( 0 ), mCurrentAutoParamsBufferPtr( 0 ), mCurrentAutoParamsBufferSpaceLeft( 0 ), @@ -288,6 +289,18 @@ namespace Ogre if( !mDevice ) return; + for( ConstBufferPacked *constBuffer : mAutoParamsBuffer ) + { + if( constBuffer->getMappingState() != MS_UNMAPPED ) + constBuffer->unmap( UO_UNMAP_ALL ); + mVaoManager->destroyConstBuffer( constBuffer ); + } + mAutoParamsBuffer.clear(); + mFirstUnflushedAutoParamsBuffer = 0u; + mAutoParamsBufferIdx = 0u; + mCurrentAutoParamsBufferPtr = 0; + mCurrentAutoParamsBufferSpaceLeft = 0; + mDevice->stall(); { @@ -2316,13 +2329,21 @@ namespace Ogre { if( mAutoParamsBufferIdx >= mAutoParamsBuffer.size() ) { + // Ask for a coherent buffer to avoid excessive flushing. Note: VaoManager may ignore + // this request if the GPU can't provide coherent memory and we must flush anyway. ConstBufferPacked *constBuffer = mVaoManager->createConstBuffer( std::max( 512u * 1024u, bytesToWrite ), - BT_DYNAMIC_PERSISTENT, 0, false ); + BT_DYNAMIC_PERSISTENT_COHERENT, 0, false ); mAutoParamsBuffer.push_back( constBuffer ); } ConstBufferPacked *constBuffer = mAutoParamsBuffer[mAutoParamsBufferIdx]; + + // This should be near-impossible to trigger because most Const Buffers are <= 64kb + // and we reserver 512kb per const buffer. A Low Level Material using a Params buffer + // with > 64kb is an edge case we don't care handling. + OGRE_ASSERT_LOW( bytesToWrite <= constBuffer->getTotalSizeBytes() ); + mCurrentAutoParamsBufferPtr = reinterpret_cast( constBuffer->map( 0, constBuffer->getNumElements() ) ); mCurrentAutoParamsBufferSpaceLeft = constBuffer->getTotalSizeBytes(); @@ -2332,7 +2353,7 @@ namespace Ogre shader->updateBuffers( params, mCurrentAutoParamsBufferPtr ); - assert( dynamic_cast( + OGRE_ASSERT_HIGH( dynamic_cast( mAutoParamsBuffer[mAutoParamsBufferIdx - 1u] ) ); VulkanConstBufferPacked *constBuffer = @@ -2357,6 +2378,70 @@ namespace Ogre } } //------------------------------------------------------------------------- + void VulkanRenderSystem::flushBoundGpuProgramParameters( + const SubmissionType::SubmissionType submissionType ) + { + bool bWillReuseLastBuffer = false; + + const size_t maxBufferToFlush = mAutoParamsBufferIdx; + for( size_t i = mFirstUnflushedAutoParamsBuffer; i < maxBufferToFlush; ++i ) + { + ConstBufferPacked *constBuffer = mAutoParamsBuffer[i]; + if( i + 1u != maxBufferToFlush ) + { + // Flush whole buffer. + constBuffer->unmap( UO_KEEP_PERSISTENT ); + } + else + { + // Last buffer. Partial flush. + const size_t bytesToFlush = + constBuffer->getTotalSizeBytes() - mCurrentAutoParamsBufferSpaceLeft; + + constBuffer->unmap( UO_KEEP_PERSISTENT, 0u, bytesToFlush ); + if( submissionType <= SubmissionType::FlushOnly && + mCurrentAutoParamsBufferSpaceLeft >= 4u ) + { + // Map again so we can continue from where we left off. + + // If the assert triggers then getNumElements is not in bytes and our math is wrong. + OGRE_ASSERT_LOW( constBuffer->getBytesPerElement() == 1u ); + constBuffer->regressFrame(); + mCurrentAutoParamsBufferPtr = reinterpret_cast( + constBuffer->map( bytesToFlush, constBuffer->getNumElements() - bytesToFlush ) ); + mCurrentAutoParamsBufferSpaceLeft = constBuffer->getNumElements() - bytesToFlush; + bWillReuseLastBuffer = true; + } + else + { + mCurrentAutoParamsBufferSpaceLeft = 0u; + mCurrentAutoParamsBufferPtr = 0; + } + } + } + + if( submissionType >= SubmissionType::NewFrameIdx ) + { + mAutoParamsBufferIdx = 0u; + mFirstUnflushedAutoParamsBuffer = 0u; + } + else + { + // If maxBufferToFlush == 0 then bindGpuProgramParameters() was never called this round + // and bWillReuseLastBuffer can't be true. + if( bWillReuseLastBuffer ) + mFirstUnflushedAutoParamsBuffer = maxBufferToFlush - 1u; + else + mFirstUnflushedAutoParamsBuffer = maxBufferToFlush; + } + } + //------------------------------------------------------------------------- + void VulkanRenderSystem::flushPendingNonCoherentFlushes( + const SubmissionType::SubmissionType submissionType ) + { + flushBoundGpuProgramParameters( submissionType ); + } + //------------------------------------------------------------------------- void VulkanRenderSystem::bindGpuProgramPassIterationParameters( GpuProgramType gptype ) {} //------------------------------------------------------------------------- void VulkanRenderSystem::clearFrameBuffer( RenderPassDescriptor *renderPassDesc, diff --git a/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp b/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp index 9003aacf2e7..e3b04d59f0d 100644 --- a/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp +++ b/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp @@ -88,8 +88,8 @@ namespace Ogre VkMappedMemoryRange memRange; makeVkStruct( memRange, VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE ); memRange.memory = mDeviceMemory; - memRange.offset = start; - memRange.size = alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); + setAlignMemoryCoherentAtom( memRange, start, count, + mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); VkResult result = vkInvalidateMappedMemoryRanges( mDevice->mDevice, 1u, &memRange ); checkVkResult( result, "vkInvalidateMappedMemoryRanges" ); } @@ -107,9 +107,8 @@ namespace Ogre mappedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; mappedRange.pNext = 0; mappedRange.memory = mDeviceMemory; - mappedRange.offset = mMappedRanges[ticket].start + start; - mappedRange.size = - alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); + setAlignMemoryCoherentAtom( mappedRange, mMappedRanges[ticket].start + start, count, + mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); VkResult result = vkFlushMappedMemoryRanges( mDevice->mDevice, 1u, &mappedRange ); checkVkResult( result, "vkFlushMappedMemoryRanges" ); }