diff --git a/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h b/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h index 6a3fbdffe0b..d0f0a460309 100644 --- a/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h +++ b/RenderSystems/Vulkan/include/OgreVulkanRenderSystem.h @@ -78,10 +78,10 @@ namespace Ogre // TODO: AutoParamsBuffer probably belongs to MetalDevice (because it's per device?) typedef vector::type ConstBufferPackedVec; ConstBufferPackedVec mAutoParamsBuffer; + size_t mFirstUnflushedAutoParamsBuffer; size_t mAutoParamsBufferIdx; uint8 *mCurrentAutoParamsBufferPtr; size_t mCurrentAutoParamsBufferSpaceLeft; - size_t mHistoricalAutoParamsSize[60]; // For v1 rendering. v1::IndexData *mCurrentIndexBuffer; @@ -272,6 +272,40 @@ namespace Ogre uint16 variabilityMask ) override; void bindGpuProgramPassIterationParameters( GpuProgramType gptype ) override; + protected: + /** Low Level Materials use a params buffer to pass all uniforms. We emulate this using a large + const buffer to which we write to and bind the regions we need. + This is done in bindGpuProgramParameters(). + + When it runs out of space, we create another one (see mAutoParamsBuffer). + + However: + - In all cases we must flush buffers before command submission or else the cmds we're + about to execute may not see the const buffer data up to date. We don't flush in + bindGpuProgramParameters() because we could end up with lots of 4-byte flushes + which is seriously inefficient. Flushing the whole thing once at the end is better. + - We musn't grow indefinitely. On submissionType >= NewFrameIdx, we + are certain we can set mAutoParamsBufferIdx = 0 and start over. + @remarks + bindGpuProgramParameters() tries to use BT_DYNAMIC_PERSISTENT_COHERENT which doesn't need + flushing (thus we'd only care about submissionType >= NewFrameIdx to reuse memory). + + However VaoManager cannot guarantee BT_DYNAMIC_PERSISTENT_COHERENT will actually be + coherent thus we must call unmap( UO_KEEP_PERSISTENT ) anyway. + @param submissionType + See SubmissionType::SubmissionType. + */ + void flushBoundGpuProgramParameters( const SubmissionType::SubmissionType submissionType ); + + public: + /** All pending or queued buffer flushes (i.e. calls to vkFlushMappedMemoryRanges) must be done + now because we're about to submit commands for execution; and they need to see those + regions flushed. + @param submissionType + See SubmissionType::SubmissionType. + */ + void flushPendingNonCoherentFlushes( const SubmissionType::SubmissionType submissionType ); + void clearFrameBuffer( RenderPassDescriptor *renderPassDesc, TextureGpu *anyTarget, uint8 mipLevel ) override; diff --git a/RenderSystems/Vulkan/include/OgreVulkanUtils.h b/RenderSystems/Vulkan/include/OgreVulkanUtils.h index dcf285391d4..bbf3d1ad95d 100644 --- a/RenderSystems/Vulkan/include/OgreVulkanUtils.h +++ b/RenderSystems/Vulkan/include/OgreVulkanUtils.h @@ -61,11 +61,19 @@ namespace Ogre uint32_t findMemoryType( VkPhysicalDeviceMemoryProperties &memProperties, uint32_t typeFilter, VkMemoryPropertyFlags properties ); - inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize &alignment ) + inline VkDeviceSize alignMemory( size_t offset, const VkDeviceSize alignment ) { return ( ( offset + alignment - 1 ) / alignment ) * alignment; } + inline void setAlignMemoryCoherentAtom( VkMappedMemoryRange &outMemRange, const size_t offset, + const size_t sizeBytes, const VkDeviceSize alignment ) + { + const VkDeviceSize endOffset = alignMemory( offset + sizeBytes, alignment ); + outMemRange.offset = ( offset / alignment ) * alignment; + outMemRange.size = endOffset - outMemRange.offset; + } + String getSpirvReflectError( SpvReflectResult spirvReflectResult ); VkSampleCountFlagBits getMaxUsableSampleCount( VkPhysicalDeviceProperties &physicalDeviceProperties, diff --git a/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp b/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp index 782bcf7bded..327bc13047c 100644 --- a/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp +++ b/RenderSystems/Vulkan/src/OgreVulkanQueue.cpp @@ -1187,6 +1187,8 @@ namespace Ogre { endCommandBuffer(); + mRenderSystem->flushPendingNonCoherentFlushes( submissionType ); + // We must reset all bindings or else after 3 (mDynamicBufferCurrentFrame) frames // there could be dangling API handles left hanging around indefinitely that // may be collected by RootLayouts that use more slots than they need diff --git a/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp b/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp index e4f274483ed..e82c2d5000c 100644 --- a/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp +++ b/RenderSystems/Vulkan/src/OgreVulkanRenderSystem.cpp @@ -161,6 +161,7 @@ namespace Ogre mVulkanProgramFactory2( 0 ), mVulkanProgramFactory3( 0 ), mVkInstance( 0 ), + mFirstUnflushedAutoParamsBuffer( 0 ), mAutoParamsBufferIdx( 0 ), mCurrentAutoParamsBufferPtr( 0 ), mCurrentAutoParamsBufferSpaceLeft( 0 ), @@ -288,6 +289,18 @@ namespace Ogre if( !mDevice ) return; + for( ConstBufferPacked *constBuffer : mAutoParamsBuffer ) + { + if( constBuffer->getMappingState() != MS_UNMAPPED ) + constBuffer->unmap( UO_UNMAP_ALL ); + mVaoManager->destroyConstBuffer( constBuffer ); + } + mAutoParamsBuffer.clear(); + mFirstUnflushedAutoParamsBuffer = 0u; + mAutoParamsBufferIdx = 0u; + mCurrentAutoParamsBufferPtr = 0; + mCurrentAutoParamsBufferSpaceLeft = 0; + mDevice->stall(); { @@ -2316,13 +2329,21 @@ namespace Ogre { if( mAutoParamsBufferIdx >= mAutoParamsBuffer.size() ) { + // Ask for a coherent buffer to avoid excessive flushing. Note: VaoManager may ignore + // this request if the GPU can't provide coherent memory and we must flush anyway. ConstBufferPacked *constBuffer = mVaoManager->createConstBuffer( std::max( 512u * 1024u, bytesToWrite ), - BT_DYNAMIC_PERSISTENT, 0, false ); + BT_DYNAMIC_PERSISTENT_COHERENT, 0, false ); mAutoParamsBuffer.push_back( constBuffer ); } ConstBufferPacked *constBuffer = mAutoParamsBuffer[mAutoParamsBufferIdx]; + + // This should be near-impossible to trigger because most Const Buffers are <= 64kb + // and we reserver 512kb per const buffer. A Low Level Material using a Params buffer + // with > 64kb is an edge case we don't care handling. + OGRE_ASSERT_LOW( bytesToWrite <= constBuffer->getTotalSizeBytes() ); + mCurrentAutoParamsBufferPtr = reinterpret_cast( constBuffer->map( 0, constBuffer->getNumElements() ) ); mCurrentAutoParamsBufferSpaceLeft = constBuffer->getTotalSizeBytes(); @@ -2332,7 +2353,7 @@ namespace Ogre shader->updateBuffers( params, mCurrentAutoParamsBufferPtr ); - assert( dynamic_cast( + OGRE_ASSERT_HIGH( dynamic_cast( mAutoParamsBuffer[mAutoParamsBufferIdx - 1u] ) ); VulkanConstBufferPacked *constBuffer = @@ -2357,6 +2378,70 @@ namespace Ogre } } //------------------------------------------------------------------------- + void VulkanRenderSystem::flushBoundGpuProgramParameters( + const SubmissionType::SubmissionType submissionType ) + { + bool bWillReuseLastBuffer = false; + + const size_t maxBufferToFlush = mAutoParamsBufferIdx; + for( size_t i = mFirstUnflushedAutoParamsBuffer; i < maxBufferToFlush; ++i ) + { + ConstBufferPacked *constBuffer = mAutoParamsBuffer[i]; + if( i + 1u != maxBufferToFlush ) + { + // Flush whole buffer. + constBuffer->unmap( UO_KEEP_PERSISTENT ); + } + else + { + // Last buffer. Partial flush. + const size_t bytesToFlush = + constBuffer->getTotalSizeBytes() - mCurrentAutoParamsBufferSpaceLeft; + + constBuffer->unmap( UO_KEEP_PERSISTENT, 0u, bytesToFlush ); + if( submissionType <= SubmissionType::FlushOnly && + mCurrentAutoParamsBufferSpaceLeft >= 4u ) + { + // Map again so we can continue from where we left off. + + // If the assert triggers then getNumElements is not in bytes and our math is wrong. + OGRE_ASSERT_LOW( constBuffer->getBytesPerElement() == 1u ); + constBuffer->regressFrame(); + mCurrentAutoParamsBufferPtr = reinterpret_cast( + constBuffer->map( bytesToFlush, constBuffer->getNumElements() - bytesToFlush ) ); + mCurrentAutoParamsBufferSpaceLeft = constBuffer->getNumElements() - bytesToFlush; + bWillReuseLastBuffer = true; + } + else + { + mCurrentAutoParamsBufferSpaceLeft = 0u; + mCurrentAutoParamsBufferPtr = 0; + } + } + } + + if( submissionType >= SubmissionType::NewFrameIdx ) + { + mAutoParamsBufferIdx = 0u; + mFirstUnflushedAutoParamsBuffer = 0u; + } + else + { + // If maxBufferToFlush == 0 then bindGpuProgramParameters() was never called this round + // and bWillReuseLastBuffer can't be true. + if( bWillReuseLastBuffer ) + mFirstUnflushedAutoParamsBuffer = maxBufferToFlush - 1u; + else + mFirstUnflushedAutoParamsBuffer = maxBufferToFlush; + } + } + //------------------------------------------------------------------------- + void VulkanRenderSystem::flushPendingNonCoherentFlushes( + const SubmissionType::SubmissionType submissionType ) + { + flushBoundGpuProgramParameters( submissionType ); + } + //------------------------------------------------------------------------- void VulkanRenderSystem::bindGpuProgramPassIterationParameters( GpuProgramType gptype ) {} //------------------------------------------------------------------------- void VulkanRenderSystem::clearFrameBuffer( RenderPassDescriptor *renderPassDesc, diff --git a/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp b/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp index 9003aacf2e7..e3b04d59f0d 100644 --- a/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp +++ b/RenderSystems/Vulkan/src/Vao/OgreVulkanDynamicBuffer.cpp @@ -88,8 +88,8 @@ namespace Ogre VkMappedMemoryRange memRange; makeVkStruct( memRange, VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE ); memRange.memory = mDeviceMemory; - memRange.offset = start; - memRange.size = alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); + setAlignMemoryCoherentAtom( memRange, start, count, + mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); VkResult result = vkInvalidateMappedMemoryRanges( mDevice->mDevice, 1u, &memRange ); checkVkResult( result, "vkInvalidateMappedMemoryRanges" ); } @@ -107,9 +107,8 @@ namespace Ogre mappedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; mappedRange.pNext = 0; mappedRange.memory = mDeviceMemory; - mappedRange.offset = mMappedRanges[ticket].start + start; - mappedRange.size = - alignMemory( count, mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); + setAlignMemoryCoherentAtom( mappedRange, mMappedRanges[ticket].start + start, count, + mDevice->mDeviceProperties.limits.nonCoherentAtomSize ); VkResult result = vkFlushMappedMemoryRanges( mDevice->mDevice, 1u, &mappedRange ); checkVkResult( result, "vkFlushMappedMemoryRanges" ); }