From 3acd0c4f2ca328abe80f2d35fb20136e738c84ae Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 14 Oct 2024 14:03:36 +0200 Subject: [PATCH] Vulkan: Protect against uniform var ringbuffer overflow (#1378) --- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 8 +- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 4 +- .../Renderer/Vulkan/VulkanRendererCore.cpp | 99 +++++++++++++------ 3 files changed, 75 insertions(+), 36 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 12d1d9755..9ad2c5ca6 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -1892,6 +1892,7 @@ void VulkanRenderer::ProcessFinishedCommandBuffers() if (fenceStatus == VK_SUCCESS) { ProcessDestructionQueue(); + m_uniformVarBufferReadIndex = m_cmdBufferUniformRingbufIndices[m_commandBufferSyncIndex]; m_commandBufferSyncIndex = (m_commandBufferSyncIndex + 1) % m_commandBuffers.size(); memoryManager->cleanupBuffers(m_countCommandBufferFinished); m_countCommandBufferFinished++; @@ -1985,6 +1986,7 @@ void VulkanRenderer::SubmitCommandBuffer(VkSemaphore signalSemaphore, VkSemaphor cemuLog_logDebug(LogType::Force, "Vulkan: Waiting for available command buffer..."); WaitForNextFinishedCommandBuffer(); } + m_cmdBufferUniformRingbufIndices[nextCmdBufferIndex] = m_cmdBufferUniformRingbufIndices[m_commandBufferIndex]; m_commandBufferIndex = nextCmdBufferIndex; @@ -3562,13 +3564,13 @@ void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, switch (shaderType) { case LatteConst::ShaderType::Vertex: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].uniformBufferOffset[bufferIndex] = offset; break; case LatteConst::ShaderType::Geometry: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].uniformBufferOffset[bufferIndex] = offset; break; case LatteConst::ShaderType::Pixel: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = offset; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].uniformBufferOffset[bufferIndex] = offset; break; default: cemu_assert_debug(false); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index ce97b5e9b..52c1c6ed2 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -591,6 +591,7 @@ class VulkanRenderer : public Renderer bool m_uniformVarBufferMemoryIsCoherent{false}; uint8* m_uniformVarBufferPtr = nullptr; uint32 m_uniformVarBufferWriteIndex = 0; + uint32 m_uniformVarBufferReadIndex = 0; // transform feedback ringbuffer VkBuffer m_xfbRingBuffer = VK_NULL_HANDLE; @@ -637,6 +638,7 @@ class VulkanRenderer : public Renderer size_t m_commandBufferIndex = 0; // current buffer being filled size_t m_commandBufferSyncIndex = 0; // latest buffer that finished execution (updated on submit) size_t m_commandBufferIDOfPrevFrame = 0; + std::array m_cmdBufferUniformRingbufIndices {}; // index in the uniform ringbuffer std::array m_cmd_buffer_fences; std::array m_commandBuffers; std::array m_commandBufferSemaphores; @@ -659,7 +661,7 @@ class VulkanRenderer : public Renderer uint32 uniformVarBufferOffset[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; struct { - uint32 unformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS]; + uint32 uniformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS]; }shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; }dynamicOffsetInfo{}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 6500f7d37..3a6840728 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -375,24 +375,20 @@ float s_vkUniformData[512 * 4]; void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader) { - auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return s_vkUniformData + (index / 4); }; + auto GET_UNIFORM_DATA_PTR = [](size_t index) { return s_vkUniformData + (index / 4); }; sint32 shaderAluConst; - sint32 shaderUniformRegisterOffset; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: shaderAluConst = 0x400; - shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Pixel: shaderAluConst = 0; - shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Geometry: shaderAluConst = 0; // geometry shader has no ALU const - shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; break; default: UNREACHABLE; @@ -445,7 +441,7 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt } if (shader->uniform.loc_verticesPerInstance >= 0) { - *(int*)(s_vkUniformData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; + *(int*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_verticesPerInstance) = m_streamoutState.verticesPerInstance; for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) { if (shader->uniform.loc_streamoutBufferBase[b] >= 0) @@ -455,26 +451,63 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt } } // upload - if ((m_uniformVarBufferWriteIndex + shader->uniform.uniformRangeSize + 1024) > UNIFORMVAR_RINGBUFFER_SIZE) + const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; + const uint32 uniformSize = (shader->uniform.uniformRangeSize + bufferAlignmentM1) & ~bufferAlignmentM1; + + auto waitWhileCondition = [&](std::function condition) { + while (condition()) + { + if (m_commandBufferSyncIndex == m_commandBufferIndex) + { + if (m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] != m_uniformVarBufferReadIndex) + { + draw_endRenderPass(); + SubmitCommandBuffer(); + } + else + { + // submitting work would not change readIndex, so there's no way for conditions based on it to change + cemuLog_log(LogType::Force, "draw call overflowed and corrupted uniform ringbuffer. expect visual corruption"); + cemu_assert_suspicious(); + break; + } + } + WaitForNextFinishedCommandBuffer(); + } + }; + + // wrap around if it doesnt fit consecutively + if (m_uniformVarBufferWriteIndex + uniformSize > UNIFORMVAR_RINGBUFFER_SIZE) { + waitWhileCondition([&]() { + return m_uniformVarBufferReadIndex > m_uniformVarBufferWriteIndex || m_uniformVarBufferReadIndex == 0; + }); m_uniformVarBufferWriteIndex = 0; } - uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; + + auto ringBufRemaining = [&]() { + ssize_t ringBufferUsedBytes = (ssize_t)m_uniformVarBufferWriteIndex - m_uniformVarBufferReadIndex; + if (ringBufferUsedBytes < 0) + ringBufferUsedBytes += UNIFORMVAR_RINGBUFFER_SIZE; + return UNIFORMVAR_RINGBUFFER_SIZE - 1 - ringBufferUsedBytes; + }; + waitWhileCondition([&]() { + return ringBufRemaining() < uniformSize; + }); + const uint32 uniformOffset = m_uniformVarBufferWriteIndex; memcpy(m_uniformVarBufferPtr + uniformOffset, s_vkUniformData, shader->uniform.uniformRangeSize); - m_uniformVarBufferWriteIndex += shader->uniform.uniformRangeSize; - m_uniformVarBufferWriteIndex = (m_uniformVarBufferWriteIndex + bufferAlignmentM1) & ~bufferAlignmentM1; + m_uniformVarBufferWriteIndex += uniformSize; // update dynamic offset dynamicOffsetInfo.uniformVarBufferOffset[shaderStageIndex] = uniformOffset; // flush if not coherent if (!m_uniformVarBufferMemoryIsCoherent) { - uint32 nonCoherentAtomSizeM1 = m_featureControl.limits.nonCoherentAtomSize - 1; VkMappedMemoryRange flushedRange{}; flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; flushedRange.memory = m_uniformVarBufferMemory; flushedRange.offset = uniformOffset; - flushedRange.size = (shader->uniform.uniformRangeSize + nonCoherentAtomSizeM1) & ~nonCoherentAtomSizeM1; + flushedRange.size = uniformSize; vkFlushMappedMemoryRanges(m_logicalDevice, 1, &flushedRange); } } @@ -494,7 +527,7 @@ void VulkanRenderer::draw_prepareDynamicOffsetsForDescriptorSet(uint32 shaderSta { for (auto& itr : pipeline_info->dynamicOffsetInfo.list_uniformBuffers[shaderStageIndex]) { - dynamicOffsets[numDynOffsets] = dynamicOffsetInfo.shaderUB[shaderStageIndex].unformBufferOffset[itr]; + dynamicOffsets[numDynOffsets] = dynamicOffsetInfo.shaderUB[shaderStageIndex].uniformBufferOffset[itr]; numDynOffsets++; } } @@ -1357,6 +1390,24 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 return; } + // prepare streamout + m_streamoutState.verticesPerInstance = count; + LatteStreamout_PrepareDrawcall(count, instanceCount); + + // update uniform vars + LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); + LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); + LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); + + if (vertexShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX, vertexShader); + if (pixelShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT, pixelShader); + if (geometryShader) + uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY, geometryShader); + // store where the read pointer should go after command buffer execution + m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] = m_uniformVarBufferWriteIndex; + // process index data const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); @@ -1410,22 +1461,6 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); } - // prepare streamout - m_streamoutState.verticesPerInstance = count; - LatteStreamout_PrepareDrawcall(count, instanceCount); - - // update uniform vars - LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); - LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); - LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); - - if (vertexShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX, vertexShader); - if (pixelShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT, pixelShader); - if (geometryShader) - uniformData_updateUniformVars(VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY, geometryShader); - PipelineInfo* pipeline_info; if (!isFirst) @@ -1613,13 +1648,13 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader switch (shaderType) { case LatteConst::ShaderType::Vertex: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; case LatteConst::ShaderType::Geometry: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; case LatteConst::ShaderType::Pixel: - dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; + dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].uniformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; break; default: UNREACHABLE;