From 936be43daf99253f06f5405e8766f95029f46918 Mon Sep 17 00:00:00 2001 From: VReaperV Date: Thu, 16 May 2024 09:52:01 +0300 Subject: [PATCH] Implement GPU frustum culling --- src.cmake | 3 + src/engine/renderer/Material.cpp | 301 ++++++++++++-- src/engine/renderer/Material.h | 87 +++- src/engine/renderer/gl_shader.cpp | 24 ++ src/engine/renderer/gl_shader.h | 377 +++++++++++++++++- .../glsl_source/clearSurfaces_cp.glsl | 57 +++ src/engine/renderer/glsl_source/cull_cp.glsl | 112 ++++++ .../glsl_source/processSurfaces_cp.glsl | 100 +++++ src/engine/renderer/shaders.cpp | 6 + src/engine/renderer/tr_backend.cpp | 12 +- src/engine/renderer/tr_init.cpp | 2 + src/engine/renderer/tr_local.h | 2 + src/engine/renderer/tr_main.cpp | 8 +- src/engine/renderer/tr_public.h | 1 + src/engine/renderer/tr_scene.cpp | 10 +- src/engine/renderer/tr_shade.cpp | 3 + src/engine/renderer/tr_surface.cpp | 1 - src/engine/renderer/tr_vbo.cpp | 12 +- src/engine/sys/sdl_glimp.cpp | 4 + 19 files changed, 1068 insertions(+), 54 deletions(-) create mode 100644 src/engine/renderer/glsl_source/clearSurfaces_cp.glsl create mode 100644 src/engine/renderer/glsl_source/cull_cp.glsl create mode 100644 src/engine/renderer/glsl_source/processSurfaces_cp.glsl diff --git a/src.cmake b/src.cmake index 9bac6a5b15..0f1f993ff0 100644 --- a/src.cmake +++ b/src.cmake @@ -147,6 +147,9 @@ set(RENDERERLIST set(GLSLSOURCELIST ${ENGINE_DIR}/renderer/glsl_source/material_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/material_fp.glsl + ${ENGINE_DIR}/renderer/glsl_source/cull_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/clearSurfaces_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/processSurfaces_cp.glsl ${ENGINE_DIR}/renderer/glsl_source/skybox_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_fp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_vp.glsl diff --git a/src/engine/renderer/Material.cpp b/src/engine/renderer/Material.cpp index 39efcbe8c8..8a8023a343 100644 --- a/src/engine/renderer/Material.cpp +++ b/src/engine/renderer/Material.cpp @@ -37,8 +37,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "tr_local.h" -GLSSBO materialsSSBO( "materials", 0 ); -GLIndirectBuffer commandBuffer( "drawCommands" ); +GLSSBO materialsSSBO( "materials", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceDescriptorsSSBO( "surfaceDescriptors", 1, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceCommandsSSBO( "surfaceCommands", 2, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLBuffer culledCommandsBuffer( "culledCommands", 3, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLUBO surfaceBatchesUBO( "surfaceBatches", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLBuffer atomicCommandCountersBuffer( "atomicCommandCounters", 4, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); MaterialSystem materialSystem; static void ComputeDynamics( shaderStage_t* pStage ) { @@ -956,6 +960,8 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { pStage->colorRenderer( pStage ); + drawSurf->drawCommandIDs[stage] = lastCommandID; + if ( pStage->dynamic ) { drawSurf->materialsSSBOOffset[stage] = ( SSBOOffset - dynamicDrawSurfsOffset + drawSurfCount * material.shader->GetPaddedSize() ) / material.shader->GetPaddedSize(); @@ -978,37 +984,154 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { void MaterialSystem::GenerateWorldCommandBuffer() { Log::Debug( "Generating world command buffer" ); - uint count = 0; - for ( const MaterialPack& pack : materialPacks ) { - for ( const Material& material : pack.materials ) { - count += material.drawCommands.size(); + totalBatchCount = 0; + + uint batchOffset = 0; + uint globalID = 0; + for ( MaterialPack& pack : materialPacks ) { + for ( Material& material : pack.materials ) { + material.surfaceCommandBatchOffset = batchOffset; + + const uint cmdCount = material.drawCommands.size(); + const uint batchCount = cmdCount % SURFACE_COMMANDS_PER_BATCH == 0 ? cmdCount / SURFACE_COMMANDS_PER_BATCH + : cmdCount / SURFACE_COMMANDS_PER_BATCH + 1; + + material.surfaceCommandBatchOffset = batchOffset; + material.surfaceCommandBatchCount = batchCount; + + batchOffset += batchCount; + material.globalID = globalID; + + totalBatchCount += batchCount; + globalID++; } } - if ( count == 0 ) { - return; - } + Log::Debug( "Total batch count: %u", totalBatchCount ); - Log::Debug( "CmdBuffer size: %u", count ); + skipDrawCommands = true; + drawSurf_t* drawSurf; - commandBuffer.BindBuffer(); - glBufferData( GL_DRAW_INDIRECT_BUFFER, count * sizeof( GLIndirectBuffer::GLIndirectCommand ), nullptr, GL_STATIC_DRAW ); + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsCount = totalDrawSurfs; + glBufferData( GL_SHADER_STORAGE_BUFFER, surfaceDescriptorsCount * SURFACE_DESCRIPTOR_SIZE * sizeof(uint32_t), nullptr, GL_STATIC_DRAW ); + SurfaceDescriptor* surfaceDescriptors = + ( SurfaceDescriptor* ) surfaceDescriptorsSSBO.MapBufferRange( surfaceDescriptorsCount * SURFACE_DESCRIPTOR_SIZE ); - GLIndirectBuffer::GLIndirectCommand* commands = commandBuffer.MapBufferRange( count ); - uint offset = 0; + culledCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH; + surfaceCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH + 1; + + surfaceCommandsSSBO.BindBuffer(); + surfaceCommandsSSBO.BufferStorage( surfaceCommandsCount * SURFACE_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + surfaceCommandsSSBO.MapAll(); + SurfaceCommand* surfaceCommands = ( SurfaceCommand* ) surfaceCommandsSSBO.GetData(); + memset( surfaceCommands, 0, surfaceCommandsCount * sizeof( SurfaceCommand ) * MAX_VIEWFRAMES ); + + culledCommandsBuffer.BindBuffer( GL_SHADER_STORAGE_BUFFER ); + culledCommandsBuffer.BufferStorage( GL_SHADER_STORAGE_BUFFER, + culledCommandsCount * INDIRECT_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + culledCommandsBuffer.MapAll( GL_SHADER_STORAGE_BUFFER ); + GLIndirectBuffer::GLIndirectCommand* culledCommands = ( GLIndirectBuffer::GLIndirectCommand* ) culledCommandsBuffer.GetData(); + memset( culledCommands, 0, culledCommandsCount * sizeof( GLIndirectBuffer::GLIndirectCommand ) * MAX_VIEWFRAMES ); + culledCommandsBuffer.FlushAll( GL_SHADER_STORAGE_BUFFER ); + + surfaceBatchesUBO.BindBuffer(); + glBufferData( GL_UNIFORM_BUFFER, MAX_SURFACE_COMMAND_BATCHES * sizeof( SurfaceCommandBatch ), nullptr, GL_STATIC_DRAW ); + SurfaceCommandBatch* surfaceCommandBatches = + ( SurfaceCommandBatch* ) surfaceBatchesUBO.MapBufferRange( MAX_SURFACE_COMMAND_BATCHES * SURFACE_COMMAND_BATCH_SIZE ); + + memset( surfaceCommandBatches, 0, MAX_SURFACE_COMMAND_BATCHES * sizeof( SurfaceCommandBatch ) ); + + uint id = 0; + uint matID = 0; + uint subID = 0; for ( MaterialPack& pack : materialPacks ) { - for ( Material& material : pack.materials ) { - material.staticCommandOffset = offset; + for ( Material& mat : pack.materials ) { + for ( uint i = 0; i < mat.surfaceCommandBatchCount; i++ ) { + surfaceCommandBatches[id * 4 + subID].materialIDs[0] = matID; + surfaceCommandBatches[id * 4 + subID].materialIDs[1] = mat.surfaceCommandBatchOffset; + subID++; + if ( subID == 4 ) { + id++; + subID = 0; + } + } + matID++; + } + } + + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.BufferStorage( GL_ATOMIC_COUNTER_BUFFER, + MAX_COMMAND_COUNTERS * MAX_VIEWS, MAX_FRAMES, nullptr ); + atomicCommandCountersBuffer.MapAll( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = (uint32_t*) atomicCommandCountersBuffer.GetData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * sizeof(uint32_t) * MAX_VIEWFRAMES ); + + for ( int i = 0; i < tr.refdef.numDrawSurfs; i++ ) { + drawSurf = &tr.refdef.drawSurfs[i]; + if ( drawSurf->entity != &tr.worldEntity ) { + continue; + } + + shader_t* shader = drawSurf->shader; + if ( !shader ) { + continue; + } + + shader = shader->remappedShader ? shader->remappedShader : shader; + if ( shader->isSky || shader->isPortal ) { + continue; + } + + tess.multiDrawPrimitives = 0; + tess.numIndexes = 0; + tess.numVertexes = 0; + tess.attribsSet = 0; + + skipSurface = false; + rb_surfaceTable[Util::ordinal( *( drawSurf->surface ) )]( drawSurf->surface ); + + // Don't add SF_SKIP surfaces + if ( skipSurface ) { + continue; + } + + SurfaceDescriptor surface; + VectorCopy( ( ( srfGeneric_t* ) drawSurf->surface )->origin, surface.boundingSphere.origin ); + surface.boundingSphere.radius = ( ( srfGeneric_t* ) drawSurf->surface )->radius; - for ( const DrawCommand& drawCmd : material.drawCommands ) { - memcpy( commands, &drawCmd.cmd, sizeof( GLIndirectBuffer::GLIndirectCommand ) ); - commands++; - offset++; + for ( int stage = 0; stage < drawSurf->shader->numStages; stage++ ) { + if ( stage > 3 ) { + Log::Warn( "skipping stage" ); + continue; } + + const Material* material = &materialPacks[drawSurf->materialPackIDs[stage]].materials[drawSurf->materialIDs[stage]]; + uint cmdID = material->surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH + drawSurf->drawCommandIDs[stage]; + cmdID++; // Add 1 because the first surface command is always reserved as a fake command + surface.surfaceCommandIDs[stage] = cmdID; + + SurfaceCommand surfaceCommand; + surfaceCommand.enabled = 0; + surfaceCommand.drawCommand = material->drawCommands[drawSurf->drawCommandIDs[stage]].cmd; + surfaceCommands[cmdID] = surfaceCommand; } + memcpy( surfaceDescriptors, &surface, sizeof( SurfaceDescriptor ) ); + surfaceDescriptors++; } - commandBuffer.UnmapBuffer(); + for ( int i = 0; i < MAX_VIEWFRAMES; i++ ) { + memcpy( surfaceCommands + surfaceCommandsCount * i, surfaceCommands, surfaceCommandsCount * sizeof( SurfaceCommand ) ); + } + + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsSSBO.UnmapBuffer(); + surfaceDescriptorsSSBO.BindBufferBase(); + + surfaceBatchesUBO.BindBuffer(); + surfaceBatchesUBO.UnmapBuffer(); + surfaceBatchesUBO.BindBufferBase(); + GL_CheckErrors(); } @@ -1268,6 +1391,7 @@ void MaterialSystem::GenerateWorldMaterials() { backEnd.currentEntity = &tr.worldEntity; drawSurf_t* drawSurf; + totalDrawSurfs = 0; uint id = 0; uint previousMaterialID = 0; @@ -1298,6 +1422,8 @@ void MaterialSystem::GenerateWorldMaterials() { continue; } + totalDrawSurfs++; + for ( int stage = 0; stage < shader->numStages; stage++ ) { shaderStage_t* pStage = shader->stages[stage]; @@ -1662,6 +1788,111 @@ void MaterialSystem::UpdateDynamicSurfaces() { materialsSSBO.UnmapBuffer(); } +void MaterialSystem::UpdateFrameData() { + /* atomicCommandCountersBuffer.AreaIncr(); + + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = atomicCommandCountersBuffer.GetCurrentAreaData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * sizeof(uint32_t)); + atomicCommandCountersBuffer.FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.UnBindBuffer( GL_ATOMIC_COUNTER_BUFFER ); */ + + atomicCommandCountersBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + gl_clearSurfacesShader->BindProgram( 0 ); + gl_clearSurfacesShader->SetUniform_Frame( nextFrame ); + gl_clearSurfacesShader->DispatchCompute( MAX_VIEWS, 1, 1 ); + atomicCommandCountersBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::QueueSurfaceCull( const uint viewID, const frustum_t* frustum ) { + memcpy( frames[nextFrame].viewFrames[viewID].frustum, frustum, sizeof( frustum_t ) ); + frames[nextFrame].viewCount++; +} + +void MaterialSystem::CullSurfaces() { + surfaceDescriptorsSSBO.BindBufferBase(); + surfaceCommandsSSBO.BindBufferBase(); + culledCommandsBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.BindBufferBase(); + atomicCommandCountersBuffer.BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + for ( uint i = 0; i < frames[nextFrame].viewCount; i++ ) { + frustum_t* frustum = &frames[nextFrame].viewFrames[i].frustum; + + vec4_t frustumPlanes[6]; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( PVSLocked ? lockedFrustum[j].normal : frustum[0][j].normal, frustumPlanes[j] ); + frustumPlanes[j][3] = PVSLocked ? lockedFrustum[j].dist : frustum[0][j].dist; + } + + gl_cullShader->BindProgram( 0 ); + uint globalWorkGroupX = totalDrawSurfs % MAX_COMMAND_COUNTERS == 0 ? + totalDrawSurfs / MAX_COMMAND_COUNTERS : totalDrawSurfs / MAX_COMMAND_COUNTERS + 1; + gl_cullShader->SetUniform_TotalDrawSurfs( totalDrawSurfs ); + gl_cullShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * MAX_VIEWS * nextFrame ); + + if ( PVSLocked ) { + if ( r_lockpvs->integer == 0 ) { + PVSLocked = false; + } + } + if ( r_lockpvs->integer == 1 && !PVSLocked ) { + PVSLocked = true; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( frustum[0][j].normal, lockedFrustum[j].normal ); + lockedFrustum[j].dist = frustum[0][j].dist; + } + } + + // FIXME: Make far plane work properly + gl_cullShader->SetUniform_Frustum( frustumPlanes ); + + gl_cullShader->DispatchCompute( globalWorkGroupX, 1, 1 ); + + gl_processSurfacesShader->BindProgram( 0 ); + gl_processSurfacesShader->SetUniform_Frame( nextFrame ); + gl_processSurfacesShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * MAX_VIEWS * nextFrame ); + gl_processSurfacesShader->SetUniform_CulledCommandsOffset( culledCommandsCount * MAX_VIEWS * nextFrame ); + + glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT ); + gl_processSurfacesShader->DispatchCompute( totalBatchCount, 1, 1 ); + } + + surfaceDescriptorsSSBO.UnBindBufferBase(); + surfaceCommandsSSBO.UnBindBufferBase(); + culledCommandsBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.UnBindBufferBase(); + atomicCommandCountersBuffer.UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::StartFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + frames[nextFrame].viewCount = 0; + + // renderedMaterials.clear(); + // UpdateDynamicSurfaces(); + // UpdateFrameData(); +} + +void MaterialSystem::EndFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + + currentFrame = nextFrame; + nextFrame++; + if ( nextFrame >= MAX_FRAMES ) { + nextFrame = 0; + } + return; +} + void MaterialSystem::GeneratePortalBoundingSpheres() { Log::Debug( "Generating portal bounding spheres" ); @@ -1703,6 +1934,13 @@ void MaterialSystem::Free() { skyShaders.clear(); renderedMaterials.clear(); + surfaceCommandsSSBO.UnmapBuffer(); + culledCommandsBuffer.UnmapBuffer(); + atomicCommandCountersBuffer.UnmapBuffer(); + + currentFrame = 0; + nextFrame = 0; + for ( MaterialPack& pack : materialPacks ) { for ( Material& material : pack.materials ) { material.drawCommands.clear(); @@ -1727,6 +1965,7 @@ void MaterialSystem::AddDrawCommand( const uint materialID, const uint materialP cmd.materialsSSBOOffset = materialsSSBOOffset; materialPacks[materialPackID].materials[materialID].drawCommands.emplace_back(cmd); + lastCommandID = materialPacks[materialPackID].materials[materialID].drawCommands.size() - 1; cmd.textureCount = 0; } @@ -1759,6 +1998,11 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS if ( frameStart ) { renderedMaterials.clear(); UpdateDynamicSurfaces(); + UpdateFrameData(); + // StartFrame(); + + // Make sure compute dispatches from the last frame finished writing to memory + glMemoryBarrier( GL_COMMAND_BARRIER_BIT ); frameStart = false; } @@ -1920,10 +2164,21 @@ void MaterialSystem::RenderMaterial( Material& material ) { } material.texturesResident = true; - glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, - BUFFER_OFFSET( material.staticCommandOffset * sizeof( GLIndirectBuffer::GLIndirectCommand ) ), + culledCommandsBuffer.BindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.BindBuffer( GL_PARAMETER_BUFFER_ARB ); + + glMultiDrawElementsIndirectCountARB( GL_TRIANGLES, GL_UNSIGNED_INT, + BUFFER_OFFSET( material.surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH * sizeof( GLIndirectBuffer::GLIndirectCommand ) + + ( culledCommandsCount * sizeof( GLIndirectBuffer::GLIndirectCommand ) * MAX_VIEWS * currentFrame ) ), + material.globalID * sizeof( uint32_t ) + + ( MAX_COMMAND_COUNTERS * MAX_VIEWS * sizeof( uint32_t ) * currentFrame ), material.drawCommands.size(), 0 ); + culledCommandsBuffer.UnBindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.UnBindBuffer( GL_PARAMETER_BUFFER_ARB ); + if ( material.usePolygonOffset ) { glDisable( GL_POLYGON_OFFSET_FILL ); } diff --git a/src/engine/renderer/Material.h b/src/engine/renderer/Material.h index cdade452ee..0cff2f258c 100644 --- a/src/engine/renderer/Material.h +++ b/src/engine/renderer/Material.h @@ -71,7 +71,10 @@ struct Material { uint currentStaticDrawSurfCount = 0; uint currentDynamicDrawSurfCount = 0; - uint staticCommandOffset = 0; + uint globalID = 0; + uint surfaceCommandBatchOffset = 0; + uint surfaceCommandBatchCount = 0; + uint surfaceCommandBatchPadding = 0; uint id = 0; bool useSync = false; @@ -134,6 +137,53 @@ struct drawSurfBoundingSphere { uint drawSurfID; }; +#define MAX_SURFACE_COMMANDS 4 +#define MAX_COMMAND_COUNTERS 64 +#define SURFACE_COMMANDS_PER_BATCH 64 + +#define MAX_SURFACE_COMMAND_BATCHES 2048 * 2 + +#define SURFACE_DESCRIPTOR_SIZE 8 +#define INDIRECT_COMMAND_SIZE 5 +#define SURFACE_COMMAND_SIZE 6 +#define SURFACE_COMMAND_BATCH_SIZE 4 + +#define MAX_FRAMES 2 +#define MAX_VIEWFRAMES MAX_VIEWS * MAX_FRAMES // Buffer 2 frames for each view + +struct ViewFrame { + // view id, compute fence + uint viewID = 0; + GLsync cullSync = nullptr; + // GLsync cullSync; // TODO: Occlusion culling + uint portalViews[MAX_VIEWS]; + frustum_t frustum; +}; + +struct Frame { + uint viewCount = 0; + ViewFrame viewFrames[MAX_VIEWS]; +}; + +struct BoundingSphere { + vec3_t origin; + float radius; +}; + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint surfaceCommandIDs[MAX_SURFACE_COMMANDS] { 0, 0, 0, 0 }; +}; + +struct SurfaceCommand { + uint enabled; // uint because bool in GLSL is always 4 bytes + GLIndirectBuffer::GLIndirectCommand drawCommand; +}; + +struct SurfaceCommandBatch { + uint materialIDs[4] { 0, 0, 0, 0 }; +}; + class MaterialSystem { public: bool generatedWorldCommandBuffer = false; @@ -142,6 +192,8 @@ class MaterialSystem { bool generatingWorldCommandBuffer = false; vec3_t worldViewBounds[2] = {}; + std::vector drawCommands; + std::vector portalSurfacesTmp; std::vector portalSurfaces; std::vector portalBounds; @@ -166,7 +218,7 @@ class MaterialSystem { { shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS } }; - bool frameStart = true; + bool frameStart = false; void AddTexture( Texture* texture ); void AddDrawCommand( const uint materialID, const uint materialPackID, const uint materialsSSBOOffset, @@ -176,6 +228,12 @@ class MaterialSystem { void RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort ); void UpdateDynamicSurfaces(); + void QueueSurfaceCull( const uint viewID, const frustum_t* frustum ); + void CullSurfaces(); + + void StartFrame(); + void EndFrame(); + void AddStageTextures( drawSurf_t* drawSurf, shaderStage_t* pStage, Material* material ); void GenerateWorldMaterials(); void GenerateWorldMaterialsBuffer(); @@ -187,16 +245,39 @@ class MaterialSystem { void Free(); private: + bool PVSLocked = false; + frustum_t lockedFrustum; + + bool firstFrame = true; + DrawCommand cmd; + uint lastCommandID; + uint totalDrawSurfs; + uint totalBatchCount = 0; + + uint surfaceCommandsCount = 0; + uint culledCommandsCount = 0; + uint surfaceDescriptorsCount = 0; + uint batchCommandsCount = 0; + std::vector dynamicDrawSurfs; uint dynamicDrawSurfsOffset = 0; uint dynamicDrawSurfsSize = 0; + Frame frames[MAX_FRAMES]; + uint currentFrame = 0; + uint nextFrame = 1; + void RenderMaterial( Material& material ); + void UpdateFrameData(); }; extern GLSSBO materialsSSBO; -extern GLIndirectBuffer commandBuffer; +extern GLSSBO surfaceDescriptorsSSBO; // Global +extern GLSSBO surfaceCommandsSSBO; // Per viewframe, GPU updated +extern GLBuffer culledCommandsBuffer; // Per viewframe +extern GLUBO surfaceBatchesUBO; // Global +extern GLBuffer atomicCommandCountersBuffer; // Per viewframe extern MaterialSystem materialSystem; #endif // MATERIAL_H diff --git a/src/engine/renderer/gl_shader.cpp b/src/engine/renderer/gl_shader.cpp index a3fedc0cb4..66d4a7afbc 100644 --- a/src/engine/renderer/gl_shader.cpp +++ b/src/engine/renderer/gl_shader.cpp @@ -42,6 +42,9 @@ ShaderKind shaderKind = ShaderKind::Unknown; GLShader_generic2D *gl_generic2DShader = nullptr; GLShader_generic *gl_genericShader = nullptr; GLShader_genericMaterial *gl_genericShaderMaterial = nullptr; +GLShader_cull *gl_cullShader = nullptr; +GLShader_clearSurfaces *gl_clearSurfacesShader = nullptr; +GLShader_processSurfaces *gl_processSurfacesShader = nullptr; GLShader_lightMapping *gl_lightMappingShader = nullptr; GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial = nullptr; GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ = nullptr; @@ -464,6 +467,8 @@ static std::string GenComputeVersionDeclaration() { GLEW_ARB_explicit_uniform_location, "ARB_explicit_uniform_location" ); addExtension( str, glConfig2.shaderImageLoadStoreAvailable, 420, GLEW_ARB_shader_image_load_store, "ARB_shader_image_load_store" ); + addExtension( str, glConfig2.shaderAtomicCountersAvailable, 420, + GLEW_ARB_shader_atomic_counters, "ARB_shader_atomic_counters" ); return str; } @@ -3055,3 +3060,22 @@ void GLShader_fxaa::BuildShaderFragmentLibNames( std::string& fragmentInlines ) { fragmentInlines += "fxaa3_11"; } + +GLShader_cull::GLShader_cull( GLShaderManager* manager ) : + GLShader( "cull", ATTR_POSITION, manager, false, false, true ), + u_TotalDrawSurfs( this ), + u_SurfaceCommandsOffset( this ), + u_Frustum( this ) { +} + +GLShader_clearSurfaces::GLShader_clearSurfaces( GLShaderManager* manager ) : + GLShader( "clearSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ) { +} + +GLShader_processSurfaces::GLShader_processSurfaces( GLShaderManager* manager ) : + GLShader( "processSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ), + u_SurfaceCommandsOffset( this ), + u_CulledCommandsOffset( this ) { +} diff --git a/src/engine/renderer/gl_shader.h b/src/engine/renderer/gl_shader.h index 093c7dc7cf..1def090e10 100644 --- a/src/engine/renderer/gl_shader.h +++ b/src/engine/renderer/gl_shader.h @@ -708,6 +708,56 @@ class GLUniform1i : protected GLUniform int currentValue = 0; }; +class GLUniform1ui : protected GLUniform { + protected: + GLUniform1ui( GLShader* shader, const char* name, const bool global = false ) : + GLUniform( shader, name, "uint", 1, 1, global ) { + } + + inline void SetValue( uint value ) { + shaderProgram_t* p = _shader->GetProgram(); + + if ( _global || !_shader->UseMaterialSystem() ) { + ASSERT_EQ( p, glState.currentProgram ); + } + +#if defined( LOG_GLSL_UNIFORMS ) + if ( r_logFile->integer ) { + GLimp_LogComment( va( "GLSL_SetUniform1i( %s, shader: %s, value: %d ) ---\n", + this->GetName(), _shader->GetName().c_str(), value ) ); + } +#endif + + if ( _shader->UseMaterialSystem() && !_global ) { + currentValue = value; + return; + } + +#if defined( USE_UNIFORM_FIREWALL ) + uint* firewall = ( uint* ) &p->uniformFirewall[_firewallIndex]; + + if ( *firewall == value ) { + return; + } + + *firewall = value; +#endif + glUniform1ui( p->uniformLocations[_locationIndex], value ); + } + public: + size_t GetSize() override { + return sizeof( uint ); + } + + uint32_t* WriteToBuffer( uint32_t* buffer ) override { + memcpy( buffer, ¤tValue, sizeof( uint ) ); + return buffer + 1; + } + + private: + uint currentValue = 0; +}; + class GLUniform1Bool : protected GLUniform { protected: // GLSL std430 bool is always 4 bytes, which might not correspond to C++ bool @@ -1266,54 +1316,121 @@ class GLUniformBlock } }; -class GLSSBO { +class GLBuffer { public: std::string _name; const GLuint _bindingPoint; + const GLbitfield _flags; + const GLbitfield _mapFlags; + const GLuint64 SYNC_TIMEOUT = 10000000000; // 10 seconds - GLSSBO( const char* name, const GLuint bindingPoint ) : + GLBuffer( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : _name( name ), - _bindingPoint( bindingPoint ) { + _bindingPoint( bindingPoint ), + _flags( flags ), + _mapFlags( mapFlags ) { } - public: const char* GetName() { return _name.c_str(); } - void BindBufferBase() { - glBindBufferBase( GL_SHADER_STORAGE_BUFFER, _bindingPoint, handle ); + void BindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, handle ); } - void BindBuffer() { - glBindBuffer( GL_SHADER_STORAGE_BUFFER, handle ); + void UnBindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, 0 ); } - uint32_t* MapBufferRange( const GLuint count ) { + void BindBuffer( const GLenum target ) { + glBindBuffer( target, handle ); + } + + void UnBindBuffer( const GLenum target ) { + glBindBuffer( target, 0 ); + } + + void BufferStorage( const GLenum target, const GLsizeiptr newAreaSize, const uint areaCount, const void* data ) { + areaSize = newAreaSize; + maxAreas = areaCount; + glBufferStorage( target, areaSize * areaCount * sizeof(uint32_t), data, _flags); + syncs.resize( areaCount ); + } + + void AreaIncr() { + syncs[area] = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); + area++; + if ( area >= maxAreas ) { + area = 0; + } + } + + void MapAll( const GLenum target ) { + if ( !mapped ) { + mapped = true; + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, areaSize * maxAreas * sizeof( uint32_t ), _flags | _mapFlags ); + } + } + + uint32_t* GetAreaData( const uint areaID ) { + ASSERT_LT( areaID, maxAreas ); + return data + areaID * areaSize; + } + + uint32_t* GetCurrentAreaData() { + if ( syncs[area] != nullptr ) { + if ( glClientWaitSync( syncs[area], GL_SYNC_FLUSH_COMMANDS_BIT, SYNC_TIMEOUT ) == GL_TIMEOUT_EXPIRED ) { + Sys::Drop( "Failed buffer %s area %u sync", _name, area ); + } + glDeleteSync( syncs[area] ); + } + + return data + area * areaSize; + } + + uint32_t* GetData() { + return data; + } + + void FlushCurrentArea( GLenum target ) { + glFlushMappedBufferRange( target, area * areaSize * sizeof( uint32_t ), areaSize * sizeof( uint32_t ) ); + } + + void FlushAll( GLenum target ) { + glFlushMappedBufferRange( target, 0, maxAreas * areaSize * sizeof( uint32_t ) ); + } + + uint32_t* MapBufferRange( const GLenum target, const GLuint count ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT ); + _flags | _mapFlags ); } return data; } - uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + uint32_t* MapBufferRange( const GLenum target, const GLuint offset, const GLuint count ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, offset * sizeof( uint32_t ), count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT ); + _flags | _mapFlags ); } return data; } void UnmapBuffer() { - mapped = false; - glUnmapBuffer( GL_SHADER_STORAGE_BUFFER ); + if ( mapped ) { + mapped = false; + glUnmapBuffer( mappedTarget ); + } } void GenBuffer() { @@ -1325,11 +1442,148 @@ class GLSSBO { } private: + GLenum mappedTarget; GLuint handle; bool mapped = false; + std::vector syncs; + uint area = 0; + uint areaSize = 0; + uint maxAreas = 0; uint32_t* data; }; +class GLSSBO : public GLBuffer { + public: + GLSSBO( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_SHADER_STORAGE_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const uint areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_SHADER_STORAGE_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_SHADER_STORAGE_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_SHADER_STORAGE_BUFFER ); + } + + uint32_t* MapBufferRange( const GLuint count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, offset, count ); + } +}; + +class GLUBO : public GLBuffer { + public: + GLUBO( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_UNIFORM_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_UNIFORM_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_UNIFORM_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const uint areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_UNIFORM_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_UNIFORM_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_UNIFORM_BUFFER ); + } + + uint32_t* MapBufferRange( const GLuint count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, offset, count ); + } +}; + +class GLAtomicCounterBuffer : public GLBuffer { + public: + GLAtomicCounterBuffer( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const uint areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_ATOMIC_COUNTER_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_ATOMIC_COUNTER_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + } + + uint32_t* MapBufferRange( const GLuint count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, offset, count ); + } +}; + class GLIndirectBuffer { public: @@ -2928,6 +3182,66 @@ class u_Color : } }; +class u_Frame : + GLUniform1ui { + public: + u_Frame( GLShader* shader ) : + GLUniform1ui( shader, "u_Frame" ) { + } + + void SetUniform_Frame( const uint frame ) { + this->SetValue( frame ); + } +}; + +class u_TotalDrawSurfs : + GLUniform1ui { + public: + u_TotalDrawSurfs( GLShader* shader ) : + GLUniform1ui( shader, "u_TotalDrawSurfs" ) { + } + + void SetUniform_TotalDrawSurfs( const uint totalDrawSurfs ) { + this->SetValue( totalDrawSurfs ); + } +}; + +class u_Frustum : + GLUniform4fv { + public: + u_Frustum( GLShader* shader ) : + GLUniform4fv( shader, "u_Frustum", 6 ) { + } + + void SetUniform_Frustum( vec4_t frustum[6] ) { + this->SetValue( 6, &frustum[0] ); + } +}; + +class u_SurfaceCommandsOffset : + GLUniform1ui { + public: + u_SurfaceCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_SurfaceCommandsOffset" ) { + } + + void SetUniform_SurfaceCommandsOffset( const uint surfaceCommandsOffset ) { + this->SetValue( surfaceCommandsOffset ); + } +}; + +class u_CulledCommandsOffset : + GLUniform1ui { + public: + u_CulledCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_CulledCommandsOffset" ) { + } + + void SetUniform_CulledCommandsOffset( const uint culledCommandsOffset ) { + this->SetValue( culledCommandsOffset ); + } +}; + class u_ModelMatrix : GLUniformMatrix4f { @@ -3050,7 +3364,7 @@ class u_Bones : { public: u_Bones( GLShader *shader ) : - GLUniform4fv( shader, "u_Bones", MAX_BONES * 0 + 1 ) + GLUniform4fv( shader, "u_Bones", MAX_BONES ) { } @@ -4270,6 +4584,32 @@ class GLShader_fxaa : void BuildShaderFragmentLibNames( std::string& fragmentInlines ) override; }; +class GLShader_cull : + public GLShader, + public u_TotalDrawSurfs, + public u_SurfaceCommandsOffset, + public u_Frustum { + public: + GLShader_cull( GLShaderManager* manager ); +}; + +class GLShader_clearSurfaces : + public GLShader, + public u_Frame { + public: + GLShader_clearSurfaces( GLShaderManager* manager ); +}; + +class GLShader_processSurfaces : + public GLShader, + public u_Frame, + public u_SurfaceCommandsOffset, + public u_CulledCommandsOffset { + public: + GLShader_processSurfaces( GLShaderManager* manager ); +}; + + std::string GetShaderPath(); extern ShaderKind shaderKind; @@ -4277,6 +4617,9 @@ extern ShaderKind shaderKind; extern GLShader_generic2D *gl_generic2DShader; extern GLShader_generic *gl_genericShader; extern GLShader_genericMaterial *gl_genericShaderMaterial; +extern GLShader_cull *gl_cullShader; +extern GLShader_clearSurfaces *gl_clearSurfacesShader; +extern GLShader_processSurfaces *gl_processSurfacesShader; extern GLShader_lightMapping *gl_lightMappingShader; extern GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial; extern GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ; diff --git a/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl new file mode 100644 index 0000000000..794a5e5cb2 --- /dev/null +++ b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl @@ -0,0 +1,57 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* clearSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define MAX_COMMAND_COUNTERS 64 +#define MAX_VIEWS 10 +#define MAX_FRAMES 2 + +layout(std430, binding = 4) writeonly buffer atomicCommandCountersBuffer { + uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; +}; + +uniform uint u_Frame; + +void main() { + const uint globalGroupID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalGroupID >= MAX_COMMAND_COUNTERS * MAX_VIEWS ) { + return; + } + atomicCommandCounters[globalGroupID + MAX_COMMAND_COUNTERS * MAX_VIEWS * u_Frame] = 0; +} diff --git a/src/engine/renderer/glsl_source/cull_cp.glsl b/src/engine/renderer/glsl_source/cull_cp.glsl new file mode 100644 index 0000000000..bf901e50a1 --- /dev/null +++ b/src/engine/renderer/glsl_source/cull_cp.glsl @@ -0,0 +1,112 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* cull_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +// layout(rg16f, binding = 0) uniform image2D depthImage; + +struct BoundingSphere { + vec3 center; + float radius; +}; + +#define MAX_SURFACE_COMMANDS 4 + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint surfaceCommandIDs[MAX_SURFACE_COMMANDS]; +}; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +layout(std430, binding = 1) readonly restrict buffer surfaceDescriptorsSSBO { + SurfaceDescriptor surfaces[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +struct Plane { + vec3 normal; + float distance; +}; + +uniform uint u_TotalDrawSurfs; +uniform uint u_SurfaceCommandsOffset; +uniform vec4 u_Frustum[6]; // xyz - normal, w - distance + +bool CullSurface( in BoundingSphere boundingSphere ) { + for( int i = 0; i < 5; i++ ) { + const float distance = dot( u_Frustum[i].xyz, boundingSphere.center ) - u_Frustum[i].w; + + if( distance < -boundingSphere.radius ) { + return true; + } + } + return false; +} + +void ProcessSurfaceCommands( const in SurfaceDescriptor surface, const in bool enabled ) { + for( uint i = 0; i < MAX_SURFACE_COMMANDS; i++ ) { + const uint commandID = surface.surfaceCommandIDs[i]; + surfaceCommands[commandID + u_SurfaceCommandsOffset].enabled = enabled; + } +} + +void main() { + const uint globalGroupID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalGroupID >= u_TotalDrawSurfs ) { + return; + } + SurfaceDescriptor surface = surfaces[globalGroupID]; + bool culled = CullSurface( surface.boundingSphere ); + + ProcessSurfaceCommands( surface, !culled ); +} diff --git a/src/engine/renderer/glsl_source/processSurfaces_cp.glsl b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl new file mode 100644 index 0000000000..401081d976 --- /dev/null +++ b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl @@ -0,0 +1,100 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* processSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +layout(std430, binding = 2) readonly buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +layout(std430, binding = 3) writeonly buffer culledCommandsSSBO { + GLIndirectCommand culledCommands[]; +}; + +#define MAX_SURFACE_COMMAND_BATCHES 2048 + +struct SurfaceCommandBatch { + uvec2 materialIDs[2]; +}; + +layout(std140, binding = 0) uniform ub_SurfaceBatches { + SurfaceCommandBatch surfaceBatches[MAX_SURFACE_COMMAND_BATCHES]; +}; + +#define MAX_COMMAND_COUNTERS 64 +#define MAX_VIEWS 10 +#define MAX_FRAMES 2 + +layout (binding = 4) uniform atomic_uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; +uniform uint u_Frame; +uniform uint u_SurfaceCommandsOffset; +uniform uint u_CulledCommandsOffset; + +void AddDrawCommand( in uint commandID, in uvec2 materialID ) { + SurfaceCommand command = surfaceCommands[commandID + u_SurfaceCommandsOffset]; + if( command.enabled ) { + const uint atomicCmdID = atomicCounterIncrement( atomicCommandCounters[materialID.x + + MAX_COMMAND_COUNTERS * MAX_VIEWS * u_Frame] ); + culledCommands[atomicCmdID + materialID.y * MAX_COMMAND_COUNTERS + u_CulledCommandsOffset] = command.drawCommand; + } +} + +void main() { + const uint globalGroupID = gl_WorkGroupID.z * gl_NumWorkGroups.x * gl_NumWorkGroups.y + + gl_WorkGroupID.y * gl_NumWorkGroups.x + + gl_WorkGroupID.x; + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x + * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x + + 1; // Add 1 because the first surface command is always reserved as a fake command + const uvec2 materialID = surfaceBatches[globalGroupID / 2].materialIDs[globalGroupID % 2]; + + AddDrawCommand( globalInvocationID, materialID ); +} diff --git a/src/engine/renderer/shaders.cpp b/src/engine/renderer/shaders.cpp index b9aee09c9e..1a7dc740bb 100644 --- a/src/engine/renderer/shaders.cpp +++ b/src/engine/renderer/shaders.cpp @@ -60,6 +60,9 @@ #include "skybox_fp.glsl.h" #include "material_vp.glsl.h" #include "material_fp.glsl.h" +#include "cull_cp.glsl.h" +#include "processSurfaces_cp.glsl.h" +#include "clearSurfaces_cp.glsl.h" std::unordered_map shadermap({ { "glsl/blurX_fp.glsl", std::string(reinterpret_cast(blurX_fp_glsl), sizeof(blurX_fp_glsl)) }, @@ -71,6 +74,8 @@ std::unordered_map shadermap({ { "glsl/computeLight_fp.glsl", std::string(reinterpret_cast(computeLight_fp_glsl), sizeof(computeLight_fp_glsl)) }, { "glsl/contrast_fp.glsl", std::string(reinterpret_cast(contrast_fp_glsl), sizeof(contrast_fp_glsl)) }, { "glsl/contrast_vp.glsl", std::string(reinterpret_cast(contrast_vp_glsl), sizeof(contrast_vp_glsl)) }, + { "glsl/clearSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( clearSurfaces_cp_glsl ), sizeof( clearSurfaces_cp_glsl ) ) }, + { "glsl/cull_cp.glsl", std::string( reinterpret_cast< const char* >( cull_cp_glsl ), sizeof( cull_cp_glsl ) ) }, { "glsl/debugShadowMap_fp.glsl", std::string(reinterpret_cast(debugShadowMap_fp_glsl), sizeof(debugShadowMap_fp_glsl)) }, { "glsl/debugShadowMap_vp.glsl", std::string(reinterpret_cast(debugShadowMap_vp_glsl), sizeof(debugShadowMap_vp_glsl)) }, { "glsl/deformVertexes_vp.glsl", std::string(reinterpret_cast(deformVertexes_vp_glsl), sizeof(deformVertexes_vp_glsl)) }, @@ -105,6 +110,7 @@ std::unordered_map shadermap({ { "glsl/motionblur_vp.glsl", std::string(reinterpret_cast(motionblur_vp_glsl), sizeof(motionblur_vp_glsl)) }, { "glsl/portal_fp.glsl", std::string(reinterpret_cast(portal_fp_glsl), sizeof(portal_fp_glsl)) }, { "glsl/portal_vp.glsl", std::string(reinterpret_cast(portal_vp_glsl), sizeof(portal_vp_glsl)) }, + { "glsl/processSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( processSurfaces_cp_glsl ), sizeof( processSurfaces_cp_glsl ) ) }, { "glsl/reflection_CB_fp.glsl", std::string(reinterpret_cast(reflection_CB_fp_glsl), sizeof(reflection_CB_fp_glsl)) }, { "glsl/reflection_CB_vp.glsl", std::string(reinterpret_cast(reflection_CB_vp_glsl), sizeof(reflection_CB_vp_glsl)) }, { "glsl/refraction_C_fp.glsl", std::string(reinterpret_cast(refraction_C_fp_glsl), sizeof(refraction_C_fp_glsl)) }, diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp index 93b22067a7..da6fc36b47 100644 --- a/src/engine/renderer/tr_backend.cpp +++ b/src/engine/renderer/tr_backend.cpp @@ -4889,8 +4889,6 @@ static void RB_RenderView( bool depthPass ) startTime = ri.Milliseconds(); } - materialSystem.frameStart = true; - if( depthPass ) { if ( glConfig2.materialSystemAvailable ) { materialSystem.RenderMaterials( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH ); @@ -5014,6 +5012,13 @@ static void RB_RenderPostProcess() tr.refdef.pixelTarget[(i * 4) + 3] = 255; //set the alpha pure white } } + + if( glConfig2.materialSystemAvailable ) { + // Dispatch the cull compute shaders for queued once we're done with post-processing + // We'll only use the results from those shaders in the next frame so we don't block the pipeline + materialSystem.CullSurfaces(); + materialSystem.EndFrame(); + } GL_CheckErrors(); } @@ -6070,6 +6075,9 @@ void RB_ExecuteRenderCommands( const void *data ) backEnd.smpFrame = 1; } + + materialSystem.frameStart = true; + // Log::Warn( "======" ); while ( cmd != nullptr ) { cmd = cmd->ExecuteSelf(); diff --git a/src/engine/renderer/tr_init.cpp b/src/engine/renderer/tr_init.cpp index 66cfb87cec..35ab9a96ec 100644 --- a/src/engine/renderer/tr_init.cpp +++ b/src/engine/renderer/tr_init.cpp @@ -112,6 +112,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA cvar_t *r_arb_shading_language_420pack; cvar_t *r_arb_explicit_uniform_location; cvar_t *r_arb_shader_image_load_store; + cvar_t *r_arb_shader_atomic_counters; cvar_t *r_checkGLErrors; cvar_t *r_logFile; @@ -1107,6 +1108,7 @@ ScreenshotCmd screenshotPNGRegistration("screenshotPNG", ssFormat_t::SSF_PNG, "p r_arb_shading_language_420pack = Cvar_Get( "r_arb_shading_language_420pack", "1", CVAR_CHEAT | CVAR_LATCH ); r_arb_explicit_uniform_location = Cvar_Get( "r_arb_explicit_uniform_location", "1", CVAR_CHEAT | CVAR_LATCH ); r_arb_shader_image_load_store = Cvar_Get( "r_arb_shader_image_load_store", "1", CVAR_CHEAT | CVAR_LATCH ); + r_arb_shader_atomic_counters = Cvar_Get( "r_arb_shader_atomic_counters", "1", CVAR_CHEAT | CVAR_LATCH ); r_picMip = Cvar_Get( "r_picMip", "0", CVAR_LATCH | CVAR_ARCHIVE ); r_imageMaxDimension = Cvar_Get( "r_imageMaxDimension", "0", CVAR_LATCH | CVAR_ARCHIVE ); diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index 8ab5b4b503..2f402688ba 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -1668,6 +1668,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; uint materialIDs[ MAX_SHADER_STAGES ]; uint materialPackIDs[ MAX_SHADER_STAGES ]; bool texturesDynamic[ MAX_SHADER_STAGES ]; + uint drawCommandIDs[ MAX_SHADER_STAGES ]; inline int index() const { return int( ( sort & SORT_INDEX_MASK ) ); @@ -2927,6 +2928,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; extern cvar_t *r_arb_shading_language_420pack; extern cvar_t *r_arb_explicit_uniform_location; extern cvar_t *r_arb_shader_image_load_store; + extern cvar_t *r_arb_shader_atomic_counters; extern cvar_t *r_nobind; // turns off binding to appropriate textures extern cvar_t *r_singleShader; // make most world faces use default shader diff --git a/src/engine/renderer/tr_main.cpp b/src/engine/renderer/tr_main.cpp index bdd920fae2..f6d63bf0c2 100644 --- a/src/engine/renderer/tr_main.cpp +++ b/src/engine/renderer/tr_main.cpp @@ -2882,11 +2882,9 @@ void R_RenderView( viewParms_t *parms ) // because it requires the decalBits R_CullDecalProjectors(); - if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { - materialSystem.GenerateWorldMaterials(); - } - - if ( !glConfig2.materialSystemAvailable ) { + if ( glConfig2.materialSystemAvailable ) { + materialSystem.QueueSurfaceCull( tr.viewCount, (frustum_t*) tr.viewParms.frustums[0] ); + } else { R_AddWorldSurfaces(); } diff --git a/src/engine/renderer/tr_public.h b/src/engine/renderer/tr_public.h index 160566ff87..70fac421ee 100644 --- a/src/engine/renderer/tr_public.h +++ b/src/engine/renderer/tr_public.h @@ -92,6 +92,7 @@ struct glconfig2_t bool shadingLanguage420PackAvailable; bool explicitUniformLocationAvailable; bool shaderImageLoadStoreAvailable; + bool shaderAtomicCountersAvailable; bool materialSystemAvailable; bool gpuShader4Available; bool textureGatherAvailable; diff --git a/src/engine/renderer/tr_scene.cpp b/src/engine/renderer/tr_scene.cpp index 11c8a9fbb2..dba6230408 100644 --- a/src/engine/renderer/tr_scene.cpp +++ b/src/engine/renderer/tr_scene.cpp @@ -625,11 +625,19 @@ void RE_RenderScene( const refdef_t *fd ) Vector4Copy( fd->gradingWeights, parms.gradingWeights ); // TODO: Add cmds for updating dynamic surfaces and for culling here - materialSystem.frameStart = true; + // materialSystem.frameStart = true; R_AddClearBufferCmd(); R_AddSetupLightsCmd(); + if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { + materialSystem.GenerateWorldMaterials(); + } + + if ( glConfig2.materialSystemAvailable ) { + materialSystem.StartFrame(); + } + R_RenderView( &parms ); R_RenderPostProcess(); diff --git a/src/engine/renderer/tr_shade.cpp b/src/engine/renderer/tr_shade.cpp index 08da17a914..df7c2824e0 100644 --- a/src/engine/renderer/tr_shade.cpp +++ b/src/engine/renderer/tr_shade.cpp @@ -86,6 +86,9 @@ static void GLSL_InitGPUShadersOrError() gl_shaderManager.load( gl_skyboxShaderMaterial ); gl_shaderManager.load( gl_fogQuake3ShaderMaterial ); gl_shaderManager.load( gl_heatHazeShaderMaterial ); + gl_shaderManager.load( gl_cullShader ); + gl_shaderManager.load( gl_clearSurfacesShader ); + gl_shaderManager.load( gl_processSurfacesShader ); } // standard light mapping diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index b9c40029f5..1292368881 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1458,7 +1458,6 @@ static void Tess_SurfaceVBOMesh( srfVBOMesh_t *srf ) { GLimp_LogComment( "--- Tess_SurfaceVBOMesh ---\n" ); - Tess_SurfaceVBO( srf->vbo, srf->ibo, srf->numIndexes, srf->firstIndex ); } diff --git a/src/engine/renderer/tr_vbo.cpp b/src/engine/renderer/tr_vbo.cpp index 7005f786ca..822a663e71 100644 --- a/src/engine/renderer/tr_vbo.cpp +++ b/src/engine/renderer/tr_vbo.cpp @@ -1076,7 +1076,11 @@ static void R_InitLightUBO() static void R_InitMaterialBuffers() { if( glConfig2.materialSystemAvailable ) { materialsSSBO.GenBuffer(); - commandBuffer.GenBuffer(); + surfaceDescriptorsSSBO.GenBuffer(); + surfaceCommandsSSBO.GenBuffer(); + culledCommandsBuffer.GenBuffer(); + surfaceBatchesUBO.GenBuffer(); + atomicCommandCountersBuffer.GenBuffer(); } } @@ -1197,7 +1201,11 @@ void R_ShutdownVBOs() if ( glConfig2.materialSystemAvailable ) { materialsSSBO.DelBuffer(); - commandBuffer.DelBuffer(); + surfaceDescriptorsSSBO.DelBuffer(); + surfaceCommandsSSBO.DelBuffer(); + culledCommandsBuffer.DelBuffer(); + surfaceBatchesUBO.DelBuffer(); + atomicCommandCountersBuffer.DelBuffer(); } tess.verts = tess.vertsBuffer = nullptr; diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 9090feabe7..2ada43f502 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -1963,10 +1963,14 @@ static void GLimp_InitExtensions() // made required in OpenGL 4.2 glConfig2.shaderImageLoadStoreAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_image_load_store, r_arb_shader_image_load_store->value ); + // made required in OpenGL 4.2 + glConfig2.shaderAtomicCountersAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_atomic_counters, r_arb_shader_atomic_counters->value ); + glConfig2.materialSystemAvailable = glConfig2.shaderDrawParametersAvailable && glConfig2.SSBOAvailable && glConfig2.multiDrawIndirectAvailable && glConfig2.bindlessTexturesAvailable && glConfig2.computeShaderAvailable && glConfig2.shadingLanguage420PackAvailable && glConfig2.explicitUniformLocationAvailable && glConfig2.shaderImageLoadStoreAvailable + && glConfig2.shaderAtomicCountersAvailable && r_smp->integer == 0 // Currently doesn't work with r_smp 1 && r_useMaterialSystem->integer == 1; // Allow disabling it without disabling any extensions