From 37b611896f36fbac859153211163d943eb59774e Mon Sep 17 00:00:00 2001 From: VReaperV Date: Thu, 16 May 2024 09:52:01 +0300 Subject: [PATCH] Implement GPU frustum culling Add frustum culling in compute shaders to the material system. This will use sphere<>frustum culling and output the correct draw commands into the buffer for each viewframe (one view in any given frame buffered by the material system). Id 0 is reserved for no-command and will result in early return in the shader. --- src.cmake | 3 + src/engine/renderer/Material.cpp | 326 +++++++++++++-- src/engine/renderer/Material.h | 91 +++- src/engine/renderer/gl_shader.cpp | 62 ++- src/engine/renderer/gl_shader.h | 390 +++++++++++++++++- .../glsl_source/clearSurfaces_cp.glsl | 53 +++ src/engine/renderer/glsl_source/cull_cp.glsl | 113 +++++ .../glsl_source/processSurfaces_cp.glsl | 96 +++++ src/engine/renderer/shaders.cpp | 6 + src/engine/renderer/tr_backend.cpp | 23 +- src/engine/renderer/tr_bsp.cpp | 3 + src/engine/renderer/tr_local.h | 5 + src/engine/renderer/tr_main.cpp | 9 +- src/engine/renderer/tr_public.h | 4 + src/engine/renderer/tr_scene.cpp | 11 +- src/engine/renderer/tr_shade.cpp | 23 ++ src/engine/renderer/tr_shader.cpp | 13 + src/engine/renderer/tr_surface.cpp | 1 - src/engine/renderer/tr_vbo.cpp | 12 +- src/engine/sys/sdl_glimp.cpp | 31 +- 20 files changed, 1204 insertions(+), 71 deletions(-) create mode 100644 src/engine/renderer/glsl_source/clearSurfaces_cp.glsl create mode 100644 src/engine/renderer/glsl_source/cull_cp.glsl create mode 100644 src/engine/renderer/glsl_source/processSurfaces_cp.glsl diff --git a/src.cmake b/src.cmake index 7fc6d7c758..0d872fa538 100644 --- a/src.cmake +++ b/src.cmake @@ -147,6 +147,9 @@ set(RENDERERLIST set(GLSLSOURCELIST ${ENGINE_DIR}/renderer/glsl_source/material_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/material_fp.glsl + ${ENGINE_DIR}/renderer/glsl_source/cull_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/clearSurfaces_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/processSurfaces_cp.glsl ${ENGINE_DIR}/renderer/glsl_source/skybox_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_fp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_vp.glsl diff --git a/src/engine/renderer/Material.cpp b/src/engine/renderer/Material.cpp index 2c199f6a89..6e8fd2bd7a 100644 --- a/src/engine/renderer/Material.cpp +++ b/src/engine/renderer/Material.cpp @@ -34,11 +34,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Material.cpp #include "Material.h" - #include "tr_local.h" -GLSSBO materialsSSBO( "materials", 0 ); -GLIndirectBuffer commandBuffer( "drawCommands" ); +GLSSBO materialsSSBO( "materials", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceDescriptorsSSBO( "surfaceDescriptors", 1, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceCommandsSSBO( "surfaceCommands", 2, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLBuffer culledCommandsBuffer( "culledCommands", 3, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLUBO surfaceBatchesUBO( "surfaceBatches", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLBuffer atomicCommandCountersBuffer( "atomicCommandCounters", 4, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); MaterialSystem materialSystem; static void ComputeDynamics( shaderStage_t* pStage ) { @@ -881,6 +884,7 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { uint32_t stage = 0; for ( shaderStage_t* pStage = drawSurf->shader->stages; pStage < drawSurf->shader->lastStage; pStage++ ) { if ( drawSurf->materialIDs[stage] != material.id || drawSurf->materialPackIDs[stage] != materialPackID ) { + stage++; continue; } @@ -964,6 +968,8 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { pStage->colorRenderer( pStage ); + drawSurf->drawCommandIDs[stage] = lastCommandID; + if ( pStage->dynamic ) { drawSurf->materialsSSBOOffset[stage] = ( SSBOOffset - dynamicDrawSurfsOffset + drawSurfCount * material.shader->GetPaddedSize() ) / material.shader->GetPaddedSize(); @@ -989,37 +995,155 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { void MaterialSystem::GenerateWorldCommandBuffer() { Log::Debug( "Generating world command buffer" ); - uint32_t count = 0; - for ( const MaterialPack& pack : materialPacks ) { - for ( const Material& material : pack.materials ) { - count += material.drawCommands.size(); + totalBatchCount = 0; + + uint32_t batchOffset = 0; + uint32_t globalID = 0; + for ( MaterialPack& pack : materialPacks ) { + for ( Material& material : pack.materials ) { + material.surfaceCommandBatchOffset = batchOffset; + + const uint32_t cmdCount = material.drawCommands.size(); + const uint32_t batchCount = cmdCount % SURFACE_COMMANDS_PER_BATCH == 0 ? cmdCount / SURFACE_COMMANDS_PER_BATCH + : cmdCount / SURFACE_COMMANDS_PER_BATCH + 1; + + material.surfaceCommandBatchOffset = batchOffset; + material.surfaceCommandBatchCount = batchCount; + + batchOffset += batchCount; + material.globalID = globalID; + + totalBatchCount += batchCount; + globalID++; } } - if ( count == 0 ) { - return; - } + Log::Debug( "Total batch count: %u", totalBatchCount ); - Log::Debug( "CmdBuffer size: %u", count ); + skipDrawCommands = true; + drawSurf_t* drawSurf; - commandBuffer.BindBuffer(); - glBufferData( GL_DRAW_INDIRECT_BUFFER, count * sizeof( GLIndirectBuffer::GLIndirectCommand ), nullptr, GL_STATIC_DRAW ); + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsCount = totalDrawSurfs; + descriptorSize = BOUNDING_SPHERE_SIZE + maxStages; + glBufferData( GL_SHADER_STORAGE_BUFFER, surfaceDescriptorsCount * descriptorSize * sizeof( uint32_t ), + nullptr, GL_STATIC_DRAW ); + uint32_t* surfaceDescriptors = surfaceDescriptorsSSBO.MapBufferRange( surfaceDescriptorsCount * descriptorSize ); + + culledCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH; + surfaceCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH + 1; + + surfaceCommandsSSBO.BindBuffer(); + surfaceCommandsSSBO.BufferStorage( surfaceCommandsCount * SURFACE_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + surfaceCommandsSSBO.MapAll(); + SurfaceCommand* surfaceCommands = ( SurfaceCommand* ) surfaceCommandsSSBO.GetData(); + memset( surfaceCommands, 0, surfaceCommandsCount * sizeof( SurfaceCommand ) * MAX_VIEWFRAMES ); + + culledCommandsBuffer.BindBuffer( GL_SHADER_STORAGE_BUFFER ); + culledCommandsBuffer.BufferStorage( GL_SHADER_STORAGE_BUFFER, + culledCommandsCount * INDIRECT_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + culledCommandsBuffer.MapAll( GL_SHADER_STORAGE_BUFFER ); + GLIndirectBuffer::GLIndirectCommand* culledCommands = ( GLIndirectBuffer::GLIndirectCommand* ) culledCommandsBuffer.GetData(); + memset( culledCommands, 0, culledCommandsCount * sizeof( GLIndirectBuffer::GLIndirectCommand ) * MAX_VIEWFRAMES ); + culledCommandsBuffer.FlushAll( GL_SHADER_STORAGE_BUFFER ); + + surfaceBatchesUBO.BindBuffer(); + // Multiply by 2 because we write a uvec2, which is aligned as vec4 + glBufferData( GL_UNIFORM_BUFFER, MAX_SURFACE_COMMAND_BATCHES * 2 * sizeof( SurfaceCommandBatch ), nullptr, GL_STATIC_DRAW ); + SurfaceCommandBatch* surfaceCommandBatches = + ( SurfaceCommandBatch* ) surfaceBatchesUBO.MapBufferRange( MAX_SURFACE_COMMAND_BATCHES * 2 * SURFACE_COMMAND_BATCH_SIZE ); + + // memset( (void*) surfaceCommandBatches, 0, MAX_SURFACE_COMMAND_BATCHES * 2 * sizeof( SurfaceCommandBatch ) ); + // Fuck off gcc + for ( int i = 0; i < MAX_SURFACE_COMMAND_BATCHES * 2; i++ ) { + surfaceCommandBatches[i] = {}; + } - GLIndirectBuffer::GLIndirectCommand* commands = commandBuffer.MapBufferRange( count ); - uint32_t offset = 0; + uint32_t id = 0; + uint32_t matID = 0; + uint32_t subID = 0; for ( MaterialPack& pack : materialPacks ) { - for ( Material& material : pack.materials ) { - material.staticCommandOffset = offset; - - for ( const DrawCommand& drawCmd : material.drawCommands ) { - memcpy( commands, &drawCmd.cmd, sizeof( GLIndirectBuffer::GLIndirectCommand ) ); - commands++; - offset++; + for ( Material& mat : pack.materials ) { + for ( uint32_t i = 0; i < mat.surfaceCommandBatchCount; i++ ) { + surfaceCommandBatches[id * 4 + subID].materialIDs[0] = matID; + surfaceCommandBatches[id * 4 + subID].materialIDs[1] = mat.surfaceCommandBatchOffset; + subID++; + if ( subID == 4 ) { + id++; + subID = 0; + } } + matID++; } } - commandBuffer.UnmapBuffer(); + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.BufferStorage( GL_ATOMIC_COUNTER_BUFFER, + MAX_COMMAND_COUNTERS * MAX_VIEWS, MAX_FRAMES, nullptr ); + atomicCommandCountersBuffer.MapAll( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = (uint32_t*) atomicCommandCountersBuffer.GetData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * MAX_VIEWFRAMES * sizeof(uint32_t) ); + + for ( int i = 0; i < tr.refdef.numDrawSurfs; i++ ) { + drawSurf = &tr.refdef.drawSurfs[i]; + if ( drawSurf->entity != &tr.worldEntity ) { + continue; + } + + shader_t* shader = drawSurf->shader; + if ( !shader ) { + continue; + } + + shader = shader->remappedShader ? shader->remappedShader : shader; + if ( shader->isSky || shader->isPortal ) { + continue; + } + + tess.multiDrawPrimitives = 0; + tess.numIndexes = 0; + tess.numVertexes = 0; + tess.attribsSet = 0; + + // Don't add SF_SKIP surfaces + if ( *drawSurf->surface == surfaceType_t::SF_SKIP ) { + continue; + } + + rb_surfaceTable[Util::ordinal( *( drawSurf->surface ) )]( drawSurf->surface ); + + SurfaceDescriptor surface; + VectorCopy( ( ( srfGeneric_t* ) drawSurf->surface )->origin, surface.boundingSphere.origin ); + surface.boundingSphere.radius = ( ( srfGeneric_t* ) drawSurf->surface )->radius; + + uint32_t stage = 0; + for ( shaderStage_t* pStage = drawSurf->shader->stages; pStage < drawSurf->shader->lastStage; pStage++ ) { + const Material* material = &materialPacks[drawSurf->materialPackIDs[stage]].materials[drawSurf->materialIDs[stage]]; + uint32_t cmdID = material->surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH + drawSurf->drawCommandIDs[stage]; + cmdID++; // Add 1 because the first surface command is always reserved as a fake command + surface.surfaceCommandIDs[stage] = cmdID; + + SurfaceCommand surfaceCommand; + surfaceCommand.enabled = 0; + surfaceCommand.drawCommand = material->drawCommands[drawSurf->drawCommandIDs[stage]].cmd; + surfaceCommands[cmdID] = surfaceCommand; + + stage++; + } + memcpy( surfaceDescriptors, &surface, descriptorSize * sizeof( uint32_t ) ); + surfaceDescriptors += descriptorSize; + } + + for ( int i = 0; i < MAX_VIEWFRAMES; i++ ) { + memcpy( surfaceCommands + surfaceCommandsCount * i, surfaceCommands, surfaceCommandsCount * sizeof( SurfaceCommand ) ); + } + + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsSSBO.UnmapBuffer(); + + surfaceBatchesUBO.BindBuffer(); + surfaceBatchesUBO.UnmapBuffer(); + GL_CheckErrors(); } @@ -1236,6 +1360,7 @@ static void ProcessMaterialHeatHaze( Material* material, shaderStage_t* pStage, material->program = gl_heatHazeShaderMaterial->GetProgram( pStage->deformIndex ); } + static void ProcessMaterialLiquid( Material* material, shaderStage_t* pStage ) { material->shader = gl_liquidShaderMaterial; @@ -1272,6 +1397,7 @@ void MaterialSystem::GenerateWorldMaterials() { backEnd.currentEntity = &tr.worldEntity; drawSurf_t* drawSurf; + totalDrawSurfs = 0; uint32_t id = 0; uint32_t previousMaterialID = 0; @@ -1302,6 +1428,7 @@ void MaterialSystem::GenerateWorldMaterials() { rb_surfaceTable[Util::ordinal( *( drawSurf->surface ) )]( drawSurf->surface ); uint32_t stage = 0; + totalDrawSurfs++; for ( shaderStage_t* pStage = drawSurf->shader->stages; pStage < drawSurf->shader->lastStage; pStage++ ) { Material material; @@ -1628,6 +1755,114 @@ void MaterialSystem::UpdateDynamicSurfaces() { materialsSSBO.UnmapBuffer(); } +void MaterialSystem::UpdateFrameData() { + /* atomicCommandCountersBuffer.AreaIncr(); + + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = atomicCommandCountersBuffer.GetCurrentAreaData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * sizeof(uint32_t)); + atomicCommandCountersBuffer.FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.UnBindBuffer( GL_ATOMIC_COUNTER_BUFFER ); */ + + atomicCommandCountersBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + gl_clearSurfacesShader->BindProgram( 0 ); + gl_clearSurfacesShader->SetUniform_Frame( nextFrame ); + gl_clearSurfacesShader->DispatchCompute( MAX_VIEWS, 1, 1 ); + atomicCommandCountersBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::QueueSurfaceCull( const uint32_t viewID, const frustum_t* frustum ) { + memcpy( frames[nextFrame].viewFrames[viewID].frustum, frustum, sizeof( frustum_t ) ); + frames[nextFrame].viewCount++; +} + +void MaterialSystem::CullSurfaces() { + surfaceDescriptorsSSBO.BindBufferBase(); + surfaceCommandsSSBO.BindBufferBase(); + culledCommandsBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.BindBufferBase(); + atomicCommandCountersBuffer.BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + for ( uint32_t view = 0; view < frames[nextFrame].viewCount; view++ ) { + frustum_t* frustum = &frames[nextFrame].viewFrames[view].frustum; + + vec4_t frustumPlanes[6]; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( PVSLocked ? lockedFrustum[j].normal : frustum[0][j].normal, frustumPlanes[j] ); + frustumPlanes[j][3] = PVSLocked ? lockedFrustum[j].dist : frustum[0][j].dist; + } + + gl_cullShader->BindProgram( 0 ); + uint32_t globalWorkGroupX = totalDrawSurfs % MAX_COMMAND_COUNTERS == 0 ? + totalDrawSurfs / MAX_COMMAND_COUNTERS : totalDrawSurfs / MAX_COMMAND_COUNTERS + 1; + gl_cullShader->SetUniform_TotalDrawSurfs( totalDrawSurfs ); + gl_cullShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + + if ( PVSLocked ) { + if ( r_lockpvs->integer == 0 ) { + PVSLocked = false; + } + } + if ( r_lockpvs->integer == 1 && !PVSLocked ) { + PVSLocked = true; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( frustum[0][j].normal, lockedFrustum[j].normal ); + lockedFrustum[j].dist = frustum[0][j].dist; + } + } + + // FIXME: Make far plane work properly + gl_cullShader->SetUniform_Frustum( frustumPlanes ); + + gl_cullShader->DispatchCompute( globalWorkGroupX, 1, 1 ); + + gl_processSurfacesShader->BindProgram( 0 ); + gl_processSurfacesShader->SetUniform_Frame( nextFrame ); + gl_processSurfacesShader->SetUniform_ViewID( view ); + gl_processSurfacesShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + gl_processSurfacesShader->SetUniform_CulledCommandsOffset( culledCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + + glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT ); + gl_processSurfacesShader->DispatchCompute( totalBatchCount, 1, 1 ); + } + + surfaceDescriptorsSSBO.UnBindBufferBase(); + surfaceCommandsSSBO.UnBindBufferBase(); + culledCommandsBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.UnBindBufferBase(); + atomicCommandCountersBuffer.UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::StartFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + frames[nextFrame].viewCount = 0; + + // renderedMaterials.clear(); + // UpdateDynamicSurfaces(); + // UpdateFrameData(); +} + +void MaterialSystem::EndFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + + currentFrame = nextFrame; + nextFrame++; + if ( nextFrame >= MAX_FRAMES ) { + nextFrame = 0; + } + + currentView = 0; + return; +} + void MaterialSystem::GeneratePortalBoundingSpheres() { Log::Debug( "Generating portal bounding spheres" ); @@ -1669,6 +1904,14 @@ void MaterialSystem::Free() { skyShaders.clear(); renderedMaterials.clear(); + surfaceCommandsSSBO.UnmapBuffer(); + culledCommandsBuffer.UnmapBuffer(); + atomicCommandCountersBuffer.UnmapBuffer(); + + currentFrame = 0; + nextFrame = 1; + maxStages = 0; + for ( MaterialPack& pack : materialPacks ) { for ( Material& material : pack.materials ) { material.drawCommands.clear(); @@ -1694,6 +1937,7 @@ void MaterialSystem::AddDrawCommand( const uint32_t materialID, const uint32_t m cmd.materialsSSBOOffset = materialsSSBOOffset; materialPacks[materialPackID].materials[materialID].drawCommands.emplace_back(cmd); + lastCommandID = materialPacks[materialPackID].materials[materialID].drawCommands.size() - 1; cmd.textureCount = 0; } @@ -1713,12 +1957,19 @@ void MaterialSystem::AddPortalSurfaces() { return Distance( backEnd.viewParms.orientation.origin, lhs.origin ) - lhs.radius < Distance( backEnd.viewParms.orientation.origin, rhs.origin ) - rhs.radius; } ); + + uint32_t count = 0; for ( const drawSurfBoundingSphere& sphere : portalBounds ) { R_MirrorViewBySurface( &portalSurfaces[sphere.drawSurfID] ); + count++; + // Limit this a bit until portal visibility readback is done + if ( count > 2 ) { + return; + } } } -void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort ) { +void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort, const uint32_t viewID ) { if ( !r_drawworld->integer ) { return; } @@ -1726,6 +1977,11 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS if ( frameStart ) { renderedMaterials.clear(); UpdateDynamicSurfaces(); + UpdateFrameData(); + // StartFrame(); + + // Make sure compute dispatches from the last frame finished writing to memory + glMemoryBarrier( GL_COMMAND_BARRIER_BIT ); frameStart = false; } @@ -1734,7 +1990,7 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS for ( MaterialPack& materialPack : materialPacks ) { if ( materialPack.fromSort >= fromSort && materialPack.toSort <= toSort ) { for ( Material& material : materialPack.materials ) { - RenderMaterial( material ); + RenderMaterial( material, viewID ); renderedMaterials.emplace_back( &material ); } } @@ -1757,7 +2013,7 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS } } -void MaterialSystem::RenderMaterial( Material& material ) { +void MaterialSystem::RenderMaterial( Material& material, const uint32_t viewID ) { backEnd.currentEntity = &tr.worldEntity; GL_State( material.stateBits ); @@ -1887,10 +2143,24 @@ void MaterialSystem::RenderMaterial( Material& material ) { } material.texturesResident = true; - glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, - BUFFER_OFFSET( material.staticCommandOffset * sizeof( GLIndirectBuffer::GLIndirectCommand ) ), + culledCommandsBuffer.BindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.BindBuffer( GL_PARAMETER_BUFFER_ARB ); + + glMultiDrawElementsIndirectCountARB( GL_TRIANGLES, GL_UNSIGNED_INT, + BUFFER_OFFSET( material.surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH * sizeof( GLIndirectBuffer::GLIndirectCommand ) + + ( culledCommandsCount * ( MAX_VIEWS * currentFrame + viewID ) + * sizeof( GLIndirectBuffer::GLIndirectCommand ) ) ), + //+ ( culledCommandsCount * ( MAX_VIEWS * currentFrame + currentView ) + //* sizeof( GLIndirectBuffer::GLIndirectCommand ) ), + material.globalID * sizeof( uint32_t ) + + ( MAX_COMMAND_COUNTERS * ( MAX_VIEWS * currentFrame + viewID ) ) * sizeof( uint32_t ), material.drawCommands.size(), 0 ); + culledCommandsBuffer.UnBindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.UnBindBuffer( GL_PARAMETER_BUFFER_ARB ); + if ( material.usePolygonOffset ) { glDisable( GL_POLYGON_OFFSET_FILL ); } diff --git a/src/engine/renderer/Material.h b/src/engine/renderer/Material.h index 664f8913ca..928e1be081 100644 --- a/src/engine/renderer/Material.h +++ b/src/engine/renderer/Material.h @@ -71,7 +71,10 @@ struct Material { uint32_t currentStaticDrawSurfCount = 0; uint32_t currentDynamicDrawSurfCount = 0; - uint32_t staticCommandOffset = 0; + uint32_t globalID = 0; + uint32_t surfaceCommandBatchOffset = 0; + uint32_t surfaceCommandBatchCount = 0; + uint32_t surfaceCommandBatchPadding = 0; uint32_t id = 0; bool useSync = false; @@ -134,6 +137,51 @@ struct drawSurfBoundingSphere { uint32_t drawSurfID; }; +#define MAX_SURFACE_COMMANDS 16 +#define MAX_COMMAND_COUNTERS 64 +#define SURFACE_COMMANDS_PER_BATCH 64 + +#define MAX_SURFACE_COMMAND_BATCHES 2048 + +#define BOUNDING_SPHERE_SIZE 4 + +#define INDIRECT_COMMAND_SIZE 5 +#define SURFACE_COMMAND_SIZE 6 +#define SURFACE_COMMAND_BATCH_SIZE 4 // Aligned to 4 components + +#define MAX_FRAMES 2 +#define MAX_VIEWFRAMES MAX_VIEWS * MAX_FRAMES // Buffer 2 frames for each view + +struct ViewFrame { + uint32_t viewID = 0; + uint32_t portalViews[MAX_VIEWS]; + frustum_t frustum; +}; + +struct Frame { + uint32_t viewCount = 0; + ViewFrame viewFrames[MAX_VIEWS]; +}; + +struct BoundingSphere { + vec3_t origin; + float radius; +}; + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint32_t surfaceCommandIDs[MAX_SURFACE_COMMANDS] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +}; + +struct SurfaceCommand { + uint32_t enabled; // uint because bool in GLSL is always 4 bytes + GLIndirectBuffer::GLIndirectCommand drawCommand; +}; + +struct SurfaceCommandBatch { + uint32_t materialIDs[4] { 0, 0, 0, 0 }; +}; + class MaterialSystem { public: bool generatedWorldCommandBuffer = false; @@ -141,6 +189,13 @@ class MaterialSystem { bool generatingWorldCommandBuffer = false; vec3_t worldViewBounds[2] = {}; + uint32_t currentView = 0; + + uint8_t maxStages = 0; + uint32_t descriptorSize; + + std::vector drawCommands; + std::vector portalSurfacesTmp; std::vector portalSurfaces; std::vector portalBounds; @@ -165,16 +220,22 @@ class MaterialSystem { { shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS } }; - bool frameStart = true; + bool frameStart = false; void AddTexture( Texture* texture ); void AddDrawCommand( const uint32_t materialID, const uint32_t materialPackID, const uint32_t materialsSSBOOffset, const GLuint count, const GLuint firstIndex ); void AddPortalSurfaces(); - void RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort ); + void RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort, const uint32_t viewID ); void UpdateDynamicSurfaces(); + void QueueSurfaceCull( const uint32_t viewID, const frustum_t* frustum ); + void CullSurfaces(); + + void StartFrame(); + void EndFrame(); + void AddStageTextures( drawSurf_t* drawSurf, shaderStage_t* pStage, Material* material ); void GenerateWorldMaterials(); void GenerateWorldMaterialsBuffer(); @@ -186,16 +247,36 @@ class MaterialSystem { void Free(); private: + bool PVSLocked = false; + frustum_t lockedFrustum; + DrawCommand cmd; + uint32_t lastCommandID; + uint32_t totalDrawSurfs; + uint32_t totalBatchCount = 0; + + uint32_t surfaceCommandsCount = 0; + uint32_t culledCommandsCount = 0; + uint32_t surfaceDescriptorsCount = 0; + std::vector dynamicDrawSurfs; uint32_t dynamicDrawSurfsOffset = 0; uint32_t dynamicDrawSurfsSize = 0; - void RenderMaterial( Material& material ); + Frame frames[MAX_FRAMES]; + uint32_t currentFrame = 0; + uint32_t nextFrame = 1; + + void RenderMaterial( Material& material, const uint32_t viewID ); + void UpdateFrameData(); }; extern GLSSBO materialsSSBO; -extern GLIndirectBuffer commandBuffer; +extern GLSSBO surfaceDescriptorsSSBO; // Global +extern GLSSBO surfaceCommandsSSBO; // Per viewframe, GPU updated +extern GLBuffer culledCommandsBuffer; // Per viewframe +extern GLUBO surfaceBatchesUBO; // Global +extern GLBuffer atomicCommandCountersBuffer; // Per viewframe extern MaterialSystem materialSystem; #endif // MATERIAL_H diff --git a/src/engine/renderer/gl_shader.cpp b/src/engine/renderer/gl_shader.cpp index f782d015d0..44d5dc69e1 100644 --- a/src/engine/renderer/gl_shader.cpp +++ b/src/engine/renderer/gl_shader.cpp @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "gl_shader.h" +#include "Material.h" // We currently write GLBinaryHeader to a file and memcpy all over it. // Make sure it's a pod, so we don't put a std::string in it or something @@ -42,6 +43,9 @@ ShaderKind shaderKind = ShaderKind::Unknown; GLShader_generic2D *gl_generic2DShader = nullptr; GLShader_generic *gl_genericShader = nullptr; GLShader_genericMaterial *gl_genericShaderMaterial = nullptr; +GLShader_cull *gl_cullShader = nullptr; +GLShader_clearSurfaces *gl_clearSurfacesShader = nullptr; +GLShader_processSurfaces *gl_processSurfacesShader = nullptr; GLShader_lightMapping *gl_lightMappingShader = nullptr; GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial = nullptr; GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ = nullptr; @@ -461,6 +465,11 @@ static std::string GenComputeVersionDeclaration() { { glConfig2.gpuShader4Available, 130, "EXT_gpu_shader4" }, { glConfig2.gpuShader5Available, 400, "ARB_gpu_shader5" }, { glConfig2.uniformBufferObjectAvailable, 140, "ARB_uniform_buffer_object" }, + { glConfig2.SSBOAvailable, 430, "ARB_shader_storage_buffer_object" }, + { glConfig2.shadingLanguage420PackAvailable, 420, "ARB_shading_language_420pack" }, + { glConfig2.explicitUniformLocationAvailable, 430, "ARB_explicit_uniform_location" }, + { glConfig2.shaderImageLoadStoreAvailable, 420, "ARB_shader_image_load_store" }, + { glConfig2.shaderAtomicCountersAvailable, 420, "ARB_shader_atomic_counters" }, }; for ( const auto& extension : extensions ) { @@ -541,6 +550,28 @@ static std::string GenFragmentHeader() { return str; } +static std::string GenComputeHeader() { + std::string str; + + // Compute shader compatibility defines + AddDefine( str, "MAX_VIEWS", MAX_VIEWS ); + AddDefine( str, "MAX_FRAMES", MAX_FRAMES ); + AddDefine( str, "MAX_VIEWFRAMES", MAX_VIEWFRAMES ); + AddDefine( str, "MAX_SURFACE_COMMAND_BATCHES", MAX_SURFACE_COMMAND_BATCHES ); + AddDefine( str, "MAX_COMMAND_COUNTERS", MAX_COMMAND_COUNTERS ); + + return str; +} + +static std::string GenWorldHeader() { + std::string str; + + // Shader compatibility defines that use map data for compile-time values + AddDefine( str, "MAX_SURFACE_COMMANDS", materialSystem.maxStages ); + + return str; +} + static std::string GenEngineConstants() { // Engine constants std::string str; @@ -733,9 +764,15 @@ void GLShaderManager::GenerateBuiltinHeaders() { GLCompatHeader = GLHeader("GLCompatHeader", GenCompatHeader(), this); GLVertexHeader = GLHeader("GLVertexHeader", GenVertexHeader(), this); GLFragmentHeader = GLHeader("GLFragmentHeader", GenFragmentHeader(), this); + GLComputeHeader = GLHeader( "GLComputeHeader", GenComputeHeader(), this ); + GLWorldHeader = GLHeader( "GLWorldHeader", GenWorldHeader(), this ); GLEngineConstants = GLHeader("GLEngineConstants", GenEngineConstants(), this); } +void GLShaderManager::GenerateWorldHeaders() { + GLWorldHeader = GLHeader( "GLWorldHeader", GenWorldHeader(), this ); +} + std::string GLShaderManager::BuildDeformShaderText( const std::string& steps ) { std::string shaderText; @@ -1056,6 +1093,8 @@ void GLShaderManager::InitShader( GLShader* shader ) { combinedShaderText = GLComputeVersionDeclaration.getText() + GLCompatHeader.getText() + + GLComputeHeader.getText() + + GLWorldHeader.getText() + GLEngineConstants.getText(); } @@ -1268,7 +1307,8 @@ void GLShaderManager::CompileGPUShaders( GLShader *shader, shaderProgram_t *prog program->CS = CompileShader( shader->GetName(), computeShaderTextWithMacros, { &GLComputeVersionDeclaration, - // &GLComputeHeader, + &GLComputeHeader, + &GLWorldHeader, &GLCompatHeader, &GLEngineConstants }, GL_COMPUTE_SHADER ); @@ -3057,3 +3097,23 @@ void GLShader_fxaa::BuildShaderFragmentLibNames( std::string& fragmentInlines ) { fragmentInlines += "fxaa3_11"; } + +GLShader_cull::GLShader_cull( GLShaderManager* manager ) : + GLShader( "cull", ATTR_POSITION, manager, false, false, true ), + u_TotalDrawSurfs( this ), + u_SurfaceCommandsOffset( this ), + u_Frustum( this ) { +} + +GLShader_clearSurfaces::GLShader_clearSurfaces( GLShaderManager* manager ) : + GLShader( "clearSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ) { +} + +GLShader_processSurfaces::GLShader_processSurfaces( GLShaderManager* manager ) : + GLShader( "processSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ), + u_ViewID( this ), + u_SurfaceCommandsOffset( this ), + u_CulledCommandsOffset( this ) { +} diff --git a/src/engine/renderer/gl_shader.h b/src/engine/renderer/gl_shader.h index bc6898bb47..2a1570dfc2 100644 --- a/src/engine/renderer/gl_shader.h +++ b/src/engine/renderer/gl_shader.h @@ -351,6 +351,8 @@ class GLShaderManager GLHeader GLCompatHeader; GLHeader GLVertexHeader; GLHeader GLFragmentHeader; + GLHeader GLComputeHeader; + GLHeader GLWorldHeader; GLHeader GLEngineConstants; GLShaderManager() : _totalBuildTime( 0 ) @@ -361,6 +363,7 @@ class GLShaderManager void InitDriverInfo(); void GenerateBuiltinHeaders(); + void GenerateWorldHeaders(); template< class T > void load( T *& shader ) @@ -708,6 +711,56 @@ class GLUniform1i : protected GLUniform int currentValue = 0; }; +class GLUniform1ui : protected GLUniform { + protected: + GLUniform1ui( GLShader* shader, const char* name, const bool global = false ) : + GLUniform( shader, name, "uint", 1, 1, global ) { + } + + inline void SetValue( uint value ) { + shaderProgram_t* p = _shader->GetProgram(); + + if ( _global || !_shader->UseMaterialSystem() ) { + ASSERT_EQ( p, glState.currentProgram ); + } + +#if defined( LOG_GLSL_UNIFORMS ) + if ( r_logFile->integer ) { + GLimp_LogComment( va( "GLSL_SetUniform1i( %s, shader: %s, value: %d ) ---\n", + this->GetName(), _shader->GetName().c_str(), value ) ); + } +#endif + + if ( _shader->UseMaterialSystem() && !_global ) { + currentValue = value; + return; + } + +#if defined( USE_UNIFORM_FIREWALL ) + uint* firewall = ( uint* ) &p->uniformFirewall[_firewallIndex]; + + if ( *firewall == value ) { + return; + } + + *firewall = value; +#endif + glUniform1ui( p->uniformLocations[_locationIndex], value ); + } + public: + size_t GetSize() override { + return sizeof( uint ); + } + + uint32_t* WriteToBuffer( uint32_t* buffer ) override { + memcpy( buffer, ¤tValue, sizeof( uint ) ); + return buffer + 1; + } + + private: + uint currentValue = 0; +}; + class GLUniform1Bool : protected GLUniform { protected: // GLSL std430 bool is always 4 bytes, which might not correspond to C++ bool @@ -1266,54 +1319,116 @@ class GLUniformBlock } }; -class GLSSBO { +class GLBuffer { public: std::string _name; const GLuint _bindingPoint; + const GLbitfield _flags; + const GLbitfield _mapFlags; + const GLuint64 SYNC_TIMEOUT = 10000000000; // 10 seconds - GLSSBO( const char* name, const GLuint bindingPoint ) : + GLBuffer( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : _name( name ), - _bindingPoint( bindingPoint ) { + _bindingPoint( bindingPoint ), + _flags( flags ), + _mapFlags( mapFlags ) { } - public: const char* GetName() { return _name.c_str(); } - void BindBufferBase() { - glBindBufferBase( GL_SHADER_STORAGE_BUFFER, _bindingPoint, handle ); + void BindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, handle ); } - void BindBuffer() { - glBindBuffer( GL_SHADER_STORAGE_BUFFER, handle ); + void UnBindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, 0 ); + } + + void BindBuffer( const GLenum target ) { + glBindBuffer( target, handle ); + } + + void UnBindBuffer( const GLenum target ) { + glBindBuffer( target, 0 ); } - uint32_t* MapBufferRange( const GLuint count ) { + void BufferStorage( const GLenum target, const GLsizeiptr newAreaSize, const GLsizeiptr areaCount, const void* data ) { + areaSize = newAreaSize; + maxAreas = areaCount; + glBufferStorage( target, areaSize * areaCount * sizeof(uint32_t), data, _flags ); + syncs.resize( areaCount ); + } + + void AreaIncr() { + syncs[area] = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); + area++; + if ( area >= maxAreas ) { + area = 0; + } + } + + void MapAll( const GLenum target ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, areaSize * maxAreas * sizeof( uint32_t ), _flags | _mapFlags ); + } + } + + uint32_t* GetCurrentAreaData() { + if ( syncs[area] != nullptr ) { + if ( glClientWaitSync( syncs[area], GL_SYNC_FLUSH_COMMANDS_BIT, SYNC_TIMEOUT ) == GL_TIMEOUT_EXPIRED ) { + Sys::Drop( "Failed buffer %s area %u sync", _name, area ); + } + glDeleteSync( syncs[area] ); + } + + return data + area * areaSize; + } + + uint32_t* GetData() { + return data; + } + + void FlushCurrentArea( GLenum target ) { + glFlushMappedBufferRange( target, area * areaSize * sizeof( uint32_t ), areaSize * sizeof( uint32_t ) ); + } + + void FlushAll( GLenum target ) { + glFlushMappedBufferRange( target, 0, maxAreas * areaSize * sizeof( uint32_t ) ); + } + + uint32_t* MapBufferRange( const GLenum target, const GLuint count ) { + if ( !mapped ) { + mapped = true; + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT ); + _flags | _mapFlags ); } return data; } - uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + uint32_t* MapBufferRange( const GLenum target, const GLuint offset, const GLuint count ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, offset * sizeof( uint32_t ), count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT ); + _flags | _mapFlags ); } return data; } void UnmapBuffer() { - mapped = false; - glUnmapBuffer( GL_SHADER_STORAGE_BUFFER ); + if ( mapped ) { + mapped = false; + glUnmapBuffer( mappedTarget ); + } } void GenBuffer() { @@ -1325,11 +1440,148 @@ class GLSSBO { } private: + GLenum mappedTarget; GLuint handle; bool mapped = false; + std::vector syncs; + GLsizeiptr area = 0; + GLsizeiptr areaSize = 0; + GLsizeiptr maxAreas = 0; uint32_t* data; }; +class GLSSBO : public GLBuffer { + public: + GLSSBO( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_SHADER_STORAGE_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_SHADER_STORAGE_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_SHADER_STORAGE_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_SHADER_STORAGE_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, offset, count ); + } +}; + +class GLUBO : public GLBuffer { + public: + GLUBO( const char* name, const GLsizeiptr bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_UNIFORM_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_UNIFORM_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_UNIFORM_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_UNIFORM_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_UNIFORM_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_UNIFORM_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, offset, count ); + } +}; + +class GLAtomicCounterBuffer : public GLBuffer { + public: + GLAtomicCounterBuffer( const char* name, const GLsizeiptr bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_ATOMIC_COUNTER_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_ATOMIC_COUNTER_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, offset, count ); + } +}; + class GLIndirectBuffer { public: @@ -1357,7 +1609,7 @@ class GLIndirectBuffer { glBindBuffer( GL_DRAW_INDIRECT_BUFFER, handle ); } - GLIndirectCommand* MapBufferRange( const GLuint count ) { + GLIndirectCommand* MapBufferRange( const GLsizeiptr count ) { return (GLIndirectCommand*) glMapBufferRange( GL_DRAW_INDIRECT_BUFFER, 0, count * sizeof( GLIndirectCommand ), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT ); @@ -2902,6 +3154,78 @@ class u_Color : } }; +class u_Frame : + GLUniform1ui { + public: + u_Frame( GLShader* shader ) : + GLUniform1ui( shader, "u_Frame" ) { + } + + void SetUniform_Frame( const uint frame ) { + this->SetValue( frame ); + } +}; + +class u_ViewID : + GLUniform1ui { + public: + u_ViewID( GLShader* shader ) : + GLUniform1ui( shader, "u_ViewID" ) { + } + + void SetUniform_ViewID( const uint viewID ) { + this->SetValue( viewID ); + } +}; + +class u_TotalDrawSurfs : + GLUniform1ui { + public: + u_TotalDrawSurfs( GLShader* shader ) : + GLUniform1ui( shader, "u_TotalDrawSurfs" ) { + } + + void SetUniform_TotalDrawSurfs( const uint totalDrawSurfs ) { + this->SetValue( totalDrawSurfs ); + } +}; + +class u_Frustum : + GLUniform4fv { + public: + u_Frustum( GLShader* shader ) : + GLUniform4fv( shader, "u_Frustum", 6 ) { + } + + void SetUniform_Frustum( vec4_t frustum[6] ) { + this->SetValue( 6, &frustum[0] ); + } +}; + +class u_SurfaceCommandsOffset : + GLUniform1ui { + public: + u_SurfaceCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_SurfaceCommandsOffset" ) { + } + + void SetUniform_SurfaceCommandsOffset( const uint surfaceCommandsOffset ) { + this->SetValue( surfaceCommandsOffset ); + } +}; + +class u_CulledCommandsOffset : + GLUniform1ui { + public: + u_CulledCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_CulledCommandsOffset" ) { + } + + void SetUniform_CulledCommandsOffset( const uint culledCommandsOffset ) { + this->SetValue( culledCommandsOffset ); + } +}; + class u_ModelMatrix : GLUniformMatrix4f { @@ -3024,7 +3348,7 @@ class u_Bones : { public: u_Bones( GLShader *shader ) : - GLUniform4fv( shader, "u_Bones", MAX_BONES * 0 + 1 ) + GLUniform4fv( shader, "u_Bones", MAX_BONES ) { } @@ -4255,6 +4579,33 @@ class GLShader_fxaa : void BuildShaderFragmentLibNames( std::string& fragmentInlines ) override; }; +class GLShader_cull : + public GLShader, + public u_TotalDrawSurfs, + public u_SurfaceCommandsOffset, + public u_Frustum { + public: + GLShader_cull( GLShaderManager* manager ); +}; + +class GLShader_clearSurfaces : + public GLShader, + public u_Frame { + public: + GLShader_clearSurfaces( GLShaderManager* manager ); +}; + +class GLShader_processSurfaces : + public GLShader, + public u_Frame, + public u_ViewID, + public u_SurfaceCommandsOffset, + public u_CulledCommandsOffset { + public: + GLShader_processSurfaces( GLShaderManager* manager ); +}; + + std::string GetShaderPath(); extern ShaderKind shaderKind; @@ -4262,6 +4613,9 @@ extern ShaderKind shaderKind; extern GLShader_generic2D *gl_generic2DShader; extern GLShader_generic *gl_genericShader; extern GLShader_genericMaterial *gl_genericShaderMaterial; +extern GLShader_cull *gl_cullShader; +extern GLShader_clearSurfaces *gl_clearSurfacesShader; +extern GLShader_processSurfaces *gl_processSurfacesShader; extern GLShader_lightMapping *gl_lightMappingShader; extern GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial; extern GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ; diff --git a/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl new file mode 100644 index 0000000000..7c42dc1881 --- /dev/null +++ b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl @@ -0,0 +1,53 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* clearSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 4) writeonly buffer atomicCommandCountersBuffer { + uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; +}; + +uniform uint u_Frame; + +void main() { + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalInvocationID >= MAX_COMMAND_COUNTERS * MAX_VIEWS ) { + return; + } + atomicCommandCounters[globalInvocationID + MAX_COMMAND_COUNTERS * MAX_VIEWS * u_Frame] = 0; +} diff --git a/src/engine/renderer/glsl_source/cull_cp.glsl b/src/engine/renderer/glsl_source/cull_cp.glsl new file mode 100644 index 0000000000..71e29d54ed --- /dev/null +++ b/src/engine/renderer/glsl_source/cull_cp.glsl @@ -0,0 +1,113 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* cull_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +// layout(rg16f, binding = 0) uniform image2D depthImage; + +struct BoundingSphere { + vec3 center; + float radius; +}; + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint surfaceCommandIDs[MAX_SURFACE_COMMANDS]; +}; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +layout(std430, binding = 1) readonly restrict buffer surfaceDescriptorsSSBO { + SurfaceDescriptor surfaces[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +struct Plane { + vec3 normal; + float distance; +}; + +uniform uint u_TotalDrawSurfs; +uniform uint u_SurfaceCommandsOffset; +uniform vec4 u_Frustum[6]; // xyz - normal, w - distance + +bool CullSurface( in BoundingSphere boundingSphere ) { + for( int i = 0; i < 5; i++ ) { // Skip far plane for now because we always have it set to { 0, 0, 0, 0 } for some reason + const float distance = dot( u_Frustum[i].xyz, boundingSphere.center ) - u_Frustum[i].w; + + if( distance < -boundingSphere.radius ) { + return true; + } + } + return false; +} + +void ProcessSurfaceCommands( const in SurfaceDescriptor surface, const in bool enabled ) { + for( uint i = 0; i < MAX_SURFACE_COMMANDS; i++ ) { + const uint commandID = surface.surfaceCommandIDs[i]; + if( commandID == 0 ) { + return; + } + surfaceCommands[commandID + u_SurfaceCommandsOffset].enabled = enabled; + } +} + +void main() { + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalInvocationID >= u_TotalDrawSurfs ) { + return; + } + SurfaceDescriptor surface = surfaces[globalInvocationID]; + bool culled = CullSurface( surface.boundingSphere ); + + ProcessSurfaceCommands( surface, !culled ); +} diff --git a/src/engine/renderer/glsl_source/processSurfaces_cp.glsl b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl new file mode 100644 index 0000000000..5a3a5676ac --- /dev/null +++ b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl @@ -0,0 +1,96 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* processSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +struct SurfaceCommandBatch { + uvec2 materialIDs[2]; +}; + +layout(std430, binding = 2) readonly buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +layout(std430, binding = 3) writeonly buffer culledCommandsSSBO { + GLIndirectCommand culledCommands[]; +}; + +layout(std140, binding = 0) uniform ub_SurfaceBatches { + SurfaceCommandBatch surfaceBatches[MAX_SURFACE_COMMAND_BATCHES]; +}; + +layout (binding = 4) uniform atomic_uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; + +uniform uint u_Frame; +uniform uint u_ViewID; +uniform uint u_SurfaceCommandsOffset; +uniform uint u_CulledCommandsOffset; + +void AddDrawCommand( in uint commandID, in uvec2 materialID ) { + SurfaceCommand command = surfaceCommands[commandID + u_SurfaceCommandsOffset]; + if( command.enabled ) { + const uint atomicCmdID = atomicCounterIncrement( atomicCommandCounters[materialID.x + + MAX_COMMAND_COUNTERS * ( MAX_VIEWS * u_Frame + u_ViewID )] ); + culledCommands[atomicCmdID + materialID.y * MAX_COMMAND_COUNTERS + u_CulledCommandsOffset] = command.drawCommand; + } +} + +void main() { + const uint globalGroupID = gl_WorkGroupID.z * gl_NumWorkGroups.x * gl_NumWorkGroups.y + + gl_WorkGroupID.y * gl_NumWorkGroups.x + + gl_WorkGroupID.x; + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x + * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x + + 1; // Add 1 because the first surface command is always reserved as a fake command + const uvec2 materialID = surfaceBatches[globalGroupID / 2].materialIDs[globalGroupID % 2]; + + AddDrawCommand( globalInvocationID, materialID ); +} diff --git a/src/engine/renderer/shaders.cpp b/src/engine/renderer/shaders.cpp index b9aee09c9e..1a7dc740bb 100644 --- a/src/engine/renderer/shaders.cpp +++ b/src/engine/renderer/shaders.cpp @@ -60,6 +60,9 @@ #include "skybox_fp.glsl.h" #include "material_vp.glsl.h" #include "material_fp.glsl.h" +#include "cull_cp.glsl.h" +#include "processSurfaces_cp.glsl.h" +#include "clearSurfaces_cp.glsl.h" std::unordered_map shadermap({ { "glsl/blurX_fp.glsl", std::string(reinterpret_cast(blurX_fp_glsl), sizeof(blurX_fp_glsl)) }, @@ -71,6 +74,8 @@ std::unordered_map shadermap({ { "glsl/computeLight_fp.glsl", std::string(reinterpret_cast(computeLight_fp_glsl), sizeof(computeLight_fp_glsl)) }, { "glsl/contrast_fp.glsl", std::string(reinterpret_cast(contrast_fp_glsl), sizeof(contrast_fp_glsl)) }, { "glsl/contrast_vp.glsl", std::string(reinterpret_cast(contrast_vp_glsl), sizeof(contrast_vp_glsl)) }, + { "glsl/clearSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( clearSurfaces_cp_glsl ), sizeof( clearSurfaces_cp_glsl ) ) }, + { "glsl/cull_cp.glsl", std::string( reinterpret_cast< const char* >( cull_cp_glsl ), sizeof( cull_cp_glsl ) ) }, { "glsl/debugShadowMap_fp.glsl", std::string(reinterpret_cast(debugShadowMap_fp_glsl), sizeof(debugShadowMap_fp_glsl)) }, { "glsl/debugShadowMap_vp.glsl", std::string(reinterpret_cast(debugShadowMap_vp_glsl), sizeof(debugShadowMap_vp_glsl)) }, { "glsl/deformVertexes_vp.glsl", std::string(reinterpret_cast(deformVertexes_vp_glsl), sizeof(deformVertexes_vp_glsl)) }, @@ -105,6 +110,7 @@ std::unordered_map shadermap({ { "glsl/motionblur_vp.glsl", std::string(reinterpret_cast(motionblur_vp_glsl), sizeof(motionblur_vp_glsl)) }, { "glsl/portal_fp.glsl", std::string(reinterpret_cast(portal_fp_glsl), sizeof(portal_fp_glsl)) }, { "glsl/portal_vp.glsl", std::string(reinterpret_cast(portal_vp_glsl), sizeof(portal_vp_glsl)) }, + { "glsl/processSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( processSurfaces_cp_glsl ), sizeof( processSurfaces_cp_glsl ) ) }, { "glsl/reflection_CB_fp.glsl", std::string(reinterpret_cast(reflection_CB_fp_glsl), sizeof(reflection_CB_fp_glsl)) }, { "glsl/reflection_CB_vp.glsl", std::string(reinterpret_cast(reflection_CB_vp_glsl), sizeof(reflection_CB_vp_glsl)) }, { "glsl/refraction_C_fp.glsl", std::string(reinterpret_cast(refraction_C_fp_glsl), sizeof(refraction_C_fp_glsl)) }, diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp index 39665b4ef4..d3933e4e8f 100644 --- a/src/engine/renderer/tr_backend.cpp +++ b/src/engine/renderer/tr_backend.cpp @@ -4834,6 +4834,8 @@ static void RB_RenderView( bool depthPass ) if ( ( backEnd.refdef.rdflags & RDF_HYPERSPACE ) ) { RB_Hyperspace(); + + materialSystem.currentView++; return; } else @@ -4852,11 +4854,9 @@ static void RB_RenderView( bool depthPass ) startTime = ri.Milliseconds(); } - materialSystem.frameStart = true; - if( depthPass ) { if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH ); + materialSystem.RenderMaterials( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH, DRAWSURFACES_ALL ); RB_RunVisTests(); @@ -4873,7 +4873,7 @@ static void RB_RenderView( bool depthPass ) { // draw everything that is not the gun if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, DRAWSURFACES_ALL_FAR ); @@ -4886,7 +4886,7 @@ static void RB_RenderView( bool depthPass ) { // draw everything that is opaque if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, DRAWSURFACES_ALL ); } @@ -4918,7 +4918,7 @@ static void RB_RenderView( bool depthPass ) // draw everything that is translucent if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS, DRAWSURFACES_ALL ); @@ -4946,6 +4946,8 @@ static void RB_RenderView( bool depthPass ) backEnd.pc.c_portals++; } + materialSystem.currentView++; + backEnd.pc.c_views++; } @@ -4980,6 +4982,13 @@ static void RB_RenderPostProcess() tr.refdef.pixelTarget[(i * 4) + 3] = 255; //set the alpha pure white } } + + if( glConfig2.materialSystemAvailable ) { + // Dispatch the cull compute shaders for queued once we're done with post-processing + // We'll only use the results from those shaders in the next frame so we don't block the pipeline + materialSystem.CullSurfaces(); + materialSystem.EndFrame(); + } GL_CheckErrors(); } @@ -6029,6 +6038,8 @@ void RB_ExecuteRenderCommands( const void *data ) backEnd.smpFrame = 1; } + + materialSystem.frameStart = true; while ( cmd != nullptr ) { cmd = cmd->ExecuteSelf(); diff --git a/src/engine/renderer/tr_bsp.cpp b/src/engine/renderer/tr_bsp.cpp index 50f841e1ab..31350add7a 100644 --- a/src/engine/renderer/tr_bsp.cpp +++ b/src/engine/renderer/tr_bsp.cpp @@ -7073,4 +7073,7 @@ void RE_LoadWorldMap( const char *name ) tr.mapLightFactor = pow( 2, tr.mapOverBrightBits ); tr.mapInverseLightFactor = 1.0f / tr.mapLightFactor; } + + tr.worldLoaded = true; + GLSL_InitWorldShadersOrError(); } diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index 388f51f3d6..c8c8ae089a 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -1519,6 +1519,8 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; orientationr_t orientation; orientationr_t world; + uint viewID = 0; + vec3_t pvsOrigin; // may be different than or.origin for portals int portalLevel; // number of portals this view is through @@ -1657,6 +1659,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; uint materialIDs[ MAX_SHADER_STAGES ]; uint materialPackIDs[ MAX_SHADER_STAGES ]; bool texturesDynamic[ MAX_SHADER_STAGES ]; + uint drawCommandIDs[ MAX_SHADER_STAGES ]; inline int index() const { return int( ( sort & SORT_INDEX_MASK ) ); @@ -2644,6 +2647,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; deluxeMode_t worldDeluxe; deluxeMode_t modelDeluxe; + bool worldLoaded; world_t *world; TextureManager textureManager; @@ -3420,6 +3424,7 @@ inline bool checkGLErrors() alignas(16) extern shaderCommands_t tess; void GLSL_InitGPUShaders(); + void GLSL_InitWorldShadersOrError(); void GLSL_ShutdownGPUShaders(); void GLSL_FinishGPUShaders(); diff --git a/src/engine/renderer/tr_main.cpp b/src/engine/renderer/tr_main.cpp index 9494c4035a..27d84296a6 100644 --- a/src/engine/renderer/tr_main.cpp +++ b/src/engine/renderer/tr_main.cpp @@ -2883,11 +2883,10 @@ void R_RenderView( viewParms_t *parms ) // because it requires the decalBits R_CullDecalProjectors(); - if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { - materialSystem.GenerateWorldMaterials(); - } - - if ( !glConfig2.materialSystemAvailable ) { + if ( glConfig2.materialSystemAvailable ) { + tr.viewParms.viewID = tr.viewCount; + materialSystem.QueueSurfaceCull( tr.viewCount, (frustum_t*) tr.viewParms.frustums[0] ); + } else { R_AddWorldSurfaces(); } diff --git a/src/engine/renderer/tr_public.h b/src/engine/renderer/tr_public.h index 770658f217..47e3ae55ab 100644 --- a/src/engine/renderer/tr_public.h +++ b/src/engine/renderer/tr_public.h @@ -89,6 +89,10 @@ struct glconfig2_t bool shaderDrawParametersAvailable; bool SSBOAvailable; bool multiDrawIndirectAvailable; + bool shadingLanguage420PackAvailable; + bool explicitUniformLocationAvailable; + bool shaderImageLoadStoreAvailable; + bool shaderAtomicCountersAvailable; bool materialSystemAvailable; bool gpuShader4Available; bool gpuShader5Available; diff --git a/src/engine/renderer/tr_scene.cpp b/src/engine/renderer/tr_scene.cpp index f793aee0ac..16bff9626a 100644 --- a/src/engine/renderer/tr_scene.cpp +++ b/src/engine/renderer/tr_scene.cpp @@ -626,12 +626,17 @@ void RE_RenderScene( const refdef_t *fd ) VectorCopy( fd->vieworg, parms.pvsOrigin ); Vector4Copy( fd->gradingWeights, parms.gradingWeights ); - // TODO: Add cmds for updating dynamic surfaces and for culling here - materialSystem.frameStart = true; - R_AddClearBufferCmd(); R_AddSetupLightsCmd(); + if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { + materialSystem.GenerateWorldMaterials(); + } + + if ( glConfig2.materialSystemAvailable ) { + materialSystem.StartFrame(); + } + R_RenderView( &parms ); R_RenderPostProcess(); diff --git a/src/engine/renderer/tr_shade.cpp b/src/engine/renderer/tr_shade.cpp index 11b1fecfe0..3259008a42 100644 --- a/src/engine/renderer/tr_shade.cpp +++ b/src/engine/renderer/tr_shade.cpp @@ -60,6 +60,23 @@ static void EnableAvailableFeatures() } } +// For shaders that require map data for compile-time values +void GLSL_InitWorldShadersOrError() { + // make sure the render thread is stopped + R_SyncRenderThread(); + + GL_CheckErrors(); + + gl_shaderManager.GenerateWorldHeaders(); + + // Material system shaders that are always loaded if material system is available + if ( glConfig2.materialSystemAvailable ) { + gl_shaderManager.load( gl_cullShader ); + } + + gl_shaderManager.buildAll(); +} + static void GLSL_InitGPUShadersOrError() { // make sure the render thread is stopped @@ -86,6 +103,9 @@ static void GLSL_InitGPUShadersOrError() gl_shaderManager.load( gl_skyboxShaderMaterial ); gl_shaderManager.load( gl_fogQuake3ShaderMaterial ); gl_shaderManager.load( gl_heatHazeShaderMaterial ); + gl_shaderManager.load( gl_cullShader ); + gl_shaderManager.load( gl_clearSurfacesShader ); + gl_shaderManager.load( gl_processSurfacesShader ); } // standard light mapping @@ -291,6 +311,9 @@ void GLSL_ShutdownGPUShaders() gl_genericShader = nullptr; gl_genericShaderMaterial = nullptr; + gl_cullShader = nullptr; + gl_clearSurfacesShader = nullptr; + gl_processSurfacesShader = nullptr; gl_lightMappingShader = nullptr; gl_lightMappingShaderMaterial = nullptr; gl_forwardLightingShader_omniXYZ = nullptr; diff --git a/src/engine/renderer/tr_shader.cpp b/src/engine/renderer/tr_shader.cpp index 54977a6182..ef235f4d07 100644 --- a/src/engine/renderer/tr_shader.cpp +++ b/src/engine/renderer/tr_shader.cpp @@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "tr_local.h" #include "gl_shader.h" #include "framework/CvarSystem.h" +#include "Material.h" #include static const int MAX_SHADERTABLE_HASH = 1024; @@ -5902,6 +5903,18 @@ static shader_t *FinishShader() // Copy the current global shader to a newly allocated shader. shader_t *ret = MakeShaderPermanent(); + if ( glConfig2.materialSystemAvailable && !tr.worldLoaded ) { + uint8_t maxStages = 0; + for ( shaderStage_t* pStage = ret->stages; pStage < ret->lastStage; pStage++ ) { + maxStages++; + } + + if ( maxStages % 4 != 0 ) { // Aligned to 4 components + maxStages = ( maxStages / 4 + 1 ) * 4; + } + materialSystem.maxStages = maxStages > materialSystem.maxStages ? maxStages : materialSystem.maxStages; + } + // generate depth-only shader if necessary if( !shader.isSky && numStages > 0 && diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index c2af5df58a..dad3aee161 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1482,7 +1482,6 @@ static void Tess_SurfaceVBOMesh( srfVBOMesh_t *srf ) { GLimp_LogComment( "--- Tess_SurfaceVBOMesh ---\n" ); - Tess_SurfaceVBO( srf->vbo, srf->ibo, srf->numIndexes, srf->firstIndex ); } diff --git a/src/engine/renderer/tr_vbo.cpp b/src/engine/renderer/tr_vbo.cpp index f085f8dc8c..7f1739c621 100644 --- a/src/engine/renderer/tr_vbo.cpp +++ b/src/engine/renderer/tr_vbo.cpp @@ -1033,7 +1033,11 @@ static void R_InitLightUBO() static void R_InitMaterialBuffers() { if( glConfig2.materialSystemAvailable ) { materialsSSBO.GenBuffer(); - commandBuffer.GenBuffer(); + surfaceDescriptorsSSBO.GenBuffer(); + surfaceCommandsSSBO.GenBuffer(); + culledCommandsBuffer.GenBuffer(); + surfaceBatchesUBO.GenBuffer(); + atomicCommandCountersBuffer.GenBuffer(); } } @@ -1156,7 +1160,11 @@ void R_ShutdownVBOs() if ( glConfig2.materialSystemAvailable ) { materialsSSBO.DelBuffer(); - commandBuffer.DelBuffer(); + surfaceDescriptorsSSBO.DelBuffer(); + surfaceCommandsSSBO.DelBuffer(); + culledCommandsBuffer.DelBuffer(); + surfaceBatchesUBO.DelBuffer(); + atomicCommandCountersBuffer.DelBuffer(); } tess.verts = tess.vertsBuffer = nullptr; diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 7203c6b9bb..a704b42f6e 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -68,6 +68,8 @@ static Cvar::Cvar r_arb_debug_output( "r_arb_debug_output", "Use GL_ARB_debug_output if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_depth_clamp( "r_arb_depth_clamp", "Use GL_ARB_depth_clamp if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_explicit_uniform_location( "r_arb_explicit_uniform_location", + "Use GL_ARB_explicit_uniform_location if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_gpu_shader5( "r_arb_gpu_shader5", "Use GL_ARB_gpu_shader5 if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_map_buffer_range( "r_arb_map_buffer_range", @@ -76,8 +78,14 @@ static Cvar::Cvar r_arb_multi_draw_indirect( "r_arb_multi_draw_indirect", "Use GL_ARB_multi_draw_indirect if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_shader_draw_parameters( "r_arb_shader_draw_parameters", "Use GL_ARB_shader_draw_parameters if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_shader_atomic_counters( "r_arb_shader_atomic_counters", + "Use GL_ARB_shader_atomic_counters if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_shader_image_load_store( "r_arb_shader_image_load_store", + "Use GL_ARB_shader_image_load_store if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_shader_storage_buffer_object( "r_arb_shader_storage_buffer_object", "Use GL_ARB_shader_storage_buffer_object if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_shading_language_420pack( "r_arb_shading_language_420pack", + "Use GL_ARB_shading_language_420pack if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_sync( "r_arb_sync", "Use GL_ARB_sync if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_texture_gather( "r_arb_texture_gather", @@ -1799,10 +1807,14 @@ static void GLimp_InitExtensions() Cvar::Latch( r_arb_compute_shader ); Cvar::Latch( r_arb_debug_output ); Cvar::Latch( r_arb_depth_clamp ); + Cvar::Latch( r_arb_explicit_uniform_location ); Cvar::Latch( r_arb_gpu_shader5 ); Cvar::Latch( r_arb_map_buffer_range ); Cvar::Latch( r_arb_multi_draw_indirect ); + Cvar::Latch( r_arb_shader_atomic_counters ); Cvar::Latch( r_arb_shader_draw_parameters ); + Cvar::Latch( r_arb_shader_image_load_store ); + Cvar::Latch( r_arb_shading_language_420pack ); Cvar::Latch( r_arb_shader_storage_buffer_object ); Cvar::Latch( r_arb_sync ); Cvar::Latch( r_arb_texture_gather ); @@ -2073,8 +2085,23 @@ static void GLimp_InitExtensions() // made required in OpenGL 4.0 glConfig2.multiDrawIndirectAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_multi_draw_indirect, r_arb_multi_draw_indirect.Get() ); - glConfig2.materialSystemAvailable = glConfig2.shaderDrawParametersAvailable && glConfig2.SSBOAvailable && - glConfig2.multiDrawIndirectAvailable && glConfig2.bindlessTexturesAvailable + // made required in OpenGL 4.2 + glConfig2.shadingLanguage420PackAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shading_language_420pack, r_arb_shading_language_420pack.Get() ); + + // made required in OpenGL 4.3 + glConfig2.explicitUniformLocationAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_explicit_uniform_location, r_arb_explicit_uniform_location.Get() ); + + // made required in OpenGL 4.2 + glConfig2.shaderImageLoadStoreAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_image_load_store, r_arb_shader_image_load_store.Get() ); + + // made required in OpenGL 4.2 + glConfig2.shaderAtomicCountersAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_atomic_counters, r_arb_shader_atomic_counters.Get() ); + + glConfig2.materialSystemAvailable = glConfig2.shaderDrawParametersAvailable && glConfig2.SSBOAvailable + && glConfig2.multiDrawIndirectAvailable && glConfig2.bindlessTexturesAvailable + && glConfig2.computeShaderAvailable && glConfig2.shadingLanguage420PackAvailable + && glConfig2.explicitUniformLocationAvailable && glConfig2.shaderImageLoadStoreAvailable + && glConfig2.shaderAtomicCountersAvailable && r_smp->integer == 0 // Currently doesn't work with r_smp 1 && r_materialSystem.Get(); // Allow disabling it without disabling any extensions if ( r_materialSystem.Get() && r_smp->integer ) {