From 472dc72ee151ef145766aa5822a6704ee875556f Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Sat, 5 Nov 2022 20:51:07 +0100 Subject: [PATCH 1/6] renderer: add alternate code path without ARB_half_float_vertex --- src/engine/renderer/tr_backend.cpp | 128 ++++++++++++------ src/engine/renderer/tr_local.h | 53 +++++++- src/engine/renderer/tr_model_iqm.cpp | 10 +- src/engine/renderer/tr_model_md3.cpp | 7 +- src/engine/renderer/tr_model_md5.cpp | 2 +- src/engine/renderer/tr_model_skel.cpp | 7 +- src/engine/renderer/tr_public.h | 1 + src/engine/renderer/tr_shade_calc.cpp | 119 ++++++++++++++--- src/engine/renderer/tr_surface.cpp | 185 ++++++++++++++++++++------ src/engine/renderer/tr_vbo.cpp | 99 +++++++++++--- src/engine/sys/sdl_glimp.cpp | 15 ++- 11 files changed, 484 insertions(+), 142 deletions(-) diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp index a5de777d17..1db28f2b4a 100644 --- a/src/engine/renderer/tr_backend.cpp +++ b/src/engine/renderer/tr_backend.cpp @@ -4615,10 +4615,18 @@ void DebugDrawVertex(const vec3_t pos, unsigned int color, const vec2_t uv) { tess.verts[ tess.numVertexes ].xyz[ 1 ] = pos[ 1 ]; tess.verts[ tess.numVertexes ].xyz[ 2 ] = pos[ 2 ]; tess.verts[ tess.numVertexes ].color = colors; + if( uv ) { - tess.verts[ tess.numVertexes ].texCoords[ 0 ] = floatToHalf( uv[ 0 ] ); - tess.verts[ tess.numVertexes ].texCoords[ 1 ] = floatToHalf( uv[ 1 ] ); + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( uv, tess.verts[ tess.numVertexes ].f16TexCoords ); + } + else + { + Vector2Copy( uv, tess.verts[ tess.numVertexes ].texCoords ); + } } + tess.indexes[ tess.numIndexes ] = tess.numVertexes; tess.numVertexes++; tess.numIndexes++; @@ -5006,32 +5014,42 @@ const RenderCommand *StretchPicCommand::ExecuteSelf( ) const tess.verts[ numVerts ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 0 ].color = backEnd.color2D; - tess.verts[ numVerts ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 1 ].xyz[ 0 ] = x + w; tess.verts[ numVerts + 1 ].xyz[ 1 ] = y; tess.verts[ numVerts + 1 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 1 ].color = backEnd.color2D; - tess.verts[ numVerts + 1 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 1 ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 2 ].xyz[ 0 ] = x + w; tess.verts[ numVerts + 2 ].xyz[ 1 ] = y + h; tess.verts[ numVerts + 2 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 2 ].color = backEnd.color2D; - tess.verts[ numVerts + 2 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 2 ].texCoords[ 1 ] = floatToHalf( t2 ); - tess.verts[ numVerts + 3 ].xyz[ 0 ] = x; tess.verts[ numVerts + 3 ].xyz[ 1 ] = y + h; tess.verts[ numVerts + 3 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 3 ].color = backEnd.color2D; - tess.verts[ numVerts + 3 ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts + 3 ].texCoords[ 1 ] = floatToHalf( t2 ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ numVerts ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 1 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 1 ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 2 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 2 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + + tess.verts[ numVerts + 3 ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts + 3 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + } + else + { + Vector2Set( tess.verts[ numVerts ].texCoords, s1, t1 ); + Vector2Set( tess.verts[ numVerts + 1 ].texCoords, s2, t1 ); + Vector2Set( tess.verts[ numVerts + 2 ].texCoords, s2, t2 ); + Vector2Set( tess.verts[ numVerts + 3 ].texCoords, s1, t2 ); + } return this + 1; } @@ -5085,8 +5103,14 @@ const RenderCommand *Poly2dCommand::ExecuteSelf( ) const tess.verts[ tess.numVertexes ].xyz[ 1 ] = verts[ i ].xyz[ 1 ]; tess.verts[ tess.numVertexes ].xyz[ 2 ] = 0.0f; - tess.verts[ tess.numVertexes ].texCoords[ 0 ] = floatToHalf( verts[ i ].st[ 0 ] ); - tess.verts[ tess.numVertexes ].texCoords[ 1 ] = floatToHalf( verts[ i ].st[ 1 ] ); + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( verts[ i ].st, tess.verts[ tess.numVertexes ].f16TexCoords ); + } + else + { + Vector2Copy( verts[ i ].st, tess.verts[ tess.numVertexes ].texCoords ); + } tess.verts[ tess.numVertexes ].color = Color::Adapt( verts[ i ].modulate ); tess.numVertexes++; @@ -5142,8 +5166,14 @@ const RenderCommand *Poly2dIndexedCommand::ExecuteSelf( ) const tess.verts[ tess.numVertexes ].xyz[ 1 ] = verts[ i ].xyz[ 1 ] + translation[ 1 ]; tess.verts[ tess.numVertexes ].xyz[ 2 ] = 0.0f; - tess.verts[ tess.numVertexes ].texCoords[ 0 ] = floatToHalf( verts[ i ].st[ 0 ] ); - tess.verts[ tess.numVertexes ].texCoords[ 1 ] = floatToHalf( verts[ i ].st[ 1 ] ); + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( verts[ i ].st, tess.verts[ tess.numVertexes ].f16TexCoords ); + } + else + { + Vector2Copy( verts[ i ].st, tess.verts[ tess.numVertexes ].texCoords ); + } tess.verts[ tess.numVertexes ].color = Color::Adapt( verts[ i ].modulate ); tess.numVertexes++; @@ -5245,32 +5275,42 @@ const RenderCommand *RotatedPicCommand::ExecuteSelf( ) const tess.verts[ numVerts ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 0 ].color = backEnd.color2D; - tess.verts[ numVerts ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 1 ].xyz[ 0 ] = mx + cw - sh; tess.verts[ numVerts + 1 ].xyz[ 1 ] = my - sw - ch; tess.verts[ numVerts + 1 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 1 ].color = backEnd.color2D; - tess.verts[ numVerts + 1 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 1 ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 2 ].xyz[ 0 ] = mx + cw + sh; tess.verts[ numVerts + 2 ].xyz[ 1 ] = my - sw + ch; tess.verts[ numVerts + 2 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 2 ].color = backEnd.color2D; - tess.verts[ numVerts + 2 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 2 ].texCoords[ 1 ] = floatToHalf( t2 ); - tess.verts[ numVerts + 3 ].xyz[ 0 ] = mx - cw + sh; tess.verts[ numVerts + 3 ].xyz[ 1 ] = my + sw + ch; tess.verts[ numVerts + 3 ].xyz[ 2 ] = 0.0f; tess.verts[ numVerts + 3 ].color = backEnd.color2D; - tess.verts[ numVerts + 3 ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts + 3 ].texCoords[ 1 ] = floatToHalf( t2 ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ numVerts ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 1 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 1 ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 2 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 2 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + + tess.verts[ numVerts + 3 ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts + 3 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + } + else + { + Vector2Set( tess.verts[ numVerts ].texCoords, s1, t1 ); + Vector2Set( tess.verts[ numVerts + 1 ].texCoords, s2, t1 ); + Vector2Set( tess.verts[ numVerts + 2 ].texCoords, s2, t2 ); + Vector2Set( tess.verts[ numVerts + 3 ].texCoords, s1, t2 ); + } return this + 1; } @@ -5329,29 +5369,39 @@ const RenderCommand *GradientPicCommand::ExecuteSelf( ) const tess.verts[ numVerts ].xyz[ 1 ] = y; tess.verts[ numVerts ].xyz[ 2 ] = 0.0f; - tess.verts[ numVerts ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 1 ].xyz[ 0 ] = x + w; tess.verts[ numVerts + 1 ].xyz[ 1 ] = y; tess.verts[ numVerts + 1 ].xyz[ 2 ] = 0.0f; - tess.verts[ numVerts + 1 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 1 ].texCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ numVerts + 2 ].xyz[ 0 ] = x + w; tess.verts[ numVerts + 2 ].xyz[ 1 ] = y + h; tess.verts[ numVerts + 2 ].xyz[ 2 ] = 0.0f; - tess.verts[ numVerts + 2 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ numVerts + 2 ].texCoords[ 1 ] = floatToHalf( t2 ); - tess.verts[ numVerts + 3 ].xyz[ 0 ] = x; tess.verts[ numVerts + 3 ].xyz[ 1 ] = y + h; tess.verts[ numVerts + 3 ].xyz[ 2 ] = 0.0f; - tess.verts[ numVerts + 3 ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ numVerts + 3 ].texCoords[ 1 ] = floatToHalf( t2 ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ numVerts ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 1 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 1 ].f16TexCoords[ 1 ] = floatToHalf( t1 ); + + tess.verts[ numVerts + 2 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ numVerts + 2 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + + tess.verts[ numVerts + 3 ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ numVerts + 3 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + } + else + { + Vector2Set( tess.verts[ numVerts ].texCoords, s1, t1 ); + Vector2Set( tess.verts[ numVerts + 1 ].texCoords, s2, t1 ); + Vector2Set( tess.verts[ numVerts + 2 ].texCoords, s2, t2 ); + Vector2Set( tess.verts[ numVerts + 3 ].texCoords, s1, t2 ); + } return this + 1; } diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index 39fd5b81cb..5e66a475a5 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -131,6 +131,13 @@ static inline f16_t floatToHalf( float in ) { return { uint16_t(((ui & 0x80000000) >> 16) | ((ui & 0x0fffe000) >> 13)) }; } + +static inline void floatToHalf2( const vec2_t in, f16vec2_t out ) +{ + out[ 0 ] = floatToHalf( in[ 0 ] ); + out[ 1 ] = floatToHalf( in[ 1 ] ); +} + static inline void floatToHalf( const vec4_t in, f16vec4_t out ) { out[ 0 ] = floatToHalf( in[ 0 ] ); @@ -138,12 +145,20 @@ static inline void floatToHalf( const vec4_t in, f16vec4_t out ) out[ 2 ] = floatToHalf( in[ 2 ] ); out[ 3 ] = floatToHalf( in[ 3 ] ); } + static inline float halfToFloat( f16_t in ) { static float scale = powf(2.0f, 127 - 15); uint32_t ui = (((unsigned int)in.bits & 0x8000) << 16) | (((unsigned int)in.bits & 0x7fff) << 13); return Util::bit_cast(ui) * scale; } + +static inline void halfToFloat2( const f16vec2_t in, vec2_t out ) +{ + out[ 0 ] = halfToFloat( in[ 0 ] ); + out[ 1 ] = halfToFloat( in[ 1 ] ); +} + static inline void halfToFloat( const f16vec4_t in, vec4_t out ) { out[ 0 ] = halfToFloat( in[ 0 ] ); @@ -201,6 +216,23 @@ static inline void halfToFloat( const f16vec4_t in, vec4_t out ) // max. 16 dynamic lights per plane #define LIGHT_PLANES ( MAX_REF_LIGHTS / 16 ) +struct glVertexShim_t +{ + GLenum floatFormat; +}; + +extern glVertexShim_t GL_vertexShim; + +static inline void glVertexSetHalfFloat() +{ + GL_vertexShim.floatFormat = GL_HALF_FLOAT; +} + +static inline void glVertexSetFloat() +{ + GL_vertexShim.floatFormat = GL_FLOAT; +} + struct glFboShim_t { /* Functions with same signature and similar purpose can be provided by: @@ -723,7 +755,12 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; vec3_t *xyz; i16vec4_t *qtangent; u8vec4_t *color; - union { f16vec2_t *st; vec2_t *stf; }; + + union { + f16vec2_t *f16st; + vec2_t *st; + }; + int (*boneIndexes)[ 4 ]; vec4_t *boneWeights; @@ -2158,7 +2195,7 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; vec4_t binormal; vec4_t normal; - f16vec2_t texCoords; + f16vec2_t f16TexCoords; char _pad[ 4 ]; uint32_t firstWeight; uint32_t numWeights; @@ -2292,7 +2329,7 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; float *normals; float *tangents; float *bitangents; - f16_t *texcoords; + f16_t *f16TexCoords; byte *blendIndexes; byte *blendWeights; byte *colors; @@ -3275,11 +3312,17 @@ inline bool checkGLErrors() struct shaderVertex_t { vec3_t xyz; Color::Color32Bit color; + union { i16vec4_t qtangents; - f16vec4_t spriteOrientation; + f16vec4_t f16SpriteOrientation; + vec4_t spriteOrientation; + }; + + union { + f16vec4_t f16TexCoords; + vec4_t texCoords; }; - f16vec4_t texCoords; }; #ifdef GL_ARB_sync diff --git a/src/engine/renderer/tr_model_iqm.cpp b/src/engine/renderer/tr_model_iqm.cpp index c2a6faba48..5f0ccf406b 100644 --- a/src/engine/renderer/tr_model_iqm.cpp +++ b/src/engine/renderer/tr_model_iqm.cpp @@ -481,7 +481,7 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, size += header->num_vertexes * 3 * sizeof(float); // normals size += header->num_vertexes * 3 * sizeof(float); // tangents size += header->num_vertexes * 3 * sizeof(float); // bitangents - size += header->num_vertexes * 2 * sizeof(f16_t); // texcoords + size += header->num_vertexes * 2 * sizeof(f16_t); // f16TexCoords size += header->num_vertexes * 4 * sizeof(byte); // blendIndexes size += header->num_vertexes * 4 * sizeof(byte); // blendWeights size += header->num_vertexes * 4 * sizeof(byte); // colors @@ -541,8 +541,8 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, IQModel->bitangents = (float *)ptr; ptr = IQModel->bitangents + 3 * header->num_vertexes; - IQModel->texcoords = (f16_t *)ptr; - ptr = IQModel->texcoords + 2 * header->num_vertexes; + IQModel->f16TexCoords = (f16_t *)ptr; + ptr = IQModel->f16TexCoords + 2 * header->num_vertexes; IQModel->blendIndexes = (byte *)ptr; ptr = IQModel->blendIndexes + 4 * header->num_vertexes; @@ -725,7 +725,7 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, break; case IQM_TEXCOORD: for( int j = 0; j < n; j++ ) { - IQModel->texcoords[ j ] = floatToHalf( ((float *)IQMPtr( header, vertexarray->offset ))[ j ] ); + IQModel->f16TexCoords[ j ] = floatToHalf( ((float *)IQMPtr( header, vertexarray->offset ))[ j ] ); } break; case IQM_BLENDINDEXES: @@ -803,7 +803,7 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, vboData.qtangent = qtangentbuf; vboData.numFrames = 0; vboData.color = (u8vec4_t *)IQModel->colors; - vboData.st = (f16vec2_t *)IQModel->texcoords; + vboData.f16st = (f16vec2_t *)IQModel->f16TexCoords; vboData.boneIndexes = (int (*)[4])indexbuf; vboData.boneWeights = (vec4_t *)weightbuf; vboData.numVerts = IQModel->num_vertexes; diff --git a/src/engine/renderer/tr_model_md3.cpp b/src/engine/renderer/tr_model_md3.cpp index 57bace0fe0..67e93798f1 100644 --- a/src/engine/renderer/tr_model_md3.cpp +++ b/src/engine/renderer/tr_model_md3.cpp @@ -258,7 +258,7 @@ bool R_LoadMD3( model_t *mod, int lod, const void *buffer, const char *modName ) data.xyz = ( vec3_t * ) ri.Hunk_AllocateTempMemory( sizeof( *data.xyz ) * mdvModel->numFrames * surf->numVerts ); data.qtangent = ( i16vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( i16vec4_t ) * mdvModel->numFrames * surf->numVerts ); data.numFrames = mdvModel->numFrames; - data.st = ( f16vec2_t * ) ri.Hunk_AllocateTempMemory( sizeof( f16vec2_t ) * surf->numVerts ); + data.f16st = ( f16vec2_t * ) ri.Hunk_AllocateTempMemory( sizeof( f16vec2_t ) * surf->numVerts ); data.numVerts = surf->numVerts; // feed vertex XYZ @@ -273,8 +273,7 @@ bool R_LoadMD3( model_t *mod, int lod, const void *buffer, const char *modName ) // feed vertex texcoords for ( j = 0; j < surf->numVerts; j++ ) { - data.st[ j ][ 0 ] = floatToHalf( surf->st[ j ].st[ 0 ] ); - data.st[ j ][ 1 ] = floatToHalf( surf->st[ j ].st[ 1 ] ); + floatToHalf2( surf->st[ j ].st, data.f16st[ j ] ); } // calc and feed tangent spaces @@ -351,7 +350,7 @@ bool R_LoadMD3( model_t *mod, int lod, const void *buffer, const char *modName ) // vertex layout includes a color field, which is zeroed by default. vboSurf->vbo->attribBits |= ATTR_COLOR; - ri.Hunk_FreeTempMemory( data.st ); + ri.Hunk_FreeTempMemory( data.f16st ); ri.Hunk_FreeTempMemory( data.qtangent ); ri.Hunk_FreeTempMemory( data.xyz ); diff --git a/src/engine/renderer/tr_model_md5.cpp b/src/engine/renderer/tr_model_md5.cpp index f218c4b555..ed023b928d 100644 --- a/src/engine/renderer/tr_model_md5.cpp +++ b/src/engine/renderer/tr_model_md5.cpp @@ -380,7 +380,7 @@ bool R_LoadMD5( model_t *mod, const char *buffer, const char *modName ) { token = COM_ParseExt2( &buf_p, false ); texCoords[ j ][ k ] = atof( token ); - v->texCoords[ k ] = floatToHalf( texCoords[ j ][ k ] ); + v->f16TexCoords[ k ] = floatToHalf( texCoords[ j ][ k ] ); } // skip ) diff --git a/src/engine/renderer/tr_model_skel.cpp b/src/engine/renderer/tr_model_skel.cpp index 662e70dbd2..2a1fd567dc 100644 --- a/src/engine/renderer/tr_model_skel.cpp +++ b/src/engine/renderer/tr_model_skel.cpp @@ -124,7 +124,7 @@ srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( data.qtangent = ( i16vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( i16vec4_t ) * vertexesNum ); data.boneIndexes = ( int (*)[ 4 ] ) ri.Hunk_AllocateTempMemory( sizeof( *data.boneIndexes ) * vertexesNum ); data.boneWeights = ( vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( *data.boneWeights ) * vertexesNum ); - data.st = ( f16vec2_t * ) ri.Hunk_AllocateTempMemory( sizeof( f16vec2_t ) * vertexesNum ); + data.f16st = ( f16vec2_t * ) ri.Hunk_AllocateTempMemory( sizeof( f16vec2_t ) * vertexesNum ); data.numVerts = vertexesNum; indexes = ( glIndex_t * ) ri.Hunk_AllocateTempMemory( indexesNum * sizeof( glIndex_t ) ); @@ -160,7 +160,8 @@ srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( R_TBNtoQtangents( surf->verts[ j ].tangent, surf->verts[ j ].binormal, surf->verts[ j ].normal, data.qtangent[ j ] ); - Vector2Copy( surf->verts[ j ].texCoords, data.st[ j ] ); + // Model only supports half float for now. + Vector2Copy( surf->verts[ j ].f16TexCoords, data.f16st[ j ] ); for (unsigned k = 0; k < MAX_WEIGHTS; k++ ) { @@ -186,7 +187,7 @@ srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( vboSurf->vbo->attribBits |= ATTR_COLOR; ri.Hunk_FreeTempMemory( indexes ); - ri.Hunk_FreeTempMemory( data.st ); + ri.Hunk_FreeTempMemory( data.f16st ); ri.Hunk_FreeTempMemory( data.boneWeights ); ri.Hunk_FreeTempMemory( data.boneIndexes ); ri.Hunk_FreeTempMemory( data.qtangent ); diff --git a/src/engine/renderer/tr_public.h b/src/engine/renderer/tr_public.h index 62ac333898..0907983924 100644 --- a/src/engine/renderer/tr_public.h +++ b/src/engine/renderer/tr_public.h @@ -47,6 +47,7 @@ extern Cvar::Modified> r_fullscreen; struct glconfig2_t { bool textureCompressionRGTCAvailable; + bool halfFloatVertexAvailable; int glHighestMajor; int glHighestMinor; diff --git a/src/engine/renderer/tr_shade_calc.cpp b/src/engine/renderer/tr_shade_calc.cpp index adf3531f8e..5530582eb0 100644 --- a/src/engine/renderer/tr_shade_calc.cpp +++ b/src/engine/renderer/tr_shade_calc.cpp @@ -482,31 +482,67 @@ static void ComputeCorner( int firstVertex, int numVertexes ) v = &tess.verts[ firstVertex + i ]; Vector4Set( midtc, 0.0f, 0.0f, 0.0f, 0.0f ); - for( j = 0; j < 4; j++ ) { - halfToFloat( v[ j ].texCoords, tc ); - VectorAdd( tc, midtc, midtc ); - midtc[ 3 ] += tc[ 3 ]; + + if ( glConfig2.halfFloatVertexAvailable ) + { + for( j = 0; j < 4; j++ ) + { + halfToFloat( v[ j ].f16TexCoords, tc ); + VectorAdd( tc, midtc, midtc ); + midtc[ 3 ] += tc[ 3 ]; + } + } + else + { + for( j = 0; j < 4; j++ ) + { + Vector4Copy( v[ j ].texCoords, tc ); + VectorAdd( tc, midtc, midtc ); + midtc[ 3 ] += tc[ 3 ]; + } } midtc[ 0 ] = 0.25f * midtc[ 0 ]; midtc[ 1 ] = 0.25f * midtc[ 1 ]; - for ( j = 0; j < 4; j++ ) { - halfToFloat( v[ j ].texCoords, tc ); - if( tc[ 0 ] < midtc[ 0 ] ) { - tc[ 2 ] = -tc[ 2 ]; + if ( glConfig2.halfFloatVertexAvailable ) + { + for ( j = 0; j < 4; j++ ) + { + halfToFloat( v[ j ].f16TexCoords, tc ); + if( tc[ 0 ] < midtc[ 0 ] ) + { + tc[ 2 ] = -tc[ 2 ]; + } + if( tc[ 1 ] < midtc[ 1 ] ) + { + tc[ 3 ] = -tc[ 3 ]; + } + floatToHalf( tc, v[ j ].f16TexCoords ); } - if( tc[ 1 ] < midtc[ 1 ] ) { - tc[ 3 ] = -tc[ 3 ]; + } + else + { + for ( j = 0; j < 4; j++ ) + { + Vector4Copy( v[ j ].texCoords, tc ); + if( tc[ 0 ] < midtc[ 0 ] ) + { + tc[ 2 ] = -tc[ 2 ]; + } + if( tc[ 1 ] < midtc[ 1 ] ) + { + tc[ 3 ] = -tc[ 3 ]; + } + Vector4Copy( tc, v[ j ].texCoords ); } - floatToHalf( tc, v[ j ].texCoords ); } } } static void AutospriteDeform( int firstVertex, int numVertexes, int numIndexes ) { - int i, j; + int i; shaderVertex_t *v; vec3_t mid, delta; float radius; @@ -536,13 +572,30 @@ static void AutospriteDeform( int firstVertex, int numVertexes, int numIndexes ) radius = VectorLength( delta ) * 0.5f * M_SQRT2; // add 4 identical vertices - for ( j = 0; j < 4; j++ ) { - VectorCopy( mid, v[ j ].xyz ); - Vector4Set( v[ j ].spriteOrientation, - floatToHalf( 0 ), - floatToHalf( 0 ), - floatToHalf( 0 ), - floatToHalf( radius ) ); + VectorCopy( mid, v[ 0 ].xyz ); + VectorCopy( mid, v[ 1 ].xyz ); + VectorCopy( mid, v[ 2 ].xyz ); + VectorCopy( mid, v[ 3 ].xyz ); + + vec4_t orientation; + Vector4Set( orientation, 0.0f, 0.0f, 0.0f, radius ); + + if ( glConfig2.halfFloatVertexAvailable ) + { + f16vec4_t f16Orientation; + floatToHalf( orientation, f16Orientation ); + + Vector4Copy( f16Orientation, v[ 0 ].f16SpriteOrientation ); + Vector4Copy( f16Orientation, v[ 1 ].f16SpriteOrientation ); + Vector4Copy( f16Orientation, v[ 2 ].f16SpriteOrientation ); + Vector4Copy( f16Orientation, v[ 3 ].f16SpriteOrientation ); + } + else + { + Vector4Copy( orientation, v[ 0 ].spriteOrientation ); + Vector4Copy( orientation, v[ 1 ].spriteOrientation ); + Vector4Copy( orientation, v[ 2 ].spriteOrientation ); + Vector4Copy( orientation, v[ 3 ].spriteOrientation ); } } } @@ -663,15 +716,37 @@ static void Autosprite2Deform( int firstVertex, int numVertexes, int numIndexes k = 1; VectorSubtract( v1->xyz, mid[ k ], minor ); - // I guess this works, since the sign bit is the MSB for both floating point and integers - if ( ( DotProduct( cross, minor ) * static_cast(v1->texCoords[ 3 ].bits) ) < 0 ) { + + float dotProduct = DotProduct( cross, minor ); + int16_t factor; + + if ( glConfig2.halfFloatVertexAvailable ) + { + // I guess this works, since the sign bit is the MSB for both floating point and integers + factor = static_cast(v1->f16TexCoords[ 3 ].bits); + } + else + { + factor = (int16_t) v1->texCoords[ 3 ]; + } + + if ( ( dotProduct * factor ) < 0 ) { VectorNegate( major, orientation ); } else { VectorCopy( major, orientation ); } + orientation[ 3 ] = -lengths[ k ]; - floatToHalf( orientation, v1->spriteOrientation ); + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf( orientation, v1->f16SpriteOrientation ); + } + else + { + Vector4Copy( orientation, v1->spriteOrientation ); + } + VectorCopy( mid[ k ], v1->xyz ); } } diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index f433aaf861..b6038360ad 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -159,11 +159,22 @@ static void Tess_SurfaceVertsAndTris( const srfVert_t *verts, const srfTriangle_ VectorCopy( vert->xyz, tess.verts[ tess.numVertexes + i ].xyz ); Vector4Copy( vert->qtangent, tess.verts[ tess.numVertexes + i ].qtangents ); - tess.verts[ tess.numVertexes + i ].texCoords[ 0 ] = floatToHalf( vert->st[ 0 ] ); - tess.verts[ tess.numVertexes + i ].texCoords[ 1 ] = floatToHalf( vert->st[ 1 ] ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ tess.numVertexes + i ].f16TexCoords[ 0 ] = floatToHalf( vert->st[ 0 ] ); + tess.verts[ tess.numVertexes + i ].f16TexCoords[ 1 ] = floatToHalf( vert->st[ 1 ] ); - tess.verts[ tess.numVertexes + i ].texCoords[ 2 ] = floatToHalf( vert->lightmap[ 0 ] ); - tess.verts[ tess.numVertexes + i ].texCoords[ 3 ] = floatToHalf( vert->lightmap[ 1 ] ); + tess.verts[ tess.numVertexes + i ].f16TexCoords[ 2 ] = floatToHalf( vert->lightmap[ 0 ] ); + tess.verts[ tess.numVertexes + i ].f16TexCoords[ 3 ] = floatToHalf( vert->lightmap[ 1 ] ); + } + else + { + tess.verts[ tess.numVertexes + i ].texCoords[ 0 ] = vert->st[ 0 ]; + tess.verts[ tess.numVertexes + i ].texCoords[ 1 ] = vert->st[ 1 ]; + + tess.verts[ tess.numVertexes + i ].texCoords[ 2 ] = vert->lightmap[ 0 ]; + tess.verts[ tess.numVertexes + i ].texCoords[ 3 ] = vert->lightmap[ 1 ]; + } tess.verts[ tess.numVertexes + i ].color = vert->lightColor; } @@ -280,17 +291,27 @@ void Tess_AddQuadStampExt( vec3_t origin, vec3_t left, vec3_t up, const Color::C Vector4Copy( qtangents, tess.verts[ ndx + 3 ].qtangents ); // standard square texture coordinates - tess.verts[ ndx ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ ndx ].texCoords[ 1 ] = floatToHalf( t1 ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ ndx ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ ndx ].f16TexCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ ndx + 1 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ ndx + 1 ].texCoords[ 1 ] = floatToHalf( t1 ); + tess.verts[ ndx + 1 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ ndx + 1 ].f16TexCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ ndx + 2 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ ndx + 2 ].texCoords[ 1 ] = floatToHalf( t2 ); + tess.verts[ ndx + 2 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ ndx + 2 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); - tess.verts[ ndx + 3 ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ ndx + 3 ].texCoords[ 1 ] = floatToHalf( t2 ); + tess.verts[ ndx + 3 ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ ndx + 3 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + } + else + { + Vector2Set( tess.verts[ ndx ].texCoords, s1, t1 ); + Vector2Set( tess.verts[ ndx + 1 ].texCoords, s2, t1 ); + Vector2Set( tess.verts[ ndx + 2 ].texCoords, s2, t2 ); + Vector2Set( tess.verts[ ndx + 3 ].texCoords, s1, t2 ); + } // constant color all the way around // should this be identity and let the shader specify from entity? @@ -370,17 +391,27 @@ void Tess_AddQuadStampExt2( vec4_t quadVerts[ 4 ], const Color::Color& color, fl Vector4Copy( qtangents, tess.verts[ ndx + 3 ].qtangents ); // standard square texture coordinates - tess.verts[ ndx ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ ndx ].texCoords[ 1 ] = floatToHalf( t1 ); + if ( glConfig2.halfFloatVertexAvailable ) + { + tess.verts[ ndx ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ ndx ].f16TexCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ ndx + 1 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ ndx + 1 ].texCoords[ 1 ] = floatToHalf( t1 ); + tess.verts[ ndx + 1 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ ndx + 1 ].f16TexCoords[ 1 ] = floatToHalf( t1 ); - tess.verts[ ndx + 2 ].texCoords[ 0 ] = floatToHalf( s2 ); - tess.verts[ ndx + 2 ].texCoords[ 1 ] = floatToHalf( t2 ); + tess.verts[ ndx + 2 ].f16TexCoords[ 0 ] = floatToHalf( s2 ); + tess.verts[ ndx + 2 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); - tess.verts[ ndx + 3 ].texCoords[ 0 ] = floatToHalf( s1 ); - tess.verts[ ndx + 3 ].texCoords[ 1 ] = floatToHalf( t2 ); + tess.verts[ ndx + 3 ].f16TexCoords[ 0 ] = floatToHalf( s1 ); + tess.verts[ ndx + 3 ].f16TexCoords[ 1 ] = floatToHalf( t2 ); + } + else + { + Vector2Set( tess.verts[ ndx ].texCoords, s1, t1 ); + Vector2Set( tess.verts[ ndx + 1 ].texCoords, s2, t1 ); + Vector2Set( tess.verts[ ndx + 2 ].texCoords, s2, t2 ); + Vector2Set( tess.verts[ ndx + 3 ].texCoords, s1, t2 ); + } // constant color all the way around // should this be identity and let the shader specify from entity? @@ -434,16 +465,25 @@ void Tess_AddSprite( const vec3_t center, const Color::Color32Bit color, float r for ( i = 0; i < 4; i++ ) { vec4_t texCoord; - vec4_t orientation; - Vector4Set( texCoord, 0.5f * (i & 2), 0.5f * ( (i + 1) & 2 ), (i & 2) - 1.0f, ( (i + 1) & 2 ) - 1.0f ); VectorCopy( center, tess.verts[ ndx + i ].xyz ); tess.verts[ ndx + i ].color = color; - floatToHalf( texCoord, tess.verts[ ndx + i ].texCoords ); + + vec4_t orientation; Vector4Set( orientation, rotation, 0.0f, 0.0f, radius ); - floatToHalf( orientation, tess.verts[ ndx + i ].spriteOrientation ); + + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf( texCoord, tess.verts[ ndx + i ].f16TexCoords ); + floatToHalf( orientation, tess.verts[ ndx + i ].f16SpriteOrientation ); + } + else + { + Vector4Copy( texCoord, tess.verts[ ndx + i ].texCoords ); + Vector4Copy( orientation, tess.verts[ ndx + i ].spriteOrientation ); + } } tess.numVertexes += 4; @@ -692,8 +732,15 @@ static void Tess_SurfacePolychain( srfPoly_t *p ) VectorCopy(p->verts[i].xyz, tess.verts[tess.numVertexes + i].xyz); tess.verts[tess.numVertexes + i].color = Color::Adapt(p->verts[i].modulate); - tess.verts[tess.numVertexes + i].texCoords[0] = floatToHalf(p->verts[i].st[0]); - tess.verts[tess.numVertexes + i].texCoords[1] = floatToHalf(p->verts[i].st[1]); + + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( p->verts[i].st, tess.verts[tess.numVertexes + i].f16TexCoords ); + } + else + { + Vector2Copy( p->verts[i].st, tess.verts[tess.numVertexes + i].texCoords ); + } } // generate fan indexes into the tess array @@ -761,8 +808,15 @@ static void Tess_SurfacePolychain( srfPoly_t *p ) VectorCopy(p->verts[i].xyz, tess.verts[tess.numVertexes + i].xyz); tess.verts[tess.numVertexes + i].color = Color::Adapt(p->verts[i].modulate); Vector4Copy(qtangents, tess.verts[tess.numVertexes + i].qtangents); - tess.verts[tess.numVertexes + i].texCoords[0] = floatToHalf(p->verts[i].st[0]); - tess.verts[tess.numVertexes + i].texCoords[1] = floatToHalf(p->verts[i].st[1]); + + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( p->verts[i].st, tess.verts[tess.numVertexes + i].f16TexCoords ); + } + else + { + Vector2Copy( p->verts[i].st, tess.verts[tess.numVertexes + i].texCoords ); + } } ri.Hunk_FreeTempMemory( normals ); @@ -895,8 +949,14 @@ static void Tess_SurfaceMDV( mdvSurface_t *srf ) tess.verts[tess.numVertexes + j].xyz[1] = tmpVert[1]; tess.verts[tess.numVertexes + j].xyz[2] = tmpVert[2]; - tess.verts[tess.numVertexes + j].texCoords[0] = floatToHalf(st->st[0]); - tess.verts[tess.numVertexes + j].texCoords[1] = floatToHalf(st->st[1]); + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( st->st, tess.verts[tess.numVertexes + j].f16TexCoords ); + } + else + { + Vector2Copy( st->st, tess.verts[tess.numVertexes + j].texCoords ); + } } } else @@ -979,8 +1039,15 @@ static void Tess_SurfaceMDV( mdvSurface_t *srf ) VectorCopy(xyz[i], tess.verts[tess.numVertexes + i].xyz); Vector4Copy(qtangents, tess.verts[tess.numVertexes + i].qtangents); - tess.verts[tess.numVertexes + i].texCoords[0] = floatToHalf(st[i].st[0]); - tess.verts[tess.numVertexes + i].texCoords[1] = floatToHalf(st[i].st[1]); + + if ( glConfig2.halfFloatVertexAvailable ) + { + floatToHalf2( st[i].st, tess.verts[tess.numVertexes + i].f16TexCoords ); + } + else + { + Vector2Copy( st[i].st, tess.verts[tess.numVertexes + i].texCoords ); + } } ri.Hunk_FreeTempMemory( normals ); @@ -1090,7 +1157,14 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) VectorCopy( position, tessVertex->xyz ); - Vector2Copy( surfaceVertex->texCoords, tessVertex->texCoords ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( surfaceVertex->f16TexCoords, tessVertex->f16TexCoords ); + } + else + { + halfToFloat2( surfaceVertex->f16TexCoords, tessVertex->texCoords ); + } } } else @@ -1133,7 +1207,14 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) R_TBNtoQtangents( tangent, binormal, normal, tessVertex->qtangents ); - Vector2Copy( surfaceVertex->texCoords, tessVertex->texCoords ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( surfaceVertex->f16TexCoords, tessVertex->f16TexCoords ); + } + else + { + halfToFloat2( surfaceVertex->f16TexCoords, tessVertex->texCoords ); + } } } @@ -1250,7 +1331,8 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { float *modelNormal = model->normals + 3 * firstVertex; float *modelTangent = model->tangents + 3 * firstVertex; float *modelBitangent = model->bitangents + 3 * firstVertex; - f16_t *modelTexcoord = model->texcoords + 2 * firstVertex; + // Model only supports half float for now. + f16_t *f16ModelTexCoords = model->f16TexCoords + 2 * firstVertex; shaderVertex_t *tessVertex = tess.verts + tess.numVertexes; shaderVertex_t *lastVertex = tessVertex + surf->num_vertexes; @@ -1267,7 +1349,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { for ( ; tessVertex < lastVertex; tessVertex++, modelPosition += 3, modelNormal += 3, modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + f16ModelTexCoords += 2 ) { vec3_t position = {}; @@ -1290,7 +1372,14 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { VectorCopy( position, tessVertex->xyz ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( f16ModelTexCoords, tessVertex->f16TexCoords ); + } + else + { + halfToFloat2( f16ModelTexCoords, tessVertex->texCoords ); + } } } else @@ -1301,7 +1390,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { for ( ; tessVertex < lastVertex; tessVertex++, modelPosition += 3, modelNormal += 3, modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + f16ModelTexCoords += 2 ) { vec3_t position = {}, tangent = {}, binormal = {}, normal = {}; @@ -1338,7 +1427,14 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { R_TBNtoQtangents( tangent, binormal, normal, tessVertex->qtangents ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( f16ModelTexCoords, tessVertex->f16TexCoords ); + } + else + { + halfToFloat2( f16ModelTexCoords, tessVertex->texCoords ); + } } } } @@ -1349,13 +1445,20 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { for ( ; tessVertex < lastVertex; tessVertex++, modelPosition += 3, modelNormal += 3, modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + f16ModelTexCoords += 2 ) { VectorScale( modelPosition, scale, tessVertex->xyz ); R_TBNtoQtangents( modelTangent, modelBitangent, modelNormal, tessVertex->qtangents ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( f16ModelTexCoords, tessVertex->f16TexCoords ); + } + else + { + halfToFloat2( f16ModelTexCoords, tessVertex->texCoords ); + } } } diff --git a/src/engine/renderer/tr_vbo.cpp b/src/engine/renderer/tr_vbo.cpp index 74940b7550..edb2803d6d 100644 --- a/src/engine/renderer/tr_vbo.cpp +++ b/src/engine/renderer/tr_vbo.cpp @@ -33,7 +33,10 @@ struct fmtVertexAnim1 { const GLsizei sizeVertexAnim1 = sizeof( struct fmtVertexAnim1 ); // interleaved texcoords and colour in part 2 struct fmtVertexAnim2 { - f16vec2_t texcoord; + union { + f16vec2_t f16TexCoords; + vec2_t texCoords; + }; Color::Color32Bit colour; }; const GLsizei sizeVertexAnim2 = sizeof( struct fmtVertexAnim2 ); @@ -41,7 +44,10 @@ const GLsizei sizeVertexAnim2 = sizeof( struct fmtVertexAnim2 ); // interleaved data: position, texcoord, colour, qtangent, bonefactors struct fmtSkeletal { i16vec4_t position; - f16vec2_t texcoord; + union { + f16vec2_t f16TexCoords; + vec2_t texCoords; + }; Color::Color32Bit colour; i16vec4_t qtangents; u16vec4_t boneFactors; @@ -71,7 +77,7 @@ static uint32_t R_DeriveAttrBits( const vboData_t &data ) stateBits |= ATTR_COLOR; } - if ( data.st ) + if ( data.f16st || data.st ) { stateBits |= ATTR_TEXCOORD; } @@ -121,10 +127,16 @@ static void R_SetAttributeLayoutsVertexAnimation( VBO_t *vbo ) vbo->attribs[ ATTR_INDEX_POSITION2 ] = vbo->attribs[ ATTR_INDEX_POSITION ]; vbo->attribs[ ATTR_INDEX_QTANGENT2 ] = vbo->attribs[ ATTR_INDEX_QTANGENT ]; + size_t texCoordsOffset = glConfig2.halfFloatVertexAvailable + ? offsetof( struct fmtVertexAnim2, f16TexCoords ) + : offsetof( struct fmtVertexAnim2, texCoords ); + + texCoordsOffset += sizePart1; + vbo->attribs[ ATTR_INDEX_TEXCOORD ].numComponents = 2; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_HALF_FLOAT; + vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_vertexShim.floatFormat; vbo->attribs[ ATTR_INDEX_TEXCOORD ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = sizePart1 + offsetof( struct fmtVertexAnim2, texcoord ); + vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = texCoordsOffset; vbo->attribs[ ATTR_INDEX_TEXCOORD ].stride = sizeVertexAnim2; vbo->attribs[ ATTR_INDEX_TEXCOORD ].frameOffset = 0; @@ -148,10 +160,14 @@ static void R_SetAttributeLayoutsSkeletal( VBO_t *vbo ) vbo->attribs[ ATTR_INDEX_POSITION ].stride = sizeSkeletal; vbo->attribs[ ATTR_INDEX_POSITION ].frameOffset = 0; + size_t texCoordsOffset = glConfig2.halfFloatVertexAvailable + ? offsetof( struct fmtSkeletal, f16TexCoords ) + : offsetof( struct fmtSkeletal, texCoords ); + vbo->attribs[ ATTR_INDEX_TEXCOORD ].numComponents = 2; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_HALF_FLOAT; + vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_vertexShim.floatFormat; vbo->attribs[ ATTR_INDEX_TEXCOORD ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = offsetof( struct fmtSkeletal, texcoord ); + vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = texCoordsOffset; vbo->attribs[ ATTR_INDEX_TEXCOORD ].stride = sizeSkeletal; vbo->attribs[ ATTR_INDEX_TEXCOORD ].frameOffset = 0; @@ -203,17 +219,25 @@ static void R_SetAttributeLayoutsStatic( VBO_t *vbo ) vbo->attribs[ ATTR_INDEX_QTANGENT ].stride = sizeShaderVertex; vbo->attribs[ ATTR_INDEX_QTANGENT ].frameOffset = 0; + size_t texCoordsOffset = glConfig2.halfFloatVertexAvailable + ? offsetof( struct shaderVertex_t, f16TexCoords ) + : offsetof( struct shaderVertex_t, texCoords ); + vbo->attribs[ ATTR_INDEX_TEXCOORD ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_HALF_FLOAT; + vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_vertexShim.floatFormat; vbo->attribs[ ATTR_INDEX_TEXCOORD ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = offsetof( shaderVertex_t, texCoords ); + vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = texCoordsOffset; vbo->attribs[ ATTR_INDEX_TEXCOORD ].stride = sizeShaderVertex; vbo->attribs[ ATTR_INDEX_TEXCOORD ].frameOffset = 0; + size_t spriteOrientationOffset = glConfig2.halfFloatVertexAvailable + ? offsetof( struct shaderVertex_t, f16SpriteOrientation ) + : offsetof( struct shaderVertex_t, spriteOrientation ); + vbo->attribs[ ATTR_INDEX_ORIENTATION ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_ORIENTATION ].componentType = GL_HALF_FLOAT; + vbo->attribs[ ATTR_INDEX_ORIENTATION ].componentType = GL_vertexShim.floatFormat; vbo->attribs[ ATTR_INDEX_ORIENTATION ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_ORIENTATION ].ofs = offsetof( shaderVertex_t, spriteOrientation ); + vbo->attribs[ ATTR_INDEX_ORIENTATION ].ofs = spriteOrientationOffset; vbo->attribs[ ATTR_INDEX_ORIENTATION ].stride = sizeShaderVertex; vbo->attribs[ ATTR_INDEX_ORIENTATION ].frameOffset = 0; @@ -332,7 +356,15 @@ static void R_CopyVertexData( VBO_t *vbo, byte *outData, vboData_t inData ) if ( ( vbo->attribBits & ATTR_TEXCOORD ) ) { - Vector2Copy( inData.st[ v ], ptr[ v ].texcoord ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( inData.f16st[ v ], ptr[ v ].f16TexCoords ); + } + else + { + // Model only supports half float for now. + halfToFloat2( inData.f16st[ v ], ptr[ v ].texCoords ); + } } if ( ( vbo->attribBits & ATTR_COLOR ) ) @@ -369,14 +401,23 @@ static void R_CopyVertexData( VBO_t *vbo, byte *outData, vboData_t inData ) if ( ( vbo->attribBits & ATTR_TEXCOORD ) ) { - Vector2Copy( inData.stf[ v ], ptr[ 2 * v + 1 ] ); + // This is always float, only usage of st. + Vector2Copy( inData.st[ v ], ptr[ 2 * v + 1 ] ); } } else if ( vbo->layout == vboLayout_t::VBO_LAYOUT_VERTEX_ANIMATION ) { struct fmtVertexAnim2 *ptr = ( struct fmtVertexAnim2 * )( outData + ( vbo->framesNum * vbo->vertexesNum ) * sizeVertexAnim1 ); if ( ( vbo->attribBits & ATTR_TEXCOORD ) ) { - Vector2Copy( inData.st[ v ], ptr[ v ].texcoord ); + if ( glConfig2.halfFloatVertexAvailable ) + { + Vector2Copy( inData.f16st[ v ], ptr[ v ].f16TexCoords ); + } + else + { + // Model only supports half float for now. + halfToFloat2( inData.f16st[ v ], ptr[ v ].texCoords ); + } } if ( ( vbo->attribBits & ATTR_COLOR ) ) @@ -871,11 +912,25 @@ static void R_InitGenericVBOs() { VectorCopy( v2, verts[2].xyz ); VectorCopy( v3, verts[3].xyz ); - for ( int i = 0; i < 4; i++ ) { - verts[i].color = Color::White; - verts[i].texCoords[0] = floatToHalf( i < 2 ? 0.0f : 1.0f ); - verts[i].texCoords[1] = floatToHalf( i > 0 && i < 3 ? 1.0f : 0.0f ); + if ( glConfig2.halfFloatVertexAvailable ) + { + for ( int i = 0; i < 4; i++ ) + { + verts[i].color = Color::White; + verts[i].f16TexCoords[0] = floatToHalf( i < 2 ? 0.0f : 1.0f ); + verts[i].f16TexCoords[1] = floatToHalf( i > 0 && i < 3 ? 1.0f : 0.0f ); + } + } + else + { + for ( int i = 0; i < 4; i++ ) + { + verts[i].color = Color::White; + verts[i].texCoords[0] = i < 2 ? 0.0f : 1.0f; + verts[i].texCoords[1] = i > 0 && i < 3 ? 1.0f : 0.0f; + } } + surface->vbo = R_CreateStaticVBO2( "generic_VBO", surface->numVerts, verts, ATTR_POSITION | ATTR_TEXCOORD | ATTR_COLOR ); glIndex_t indexes[6] = { 0, 2, 1, 0, 3, 2 }; // Front @@ -945,7 +1000,7 @@ static void R_InitTileVBO() data.numVerts = w * h; data.xyz = ( vec3_t * ) ri.Hunk_AllocateTempMemory( data.numVerts * sizeof( *data.xyz ) ); - data.stf = ( vec2_t * ) ri.Hunk_AllocateTempMemory( data.numVerts * sizeof( *data.stf ) ); + data.st = ( vec2_t * ) ri.Hunk_AllocateTempMemory( data.numVerts * sizeof( *data.st ) ); for (y = 0; y < h; y++ ) { for (x = 0; x < w; x++ ) { @@ -953,7 +1008,9 @@ static void R_InitTileVBO() (2 * x - w + 1) * (1.0f / w), (2 * y - h + 1) * (1.0f / h), 0.0f ); - Vector2Set( data.stf[ y * w + x ], + + // This is always float, only usage of st. + Vector2Set( data.st[ y * w + x ], 2 * x * glState.tileStep[ 0 ] + glState.tileStep[ 0 ] - 1.0f, 2 * y * glState.tileStep[ 1 ] + glState.tileStep[ 1 ] - 1.0f ); } @@ -961,7 +1018,7 @@ static void R_InitTileVBO() tr.lighttileVBO = R_CreateStaticVBO( "lighttile_VBO", data, vboLayout_t::VBO_LAYOUT_XYST ); - ri.Hunk_FreeTempMemory( data.stf ); + ri.Hunk_FreeTempMemory( data.st ); ri.Hunk_FreeTempMemory( data.xyz ); } diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 0d05b1bd84..4e6dc50a09 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -78,6 +78,8 @@ static Cvar::Cvar r_arb_gpu_shader5( "r_arb_gpu_shader5", "Use GL_ARB_gpu_shader5 if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_half_float_pixel( "r_arb_half_float_pixel", "Use GL_ARB_half_float_pixel if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_half_float_vertex( "r_arb_half_float_vertex", + "Use GL_ARB_half_float_vertex if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_indirect_parameters( "r_arb_indirect_parameters", "Use GL_ARB_indirect_parameters if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_internalformat_query2( "r_arb_internalformat_query2", @@ -1943,6 +1945,7 @@ static bool LoadExt( int flags, bool hasExt, const char* name, bool test = true #define LOAD_EXTENSION_WITH_TEST(flags, ext, test) LoadExt(flags, GLEW_##ext, #ext, test) +glVertexShim_t GL_vertexShim; glFboShim_t GL_fboShim; static void GLimp_InitExtensions() @@ -1956,6 +1959,7 @@ static void GLimp_InitExtensions() Cvar::Latch( r_arb_explicit_uniform_location ); Cvar::Latch( r_arb_gpu_shader5 ); Cvar::Latch( r_arb_half_float_pixel ); + Cvar::Latch( r_arb_half_float_vertex ); Cvar::Latch( r_arb_indirect_parameters ); Cvar::Latch( r_arb_internalformat_query2 ); Cvar::Latch( r_arb_map_buffer_range ); @@ -2194,7 +2198,16 @@ static void GLimp_InitExtensions() // VAO and VBO // made required in OpenGL 3.0 - LOAD_EXTENSION( ExtFlag_REQUIRED | ExtFlag_CORE, ARB_half_float_vertex ); + glConfig2.halfFloatVertexAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_CORE, ARB_half_float_vertex, r_arb_half_float_vertex.Get() ); + + if ( glConfig2.halfFloatVertexAvailable ) + { + glVertexSetHalfFloat(); + } + else + { + glVertexSetFloat(); + } { int flag = ExtFlag_CORE | ( workaround_glExtension_missingArbFbo_useExtFbo.Get() ? 0 : ExtFlag_REQUIRED ); From f2c3caa5d4cb0bc4f08861b5c37f89cb3c04434f Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Sun, 9 Jun 2024 05:43:15 +0200 Subject: [PATCH 2/6] tr_shade_calc: optimize half-float round trip --- src/engine/renderer/tr_shade_calc.cpp | 48 +++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/engine/renderer/tr_shade_calc.cpp b/src/engine/renderer/tr_shade_calc.cpp index 5530582eb0..03a17022ae 100644 --- a/src/engine/renderer/tr_shade_calc.cpp +++ b/src/engine/renderer/tr_shade_calc.cpp @@ -473,43 +473,31 @@ quads, rebuild them as forward facing sprites */ static void ComputeCorner( int firstVertex, int numVertexes ) { - int i, j; - shaderVertex_t *v; - vec4_t tc, midtc; - - for ( i = 0; i < numVertexes; i += 4 ) { + for ( int i = 0; i < numVertexes; i += 4 ) { // find the midpoint - v = &tess.verts[ firstVertex + i ]; + shaderVertex_t *v = &tess.verts[ firstVertex + i ]; + vec4_t midtc; Vector4Set( midtc, 0.0f, 0.0f, 0.0f, 0.0f ); if ( glConfig2.halfFloatVertexAvailable ) { - for( j = 0; j < 4; j++ ) + vec4_t tcs[ 4 ]; + + for( int j = 0; j < 4; j++ ) { + vec4_t &tc = tcs[ j ]; halfToFloat( v[ j ].f16TexCoords, tc ); VectorAdd( tc, midtc, midtc ); midtc[ 3 ] += tc[ 3 ]; } - } - else - { - for( j = 0; j < 4; j++ ) - { - Vector4Copy( v[ j ].texCoords, tc ); - VectorAdd( tc, midtc, midtc ); - midtc[ 3 ] += tc[ 3 ]; - } - } - midtc[ 0 ] = 0.25f * midtc[ 0 ]; - midtc[ 1 ] = 0.25f * midtc[ 1 ]; + midtc[ 0 ] = 0.25f * midtc[ 0 ]; + midtc[ 1 ] = 0.25f * midtc[ 1 ]; - if ( glConfig2.halfFloatVertexAvailable ) - { - for ( j = 0; j < 4; j++ ) + for ( int j = 0; j < 4; j++ ) { - halfToFloat( v[ j ].f16TexCoords, tc ); + vec4_t &tc = tcs[ j ]; if( tc[ 0 ] < midtc[ 0 ] ) { tc[ 2 ] = -tc[ 2 ]; @@ -523,7 +511,19 @@ static void ComputeCorner( int firstVertex, int numVertexes ) } else { - for ( j = 0; j < 4; j++ ) + vec4_t tc; + + for( int j = 0; j < 4; j++ ) + { + Vector4Copy( v[ j ].texCoords, tc ); + VectorAdd( tc, midtc, midtc ); + midtc[ 3 ] += tc[ 3 ]; + } + + midtc[ 0 ] = 0.25f * midtc[ 0 ]; + midtc[ 1 ] = 0.25f * midtc[ 1 ]; + + for ( int j = 0; j < 4; j++ ) { Vector4Copy( v[ j ].texCoords, tc ); if( tc[ 0 ] < midtc[ 0 ] ) From 4da795374f665fd33f2e7f6fa102715e44286f8c Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 11 Jun 2024 08:41:30 +0200 Subject: [PATCH 3/6] sdl_glimp: update the error message when OpenGL is too old Also fixes the version printing. --- src/engine/sys/sdl_glimp.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 4e6dc50a09..6d4907b389 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -1737,6 +1737,10 @@ static rserr_t GLimp_StartDriverAndSetMode( int mode, bool fullscreen, bool bord rserr_t err = GLimp_SetMode(mode, fullscreen, bordered); + const char* glRequirements = + "You need a graphics card with drivers supporting at least\n" + "OpenGL 3.2 or OpenGL 2.1 with EXT_framebuffer_object."; + switch ( err ) { case rserr_t::RSERR_OK: @@ -1752,21 +1756,13 @@ static rserr_t GLimp_StartDriverAndSetMode( int mode, bool fullscreen, bool bord break; case rserr_t::RSERR_MISSING_GL: - Sys::Error( - "OpenGL is not available.\n\n" - "You need a graphic card with drivers supporting\n" - "at least OpenGL 3.2 or OpenGL 2.1 with\n" - "ARB_half_float_vertex and EXT_framebuffer_object." ); + Sys::Error( "OpenGL is not available.\n\n%s", glRequirements ); // Sys:Error calls OSExit() so the break and the return is unreachable. break; case rserr_t::RSERR_OLD_GL: - Sys::Error( - "OpenGL %d.%d is too old.\n\n" - "You need a graphic card with drivers supporting\n" - "at least OpenGL 3.2 or OpenGL 2.1 with\n" - "ARB_half_float_vertex and EXT_framebuffer_object." ); + Sys::Error( "OpenGL %d.%d is too old.\n\n%s", glConfig2.glMajor, glConfig2.glMinor, glRequirements ); // Sys:Error calls OSExit() so the break and the return is unreachable. break; From 8c53a2fca485568c4f9a98636c7459a00dc3733c Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 11 Jun 2024 09:35:03 +0200 Subject: [PATCH 4/6] renderer: rename halfToFloat and floatToHalf as halfToFloat4 and floatToHalf4 to avoid confusion --- src/engine/renderer/tr_local.h | 4 ++-- src/engine/renderer/tr_shade_calc.cpp | 8 ++++---- src/engine/renderer/tr_surface.cpp | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index 5e66a475a5..47b6f74f2b 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -138,7 +138,7 @@ static inline void floatToHalf2( const vec2_t in, f16vec2_t out ) out[ 1 ] = floatToHalf( in[ 1 ] ); } -static inline void floatToHalf( const vec4_t in, f16vec4_t out ) +static inline void floatToHalf4( const vec4_t in, f16vec4_t out ) { out[ 0 ] = floatToHalf( in[ 0 ] ); out[ 1 ] = floatToHalf( in[ 1 ] ); @@ -159,7 +159,7 @@ static inline void halfToFloat2( const f16vec2_t in, vec2_t out ) out[ 1 ] = halfToFloat( in[ 1 ] ); } -static inline void halfToFloat( const f16vec4_t in, vec4_t out ) +static inline void halfToFloat4( const f16vec4_t in, vec4_t out ) { out[ 0 ] = halfToFloat( in[ 0 ] ); out[ 1 ] = halfToFloat( in[ 1 ] ); diff --git a/src/engine/renderer/tr_shade_calc.cpp b/src/engine/renderer/tr_shade_calc.cpp index 03a17022ae..7a95da212f 100644 --- a/src/engine/renderer/tr_shade_calc.cpp +++ b/src/engine/renderer/tr_shade_calc.cpp @@ -487,7 +487,7 @@ static void ComputeCorner( int firstVertex, int numVertexes ) for( int j = 0; j < 4; j++ ) { vec4_t &tc = tcs[ j ]; - halfToFloat( v[ j ].f16TexCoords, tc ); + halfToFloat4( v[ j ].f16TexCoords, tc ); VectorAdd( tc, midtc, midtc ); midtc[ 3 ] += tc[ 3 ]; } @@ -506,7 +506,7 @@ static void ComputeCorner( int firstVertex, int numVertexes ) { tc[ 3 ] = -tc[ 3 ]; } - floatToHalf( tc, v[ j ].f16TexCoords ); + floatToHalf4( tc, v[ j ].f16TexCoords ); } } else @@ -583,7 +583,7 @@ static void AutospriteDeform( int firstVertex, int numVertexes, int numIndexes ) if ( glConfig2.halfFloatVertexAvailable ) { f16vec4_t f16Orientation; - floatToHalf( orientation, f16Orientation ); + floatToHalf4( orientation, f16Orientation ); Vector4Copy( f16Orientation, v[ 0 ].f16SpriteOrientation ); Vector4Copy( f16Orientation, v[ 1 ].f16SpriteOrientation ); @@ -740,7 +740,7 @@ static void Autosprite2Deform( int firstVertex, int numVertexes, int numIndexes if ( glConfig2.halfFloatVertexAvailable ) { - floatToHalf( orientation, v1->f16SpriteOrientation ); + floatToHalf4( orientation, v1->f16SpriteOrientation ); } else { diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index b6038360ad..5d3450cb3b 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -476,8 +476,8 @@ void Tess_AddSprite( const vec3_t center, const Color::Color32Bit color, float r if ( glConfig2.halfFloatVertexAvailable ) { - floatToHalf( texCoord, tess.verts[ ndx + i ].f16TexCoords ); - floatToHalf( orientation, tess.verts[ ndx + i ].f16SpriteOrientation ); + floatToHalf4( texCoord, tess.verts[ ndx + i ].f16TexCoords ); + floatToHalf4( orientation, tess.verts[ ndx + i ].f16SpriteOrientation ); } else { From 409101129d043b5b6de77105af3fae952b50a95d Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 11 Jun 2024 21:21:29 +0200 Subject: [PATCH 5/6] tr_init: let gfxinfo tell about half-float usage --- src/engine/renderer/tr_init.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/engine/renderer/tr_init.cpp b/src/engine/renderer/tr_init.cpp index 8db3ab864c..84ca3514f2 100644 --- a/src/engine/renderer/tr_init.cpp +++ b/src/engine/renderer/tr_init.cpp @@ -1016,6 +1016,15 @@ ScreenshotCmd screenshotPNGRegistration("screenshotPNG", ssFormat_t::SSF_PNG, "p Log::Notice("%sMissing OpenGL extensions: %s", Color::ToString( Color::Red ), glConfig2.glMissingExtensionsString ); } + if ( glConfig2.halfFloatVertexAvailable ) + { + Log::Notice("%sUsing half-float vertex format.", Color::ToString( Color::Green )); + } + else + { + Log::Notice("%sMissing half-float vertex format.", Color::ToString( Color::Red )); + } + if ( glConfig.hardwareType == glHardwareType_t::GLHW_R300 ) { Log::Notice("%sUsing ATI R300 approximations.", Color::ToString( Color::Red )); From 3697d5315a16772e6cdad32e2de1b223cc0da1c0 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Mon, 30 Sep 2024 21:42:53 +0200 Subject: [PATCH 6/6] sdl_glimp: detect ATI RV300 and use float vertex instead of half-float vertex --- src/engine/sys/sdl_glimp.cpp | 68 +++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 6d4907b389..97335bab12 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -133,6 +133,10 @@ static Cvar::Cvar workaround_glDriver_mesa_ati_rv300_disableRgba16Blend( "workaround.glDriver.mesa.ati.rv300.disableRgba16Blend", "Disable misdetected RGBA16 on Mesa driver on RV300 hardware", Cvar::NONE, true ); +static Cvar::Cvar workaround_glDriver_mesa_ati_rv300_useFloatVertex( + "workaround.glDriver.mesa.ati.rv300.useFloatVertex", + "Use float vertex instead of supported-but-slower half-float vertex on Mesa driver on RV300 hardware", + Cvar::NONE, true ); static Cvar::Cvar workaround_glDriver_mesa_ati_rv600_disableHyperZ( "workaround.glDriver.mesa.ati.rv600.disableHyperZ", "Disable Hyper-Z on Mesa driver on RV600 hardware", @@ -1507,7 +1511,7 @@ static bool IsSdlVideoRestartNeeded() { cardName = Str::Format( "AMD %s", codename ); - if ( Q_stristr( glConfig.renderer_string, cardName.c_str() ) ) + if ( Str::IsPrefix( cardName, glConfig.renderer_string ) ) { foundRv600 = true; break; @@ -2192,9 +2196,65 @@ static void GLimp_InitExtensions() glConfig2.textureAnisotropy = std::max( std::min( r_ext_texture_filter_anisotropic.Get(), glConfig2.maxTextureAnisotropy ), 1.0f ); } - // VAO and VBO - // made required in OpenGL 3.0 - glConfig2.halfFloatVertexAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_CORE, ARB_half_float_vertex, r_arb_half_float_vertex.Get() ); + /* We call RV300 the first generation of R300 cards, to make a difference + with RV400 and RV500 cards that are also supported by the Mesa r300 driver. + + Mesa r300 implements half-float vertex for the RV300 hardware generation, + but it is likely emulated and it is very slow. We better use float vertex + instead. */ + { + bool halfFloatVertexEnabled = r_arb_half_float_vertex.Get(); + + if ( glConfig2.driverVendor == glDriverVendor_t::MESA + && glConfig2.hardwareVendor == glHardwareVendor_t::ATI ) + { + bool foundRv300 = false; + + std::string cardName = "unknown ATI RV3xx"; + + static const std::string codenames[] = { + "R300", "R350", "R360", + "RV350", "RV360", "RV370", "RV380", + }; + + for ( auto& codename : codenames ) + { + cardName = Str::Format( "ATI %s", codename ); + + if ( Str::IsPrefix( cardName, glConfig.renderer_string ) ) + { + foundRv300 = true; + break; + } + } + + /* The RV300 generation only has 64 ALU instructions while RV400 and RV500 + have 512 of them, so we can also use that value to detect RV300. */ + if ( !foundRv300 ) + { + if ( glConfig.hardwareType == glHardwareType_t::GLHW_R300 + && glConfig2.maxAluInstructions == 64 ) + { + foundRv300 = true; + } + } + + if ( foundRv300 && workaround_glDriver_mesa_ati_rv300_useFloatVertex.Get() ) + { + logger.Warn( "Found slow Mesa half-float vertex implementation with %s card, disabling ARB_half_float_vertex.", cardName ); + halfFloatVertexEnabled = false; + } + } + + // VAO and VBO + // made required in OpenGL 3.0 + glConfig2.halfFloatVertexAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_CORE, ARB_half_float_vertex, halfFloatVertexEnabled ); + + if ( !halfFloatVertexEnabled ) + { + logger.Warn( "Missing half-float vertex, using float vertex instead." ); + } + } if ( glConfig2.halfFloatVertexAvailable ) {