From eafdacf548695168f0019d20b578c8be37674f9c Mon Sep 17 00:00:00 2001 From: toxieainc Date: Sun, 13 Oct 2024 22:53:29 +0200 Subject: [PATCH 1/3] improve edge test precision of quad test, and add missing '----'-area-case support for twisted/bow-tie quads ..and align triangle code more to the quad code for easier comparisons also generalize mipmap0-only optimization to all non-blended cases related to #201 --- Src/Graphics/New3D/R3DShaderCommon.h | 59 +++++++------ Src/Graphics/New3D/R3DShaderQuads.h | 113 +++++++++--------------- Src/Graphics/New3D/R3DShaderTriangles.h | 70 +++++++-------- 3 files changed, 105 insertions(+), 137 deletions(-) diff --git a/Src/Graphics/New3D/R3DShaderCommon.h b/Src/Graphics/New3D/R3DShaderCommon.h index 310d7749..ce13e8cc 100644 --- a/Src/Graphics/New3D/R3DShaderCommon.h +++ b/Src/Graphics/New3D/R3DShaderCommon.h @@ -14,32 +14,32 @@ vec4 ExtractColour(int type, uint value) { vec4 c = vec4(0.0); - if(type==0) { // T1RGB5 - c.r = float((value >> 10) & 0x1Fu); - c.g = float((value >> 5 ) & 0x1Fu); - c.b = float((value ) & 0x1Fu); + if(type==0) { // T1RGB5 + c.r = float((value >> 10) & 0x1Fu); + c.g = float((value >> 5 ) & 0x1Fu); + c.b = float((value ) & 0x1Fu); c.rgb *= (1.0/31.0); - c.a = 1.0 - float((value >> 15) & 0x1u); + c.a = 1.0 - float((value >> 15) & 0x1u); } else if(type==1) { // Interleaved A4L4 (low byte) - c.rgb = vec3(float(value&0xFu)); - c.a = float((value >> 4) & 0xFu); + c.rgb = vec3(float(value&0xFu)); + c.a = float((value >> 4) & 0xFu); c *= (1.0/15.0); } else if(type==2) { - c.a = float(value&0xFu); + c.a = float(value&0xFu); c.rgb = vec3(float((value >> 4) & 0xFu)); - c *= (1.0/15.0); + c *= (1.0/15.0); } else if(type==3) { - c.rgb = vec3(float((value>>8)&0xFu)); - c.a = float((value >> 12) & 0xFu); - c *= (1.0/15.0); + c.rgb = vec3(float((value>>8)&0xFu)); + c.a = float((value >> 12) & 0xFu); + c *= (1.0/15.0); } else if(type==4) { - c.a = float((value>>8)&0xFu); + c.a = float((value>>8)&0xFu); c.rgb = vec3(float((value >> 12) & 0xFu)); - c *= (1.0/15.0); + c *= (1.0/15.0); } else if(type==5) { c = vec4(float(value&0xFFu) / 255.0); @@ -145,7 +145,6 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl return fract(u); // return weight } else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway - float odd = floor(mod(u, 2.0)); // odd values are mirrored if(odd > 0.0) { @@ -161,7 +160,7 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl if(u0 < 0.0) u0 = 0.0; if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; - + return fract(u); // return weight } } @@ -173,9 +172,9 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]); vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos),level), level).r); - vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r); - vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r); - vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r); + vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r); + vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r); + vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r); if(alphaTest) { if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; } @@ -192,10 +191,10 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP } // Interpolation in X direction. - vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction. - vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction. + vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction. + vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction. - return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction. + return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction. } vec4 GetTextureValue() @@ -206,8 +205,8 @@ vec4 GetTextureValue() int iLevel = int(fLevel); - ivec2 tex1Pos = GetTexturePosition(iLevel, ivec2(baseTexInfo.xy)); - ivec2 tex1Size = GetTextureSize(iLevel, ivec2(baseTexInfo.zw)); + ivec2 tex1Pos = GetTexturePosition(iLevel, baseTexInfo.xy); + ivec2 tex1Size = GetTextureSize(iLevel, baseTexInfo.zw); vec4 tex1Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex1Size), tex1Pos, fsTexCoord, iLevel); // init second texel with blank data to avoid any potentially undefined behavior @@ -216,13 +215,15 @@ vec4 GetTextureValue() float blendFactor = 0.0; // if LOD < 0, no need to blend with next mipmap level; slight performance boost - if (lod > 0.0) + // while at it, just generalize to all cases where only one mip level needs to be touched + float ffL = fract(fLevel); + if (ffL > 0.0) { - ivec2 tex2Pos = GetTexturePosition(iLevel+1, ivec2(baseTexInfo.xy)); - ivec2 tex2Size = GetTextureSize(iLevel+1, ivec2(baseTexInfo.zw)); + ivec2 tex2Pos = GetTexturePosition(iLevel+1, baseTexInfo.xy); + ivec2 tex2Size = GetTextureSize(iLevel+1, baseTexInfo.zw); tex2Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex2Size), tex2Pos, fsTexCoord, iLevel+1); - blendFactor = fract(fLevel); + blendFactor = ffL; } else if (microTexture && lod < -microTextureMinLOD) { @@ -240,7 +241,7 @@ vec4 GetTextureValue() tex1Data = mix(tex1Data, tex2Data, blendFactor); if(textureInverted) { - tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb); + tex1Data.rgb = vec3(1.0) - tex1Data.rgb; } if (alphaTest) { diff --git a/Src/Graphics/New3D/R3DShaderQuads.h b/Src/Graphics/New3D/R3DShaderQuads.h index 097c8bd2..da6d12e3 100644 --- a/Src/Graphics/New3D/R3DShaderQuads.h +++ b/Src/Graphics/New3D/R3DShaderQuads.h @@ -59,14 +59,14 @@ float CalcBackFace(in vec3 viewVertex) void main(void) { - vs_out.viewVertex = vec3(modelMat * inVertex); - vs_out.viewNormal = (mat3(modelMat) * inNormal) / modelScale; + vs_out.viewVertex = (modelMat * inVertex).xyz; + vs_out.viewNormal = (mat3(modelMat) / modelScale) * inNormal; vs_out.discardPoly = CalcBackFace(vs_out.viewVertex); - vs_out.color = GetColour(inColour); + vs_out.color = GetColour(inColour); vs_out.texCoord = inTexCoord; vs_out.fixedShade = inFixedShade; - vs_out.LODBase = -vs_out.discardPoly * cota * inTextureNP; - gl_Position = projMat * modelMat * inVertex; + vs_out.LODBase = vs_out.discardPoly * -cota * inTextureNP; + gl_Position = (projMat * modelMat) * inVertex; } )glsl"; @@ -114,7 +114,7 @@ float DifferenceOfProducts(float a, float b, float c, float d) void main(void) { - if(gs_in[0].discardPoly > 0) { + if(gs_in[0].discardPoly > 0.0) { return; //emulate back face culling here (all vertices in poly have same value) } @@ -159,7 +159,7 @@ void main(void) // | | | \ | // 0----3 0----2 // - int reorder[4] = int[]( 1, 0, 2, 3 ); + const int reorder[4] = int[4]( 1, 0, 2, 3 ); int ii = reorder[i]; for (int j=0; j<4; j++) { @@ -246,8 +246,8 @@ in GS_OUT vec3 fsViewVertex; vec3 fsViewNormal; vec2 fsTexCoord; -float fsFixedShade; vec4 fsColor; +float fsFixedShade; float fsLODBase; //outputs @@ -266,87 +266,54 @@ float SqrLength(vec2 a); void QuadraticInterpolation() { - vec2 s[4]; - float A[4]; - - for (int i=0; i<4; i++) { - s[i] = fs_in.v[i]; - A[i] = fs_in.area[i]; - } - - float D[4]; - float r[4]; - - for (int i=0; i<4; i++) { - int i_next = (i+1)%4; - D[i] = dot(s[i], s[i_next]); - r[i] = length(s[i]); - if (fs_in.oneOverW[i] < 0.0) { // is w[i] negative? - r[i] = -r[i]; - } - } - - float t[4]; + float u[4]; + for (int i=0; i<4; i++) + u[i] = length(fs_in.v[i]) * sign(fs_in.oneOverW[i]); // is w[i] negative? + precise float t[4]; for (int i=0; i<4; i++) { int i_next = (i+1)%4; - if(A[i]==0.0) t[i] = 0.0; // check for zero area + div by zero - else t[i] = (r[i]*r[i_next] - D[i]) / A[i]; + if(fs_in.area[i]==0.0) t[i] = 0.0; // check for zero area to avoid div by zero + else t[i] = fma(u[i],u[i_next], -dot(fs_in.v[i],fs_in.v[i_next])) / fs_in.area[i]; } - float uSum = 0.0; - float u[4]; + int lambdaSignCount = 0; // to discard fragments if all the weights are neither all negative nor all positive (=outside the convex/concave/crossed quad). for (uint i=0; i<4; i++) { uint i_prev = (i-1)%4; - u[i] = (t[i_prev] + t[i]) / r[i]; - uSum += u[i]; + u[i] = (t[i_prev] + t[i]) / u[i]; + lambdaSignCount += (t[i_prev] < -t[i]) ? -1 : 1; } - float lambda[4]; - - for (int i=0; i<4; i++) { - lambda[i] = u[i] / uSum; - } - - /* Discard fragments when all the weights are neither all negative nor all positive. */ - - int lambdaSignCount = 0; - - for (int i=0; i<4; i++) { - if (fs_in.oneOverW[i] * lambda[i] < 0.0) { - lambdaSignCount--; - } else { - lambdaSignCount++; - } - } - if (lambdaSignCount != 4) { + if (lambdaSignCount == 0) { // one can either check for == 0 or abs(...) != 4, both should(!) be equivalent (but in practice its not due to precision issues, but these cases are extremely rare) if(!gl_HelperInvocation) { discard; } } - float interp_oneOverW = 0.0; - fsViewVertex = vec3(0.0); fsViewNormal = vec3(0.0); fsTexCoord = vec2(0.0); fsFixedShade = 0.0; + float interp_oneOverW = 0.0; + float uSum = 0.0; fsColor = fs_in.color; fsLODBase = fs_in.LODBase; for (int i=0; i<4; i++) { - fsViewVertex += lambda[i] * fs_in.viewVertex[i]; - fsViewNormal += lambda[i] * fs_in.viewNormal[i]; - fsTexCoord += lambda[i] * fs_in.texCoord[i]; - fsFixedShade += lambda[i] * fs_in.fixedShade[i]; - interp_oneOverW += lambda[i] * fs_in.oneOverW[i]; + fsViewVertex += u[i] * fs_in.viewVertex[i]; + fsViewNormal += u[i] * fs_in.viewNormal[i]; + fsTexCoord += u[i] * fs_in.texCoord[i]; + fsFixedShade += u[i] * fs_in.fixedShade[i]; + interp_oneOverW += u[i] * fs_in.oneOverW[i]; + uSum += u[i]; } - fsViewVertex /= interp_oneOverW; - fsViewNormal /= interp_oneOverW; - fsTexCoord /= interp_oneOverW; - fsFixedShade /= interp_oneOverW; + float inv = 1.0/interp_oneOverW; + fsViewVertex *= inv; + fsViewNormal *= inv; + fsTexCoord *= inv; + fsFixedShade *= inv; vec4 vertex; float depth; @@ -363,7 +330,7 @@ void QuadraticInterpolation() } else { vertex.z = projMat[2][2] * fsViewVertex.z + projMat[3][2]; // standard projMat * vertex - but just using Z components - depth = vertex.z * interp_oneOverW; + depth = vertex.z * (interp_oneOverW/uSum); } gl_FragDepth = depth; @@ -386,7 +353,7 @@ void main() } colData = fsColor; - Step15Luminous(colData); // no-op for step 2.0+ + Step15Luminous(colData); // no-op for step 2.0+ finalData = tex1Data * colData; if (finalData.a < (1.0/32.0)) { // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports @@ -454,7 +421,7 @@ void main() // Total light intensity: sum of all components lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y); // diffuse + ambient - lightIntensity.rgb += spotColor*lobeEffect; + lightIntensity += spotColor*lobeEffect; // Upper clamp is optional, step 1.5+ games will drive brightness beyond 100% if(intensityClamp) { @@ -473,10 +440,10 @@ void main() // Always clamp floor to zero float NdotL = max(0.0, sunFactor); - vec4 expIndex = vec4(8.0, 16.0, 32.0, 64.0); - vec4 multIndex = vec4(1.6, 1.6, 2.4, 3.2); + const float expIndex[4] = float[4](8.0, 16.0, 32.0, 64.0); + const float multIndex[4] = float[4](1.6, 1.6, 2.4, 3.2); float exponent = expIndex[int(shininess)]; - + specularFactor = pow(NdotL, exponent); specularFactor *= multIndex[int(shininess)]; } @@ -487,7 +454,7 @@ void main() vec3 R = reflect(-sunVector, fsViewNormal); specularFactor = max(0.0, R.z); } - + specularFactor *= specularValue; specularFactor *= lighting[1].x; @@ -496,7 +463,7 @@ void main() finalData.a = max(finalData.a, specularFactor); } - finalData.rgb += vec3(specularFactor); + finalData.rgb += specularFactor; } } @@ -506,7 +473,7 @@ void main() // Spotlight on fog vec3 lSpotFogColor = spotFogColor * fogAttenuation * fogColour.rgb * lobeFogEffect; - // Fog & spotlight applied + // Fog & spotlight applied finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a); // Write outputs to colour buffers diff --git a/Src/Graphics/New3D/R3DShaderTriangles.h b/Src/Graphics/New3D/R3DShaderTriangles.h index 910ae782..4fef5746 100644 --- a/Src/Graphics/New3D/R3DShaderTriangles.h +++ b/Src/Graphics/New3D/R3DShaderTriangles.h @@ -14,21 +14,21 @@ uniform mat4 projMat; uniform bool translatorMap; // attributes -in vec4 inVertex; -in vec3 inNormal; -in vec2 inTexCoord; -in vec4 inColour; -in vec3 inFaceNormal; // used to emulate r3d culling -in float inFixedShade; -in float inTextureNP; +in vec4 inVertex; +in vec3 inNormal; +in vec2 inTexCoord; +in vec3 inFaceNormal; // used to emulate r3d culling +in float inFixedShade; +in vec4 inColour; +in float inTextureNP; // outputs to fragment shader out vec3 fsViewVertex; out vec3 fsViewNormal; // per vertex normal vector out vec2 fsTexCoord; out vec4 fsColor; -out float fsDiscard; // can't have varying bool (glsl spec) out float fsFixedShade; +out float fsDiscardPoly; // can't have varying bool (glsl spec) out float fsLODBase; vec4 GetColour(vec4 colour) @@ -46,8 +46,8 @@ vec4 GetColour(vec4 colour) float CalcBackFace(in vec3 viewVertex) { - vec3 vt = viewVertex - vec3(0.0); - vec3 vn = (mat3(modelMat) * inFaceNormal); + vec3 vt = viewVertex; // - vec3(0.0); + vec3 vn = mat3(modelMat) * inFaceNormal; // dot product of face normal with view direction return dot(vt, vn); @@ -55,14 +55,14 @@ float CalcBackFace(in vec3 viewVertex) void main(void) { - fsViewVertex = vec3(modelMat * inVertex); - fsViewNormal = (mat3(modelMat) * inNormal) / modelScale; - fsDiscard = CalcBackFace(fsViewVertex); - fsColor = GetColour(inColour); + fsViewVertex = (modelMat * inVertex).xyz; + fsViewNormal = (mat3(modelMat) / modelScale) * inNormal; + fsDiscardPoly = CalcBackFace(fsViewVertex); + fsColor = GetColour(inColour); fsTexCoord = inTexCoord; fsFixedShade = inFixedShade; - fsLODBase = -fsDiscard * cota * inTextureNP; - gl_Position = projMat * modelMat * inVertex; + fsLODBase = fsDiscardPoly * -cota * inTextureNP; + gl_Position = (projMat * modelMat) * inVertex; } )glsl"; @@ -114,13 +114,13 @@ uniform bool polyAlpha; uniform mat4 projMat; //interpolated inputs from vertex shader -in vec3 fsViewVertex; -in vec3 fsViewNormal; // per vertex normal vector -in vec4 fsColor; -in vec2 fsTexCoord; -in float fsDiscard; -in float fsFixedShade; -in float fsLODBase; +in vec3 fsViewVertex; +in vec3 fsViewNormal; // per vertex normal vector +in vec2 fsTexCoord; +in vec4 fsColor; +in float fsFixedShade; +in float fsDiscardPoly; +in float fsLODBase; //outputs layout(location = 0) out vec4 out0; // opaque @@ -143,12 +143,12 @@ void main() vec4 finalData; vec4 fogData; - if(fsDiscard > 0) { + if(fsDiscardPoly > 0.0) { discard; //emulate back face culling here } - + gl_FragDepth = projMat[3][2] * gl_FragCoord.w; - + fogData = vec4(fogColour.rgb * fogAmbient, CalcFog()); tex1Data = vec4(1.0, 1.0, 1.0, 1.0); @@ -157,7 +157,7 @@ void main() } colData = fsColor; - Step15Luminous(colData); // no-op for step 2.0+ + Step15Luminous(colData); // no-op for step 2.0+ finalData = tex1Data * colData; if (finalData.a < (1.0/32.0)) { // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports @@ -225,7 +225,7 @@ void main() // Total light intensity: sum of all components lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y); // diffuse + ambient - lightIntensity.rgb += spotColor*lobeEffect; + lightIntensity += spotColor*lobeEffect; // Upper clamp is optional, step 1.5+ games will drive brightness beyond 100% if(intensityClamp) { @@ -244,10 +244,10 @@ void main() // Always clamp floor to zero float NdotL = max(0.0, sunFactor); - vec4 expIndex = vec4(8.0, 16.0, 32.0, 64.0); - vec4 multIndex = vec4(1.6, 1.6, 2.4, 3.2); + const float expIndex[4] = float[4](8.0, 16.0, 32.0, 64.0); + const float multIndex[4] = float[4](1.6, 1.6, 2.4, 3.2); float exponent = expIndex[int(shininess)]; - + specularFactor = pow(NdotL, exponent); specularFactor *= multIndex[int(shininess)]; } @@ -258,7 +258,7 @@ void main() vec3 R = reflect(-sunVector, fsViewNormal); specularFactor = max(0.0, R.z); } - + specularFactor *= specularValue; specularFactor *= lighting[1].x; @@ -267,7 +267,7 @@ void main() finalData.a = max(finalData.a, specularFactor); } - finalData.rgb += vec3(specularFactor); + finalData.rgb += specularFactor; } } @@ -277,7 +277,7 @@ void main() // Spotlight on fog vec3 lSpotFogColor = spotFogColor * fogAttenuation * fogColour.rgb * lobeFogEffect; - // Fog & spotlight applied + // Fog & spotlight applied finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a); // Write outputs to colour buffers @@ -285,4 +285,4 @@ void main() } )glsl"; -#endif \ No newline at end of file +#endif From a922488ddd70b09018b29ade41144e0953150b90 Mon Sep 17 00:00:00 2001 From: toxieainc Date: Mon, 14 Oct 2024 22:53:42 +0200 Subject: [PATCH 2/3] remove unnecessary sign --- Src/Graphics/New3D/R3DShaderQuads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Graphics/New3D/R3DShaderQuads.h b/Src/Graphics/New3D/R3DShaderQuads.h index da6d12e3..678b58f9 100644 --- a/Src/Graphics/New3D/R3DShaderQuads.h +++ b/Src/Graphics/New3D/R3DShaderQuads.h @@ -268,7 +268,7 @@ void QuadraticInterpolation() { float u[4]; for (int i=0; i<4; i++) - u[i] = length(fs_in.v[i]) * sign(fs_in.oneOverW[i]); // is w[i] negative? + u[i] = length(fs_in.v[i]); precise float t[4]; for (int i=0; i<4; i++) { From 75d2e79033e1a77939ccb1219bf81f6019a92b5c Mon Sep 17 00:00:00 2001 From: toxieainc Date: Mon, 14 Oct 2024 23:11:27 +0200 Subject: [PATCH 3/3] sign IS needed for interpolation correctness :/ --- Src/Graphics/New3D/R3DShaderQuads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Graphics/New3D/R3DShaderQuads.h b/Src/Graphics/New3D/R3DShaderQuads.h index 678b58f9..da6d12e3 100644 --- a/Src/Graphics/New3D/R3DShaderQuads.h +++ b/Src/Graphics/New3D/R3DShaderQuads.h @@ -268,7 +268,7 @@ void QuadraticInterpolation() { float u[4]; for (int i=0; i<4; i++) - u[i] = length(fs_in.v[i]); + u[i] = length(fs_in.v[i]) * sign(fs_in.oneOverW[i]); // is w[i] negative? precise float t[4]; for (int i=0; i<4; i++) {