Skip to content

Commit

Permalink
improve edge test precision of quad test, and add missing '----'-area…
Browse files Browse the repository at this point in the history
…-case support for twisted/bow-tie quads

..and align triangle code more to the quad code for easier comparisons

also generalize mipmap0-only optimization to all non-blended cases

related to trzy#201
  • Loading branch information
toxieainc committed Oct 13, 2024
1 parent 916d912 commit eafdacf
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 137 deletions.
59 changes: 30 additions & 29 deletions Src/Graphics/New3D/R3DShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,32 @@ vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);

if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
c.rgb *= (1.0/31.0);
c.a = 1.0 - float((value >> 15) & 0x1u);
c.a = 1.0 - float((value >> 15) & 0x1u);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c *= (1.0/15.0);
}
else if(type==2) {
c.a = float(value&0xFu);
c.a = float(value&0xFu);
c.rgb = vec3(float((value >> 4) & 0xFu));
c *= (1.0/15.0);
c *= (1.0/15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
}
else if(type==4) {
c.a = float((value>>8)&0xFu);
c.a = float((value>>8)&0xFu);
c.rgb = vec3(float((value >> 12) & 0xFu));
c *= (1.0/15.0);
c *= (1.0/15.0);
}
else if(type==5) {
c = vec4(float(value&0xFFu) / 255.0);
Expand Down Expand Up @@ -145,7 +145,6 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway

float odd = floor(mod(u, 2.0)); // odd values are mirrored

if(odd > 0.0) {
Expand All @@ -161,7 +160,7 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl

if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;

return fract(u); // return weight
}
}
Expand All @@ -173,9 +172,9 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);

vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos),level), level).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r);

if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
Expand All @@ -192,10 +191,10 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP
}

// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.

return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}

vec4 GetTextureValue()
Expand All @@ -206,8 +205,8 @@ vec4 GetTextureValue()

int iLevel = int(fLevel);

ivec2 tex1Pos = GetTexturePosition(iLevel, ivec2(baseTexInfo.xy));
ivec2 tex1Size = GetTextureSize(iLevel, ivec2(baseTexInfo.zw));
ivec2 tex1Pos = GetTexturePosition(iLevel, baseTexInfo.xy);
ivec2 tex1Size = GetTextureSize(iLevel, baseTexInfo.zw);
vec4 tex1Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex1Size), tex1Pos, fsTexCoord, iLevel);

// init second texel with blank data to avoid any potentially undefined behavior
Expand All @@ -216,13 +215,15 @@ vec4 GetTextureValue()
float blendFactor = 0.0;

// if LOD < 0, no need to blend with next mipmap level; slight performance boost
if (lod > 0.0)
// while at it, just generalize to all cases where only one mip level needs to be touched
float ffL = fract(fLevel);
if (ffL > 0.0)
{
ivec2 tex2Pos = GetTexturePosition(iLevel+1, ivec2(baseTexInfo.xy));
ivec2 tex2Size = GetTextureSize(iLevel+1, ivec2(baseTexInfo.zw));
ivec2 tex2Pos = GetTexturePosition(iLevel+1, baseTexInfo.xy);
ivec2 tex2Size = GetTextureSize(iLevel+1, baseTexInfo.zw);
tex2Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex2Size), tex2Pos, fsTexCoord, iLevel+1);

blendFactor = fract(fLevel);
blendFactor = ffL;
}
else if (microTexture && lod < -microTextureMinLOD)
{
Expand All @@ -240,7 +241,7 @@ vec4 GetTextureValue()
tex1Data = mix(tex1Data, tex2Data, blendFactor);

if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
tex1Data.rgb = vec3(1.0) - tex1Data.rgb;
}

if (alphaTest) {
Expand Down
113 changes: 40 additions & 73 deletions Src/Graphics/New3D/R3DShaderQuads.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ float CalcBackFace(in vec3 viewVertex)

void main(void)
{
vs_out.viewVertex = vec3(modelMat * inVertex);
vs_out.viewNormal = (mat3(modelMat) * inNormal) / modelScale;
vs_out.viewVertex = (modelMat * inVertex).xyz;
vs_out.viewNormal = (mat3(modelMat) / modelScale) * inNormal;
vs_out.discardPoly = CalcBackFace(vs_out.viewVertex);
vs_out.color = GetColour(inColour);
vs_out.color = GetColour(inColour);
vs_out.texCoord = inTexCoord;
vs_out.fixedShade = inFixedShade;
vs_out.LODBase = -vs_out.discardPoly * cota * inTextureNP;
gl_Position = projMat * modelMat * inVertex;
vs_out.LODBase = vs_out.discardPoly * -cota * inTextureNP;
gl_Position = (projMat * modelMat) * inVertex;
}
)glsl";

Expand Down Expand Up @@ -114,7 +114,7 @@ float DifferenceOfProducts(float a, float b, float c, float d)

void main(void)
{
if(gs_in[0].discardPoly > 0) {
if(gs_in[0].discardPoly > 0.0) {
return; //emulate back face culling here (all vertices in poly have same value)
}

Expand Down Expand Up @@ -159,7 +159,7 @@ void main(void)
// | | | \ |
// 0----3 0----2
//
int reorder[4] = int[]( 1, 0, 2, 3 );
const int reorder[4] = int[4]( 1, 0, 2, 3 );
int ii = reorder[i];

for (int j=0; j<4; j++) {
Expand Down Expand Up @@ -246,8 +246,8 @@ in GS_OUT
vec3 fsViewVertex;
vec3 fsViewNormal;
vec2 fsTexCoord;
float fsFixedShade;
vec4 fsColor;
float fsFixedShade;
float fsLODBase;

//outputs
Expand All @@ -266,87 +266,54 @@ float SqrLength(vec2 a);

void QuadraticInterpolation()
{
vec2 s[4];
float A[4];

for (int i=0; i<4; i++) {
s[i] = fs_in.v[i];
A[i] = fs_in.area[i];
}

float D[4];
float r[4];

for (int i=0; i<4; i++) {
int i_next = (i+1)%4;
D[i] = dot(s[i], s[i_next]);
r[i] = length(s[i]);
if (fs_in.oneOverW[i] < 0.0) { // is w[i] negative?
r[i] = -r[i];
}
}

float t[4];
float u[4];
for (int i=0; i<4; i++)
u[i] = length(fs_in.v[i]) * sign(fs_in.oneOverW[i]); // is w[i] negative?

precise float t[4];
for (int i=0; i<4; i++) {
int i_next = (i+1)%4;
if(A[i]==0.0) t[i] = 0.0; // check for zero area + div by zero
else t[i] = (r[i]*r[i_next] - D[i]) / A[i];
if(fs_in.area[i]==0.0) t[i] = 0.0; // check for zero area to avoid div by zero
else t[i] = fma(u[i],u[i_next], -dot(fs_in.v[i],fs_in.v[i_next])) / fs_in.area[i];
}

float uSum = 0.0;
float u[4];
int lambdaSignCount = 0; // to discard fragments if all the weights are neither all negative nor all positive (=outside the convex/concave/crossed quad).

for (uint i=0; i<4; i++) {
uint i_prev = (i-1)%4;
u[i] = (t[i_prev] + t[i]) / r[i];
uSum += u[i];
u[i] = (t[i_prev] + t[i]) / u[i];
lambdaSignCount += (t[i_prev] < -t[i]) ? -1 : 1;
}

float lambda[4];

for (int i=0; i<4; i++) {
lambda[i] = u[i] / uSum;
}

/* Discard fragments when all the weights are neither all negative nor all positive. */

int lambdaSignCount = 0;

for (int i=0; i<4; i++) {
if (fs_in.oneOverW[i] * lambda[i] < 0.0) {
lambdaSignCount--;
} else {
lambdaSignCount++;
}
}
if (lambdaSignCount != 4) {
if (lambdaSignCount == 0) { // one can either check for == 0 or abs(...) != 4, both should(!) be equivalent (but in practice its not due to precision issues, but these cases are extremely rare)
if(!gl_HelperInvocation) {
discard;
}
}

float interp_oneOverW = 0.0;

fsViewVertex = vec3(0.0);
fsViewNormal = vec3(0.0);
fsTexCoord = vec2(0.0);
fsFixedShade = 0.0;
float interp_oneOverW = 0.0;
float uSum = 0.0;
fsColor = fs_in.color;
fsLODBase = fs_in.LODBase;

for (int i=0; i<4; i++) {
fsViewVertex += lambda[i] * fs_in.viewVertex[i];
fsViewNormal += lambda[i] * fs_in.viewNormal[i];
fsTexCoord += lambda[i] * fs_in.texCoord[i];
fsFixedShade += lambda[i] * fs_in.fixedShade[i];
interp_oneOverW += lambda[i] * fs_in.oneOverW[i];
fsViewVertex += u[i] * fs_in.viewVertex[i];
fsViewNormal += u[i] * fs_in.viewNormal[i];
fsTexCoord += u[i] * fs_in.texCoord[i];
fsFixedShade += u[i] * fs_in.fixedShade[i];
interp_oneOverW += u[i] * fs_in.oneOverW[i];
uSum += u[i];
}

fsViewVertex /= interp_oneOverW;
fsViewNormal /= interp_oneOverW;
fsTexCoord /= interp_oneOverW;
fsFixedShade /= interp_oneOverW;
float inv = 1.0/interp_oneOverW;
fsViewVertex *= inv;
fsViewNormal *= inv;
fsTexCoord *= inv;
fsFixedShade *= inv;

vec4 vertex;
float depth;
Expand All @@ -363,7 +330,7 @@ void QuadraticInterpolation()
}
else {
vertex.z = projMat[2][2] * fsViewVertex.z + projMat[3][2]; // standard projMat * vertex - but just using Z components
depth = vertex.z * interp_oneOverW;
depth = vertex.z * (interp_oneOverW/uSum);
}

gl_FragDepth = depth;
Expand All @@ -386,7 +353,7 @@ void main()
}

colData = fsColor;
Step15Luminous(colData); // no-op for step 2.0+
Step15Luminous(colData); // no-op for step 2.0+
finalData = tex1Data * colData;

if (finalData.a < (1.0/32.0)) { // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports
Expand Down Expand Up @@ -454,7 +421,7 @@ void main()
// Total light intensity: sum of all components
lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y); // diffuse + ambient

lightIntensity.rgb += spotColor*lobeEffect;
lightIntensity += spotColor*lobeEffect;

// Upper clamp is optional, step 1.5+ games will drive brightness beyond 100%
if(intensityClamp) {
Expand All @@ -473,10 +440,10 @@ void main()
// Always clamp floor to zero
float NdotL = max(0.0, sunFactor);

vec4 expIndex = vec4(8.0, 16.0, 32.0, 64.0);
vec4 multIndex = vec4(1.6, 1.6, 2.4, 3.2);
const float expIndex[4] = float[4](8.0, 16.0, 32.0, 64.0);
const float multIndex[4] = float[4](1.6, 1.6, 2.4, 3.2);
float exponent = expIndex[int(shininess)];

specularFactor = pow(NdotL, exponent);
specularFactor *= multIndex[int(shininess)];
}
Expand All @@ -487,7 +454,7 @@ void main()
vec3 R = reflect(-sunVector, fsViewNormal);
specularFactor = max(0.0, R.z);
}

specularFactor *= specularValue;
specularFactor *= lighting[1].x;

Expand All @@ -496,7 +463,7 @@ void main()
finalData.a = max(finalData.a, specularFactor);
}

finalData.rgb += vec3(specularFactor);
finalData.rgb += specularFactor;
}
}

Expand All @@ -506,7 +473,7 @@ void main()
// Spotlight on fog
vec3 lSpotFogColor = spotFogColor * fogAttenuation * fogColour.rgb * lobeFogEffect;

// Fog & spotlight applied
// Fog & spotlight applied
finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);

// Write outputs to colour buffers
Expand Down
Loading

0 comments on commit eafdacf

Please sign in to comment.