Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve edge test precision of quad test, and add missing '----'-area #213

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 30 additions & 29 deletions Src/Graphics/New3D/R3DShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,32 @@ vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);

if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
c.rgb *= (1.0/31.0);
c.a = 1.0 - float((value >> 15) & 0x1u);
c.a = 1.0 - float((value >> 15) & 0x1u);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c *= (1.0/15.0);
}
else if(type==2) {
c.a = float(value&0xFu);
c.a = float(value&0xFu);
c.rgb = vec3(float((value >> 4) & 0xFu));
c *= (1.0/15.0);
c *= (1.0/15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
}
else if(type==4) {
c.a = float((value>>8)&0xFu);
c.a = float((value>>8)&0xFu);
c.rgb = vec3(float((value >> 12) & 0xFu));
c *= (1.0/15.0);
c *= (1.0/15.0);
}
else if(type==5) {
c = vec4(float(value&0xFFu) / 255.0);
Expand Down Expand Up @@ -145,7 +145,6 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway

float odd = floor(mod(u, 2.0)); // odd values are mirrored

if(odd > 0.0) {
Expand All @@ -161,7 +160,7 @@ float LinearTexLocations(int wrapMode, float size, float u, out float u0, out fl

if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;

return fract(u); // return weight
}
}
Expand All @@ -173,9 +172,9 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);

vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos),level), level).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos),level), level).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos),level), level).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos),level), level).r);

if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
Expand All @@ -192,10 +191,10 @@ vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texP
}

// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.

return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}

vec4 GetTextureValue()
Expand All @@ -206,8 +205,8 @@ vec4 GetTextureValue()

int iLevel = int(fLevel);

ivec2 tex1Pos = GetTexturePosition(iLevel, ivec2(baseTexInfo.xy));
ivec2 tex1Size = GetTextureSize(iLevel, ivec2(baseTexInfo.zw));
ivec2 tex1Pos = GetTexturePosition(iLevel, baseTexInfo.xy);
ivec2 tex1Size = GetTextureSize(iLevel, baseTexInfo.zw);
vec4 tex1Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex1Size), tex1Pos, fsTexCoord, iLevel);

// init second texel with blank data to avoid any potentially undefined behavior
Expand All @@ -216,13 +215,15 @@ vec4 GetTextureValue()
float blendFactor = 0.0;

// if LOD < 0, no need to blend with next mipmap level; slight performance boost
if (lod > 0.0)
// while at it, just generalize to all cases where only one mip level needs to be touched
float ffL = fract(fLevel);
if (ffL > 0.0)
{
ivec2 tex2Pos = GetTexturePosition(iLevel+1, ivec2(baseTexInfo.xy));
ivec2 tex2Size = GetTextureSize(iLevel+1, ivec2(baseTexInfo.zw));
ivec2 tex2Pos = GetTexturePosition(iLevel+1, baseTexInfo.xy);
ivec2 tex2Size = GetTextureSize(iLevel+1, baseTexInfo.zw);
tex2Data = texBiLinear(textureBank[texturePage], textureWrapMode, vec2(tex2Size), tex2Pos, fsTexCoord, iLevel+1);

blendFactor = fract(fLevel);
blendFactor = ffL;
}
else if (microTexture && lod < -microTextureMinLOD)
{
Expand All @@ -240,7 +241,7 @@ vec4 GetTextureValue()
tex1Data = mix(tex1Data, tex2Data, blendFactor);

if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
tex1Data.rgb = vec3(1.0) - tex1Data.rgb;
}

if (alphaTest) {
Expand Down
113 changes: 40 additions & 73 deletions Src/Graphics/New3D/R3DShaderQuads.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ float CalcBackFace(in vec3 viewVertex)

void main(void)
{
vs_out.viewVertex = vec3(modelMat * inVertex);
vs_out.viewNormal = (mat3(modelMat) * inNormal) / modelScale;
vs_out.viewVertex = (modelMat * inVertex).xyz;
vs_out.viewNormal = (mat3(modelMat) / modelScale) * inNormal;
vs_out.discardPoly = CalcBackFace(vs_out.viewVertex);
vs_out.color = GetColour(inColour);
vs_out.color = GetColour(inColour);
vs_out.texCoord = inTexCoord;
vs_out.fixedShade = inFixedShade;
vs_out.LODBase = -vs_out.discardPoly * cota * inTextureNP;
gl_Position = projMat * modelMat * inVertex;
vs_out.LODBase = vs_out.discardPoly * -cota * inTextureNP;
gl_Position = (projMat * modelMat) * inVertex;
}
)glsl";

Expand Down Expand Up @@ -114,7 +114,7 @@ float DifferenceOfProducts(float a, float b, float c, float d)

void main(void)
{
if(gs_in[0].discardPoly > 0) {
if(gs_in[0].discardPoly > 0.0) {
return; //emulate back face culling here (all vertices in poly have same value)
}

Expand Down Expand Up @@ -159,7 +159,7 @@ void main(void)
// | | | \ |
// 0----3 0----2
//
int reorder[4] = int[]( 1, 0, 2, 3 );
const int reorder[4] = int[4]( 1, 0, 2, 3 );
int ii = reorder[i];

for (int j=0; j<4; j++) {
Expand Down Expand Up @@ -246,8 +246,8 @@ in GS_OUT
vec3 fsViewVertex;
vec3 fsViewNormal;
vec2 fsTexCoord;
float fsFixedShade;
vec4 fsColor;
float fsFixedShade;
float fsLODBase;

//outputs
Expand All @@ -266,87 +266,54 @@ float SqrLength(vec2 a);

void QuadraticInterpolation()
{
vec2 s[4];
float A[4];

for (int i=0; i<4; i++) {
s[i] = fs_in.v[i];
A[i] = fs_in.area[i];
}

float D[4];
float r[4];

for (int i=0; i<4; i++) {
int i_next = (i+1)%4;
D[i] = dot(s[i], s[i_next]);
r[i] = length(s[i]);
if (fs_in.oneOverW[i] < 0.0) { // is w[i] negative?
r[i] = -r[i];
}
}

float t[4];
float u[4];
for (int i=0; i<4; i++)
u[i] = length(fs_in.v[i]) * sign(fs_in.oneOverW[i]); // is w[i] negative?

precise float t[4];
for (int i=0; i<4; i++) {
int i_next = (i+1)%4;
if(A[i]==0.0) t[i] = 0.0; // check for zero area + div by zero
else t[i] = (r[i]*r[i_next] - D[i]) / A[i];
if(fs_in.area[i]==0.0) t[i] = 0.0; // check for zero area to avoid div by zero
else t[i] = fma(u[i],u[i_next], -dot(fs_in.v[i],fs_in.v[i_next])) / fs_in.area[i];
}

float uSum = 0.0;
float u[4];
int lambdaSignCount = 0; // to discard fragments if all the weights are neither all negative nor all positive (=outside the convex/concave/crossed quad).

for (uint i=0; i<4; i++) {
uint i_prev = (i-1)%4;
u[i] = (t[i_prev] + t[i]) / r[i];
uSum += u[i];
u[i] = (t[i_prev] + t[i]) / u[i];
lambdaSignCount += (t[i_prev] < -t[i]) ? -1 : 1;
}

float lambda[4];

for (int i=0; i<4; i++) {
lambda[i] = u[i] / uSum;
}

/* Discard fragments when all the weights are neither all negative nor all positive. */

int lambdaSignCount = 0;

for (int i=0; i<4; i++) {
if (fs_in.oneOverW[i] * lambda[i] < 0.0) {
lambdaSignCount--;
} else {
lambdaSignCount++;
}
}
if (lambdaSignCount != 4) {
if (lambdaSignCount == 0) { // one can either check for == 0 or abs(...) != 4, both should(!) be equivalent (but in practice its not due to precision issues, but these cases are extremely rare)
if(!gl_HelperInvocation) {
discard;
}
}

float interp_oneOverW = 0.0;

fsViewVertex = vec3(0.0);
fsViewNormal = vec3(0.0);
fsTexCoord = vec2(0.0);
fsFixedShade = 0.0;
float interp_oneOverW = 0.0;
float uSum = 0.0;
fsColor = fs_in.color;
fsLODBase = fs_in.LODBase;

for (int i=0; i<4; i++) {
fsViewVertex += lambda[i] * fs_in.viewVertex[i];
fsViewNormal += lambda[i] * fs_in.viewNormal[i];
fsTexCoord += lambda[i] * fs_in.texCoord[i];
fsFixedShade += lambda[i] * fs_in.fixedShade[i];
interp_oneOverW += lambda[i] * fs_in.oneOverW[i];
fsViewVertex += u[i] * fs_in.viewVertex[i];
fsViewNormal += u[i] * fs_in.viewNormal[i];
fsTexCoord += u[i] * fs_in.texCoord[i];
fsFixedShade += u[i] * fs_in.fixedShade[i];
interp_oneOverW += u[i] * fs_in.oneOverW[i];
uSum += u[i];
}

fsViewVertex /= interp_oneOverW;
fsViewNormal /= interp_oneOverW;
fsTexCoord /= interp_oneOverW;
fsFixedShade /= interp_oneOverW;
float inv = 1.0/interp_oneOverW;
fsViewVertex *= inv;
fsViewNormal *= inv;
fsTexCoord *= inv;
fsFixedShade *= inv;

vec4 vertex;
float depth;
Expand All @@ -363,7 +330,7 @@ void QuadraticInterpolation()
}
else {
vertex.z = projMat[2][2] * fsViewVertex.z + projMat[3][2]; // standard projMat * vertex - but just using Z components
depth = vertex.z * interp_oneOverW;
depth = vertex.z * (interp_oneOverW/uSum);
}

gl_FragDepth = depth;
Expand All @@ -386,7 +353,7 @@ void main()
}

colData = fsColor;
Step15Luminous(colData); // no-op for step 2.0+
Step15Luminous(colData); // no-op for step 2.0+
finalData = tex1Data * colData;

if (finalData.a < (1.0/32.0)) { // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports
Expand Down Expand Up @@ -454,7 +421,7 @@ void main()
// Total light intensity: sum of all components
lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y); // diffuse + ambient

lightIntensity.rgb += spotColor*lobeEffect;
lightIntensity += spotColor*lobeEffect;

// Upper clamp is optional, step 1.5+ games will drive brightness beyond 100%
if(intensityClamp) {
Expand All @@ -473,10 +440,10 @@ void main()
// Always clamp floor to zero
float NdotL = max(0.0, sunFactor);

vec4 expIndex = vec4(8.0, 16.0, 32.0, 64.0);
vec4 multIndex = vec4(1.6, 1.6, 2.4, 3.2);
const float expIndex[4] = float[4](8.0, 16.0, 32.0, 64.0);
const float multIndex[4] = float[4](1.6, 1.6, 2.4, 3.2);
float exponent = expIndex[int(shininess)];

specularFactor = pow(NdotL, exponent);
specularFactor *= multIndex[int(shininess)];
}
Expand All @@ -487,7 +454,7 @@ void main()
vec3 R = reflect(-sunVector, fsViewNormal);
specularFactor = max(0.0, R.z);
}

specularFactor *= specularValue;
specularFactor *= lighting[1].x;

Expand All @@ -496,7 +463,7 @@ void main()
finalData.a = max(finalData.a, specularFactor);
}

finalData.rgb += vec3(specularFactor);
finalData.rgb += specularFactor;
}
}

Expand All @@ -506,7 +473,7 @@ void main()
// Spotlight on fog
vec3 lSpotFogColor = spotFogColor * fogAttenuation * fogColour.rgb * lobeFogEffect;

// Fog & spotlight applied
// Fog & spotlight applied
finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);

// Write outputs to colour buffers
Expand Down
Loading