Skip to content

Commit

Permalink
Merge branch 'specialized-shaders-2' of https://github.com/wheremyfoo…
Browse files Browse the repository at this point in the history
…dat/Panda3DS into specialized-shaders-2
  • Loading branch information
wheremyfoodat committed Jul 18, 2024
2 parents 00037d8 + 6279ce3 commit c9a4e4e
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 101 deletions.
66 changes: 31 additions & 35 deletions .github/gles.patch
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644

void main() {
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
index 23c5c4cb..a9851a8b 100644
index b4ad7ecc..98b1bd80 100644
--- a/src/host_shaders/opengl_fragment_shader.frag
+++ b/src/host_shaders/opengl_fragment_shader.frag
@@ -1,4 +1,5 @@
Expand All @@ -31,7 +31,7 @@ index 23c5c4cb..a9851a8b 100644

in vec4 v_quaternion;
in vec4 v_colour;
@@ -189,11 +190,17 @@ float lutLookup(uint lut, int index) {
@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r;
}

Expand All @@ -50,8 +50,8 @@ index 23c5c4cb..a9851a8b 100644
}

// Convert an arbitrary-width floating point literal to an f32
@@ -257,16 +264,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
// If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light

bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment

- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
Expand All @@ -70,26 +70,23 @@ index 23c5c4cb..a9851a8b 100644
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
@@ -285,14 +292,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
break;
}
case 4u: {
- // These are ints so that bitfieldExtract sign extends for us
+ // These are ints so that bitfieldExtractCompat sign extends for us
@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));

// These are fixed point 1.1.11 values, so we need to convert them to float
- float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0;
- float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0;
- float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0;
+ float x = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0;
+ float y = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0;
+ float z = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0;
vec3 spotlight_vector = vec3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
@@ -310,9 +317,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
+ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions
// of GLSL so we do it manually
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
+ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
+ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
+ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);

if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
}

// 0 = enabled
Expand All @@ -101,25 +98,25 @@ index 23c5c4cb..a9851a8b 100644
delta = max(delta, 0.0);
} else {
delta = abs(delta);
@@ -339,7 +346,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);

@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(0.0);
return;
}
@@ -356,7 +363,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);

- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);

// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
// Could be because the texture is not sampled correctly, may need the clamp/border color configurations
@@ -370,15 +377,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
switch (bump_mode) {
@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);

Expand All @@ -138,7 +135,7 @@ index 23c5c4cb..a9851a8b 100644

uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
@@ -390,12 +397,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {

float light_distance;
vec3 light_position = vec3(
Expand All @@ -154,7 +151,7 @@ index 23c5c4cb..a9851a8b 100644
light_vector = light_position + v_view;
}

@@ -411,14 +418,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // N dot Li

// Two sided diffuse
Expand All @@ -172,9 +169,8 @@ index 23c5c4cb..a9851a8b 100644
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
@@ -430,9 +437,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
// fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
// See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE
}

float distance_attenuation = 1.0;
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
Expand All @@ -185,7 +181,7 @@ index 23c5c4cb..a9851a8b 100644

float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
@@ -477,8 +484,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}

Expand Down Expand Up @@ -229,10 +225,10 @@ index 057f9a88..dc735ced 100644
v_quaternion = a_quaternion;
}
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
index 9997e63b..5d9d7804 100644
index 828fb784..a1861b77 100644
--- a/third_party/opengl/opengl.hpp
+++ b/third_party/opengl/opengl.hpp
@@ -561,22 +561,22 @@ namespace OpenGL {
@@ -568,22 +568,22 @@ namespace OpenGL {
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
static void enableBlend() { glEnable(GL_BLEND); }
static void disableBlend() { glDisable(GL_BLEND); }
Expand Down
79 changes: 79 additions & 0 deletions docs/3ds/lighting.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
## Info on the lighting implementation

### Missing shadow attenuation
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.

### Missing bump mapping
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.

Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.

### samplerEnabledBitfields
Holds the enabled state of the lighting samples for various PICA configurations
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0

```c
const bool samplerEnabled[9 * 7] = bool[9 * 7](
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true // Configuration 8: All
);
```

The above has been condensed to two uints for performance reasons.
You can confirm they are the same by running the following:
```c
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
for (int i = 0; i < 9 * 7; i++) {
unsigned arrayIndex = (i >> 5);
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
if (samplerEnabled[i] == b) {
printf("%d: happy\n", i);
} else {
printf("%d: unhappy\n", i);
}
}
```

### lightLutLookup
lut_id is one of these values
0 D0
1 D1
2 SP
3 FR
4 RB
5 RG
6 RR

lut_index on the other hand represents the actual index of the LUT in the texture
u_tex_lighting_lut has 24 LUTs and they are used like so:
0 D0
1 D1
2 is missing because SP uses LUTs 8-15
3 FR
4 RB
5 RG
6 RR
8-15 SP0-7
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different

The light environment configuration controls which LUTs are available for use
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.

### Distance attenuation
Distance attenuation is computed differently from the other factors, for example
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE
83 changes: 17 additions & 66 deletions src/host_shaders/opengl_fragment_shader.frag
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,13 @@ vec4 tevNextPreviousBuffer;
bool tevUnimplementedSourceFlag = false;
vec3 normal;

// Holds the enabled state of the lighting samples for various PICA configurations
// As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
// const bool samplerEnabled[9 * 7] = bool[9 * 7](
// // D0 D1 SP FR RB RG RR
// true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
// false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
// true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
// true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
// true, true, true, false, true, true, true, // Configuration 4: All except for FR
// true, false, true, true, true, true, true, // Configuration 5: All except for D1
// true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
// false, false, false, false, false, false, false, // Configuration 7: Unused
// true, true, true, true, true, true, true // Configuration 8: All
// );

// The above have been condensed to two uints to save space
// You can confirm they are the same by running the following:
// for (int i = 0; i < 9 * 7; i++) {
// unsigned arrayIndex = (i >> 5);
// bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
// if (samplerEnabled[i] == b) {
// printf("%d: happy\n", i);
// } else {
// printf("%d: unhappy\n", i);
// }
// }
// See docs/lighting.md
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);

bool isSamplerEnabled(uint environment_id, uint lut_id) {
uint index = 7 * environment_id + lut_id;
uint arrayIndex = (index >> 5);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31))) != 0u;
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}

// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
Expand Down Expand Up @@ -182,8 +157,8 @@ uint GPUREG_LIGHTING_CONFIG1;
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
uint GPUREG_LIGHTING_LUTINPUT_ABS;
bool error_unimpl;
vec4 unimpl_color;
bool error_unimpl = false;
vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);

float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r;
Expand Down Expand Up @@ -219,27 +194,6 @@ float decodeFP(uint hex, uint E, uint M) {

float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) {
uint lut_index;
// lut_id is one of these values
// 0 D0
// 1 D1
// 2 SP
// 3 FR
// 4 RB
// 5 RG
// 6 RR

// lut_index on the other hand represents the actual index of the LUT in the texture
// u_tex_lighting_lut has 24 LUTs and they are used like so:
// 0 D0
// 1 D1
// 2 is missing because SP uses LUTs 8-15
// 3 FR
// 4 RB
// 5 RG
// 6 RR
// 8-15 SP0-7
// 16-23 DA0-7, but this is not handled in this function as the lookup is a bit different

int bit_in_config1;
if (lut_id == SP_LUT) {
// These are the spotlight attenuation LUTs
Expand All @@ -252,9 +206,6 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
error_unimpl = true;
}

// The light environment configuration controls which LUTs are available for use
// If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
// If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment

if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
Expand Down Expand Up @@ -285,14 +236,23 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
break;
}
case 4u: {
// These are ints so that bitfieldExtract sign extends for us
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));

// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);

if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;

// These are fixed point 1.1.11 values, so we need to convert them to float
float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0;
float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0;
float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0;
float x = float(se_x) / 2047.0;
float y = float(se_y) / 2047.0;
float z = float(se_z) / 2047.0;
vec3 spotlight_vector = vec3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
Expand Down Expand Up @@ -335,9 +295,6 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {

// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
error_unimpl = false;
unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);

uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(0.0);
Expand All @@ -359,7 +316,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);

// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
// Could be because the texture is not sampled correctly, may need the clamp/border color configurations
switch (bump_mode) {
default: {
normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);
Expand Down Expand Up @@ -424,11 +380,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}

// Distance attenuation is computed differently from the other factors, for example
// it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
// GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
// fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
// See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE
float distance_attenuation = 1.0;
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
Expand Down

0 comments on commit c9a4e4e

Please sign in to comment.