From a3886a948fd6d7b54f94f3896e3e7d1ef841b7d3 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 9 Jul 2024 20:51:09 +0300 Subject: [PATCH 01/10] Switch to GL_TEXTURE_2D for lighting LUT --- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 21 ++++++++++---------- src/host_shaders/opengl_fragment_shader.frag | 8 ++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662f..057f0d3bc 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -53,7 +53,7 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - GLuint lightLUTTextureArray; + OpenGL::Texture lightLUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa5..9de9f8d89 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -124,7 +124,10 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - glGenTextures(1, &lightLUTTextureArray); + lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F); + lightLUTTexture.bind(); + lightLUTTexture.setMinFilter(OpenGL::Linear); + lightLUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -357,26 +360,22 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + lightLUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + lightLUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data()); glActiveTexture(GL_TEXTURE0); } diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c558..6b728ace1 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -27,7 +27,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler1DArray u_tex_lighting_lut; +uniform sampler2D u_tex_lighting_lut; uniform uint u_picaRegs[0x200 - 0x48]; @@ -145,9 +145,9 @@ vec4 tevCalculateCombiner(int tev_id) { #define RR_LUT 6u float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; - if (lut == SP_LUT) lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; + if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1u; + if (lut == SP_LUT) lut = light + 8u; + return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; } vec3 regToColor(uint reg) { From fe566e960b17471fa7bcbd11f43c1ca22368d25c Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 9 Jul 2024 20:57:56 +0300 Subject: [PATCH 02/10] Update GL ES patch to work with latest changes --- .github/gles.patch | 103 +++++++++------------------------------------ 1 file changed, 19 insertions(+), 84 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index f1dc2c73d..3d6c96fe3 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -1,52 +1,3 @@ -diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp -index a11a6ffa..77486a09 100644 ---- a/src/core/renderer_gl/renderer_gl.cpp -+++ b/src/core/renderer_gl/renderer_gl.cpp -@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() { - } - - glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::updateLightingLUT() { -- gpu.lightingLUTDirty = false; -- std::array u16_lightinglut; -- -- for (int i = 0; i < gpu.lightingLUT.size(); i++) { -- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -- u16_lightinglut[i] = value * 65535 / 4095; -- } -- -- glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -- glActiveTexture(GL_TEXTURE0); -+ // gpu.lightingLUTDirty = false; -+ // std::array u16_lightinglut; -+ -+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) { -+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -+ // u16_lightinglut[i] = value * 65535 / 4095; -+ // } -+ -+ // glActiveTexture(GL_TEXTURE0 + 3); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -+ // glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag index 612671c8..1937f711 100644 --- a/src/host_shaders/opengl_display.frag @@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index f6fa6c55..bb88e278 100644 +index 6b728ace..eaac1484 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -80,34 +31,16 @@ index f6fa6c55..bb88e278 100644 in vec3 v_tangent; in vec3 v_normal; -@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; --uniform sampler1DArray u_tex_lighting_lut; -+// uniform sampler1DArray u_tex_lighting_lut; - - uniform uint u_picaRegs[0x200 - 0x48]; - -@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) { - #define RR_LUT 6u +@@ -150,11 +151,17 @@ float lutLookup(uint lut, uint light, float value) { + return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; + } - float lutLookup(uint lut, uint light, float value) { -- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -- if (lut == SP_LUT) lut = light + 8; -- return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -+ // if (lut == SP_LUT) lut = light + 8; -+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ return 0.0; -+} -+ +// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; - } - ++} ++ vec3 regToColor(uint reg) { // Normalization scale to convert from [0...255] to [0.0...1.0] const float scale = 1.0 / 255.0; @@ -117,7 +50,7 @@ index f6fa6c55..bb88e278 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -189,7 +196,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 view = normalize(v_view); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -126,7 +59,7 @@ index f6fa6c55..bb88e278 100644 primary_color = secondary_color = vec4(1.0); return; } -@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -213,7 +220,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { bool error_unimpl = false; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { @@ -135,7 +68,7 @@ index f6fa6c55..bb88e278 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -224,14 +231,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); vec3 light_vector = normalize(vec3( @@ -153,7 +86,7 @@ index f6fa6c55..bb88e278 100644 // error_unimpl = true; half_vector = normalize(normalize(light_vector + v_view) + view); } -@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -242,12 +249,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } for (int c = 0; c < 7; c++) { @@ -169,7 +102,7 @@ index f6fa6c55..bb88e278 100644 if (input_id == 0u) d[c] = dot(normal, half_vector); else if (input_id == 1u) -@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -260,9 +267,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); vec3 spot_light_vector = normalize(vec3( @@ -182,7 +115,7 @@ index f6fa6c55..bb88e278 100644 )); d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); } else if (input_id == 5u) { -@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -273,13 +280,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; @@ -198,7 +131,7 @@ index f6fa6c55..bb88e278 100644 if (lookup_config == 0u) { d[D1_LUT] = 0.0; d[FR_LUT] = 0.0; -@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -310,7 +317,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // Li dot N // Two sided diffuse @@ -207,7 +140,7 @@ index f6fa6c55..bb88e278 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -321,8 +328,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); } @@ -249,14 +182,16 @@ index a25d7a6d..7cf40398 100644 + // gl_ClipDistance[1] = dot(clipData, a_coords); } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index f368f573..5ead7f63 100644 +index 9997e63b..5d9d7804 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -520,21 +520,21 @@ namespace OpenGL { +@@ -561,22 +561,22 @@ namespace OpenGL { + static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } +- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } ++ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); } From 9b4e5841e7154563a4dda0153c87a46250f46543 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 14 Jul 2024 00:56:55 +0300 Subject: [PATCH 03/10] Summary of the current state of lighting fragment_light.elf: works toon_shading.elf: works Cave story 3d: no longer too dark, but the intro has a bug Rabbids: positional lighting fixes, looks better Mario 3d land: ground is not too bright, mario is not yellow Kirby triple deluxe: Kirby is not shining like before Luigis mansion: better but luigi lighting is way off and spotlight sometimes turns off Captain Toad: bit better, still too bright Omega ruby: looks fine to me Pokemon Super Mystery Dungeon: looks fine to me Lego batman: didn't try but should work? --- src/host_shaders/opengl_fragment_shader.frag | 347 +++++++++++++------ 1 file changed, 244 insertions(+), 103 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 6b728ace1..1b8e97511 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -38,6 +38,21 @@ vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +// Holds the enabled state of the lighting samples for various PICA configurations +// As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); + // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -144,10 +159,16 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1u; - if (lut == SP_LUT) lut = light + 8u; - return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; +uint GPUREG_LIGHTi_CONFIG; +uint GPUREG_LIGHTING_CONFIG1; +uint GPUREG_LIGHTING_LUTINPUT_SELECT; +uint GPUREG_LIGHTING_LUTINPUT_SCALE; +uint GPUREG_LIGHTING_LUTINPUT_ABS; +bool error_unimpl; +vec4 unimpl_color; + +float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } vec3 regToColor(uint reg) { @@ -178,42 +199,155 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } +bool isSamplerEnabled(uint environment_id, uint lut_id) { + return samplerEnabled[7 * environment_id + lut_id]; +} + +float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { + uint lut_index; + // lut_id is one of these values + // 0 D0 + // 1 D1 + // 2 SP + // 3 FR + // 4 RB + // 5 RG + // 6 RR + + // lut_index on the other hand represents the actual index of the LUT in the texture + // u_tex_lighting_lut has 24 LUTs and they are used like so: + // 0 D0 + // 1 D1 + // 2 is missing because SP uses LUTs 8-15 + // 3 FR + // 4 RB + // 5 RG + // 6 RR + // 8-15 SP0-7 + // 16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + error_unimpl = true; + } + + // The light environment configuration controls which LUTs are available for use + // If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 + // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + switch (input_id) { + case 0u: { + delta = dot(v_normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(v_view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(v_normal, normalize(v_view)); + break; + } + case 3u: { + delta = dot(light_vector, v_normal); + break; + } + case 4u: { + // These are ints so that bitfieldExtract sign extends for us + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; + float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; + float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + break; + } + default: { + delta = 1.0; + error_unimpl = true; + break; + } + } + + // 0 = enabled + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 256.0), 0.f, 255.f)); + return lutLookup(lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(lut_index, index) * scale; + } +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal); - vec3 tangent = normalize(v_tangent); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); + error_unimpl = false; + unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); - float d[7]; + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); + GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + + uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; - bool error_unimpl = false; + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); @@ -221,93 +355,29 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); + light_vector = light_position + v_view; } // Directional light else { - half_vector = normalize(normalize(light_vector) + view); - } - - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) - d[c] = dot(view, half_vector); - else if (input_id == 2u) - d[c] = dot(normal, view); - else if (input_id == 3u) - d[c] = dot(light_vector, normal); - else if (input_id == 4u) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6u) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + light_vector = light_position; } - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); - float NdotL = dot(normal, light_vector); // Li dot N + float NdotL = dot(v_normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -315,20 +385,91 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { else NdotL = abs(NdotL); - float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + float geometric_factor; + bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + // Distance attenuation is computed differently from the other factors, for example + // it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use + // GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the + // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. + // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + float distance_attenuation = 1.0; + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distance_attenuation = lutLookup(16u + light_id, index); + } + + float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector); + float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector); + float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector); + vec3 reflected_color; + reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector); + + if (isSamplerEnabled(environment_id, RG_LUT)) { + reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.g = reflected_color.r; + } + + if (isSamplerEnabled(environment_id, RB_LUT)) { + reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.b = reflected_color.r; + } + + vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution; + vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color; + + specular0 *= use_geo_0 ? geometric_factor : 1.0; + specular1 *= use_geo_1 ? geometric_factor : 1.0; + + float clamp_factor = 1.0; + if (clamp_highlights && NdotL == 0.0) { + clamp_factor = 0.0; + } + + float light_factor = distance_attenuation * spotlight_attenuation; + diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + + if (fresnel_output1 == 1u || fresnel_output2 == 1u) { + fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector); + } + + if (fresnel_output1 == 1u) { + diffuse_sum.a = fresnel_factor; + } - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) { + specular_sum.a = fresnel_factor; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - // secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + secondary_color = primary_color = unimpl_color; } } From f6ebf8398230928a95101de021989e6a64a36804 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 00:18:53 +0300 Subject: [PATCH 04/10] Update gles.patch --- .github/gles.patch | 176 +++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 70 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index 3d6c96fe3..f52705187 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 6b728ace..eaac1484 100644 +index 1b8e9751..96238000 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,8 +31,8 @@ index 6b728ace..eaac1484 100644 in vec3 v_tangent; in vec3 v_normal; -@@ -150,11 +151,17 @@ float lutLookup(uint lut, uint light, float value) { - return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; +@@ -171,11 +172,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } +// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead @@ -50,89 +50,103 @@ index 6b728ace..eaac1484 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +196,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 view = normalize(v_view); +@@ -243,16 +250,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + +- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { ++ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + switch (input_id) { + case 0u: { + delta = dot(v_normal, normalize(half_vector)); +@@ -271,14 +278,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + break; + } + case 4u: { +- // These are ints so that bitfieldExtract sign extends for us ++ // These are ints so that bitfieldExtractCompat sign extends for us + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + + // These are fixed point 1.1.11 values, so we need to convert them to float +- float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; +- float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; +- float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; ++ float x = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; ++ float y = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; ++ float z = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; +@@ -296,9 +303,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + } + + // 0 = enabled +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + // Two sided diffuse +- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); +@@ -319,7 +326,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } -@@ -213,7 +220,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - bool error_unimpl = false; +@@ -339,15 +346,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + +- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); +- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; ++ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4); ++ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +231,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); +@@ -359,12 +366,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); - } -@@ -242,12 +249,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - for (int c = 0; c < 7; c++) { -- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) -@@ -260,9 +267,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { -@@ -273,13 +280,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } + light_vector = light_position + v_view; } -- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); -+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; -@@ -310,7 +317,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(normal, light_vector); // Li dot N +@@ -380,14 +387,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(v_normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -140,17 +154,39 @@ index 6b728ace..eaac1484 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +328,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + + float geometric_factor; +- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; +- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; ++ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; ++ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); +@@ -399,9 +406,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. + // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + float distance_attenuation = 1.0; +- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); +@@ -446,8 +453,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..7cf40398 100644 --- a/src/host_shaders/opengl_vertex_shader.vert From e5bed23cee4d0223639167edeaa5e17058588ab2 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 15:48:34 +0300 Subject: [PATCH 05/10] Fix Luigi's flashlight in Luigi's Mansion --- src/host_shaders/opengl_fragment_shader.frag | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 1b8e97511..c3c7cf0bf 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -347,7 +347,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i << 2u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); From 139f35588d160928e37aa25b6de1c846d8543f59 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 16:23:42 +0300 Subject: [PATCH 06/10] Switch to shifts in some places instead of multiplication --- src/host_shaders/opengl_fragment_shader.frag | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index c3c7cf0bf..32f4c1ec5 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -247,12 +247,12 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light return 1.0; } - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) scale /= 256.0; float delta = 1.0; - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { delta = dot(v_normal, normalize(half_vector)); @@ -296,7 +296,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { delta = max(delta, 0.0); @@ -347,7 +347,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i << 2u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); From 8b4eacc7b6982c0f254baa223cf3291d50e55e49 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 20:32:35 +0300 Subject: [PATCH 07/10] More luigi mansion fixes --- src/host_shaders/opengl_fragment_shader.frag | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 32f4c1ec5..ae43d9937 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -272,8 +272,8 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } case 4u: { // These are ints so that bitfieldExtract sign extends for us - int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); - int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); // These are fixed point 1.1.11 values, so we need to convert them to float float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; @@ -349,13 +349,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u)); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u)); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u)); float light_distance; vec3 light_position = vec3( @@ -400,8 +400,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { - uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); - uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); From 2ca886f64f56f47fffc9b4125458b728352c9e7e Mon Sep 17 00:00:00 2001 From: offtkp Date: Wed, 17 Jul 2024 22:08:48 +0300 Subject: [PATCH 08/10] Move normal calculation to the fragment shader --- src/host_shaders/opengl_fragment_shader.frag | 30 +++++++++++++++----- src/host_shaders/opengl_vertex_shader.vert | 16 ++--------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index ae43d9937..582d6eefe 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,8 +1,6 @@ #version 410 core -in vec3 v_tangent; -in vec3 v_normal; -in vec3 v_bitangent; +in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -37,6 +35,7 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +vec3 normal; // Holds the enabled state of the lighting samples for various PICA configurations // As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 @@ -255,7 +254,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { - delta = dot(v_normal, normalize(half_vector)); + delta = dot(normal, normalize(half_vector)); break; } case 1u: { @@ -263,11 +262,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 2u: { - delta = dot(v_normal, normalize(v_view)); + delta = dot(normal, normalize(v_view)); break; } case 3u: { - delta = dot(light_vector, v_normal); + delta = dot(light_vector, normal); break; } case 4u: { @@ -313,6 +312,12 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } } +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { error_unimpl = false; @@ -336,6 +341,17 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + // Could be because the texture is not sampled correctly, may need the clamp/border color configurations + switch (bump_mode) { + default: { + normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); + break; + } + } + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -377,7 +393,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { light_vector = normalize(light_vector); half_vector = light_vector + normalize(v_view); - float NdotL = dot(v_normal, light_vector); // N dot Li + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d7..057f9a88b 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w; layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; -out vec3 v_normal; -out vec3 v_tangent; -out vec3 v_bitangent; +out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } -vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); -} - // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M) { uint width = M + E + 1u; @@ -73,10 +65,6 @@ void main() { v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -95,4 +83,6 @@ void main() { // There's also another, always-on clipping plane based on vertex z gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } From 7e7856fa4440b8584895a3ba4ce77e586d62f400 Mon Sep 17 00:00:00 2001 From: offtkp Date: Thu, 18 Jul 2024 02:51:08 +0300 Subject: [PATCH 09/10] Pack sampler configurations in bitfields instead of bool arrays --- src/host_shaders/opengl_fragment_shader.frag | 49 +++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 582d6eefe..23c5c4cb2 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -39,18 +39,37 @@ vec3 normal; // Holds the enabled state of the lighting samples for various PICA configurations // As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 -const bool samplerEnabled[9 * 7] = bool[9 * 7]( - // D0 D1 SP FR RB RG RR - true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR - false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR - true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR - true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR - true, true, true, false, true, true, true, // Configuration 4: All except for FR - true, false, true, true, true, true, true, // Configuration 5: All except for D1 - true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG - false, false, false, false, false, false, false, // Configuration 7: Unused - true, true, true, true, true, true, true // Configuration 8: All -); +// const bool samplerEnabled[9 * 7] = bool[9 * 7]( +// // D0 D1 SP FR RB RG RR +// true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR +// false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR +// true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR +// true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR +// true, true, true, false, true, true, true, // Configuration 4: All except for FR +// true, false, true, true, true, true, true, // Configuration 5: All except for D1 +// true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG +// false, false, false, false, false, false, false, // Configuration 7: Unused +// true, true, true, true, true, true, true // Configuration 8: All +// ); + +// The above have been condensed to two uints to save space +// You can confirm they are the same by running the following: +// for (int i = 0; i < 9 * 7; i++) { +// unsigned arrayIndex = (i >> 5); +// bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; +// if (samplerEnabled[i] == b) { +// printf("%d: happy\n", i); +// } else { +// printf("%d: unhappy\n", i); +// } +// } +const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31))) != 0u; +} // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -198,10 +217,6 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } -bool isSamplerEnabled(uint environment_id, uint lut_id) { - return samplerEnabled[7 * environment_id + lut_id]; -} - float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { uint lut_index; // lut_id is one of these values @@ -485,7 +500,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - secondary_color = primary_color = unimpl_color; + // secondary_color = primary_color = unimpl_color; } } From b51e2fd25f4a983c93a8781fb4847f8b26409d2b Mon Sep 17 00:00:00 2001 From: offtkp Date: Thu, 18 Jul 2024 02:53:54 +0300 Subject: [PATCH 10/10] Update gles.patch --- .github/gles.patch | 81 ++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index f52705187..99258011d 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 1b8e9751..96238000 100644 +index 23c5c4cb..a9851a8b 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -29,13 +29,13 @@ index 1b8e9751..96238000 100644 +#version 300 es +precision mediump float; - in vec3 v_tangent; - in vec3 v_normal; -@@ -171,11 +172,17 @@ float lutLookup(uint lut, int index) { + in vec4 v_quaternion; + in vec4 v_colour; +@@ -189,11 +190,17 @@ float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } -+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead ++// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; @@ -50,7 +50,7 @@ index 1b8e9751..96238000 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -243,16 +250,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -257,16 +264,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment @@ -59,25 +59,25 @@ index 1b8e9751..96238000 100644 return 1.0; } -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) scale /= 256.0; float delta = 1.0; -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { - delta = dot(v_normal, normalize(half_vector)); -@@ -271,14 +278,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + delta = dot(normal, normalize(half_vector)); +@@ -285,14 +292,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 4u: { - // These are ints so that bitfieldExtract sign extends for us + // These are ints so that bitfieldExtractCompat sign extends for us - int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); - int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); // These are fixed point 1.1.11 values, so we need to convert them to float - float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; @@ -89,19 +89,19 @@ index 1b8e9751..96238000 100644 vec3 spotlight_vector = vec3(x, y, z); delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector break; -@@ -296,9 +303,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -310,9 +317,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -319,7 +326,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -339,7 +346,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -110,7 +110,16 @@ index 1b8e9751..96238000 100644 primary_color = secondary_color = vec4(0.0); return; } -@@ -339,15 +346,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -356,7 +363,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + +- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); ++ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + // Could be because the texture is not sampled correctly, may need the clamp/border color configurations +@@ -370,15 +377,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -124,12 +133,12 @@ index 1b8e9751..96238000 100644 vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -359,12 +366,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); +@@ -390,12 +397,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -145,8 +154,8 @@ index 1b8e9751..96238000 100644 light_vector = light_position + v_view; } -@@ -380,14 +387,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(v_normal, light_vector); // N dot Li +@@ -411,14 +418,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -163,20 +172,20 @@ index 1b8e9751..96238000 100644 if (use_geo_0 || use_geo_1) { geometric_factor = dot(half_vector, half_vector); geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); -@@ -399,9 +406,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -430,9 +437,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { -- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); -- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { -+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au), 0, 20); -+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -446,8 +453,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -477,8 +484,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } @@ -188,7 +197,7 @@ index 1b8e9751..96238000 100644 float fresnel_factor; diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert -index a25d7a6d..7cf40398 100644 +index 057f9a88..dc735ced 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -1,4 +1,6 @@ @@ -199,7 +208,7 @@ index a25d7a6d..7cf40398 100644 layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; -@@ -20,7 +22,7 @@ out vec2 v_texcoord2; +@@ -18,7 +20,7 @@ out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -208,7 +217,7 @@ index a25d7a6d..7cf40398 100644 // TEV uniforms uniform uint u_textureEnvColor[6]; -@@ -93,6 +95,6 @@ void main() { +@@ -81,8 +83,8 @@ void main() { ); // There's also another, always-on clipping plane based on vertex z @@ -216,6 +225,8 @@ index a25d7a6d..7cf40398 100644 - gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..5d9d7804 100644