diff --git a/.github/gles.patch b/.github/gles.patch index 270e336e2..5a922fcf2 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 9f369e39..b4bb19d3 100644 +index b9f9fe4c..f1cf286f 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,8 +31,8 @@ index 9f369e39..b4bb19d3 100644 in vec4 v_quaternion; in vec4 v_colour; -@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; +@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } +// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead @@ -50,7 +50,7 @@ index 9f369e39..b4bb19d3 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment @@ -70,7 +70,7 @@ index 9f369e39..b4bb19d3 100644 switch (input_id) { case 0u: { delta = dot(normal, normalize(half_vector)); -@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -241,11 +248,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); @@ -86,7 +86,7 @@ index 9f369e39..b4bb19d3 100644 if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; -@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled @@ -98,7 +98,7 @@ index 9f369e39..b4bb19d3 100644 delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { +@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -107,7 +107,7 @@ index 9f369e39..b4bb19d3 100644 primary_color = secondary_color = vec4(0.0); return; } -@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); @@ -116,7 +116,7 @@ index 9f369e39..b4bb19d3 100644 // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker switch (bump_mode) { -@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -135,7 +135,7 @@ index 9f369e39..b4bb19d3 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); -@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -151,7 +151,7 @@ index 9f369e39..b4bb19d3 100644 light_vector = light_position + v_view; } -@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse @@ -181,7 +181,7 @@ index 9f369e39..b4bb19d3 100644 float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md index 9f4ff2f2f..8b6b98855 100644 --- a/docs/3ds/lighting.md +++ b/docs/3ds/lighting.md @@ -56,7 +56,7 @@ lut_id is one of these values 6 RR lut_index on the other hand represents the actual index of the LUT in the texture -u_tex_lighting_lut has 24 LUTs and they are used like so: +u_tex_luts has 24 LUTs for lighting and they are used like so: 0 D0 1 D1 2 is missing because SP uses LUTs 8-15 diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 61020f768..c4c8db5c1 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -92,6 +92,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + bool fogLUTDirty = false; + std::array fogLUT; + GPU(Memory& mem, EmulatorConfig& config); void display() { renderer->display(); } void screenshot(const std::string& name) { renderer->screenshot(name); } diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index f4142ef1e..337fd2116 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -29,6 +29,18 @@ namespace PICA { std::array tevConfigs; }; + struct FogConfig { + union { + u32 raw{}; + + BitField<0, 3, FogMode> mode; + BitField<3, 1, u32> flipDepth; + BitField<8, 8, u32> fogColorR; + BitField<16, 8, u32> fogColorG; + BitField<24, 8, u32> fogColorB; + }; + }; + struct Light { union { u16 raw; @@ -189,6 +201,7 @@ namespace PICA { struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; + FogConfig fogConfig; LightingConfig lighting; bool operator==(const FragmentConfig& config) const { @@ -220,12 +233,21 @@ namespace PICA { setupTevStage(4); setupTevStage(5); #undef setupTevStage + + fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]); + + if (fogConfig.mode == FogMode::Fog) { + fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); + fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]); + } } }; static_assert( std::has_unique_object_representations() && std::has_unique_object_representations() && - std::has_unique_object_representations() + std::has_unique_object_representations() && std::has_unique_object_representations() ); } // namespace PICA diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index c4d6a5fb4..636e8f7ce 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -51,6 +51,18 @@ namespace PICA { #undef defineTexEnv // clang-format on + // Fog registers + FogColor = 0xE1, + FogLUTIndex = 0xE6, + FogLUTData0 = 0xE8, + FogLUTData1 = 0xE9, + FogLUTData2 = 0xEA, + FogLUTData3 = 0xEB, + FogLUTData4 = 0xEC, + FogLUTData5 = 0xED, + FogLUTData6 = 0xEE, + FogLUTData7 = 0xEF, + // Framebuffer registers ColourOperation = 0x100, BlendFunc = 0x101, @@ -384,6 +396,12 @@ namespace PICA { GreaterOrEqual = 7, }; + enum class FogMode : u32 { + Disabled = 0, + Fog = 5, + Gas = 7, + }; + struct TexEnvConfig { enum class Source : u8 { PrimaryColor = 0x0, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 085d990a8..215e5adb0 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); bool isSamplerEnabled(u32 environmentID, u32 lutID); + void compileFog(std::string& shader, const PICA::FragmentConfig& config); + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICA::FragmentConfig& config); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index d00445ac1..f5a964a34 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -63,7 +63,7 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - OpenGL::Texture lightLUTTexture; + OpenGL::Texture LUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation @@ -90,6 +90,7 @@ class RendererGL final : public Renderer { void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); + void updateFogLUT(); void initGraphicsContextInternal(); public: diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index ace49feac..fe336edc8 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -74,6 +74,9 @@ void GPU::reset() { lightingLUT.fill(0); lightingLUTDirty = true; + fogLUT.fill(0); + fogLUTDirty = true; + totalAttribCount = 0; fixedAttribMask = 0; fixedAttribIndex = 0; diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index baaa2256c..995192727 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -135,6 +135,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + case FogLUTData0: + case FogLUTData1: + case FogLUTData2: + case FogLUTData3: + case FogLUTData4: + case FogLUTData5: + case FogLUTData6: + case FogLUTData7: { + const uint32_t index = regs[FogLUTIndex] & 0x7F; + fogLUT[index] = value; + fogLUTDirty = true; + regs[FogLUTIndex] = (index + 1) & 0x7F; + break; + } + case LightingLUTData0: case LightingLUTData1: case LightingLUTData2: diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 3d688bd2e..9802be902 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -130,7 +130,7 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; - uniform sampler2D u_tex_lighting_lut; + uniform sampler2D u_tex_luts; )"; ret += uniformDefinition; @@ -144,7 +144,7 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { } float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -194,6 +194,8 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { compileTEV(ret, i, config); } + compileFog(ret, config); + applyAlphaTest(ret, config); ret += "fragColor = combinerOutput;\n}"; // End of main function @@ -652,4 +654,27 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; } } +} + +void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConfig& config) { + if (config.fogConfig.mode != FogMode::Fog) { + return; + } + + float r = config.fogConfig.fogColorR / 255.0f; + float g = config.fogConfig.fogColorG / 255.0f; + float b = config.fogConfig.fogColorB / 255.0f; + + if (config.fogConfig.flipDepth) { + shader += "float fog_index = (1.0 - depth) * 128.0;\n"; + } else { + shader += "float fog_index = depth * 128.0;\n"; + } + + shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);"; + shader += "float delta = fog_index - clamped_index;"; + shader += "vec3 fog_color = vec3(" + std::to_string(r) + ", " + std::to_string(g) + ", " + std::to_string(b) + ");"; + shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs + shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; + shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 36827027f..5e1462b9e 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -115,10 +115,11 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F); - lightLUTTexture.bind(); - lightLUTTexture.setMinFilter(OpenGL::Linear); - lightLUTTexture.setMagFilter(OpenGL::Linear); + // 24 rows for light, 1 for fog + LUTTexture.create(256, Lights::LUT_Count + 1, GL_RG32F); + LUTTexture.bind(); + LUTTexture.setMinFilter(OpenGL::Linear); + LUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -353,22 +354,49 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - lightLUTTexture.bind(); + LUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array lightingLut; + std::array lightingLut; - for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & 0xFFF; + for (int i = 0; i < lightingLut.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - lightLUTTexture.bind(); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data()); + LUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RG, GL_FLOAT, lightingLut.data()); + glActiveTexture(GL_TEXTURE0); +} + +void RendererGL::updateFogLUT() { + gpu.fogLUTDirty = false; + + // Fog LUT elements are of this type: + // 0-12 fixed1.1.11, Difference from next element + // 13-23 fixed0.0.11, Value + // We will store them as a 128x1 RG texture with R being the value and G being the difference + std::array fogLut; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + glActiveTexture(GL_TEXTURE0 + 3); + LUTTexture.bind(); + // The fog LUT exists at the end of the lighting LUT + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, Lights::LUT_Count, 128, 1, GL_RG, GL_FLOAT, fogLut.data()); glActiveTexture(GL_TEXTURE0); } @@ -453,6 +481,10 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v bindTexturesToSlots(); + if (gpu.fogLUTDirty) { + updateFogLUT(); + } + if (gpu.lightingLUTDirty) { updateLightingLUT(); } @@ -811,7 +843,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); // Allocate memory for the program UBO glGenBuffers(1, &programEntry.uboBinding); @@ -994,9 +1026,9 @@ void RendererGL::initUbershader(OpenGL::Program& program) { ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2 and the LUTs go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); } diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 48b55a4ca..b9f9fe4c5 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -25,7 +25,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler2D u_tex_lighting_lut; +uniform sampler2D u_tex_luts; uniform uint u_picaRegs[0x200 - 0x48]; @@ -152,6 +152,8 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u +#define FOG_INDEX 24 + uint GPUREG_LIGHTi_CONFIG; uint GPUREG_LIGHTING_CONFIG1; uint GPUREG_LIGHTING_LUTINPUT_SELECT; @@ -161,7 +163,7 @@ bool error_unimpl = false; vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -494,7 +496,7 @@ void main() { if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } - // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // fragColour.rg = texture(u_tex_luts,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -507,6 +509,28 @@ void main() { // Write final fragment depth gl_FragDepth = depth; + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - depth : depth; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), FOG_INDEX), 0).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = readPicaReg(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + vec3 fog_color = vec3(r, g, b); + + fragColour.rgb = mix(fog_color, fragColour.rgb, fog_factor); + } + // Perform alpha test uint alphaControl = readPicaReg(0x104u); if ((alphaControl & 1u) != 0u) { // Check if alpha test is on