From d8ef9c6eca972b74a54b90aaa2a081186e2874e7 Mon Sep 17 00:00:00 2001 From: fleroviux Date: Sun, 10 Dec 2023 13:19:21 +0100 Subject: [PATCH 1/2] gpu: sw: Move span rasterization into a separate method --- .../gpu/renderer/software_renderer.hpp | 3 + .../gpu/renderer/software/rasterizer.cpp | 139 +++++++++--------- 2 files changed, 72 insertions(+), 70 deletions(-) diff --git a/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp b/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp index e9551ce..605c021 100644 --- a/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp +++ b/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp @@ -10,6 +10,8 @@ namespace dual::nds::gpu { + struct Span; + class SoftwareRenderer final : public RendererBase { public: SoftwareRenderer( @@ -32,6 +34,7 @@ namespace dual::nds::gpu { void RenderRearPlane(); void RenderPolygons(const Viewport& viewport, std::span polygons); void RenderPolygon(const Viewport& viewport, const Polygon& polygon); + void RenderPolygonSpan(const Polygon& polygon, const Span& span, i32 y, int x0, int x1, int x_min, int x_max, int l, int r); Color4 SampleTexture(TextureParams params, u32 palette_base, Vector2 uv); template diff --git a/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp b/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp index ab41d57..a6332a7 100644 --- a/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp +++ b/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp @@ -49,13 +49,9 @@ namespace dual::nds::gpu { void SoftwareRenderer::RenderPolygon(const Viewport& viewport, const Polygon& polygon) { const int vertex_count = (int)polygon.vertices.Size(); - // @todo: use different threshold for w-buffer - const i32 depth_test_threshold = m_enable_w_buffer ? 0xFF : 0x200; - Span span{}; Edge::Point points[10]; Interpolator<9> edge_interp{}; - Interpolator<8> line_interp{}; int initial_vertex; int final_vertex; @@ -200,92 +196,95 @@ namespace dual::nds::gpu { const int xr0 = std::clamp(span.x0[r] >> 18, xl1, 255); const int xr1 = std::min(x_max, 255); - const auto RenderSpan = [&](int x0, int x1) { - Color4 color; - Vector2 uv; + RenderPolygonSpan(polygon, span, y, xl0, xl1, x_min, x_max, l, r); - for(int x = x0; x <= x1; x++) { - line_interp.Setup(span.w_16[l], span.w_16[r], x, x_min, x_max); - - const u32 depth_old = m_depth_buffer[y][x]; - const u32 depth_new = m_enable_w_buffer ? - line_interp.Perp(span.depth[l], span.depth[r]) : line_interp.Lerp(span.depth[l], span.depth[r]); + if(!wireframe || force_render_inner_span) { + RenderPolygonSpan(polygon, span, y, xl1 + 1, xr0 - 1, x_min, x_max, l, r); + } - bool depth_test_passed; + RenderPolygonSpan(polygon, span, y, xr0, xr1, x_min, x_max, l, r); + } + } - if(polygon.attributes.use_equal_depth_test) { - depth_test_passed = std::abs((i32)depth_new - (i32)depth_old) <= depth_test_threshold; - } else { - depth_test_passed = depth_new < depth_old; - } + void SoftwareRenderer::RenderPolygonSpan(const Polygon& polygon, const Span& span, i32 y, int x0, int x1, int x_min, int x_max, int l, int r) { + const i32 depth_test_threshold = m_enable_w_buffer ? 0xFF : 0x200; - if(!depth_test_passed) { - continue; - } + Interpolator<8> line_interp{}; + Color4 color; + Vector2 uv; - line_interp.Perp(span.color[l], span.color[r], color); - line_interp.Perp(span.uv[l], span.uv[r], uv); + for(int x = x0; x <= x1; x++) { + line_interp.Setup(span.w_16[l], span.w_16[r], x, x_min, x_max); - if(m_io.disp3dcnt.enable_texture_mapping && polygon.texture_params.format != TextureParams::Format::Disabled) { - const Color4 texel = SampleTexture(polygon.texture_params, polygon.palette_base, uv); + const u32 depth_old = m_depth_buffer[y][x]; + const u32 depth_new = m_enable_w_buffer ? + line_interp.Perp(span.depth[l], span.depth[r]) : line_interp.Lerp(span.depth[l], span.depth[r]); - // @todo: alpha test + bool depth_test_passed; - switch((Polygon::Mode)polygon.attributes.polygon_mode) { - case Polygon::Mode::Modulation: { - for(const int i : {0, 1, 2, 3}) { - const int a = texel[i].Raw(); - const int b = color[i].Raw(); - color[i] = (i8)(((a + 1) * (b + 1) - 1) >> 6); - } - break; - } - case Polygon::Mode::Shadow: - case Polygon::Mode::Decal: { - // @todo - break; - } - case Polygon::Mode::Shaded: { - // @todo - break; - } - } - } else if(polygon.attributes.polygon_mode == Polygon::Mode::Shaded) { - // @todo - } + if(polygon.attributes.use_equal_depth_test) { + depth_test_passed = std::abs((i32)depth_new - (i32)depth_old) <= depth_test_threshold; + } else { + depth_test_passed = depth_new < depth_old; + } - // @todo: reject translucent pixel if the polygon ID is equal and the destination (old?) pixel isn't opaque. + if(!depth_test_passed) { + continue; + } - const bool opaque_pixel = color.A() == 63; + line_interp.Perp(span.color[l], span.color[r], color); + line_interp.Perp(span.uv[l], span.uv[r], uv); - if(!opaque_pixel && m_io.disp3dcnt.enable_alpha_blend && m_frame_buffer[y][x].A() != 0) { - const Fixed6 a0 = color.A(); - const Fixed6 a1 = Fixed6{63} - a0; - for(const int i : {0, 1, 2}) { - color[i] = color[i] * a0 + m_frame_buffer[y][x][i] * a1; - } - color.A() = std::max(color.A(), m_frame_buffer[y][x].A()); - } + if(m_io.disp3dcnt.enable_texture_mapping && polygon.texture_params.format != TextureParams::Format::Disabled) { + const Color4 texel = SampleTexture(polygon.texture_params, polygon.palette_base, uv); - m_frame_buffer[y][x] = color; + // @todo: alpha test - if(opaque_pixel) { - m_depth_buffer[y][x] = depth_new; - } else { - if(polygon.attributes.enable_translucent_depth_write) { - m_depth_buffer[y][x] = depth_new; + switch((Polygon::Mode)polygon.attributes.polygon_mode) { + case Polygon::Mode::Modulation: { + for(const int i : {0, 1, 2, 3}) { + const int a = texel[i].Raw(); + const int b = color[i].Raw(); + color[i] = (i8)(((a + 1) * (b + 1) - 1) >> 6); } + break; + } + case Polygon::Mode::Shadow: + case Polygon::Mode::Decal: { + // @todo + break; + } + case Polygon::Mode::Shaded: { + // @todo + break; } } - }; + } else if(polygon.attributes.polygon_mode == Polygon::Mode::Shaded) { + // @todo + } - RenderSpan(xl0, xl1); + // @todo: reject translucent pixel if the polygon ID is equal and the destination (old?) pixel isn't opaque. - if(!wireframe || force_render_inner_span) { - RenderSpan(xl1 + 1, xr0 - 1); + const bool opaque_pixel = color.A() == 63; + + if(!opaque_pixel && m_io.disp3dcnt.enable_alpha_blend && m_frame_buffer[y][x].A() != 0) { + const Fixed6 a0 = color.A(); + const Fixed6 a1 = Fixed6{63} - a0; + for(const int i : {0, 1, 2}) { + color[i] = color[i] * a0 + m_frame_buffer[y][x][i] * a1; + } + color.A() = std::max(color.A(), m_frame_buffer[y][x].A()); } - RenderSpan(xr0, xr1); + m_frame_buffer[y][x] = color; + + if(opaque_pixel) { + m_depth_buffer[y][x] = depth_new; + } else { + if(polygon.attributes.enable_translucent_depth_write) { + m_depth_buffer[y][x] = depth_new; + } + } } } From 6d363bcd5febbcbb76bbecc8d18140aae9705aa3 Mon Sep 17 00:00:00 2001 From: fleroviux Date: Sun, 10 Dec 2023 14:43:11 +0100 Subject: [PATCH 2/2] gpu: sw: Reduce the number of arguments RenderPolygonSpan() takes --- .../gpu/renderer/software_renderer.hpp | 12 +++- .../gpu/renderer/software/rasterizer.cpp | 61 +++++++++---------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp b/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp index 605c021..32a9d1d 100644 --- a/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp +++ b/src/dual/include/dual/nds/video_unit/gpu/renderer/software_renderer.hpp @@ -10,8 +10,6 @@ namespace dual::nds::gpu { - struct Span; - class SoftwareRenderer final : public RendererBase { public: SoftwareRenderer( @@ -30,11 +28,19 @@ namespace dual::nds::gpu { void CaptureAlpha(int scanline, std::span dst_buffer) override; private: + struct Line { + int x[2]; + Color4 color[2]; + Vector2 uv[2]; + u16 w_16[2]; + u32 depth[2]; + }; + void CopyVRAM(); void RenderRearPlane(); void RenderPolygons(const Viewport& viewport, std::span polygons); void RenderPolygon(const Viewport& viewport, const Polygon& polygon); - void RenderPolygonSpan(const Polygon& polygon, const Span& span, i32 y, int x0, int x1, int x_min, int x_max, int l, int r); + void RenderPolygonSpan(const Polygon& polygon, const Line& line, i32 y, int x0, int x1); Color4 SampleTexture(TextureParams params, u32 palette_base, Vector2 uv); template diff --git a/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp b/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp index a6332a7..46ca952 100644 --- a/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp +++ b/src/dual/src/nds/video_unit/gpu/renderer/software/rasterizer.cpp @@ -8,16 +8,6 @@ namespace dual::nds::gpu { - // @todo: move this into a header file. - struct Span { - i32 x0[2]; - i32 x1[2]; - Color4 color[2]; - Vector2 uv[2]; - u16 w_16[2]; - u32 depth[2]; - }; - void SoftwareRenderer::RenderRearPlane() { if(m_io.disp3dcnt.enable_rear_plane_bitmap) { ATOM_PANIC("gpu: sw: Unimplemented rear plane bitmap"); @@ -49,7 +39,9 @@ namespace dual::nds::gpu { void SoftwareRenderer::RenderPolygon(const Viewport& viewport, const Polygon& polygon) { const int vertex_count = (int)polygon.vertices.Size(); - Span span{}; + i32 x0[2]; + i32 x1[2]; + Line line{}; Edge::Point points[10]; Interpolator<9> edge_interp{}; @@ -140,23 +132,24 @@ namespace dual::nds::gpu { } for(int i = 0; i < 2; i++) { - edge[i].Interpolate(y, span.x0[i], span.x1[i]); + edge[i].Interpolate(y, x0[i], x1[i]); } // Detect when the left and right edges become swapped - if(span.x0[l] >> 18 > span.x1[r] >> 18) { + if(x0[l] >> 18 > x1[r] >> 18) { l ^= 1; r ^= 1; } for(int i = 0; i < 2; i++) { + const int j = i ^ l; const u16 w0 = polygon.w_16[start[i]]; const u16 w1 = polygon.w_16[end[i]]; if(edge[i].IsXMajor()) { const i32 x_min = points[start[i]].x; const i32 x_max = points[end[i]].x; - const i32 x = (i == l ? span.x0[l] : span.x1[r]) >> 18; + const i32 x = (i == l ? x0[l] : x1[r]) >> 18; if(x_min <= x_max) { edge_interp.Setup(w0, w1, x, x_min, x_max); @@ -167,14 +160,14 @@ namespace dual::nds::gpu { edge_interp.Setup(w0, w1, y, points[start[i]].y, points[end[i]].y); } - edge_interp.Perp(points[start[i]].vertex->color, points[end[i]].vertex->color, span.color[i]); - edge_interp.Perp(points[start[i]].vertex->uv, points[end[i]].vertex->uv, span.uv[i]); - span.w_16[i] = edge_interp.Perp(w0, w1); + edge_interp.Perp(points[start[i]].vertex->color, points[end[i]].vertex->color, line.color[j]); + edge_interp.Perp(points[start[i]].vertex->uv, points[end[i]].vertex->uv, line.uv[j]); + line.w_16[j] = edge_interp.Perp(w0, w1); if(m_enable_w_buffer) { - span.depth[i] = (u32)((i32)(i16)span.w_16[i] << polygon.w_l_shift >> polygon.w_r_shift); + line.depth[j] = (u32)((i32)(i16)line.w_16[j] << polygon.w_l_shift >> polygon.w_r_shift); } else { - span.depth[i] = edge_interp.Lerp(points[start[i]].depth, points[end[i]].depth); + line.depth[j] = edge_interp.Lerp(points[start[i]].depth, points[end[i]].depth); } } @@ -188,37 +181,43 @@ namespace dual::nds::gpu { const bool force_render_inner_span = y == y_min || y == y_max - 1; - const int x_min = span.x0[l] >> 18; - const int x_max = span.x1[r] >> 18; + const int x_min = x0[l] >> 18; + const int x_max = x1[r] >> 18; const int xl0 = std::max(x_min, 0); - const int xl1 = std::clamp(span.x1[l] >> 18, xl0, 255); - const int xr0 = std::clamp(span.x0[r] >> 18, xl1, 255); + const int xl1 = std::clamp(x1[l] >> 18, xl0, 255); + const int xr0 = std::clamp(x0[r] >> 18, xl1, 255); const int xr1 = std::min(x_max, 255); - RenderPolygonSpan(polygon, span, y, xl0, xl1, x_min, x_max, l, r); + line.x[0] = x_min; + line.x[1] = x_max; + + RenderPolygonSpan(polygon, line, y, xl0, xl1); if(!wireframe || force_render_inner_span) { - RenderPolygonSpan(polygon, span, y, xl1 + 1, xr0 - 1, x_min, x_max, l, r); + RenderPolygonSpan(polygon, line, y, xl1 + 1, xr0 - 1); } - RenderPolygonSpan(polygon, span, y, xr0, xr1, x_min, x_max, l, r); + RenderPolygonSpan(polygon, line, y, xr0, xr1); } } - void SoftwareRenderer::RenderPolygonSpan(const Polygon& polygon, const Span& span, i32 y, int x0, int x1, int x_min, int x_max, int l, int r) { + void SoftwareRenderer::RenderPolygonSpan(const Polygon& polygon, const Line& line, i32 y, int x0, int x1) { const i32 depth_test_threshold = m_enable_w_buffer ? 0xFF : 0x200; Interpolator<8> line_interp{}; Color4 color; Vector2 uv; + const int x_min = line.x[0]; + const int x_max = line.x[1]; + for(int x = x0; x <= x1; x++) { - line_interp.Setup(span.w_16[l], span.w_16[r], x, x_min, x_max); + line_interp.Setup(line.w_16[0], line.w_16[1], x, x_min, x_max); const u32 depth_old = m_depth_buffer[y][x]; const u32 depth_new = m_enable_w_buffer ? - line_interp.Perp(span.depth[l], span.depth[r]) : line_interp.Lerp(span.depth[l], span.depth[r]); + line_interp.Perp(line.depth[0], line.depth[1]) : line_interp.Lerp(line.depth[0], line.depth[1]); bool depth_test_passed; @@ -232,8 +231,8 @@ namespace dual::nds::gpu { continue; } - line_interp.Perp(span.color[l], span.color[r], color); - line_interp.Perp(span.uv[l], span.uv[r], uv); + line_interp.Perp(line.color[0], line.color[1], color); + line_interp.Perp(line.uv[0], line.uv[1], uv); if(m_io.disp3dcnt.enable_texture_mapping && polygon.texture_params.format != TextureParams::Format::Disabled) { const Color4 texel = SampleTexture(polygon.texture_params, polygon.palette_base, uv);