gpu: sw: revamp vertex attribute interpolation to be more accurate

Previously the linear interpolation fallback did not have sufficient precision and it did not model the unsigned 32-bit divider properly. This is now fixed.
fleroviux · Jan 10, 2024 · b058a52 · b058a52
1 parent 0de8b5a
commit b058a52
Showing 1 changed file with 45 additions and 47 deletions.
diff --git a/src/dual/src/nds/video_unit/gpu/renderer/software/interpolator.hpp b/src/dual/src/nds/video_unit/gpu/renderer/software/interpolator.hpp
@@ -11,21 +11,48 @@ namespace dual::nds::gpu {
     public:
       void Setup(u16 w0, u16 w1, i32 x, i32 x_min, i32 x_max) {
         constexpr u16 mask = bit_precision == 9 ? 0x7Eu : 0x7Fu;
+        m_use_lerp_fallback = w0 == w1 && (w0 & mask) == 0u && (w1 & mask) == 0u;
 
-        m_lerp_factor = CalculateLerpFactor(x, x_min, x_max);
+        const i32 t0 = x - x_min;
+        const i32 t1 = x_max - x;
 
-        if(w0 == w1 && (w0 & mask) == 0u && (w1 & mask) == 0u) {
-          m_perp_factor = m_lerp_factor;
+        if(x_min != x_max) {
+          m_lerp_numer = t0;
+          m_lerp_denom = x_max - x_min;
+
+          if(!m_use_lerp_fallback) {
+            u16 w0_numer = w0;
+            u16 w0_denom = w0;
+            u16 w1_denom = w1;
+
+            if constexpr(bit_precision == 9) {
+              w0_numer >>= 1;
+              w0_denom >>= 1;
+              w1_denom >>= 1;
+
+              if((w0 & 1u) == 1u && (w1 & 1u) == 0u) {
+                w0_denom++;
+              }
+            }
+
+            const u32 numer = (u32)((t0 << bit_precision) * w0_numer);
+            const u32 denom = (u32)(t1 * w1_denom + t0 * w0_denom);
+
+            m_perp_factor = numer / denom;
+          }
         } else {
-          m_perp_factor = CalculatePerpFactor(w0, w1, x, x_min, x_max);
+          m_lerp_numer  = 0;
+          m_lerp_denom  = 1;
+          m_perp_factor = 0u;
         }
       }
 
       template<typename T>
       [[nodiscard]] T Lerp(T a, T b) const {
-        const u32 inv_lerp_factor = (1 << bit_precision) - m_lerp_factor;
-
-        return (a * inv_lerp_factor + b * m_lerp_factor) >> bit_precision;
+        if(b >= a) {
+          return a + (b - a) * m_lerp_numer / m_lerp_denom;
+        }
+        return b + (a - b) * (m_lerp_denom - m_lerp_numer) / m_lerp_denom;
       }
 
       template<typename T>
@@ -41,51 +68,22 @@ namespace dual::nds::gpu {
         const atom::detail::Vector<Derived, T, n>& b,
         atom::detail::Vector<Derived, T, n>& out
       ) const {
-        const u32 inv_perp_factor = (1 << bit_precision) - m_perp_factor;
-
-        for(uint i = 0; i < n; i++) {
-          out[i] = (a[i].Raw() * inv_perp_factor + b[i].Raw() * m_perp_factor) >> bit_precision;
+        if(m_use_lerp_fallback) {
+          for(uint i = 0; i < n; i++) {
+            out[i] = Lerp(a[i].Raw(), b[i].Raw());
+          }
+        } else {
+          for(uint i = 0; i < n; i++) {
+            out[i] = Perp(a[i].Raw(), b[i].Raw());
+          }
         }
       }
 
     private:
-      [[nodiscard]] u32 CalculateLerpFactor(i32 x, i32 x_min, i32 x_max) const {
-        const u32 numer = (x - x_min) << bit_precision;
-        const u32 denom = x_max - x_min;
-
-        if(denom == 0u) {
-          return 0u;
-        }
-        return numer / denom;
-      }
-
-      [[nodiscard]] u32 CalculatePerpFactor(u16 w0, u16 w1, i32 x, i32 x_min, i32 x_max) const {
-        u16 w0_numer = w0;
-        u16 w0_denom = w0;
-        u16 w1_denom = w1;
-
-        if constexpr(bit_precision == 9) {
-          w0_numer >>= 1;
-          w0_denom >>= 1;
-          w1_denom >>= 1;
-
-          if((w0 & 1u) == 1u && (w1 & 1u) == 0u) {
-            w0_denom++;
-          }
-        }
-
-        const u32 t0 = x - x_min;
-        const u32 t1 = x_max - x;
-        const u32 numer = (t0 << bit_precision) * w0_numer;
-        const u32 denom = t1 * w1_denom + t0 * w0_denom;
-
-        if(denom == 0u) {
-          return 0u;
-        }
-        return numer / denom;
-      }
+      bool m_use_lerp_fallback{}; //< whether this is a 2D-polygon using linear interpolation for every attribute.
 
-      u32 m_lerp_factor{}; //< linear interpolation factor
+      u32 m_lerp_numer{};  //< linear interpolator numerator
+      u32 m_lerp_denom{};  //< linear interpolation denominator
       u32 m_perp_factor{}; //< perspective-correct interpolation factor
   };