Skip to content

Commit

Permalink
gpu: sw: revamp vertex attribute interpolation to be more accurate
Browse files Browse the repository at this point in the history
Previously the linear interpolation fallback did not have sufficient precision and
it did not model the unsigned 32-bit divider properly. This is now fixed.
  • Loading branch information
fleroviux committed Jan 10, 2024
1 parent 0de8b5a commit b058a52
Showing 1 changed file with 45 additions and 47 deletions.
92 changes: 45 additions & 47 deletions src/dual/src/nds/video_unit/gpu/renderer/software/interpolator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,48 @@ namespace dual::nds::gpu {
public:
void Setup(u16 w0, u16 w1, i32 x, i32 x_min, i32 x_max) {
constexpr u16 mask = bit_precision == 9 ? 0x7Eu : 0x7Fu;
m_use_lerp_fallback = w0 == w1 && (w0 & mask) == 0u && (w1 & mask) == 0u;

m_lerp_factor = CalculateLerpFactor(x, x_min, x_max);
const i32 t0 = x - x_min;
const i32 t1 = x_max - x;

if(w0 == w1 && (w0 & mask) == 0u && (w1 & mask) == 0u) {
m_perp_factor = m_lerp_factor;
if(x_min != x_max) {
m_lerp_numer = t0;
m_lerp_denom = x_max - x_min;

if(!m_use_lerp_fallback) {
u16 w0_numer = w0;
u16 w0_denom = w0;
u16 w1_denom = w1;

if constexpr(bit_precision == 9) {
w0_numer >>= 1;
w0_denom >>= 1;
w1_denom >>= 1;

if((w0 & 1u) == 1u && (w1 & 1u) == 0u) {
w0_denom++;
}
}

const u32 numer = (u32)((t0 << bit_precision) * w0_numer);
const u32 denom = (u32)(t1 * w1_denom + t0 * w0_denom);

m_perp_factor = numer / denom;
}
} else {
m_perp_factor = CalculatePerpFactor(w0, w1, x, x_min, x_max);
m_lerp_numer = 0;
m_lerp_denom = 1;
m_perp_factor = 0u;
}
}

template<typename T>
[[nodiscard]] T Lerp(T a, T b) const {
const u32 inv_lerp_factor = (1 << bit_precision) - m_lerp_factor;

return (a * inv_lerp_factor + b * m_lerp_factor) >> bit_precision;
if(b >= a) {
return a + (b - a) * m_lerp_numer / m_lerp_denom;
}
return b + (a - b) * (m_lerp_denom - m_lerp_numer) / m_lerp_denom;
}

template<typename T>
Expand All @@ -41,51 +68,22 @@ namespace dual::nds::gpu {
const atom::detail::Vector<Derived, T, n>& b,
atom::detail::Vector<Derived, T, n>& out
) const {
const u32 inv_perp_factor = (1 << bit_precision) - m_perp_factor;

for(uint i = 0; i < n; i++) {
out[i] = (a[i].Raw() * inv_perp_factor + b[i].Raw() * m_perp_factor) >> bit_precision;
if(m_use_lerp_fallback) {
for(uint i = 0; i < n; i++) {
out[i] = Lerp(a[i].Raw(), b[i].Raw());
}
} else {
for(uint i = 0; i < n; i++) {
out[i] = Perp(a[i].Raw(), b[i].Raw());
}
}
}

private:
[[nodiscard]] u32 CalculateLerpFactor(i32 x, i32 x_min, i32 x_max) const {
const u32 numer = (x - x_min) << bit_precision;
const u32 denom = x_max - x_min;

if(denom == 0u) {
return 0u;
}
return numer / denom;
}

[[nodiscard]] u32 CalculatePerpFactor(u16 w0, u16 w1, i32 x, i32 x_min, i32 x_max) const {
u16 w0_numer = w0;
u16 w0_denom = w0;
u16 w1_denom = w1;

if constexpr(bit_precision == 9) {
w0_numer >>= 1;
w0_denom >>= 1;
w1_denom >>= 1;

if((w0 & 1u) == 1u && (w1 & 1u) == 0u) {
w0_denom++;
}
}

const u32 t0 = x - x_min;
const u32 t1 = x_max - x;
const u32 numer = (t0 << bit_precision) * w0_numer;
const u32 denom = t1 * w1_denom + t0 * w0_denom;

if(denom == 0u) {
return 0u;
}
return numer / denom;
}
bool m_use_lerp_fallback{}; //< whether this is a 2D-polygon using linear interpolation for every attribute.

u32 m_lerp_factor{}; //< linear interpolation factor
u32 m_lerp_numer{}; //< linear interpolator numerator
u32 m_lerp_denom{}; //< linear interpolation denominator
u32 m_perp_factor{}; //< perspective-correct interpolation factor
};

Expand Down

0 comments on commit b058a52

Please sign in to comment.