From 06458f11db41b4049524c6ed1ef49026922919c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20J=C3=BAno=C5=A1?= Date: Sat, 6 Nov 2021 20:34:44 +0100 Subject: [PATCH] Avoid GeometricMean overflow by using exp and log This PR complicates a formula a bit to avoid overflows. Geometric mean of 100 numbers around 1 billion gets us overflow as multiplying them yields infinity. Numeric roots from that still return infinity. We can take logarithm of all numbers to avoid most prominent overflows that manifest themselves for just few numbers. Add tests: Test with 0 in input failed in previous implementation. Test with more larger values show the overflow issue. --- mean.go | 43 ++++++++++++++++++++++++++++++++++++------- mean_test.go | 8 ++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/mean.go b/mean.go index a78d299..1fc3fc6 100644 --- a/mean.go +++ b/mean.go @@ -22,18 +22,47 @@ func GeometricMean(input Float64Data) (float64, error) { return math.NaN(), EmptyInputErr } - // Get the product of all the numbers + // Traditional definition is: + // geoMean := (a_1 * a_2 * ... * a_n)^(1/n) + // however, that overflows for larger numbers. + // We can rewrite it to this if all numbers are greater than 0; otherwise see (1) + // geoMean := e^(log((a_1 * a_2 * ... * a_n)^1/n)) = + // = e^(1/n * log(a_1 * a_2 * ... * a_n)) = + // = e^(1/n * (log(a_1) + log(a_2) + ... + log(a_n))) + // Using this, we may get less precision, but we'll avoid overflow. + // + // Note (1): + // - if there is 0 in input, result is 0 no matter what. + // - if count of negative numbers is even, we can take absolute value + // of all the numbers as negatives cancel each other. + // - if count of negative numbers is odd: + // -> if there are even data points, we have to return NaN as even root + // of negative number doesn't exist in real numbers. + // -> if there are odd data points, we can take absolute value + // of all the numbers and add sign minus to the result. var p float64 + var negative int64 for _, n := range input { - if p == 0 { - p = n - } else { - p *= n + switch { + case n < 0: + negative++ + p += math.Log(-n) + case n == 0: + return 0, nil + case n > 0: + p += math.Log(n) } } - // Calculate the geometric mean - return math.Pow(p, 1/float64(l)), nil + if negative % 2 == 0 { + return math.Exp(p / float64(input.Len())), nil + } else { + if input.Len() % 2 == 0 { + return math.NaN(), nil + } else { + return -math.Exp(p / float64(input.Len())), nil + } + } } // HarmonicMean gets the harmonic mean for a slice of numbers diff --git a/mean_test.go b/mean_test.go index 78c7b2a..95de72a 100644 --- a/mean_test.go +++ b/mean_test.go @@ -44,6 +44,12 @@ func TestGeometricMean(t *testing.T) { s1 := []float64{2, 18} s2 := []float64{10, 51.2, 8} s3 := []float64{1, 3, 9, 27, 81} + s4 := []float64{1, 0, 2, 3, 4} + + s5 := make([]float64, 100, 100) + for i:=0; i<100; i++ { + s5[i] = 1e9 + } for _, c := range []struct { in []float64 @@ -52,6 +58,8 @@ func TestGeometricMean(t *testing.T) { {s1, 6}, {s2, 16}, {s3, 9}, + {s4, 0}, + {s5, 1e9}, } { gm, err := stats.GeometricMean(c.in) if err != nil {