diff --git a/src/Warden/Data/Numeric.hs b/src/Warden/Data/Numeric.hs index f1a5bdc..76343b5 100644 --- a/src/Warden/Data/Numeric.hs +++ b/src/Warden/Data/Numeric.hs @@ -6,7 +6,7 @@ {-# LANGUAGE DeriveGeneric #-} module Warden.Data.Numeric ( - Count(..) + KAcc(..) , Maximum(..) , Mean(..) , MeanAcc(..) @@ -71,12 +71,14 @@ instance Monoid Maximum where else Maximum prev {-# INLINE mappend #-} -newtype Count = - Count { - getCount :: Int +-- | Counter param for mean/stddev calculation. Equal to one plus the number +-- of records seen. +newtype KAcc = + KAcc { + getKAcc :: Int } deriving (Eq, Show, ToJSON, FromJSON, Generic, Num) -instance NFData Count +instance NFData KAcc -- | Preliminary mean, still accumulating. newtype MeanAcc = @@ -138,7 +140,7 @@ instance NFData NumericSummary data MeanDevAcc = MeanDevInitial - | MeanDevAcc {-# UNPACK #-} !MeanAcc !(Maybe StdDevAcc) {-# UNPACK #-} !Count + | MeanDevAcc {-# UNPACK #-} !MeanAcc !(Maybe StdDevAcc) {-# UNPACK #-} !KAcc deriving (Eq, Show, Generic) instance NFData MeanDevAcc diff --git a/src/Warden/Numeric.hs b/src/Warden/Numeric.hs index f83bd8b..60e2e31 100644 --- a/src/Warden/Numeric.hs +++ b/src/Warden/Numeric.hs @@ -1,4 +1,4 @@ - {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE BangPatterns #-} module Warden.Numeric ( @@ -46,17 +46,17 @@ updateMeanDev :: Real a updateMeanDev !macc x = let x' = (fromRational . toRational) x in case macc of MeanDevInitial -> - let i = Count 1 + let i = KAcc 1 m = MeanAcc 0 s = Nothing in update' m s i x' (MeanDevAcc m s i) -> update' m s i x' where - update' (MeanAcc m) s (Count i) v = + update' (MeanAcc m) s (KAcc i) v = let delta = v - m m' = MeanAcc $ m + delta / (fromIntegral i) - i' = Count $ i + 1 + i' = KAcc $ i + 1 s' = case s of Nothing -> Just $ StdDevAcc 0 @@ -100,7 +100,7 @@ combineMeanDevAcc (MeanDevAcc mu1 s1 c1) (MeanDevAcc mu2 s2 c2) = sda' = combineStdDevAcc mu' (mu1, s1, c1) (mu2, s2, c2) -- KAccs are off-by-one from the actual number of values seen, so -- subtract one from the sum to prevent it becoming off-by-two. - c' = c1 + c2 - (Count 1) in + c' = c1 + c2 - (KAcc 1) in MeanDevAcc mu' sda' c' {-# INLINE combineMeanDevAcc #-} @@ -144,8 +144,8 @@ combineVariance (MeanAcc muHat) (MeanAcc mu1, Variance var1, KAcc c1) (MeanAcc m {-# INLINE combineVariance #-} -- | Combine mean of two subsets, given subset means and size. -combineMeanAcc :: (MeanAcc, Count) -> (MeanAcc, Count) -> MeanAcc -combineMeanAcc (MeanAcc mu1, Count c1) (MeanAcc mu2, Count c2) = +combineMeanAcc :: (MeanAcc, KAcc) -> (MeanAcc, KAcc) -> MeanAcc +combineMeanAcc (MeanAcc mu1, KAcc c1) (MeanAcc mu2, KAcc c2) = let c1' = fromIntegral c1 c2' = fromIntegral c2 in MeanAcc $ ((mu1 * c1') + (mu2 * c2')) / (c1' + c2') diff --git a/test/Test/Warden/Numeric.hs b/test/Test/Warden/Numeric.hs index a4d7211..38a26fa 100644 --- a/test/Test/Warden/Numeric.hs +++ b/test/Test/Warden/Numeric.hs @@ -89,10 +89,10 @@ prop_updateMeanDev (NPlus n) = forAll (vectorOf n (arbitrary :: Gen Double)) $ \ var = textbookVariance mu xs sd = StdDev $ sqrt var uMeanDev = (Mean mu, sd) in - (nsMeanDev, Just (n+1)) ~~~ (uMeanDev, meanDevCount mda) + (nsMeanDev, Just (n+1)) ~~~ (uMeanDev, meanDevKAcc mda) where - meanDevCount MeanDevInitial = Nothing - meanDevCount (MeanDevAcc _ _ (Count c)) = Just c + meanDevKAcc MeanDevInitial = Nothing + meanDevKAcc (MeanDevAcc _ _ (KAcc c)) = Just c prop_updateMeanDev_associative :: Int -> Property prop_updateMeanDev_associative n = forAll (vectorOf n (arbitrary :: Gen Double)) $ \xs ->