diff --git a/CHANGELOG.md b/CHANGELOG.md index a1d6177..68684d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ +- V 0.18.0: Implemented calibration with a StudentT distribution to mimic Bchron and established that as the new default. Reimplemented the --method option of the CLI tool to reflect that change. - V 0.17.0: Changed argument order in CalCurve data type to adjust to the order in .14C files - V 0.16.0: Refactoring in the library to simplify and clarify the interface - V 0.15.0: Added another calibration algorithm (following the implementation by Andrew Parnell in Bchron) and a method switch for the CLI diff --git a/currycarbon.cabal b/currycarbon.cabal index 256ac56..b47cca8 100644 --- a/currycarbon.cabal +++ b/currycarbon.cabal @@ -1,5 +1,5 @@ name: currycarbon -version: 0.17.0 +version: 0.18.0 synopsis: A package for simple, fast radiocarbon calibration description: Radiocarbon calibration with the intercept method optimised for fast calibration of many dates. license: MIT @@ -15,7 +15,7 @@ library exposed-modules: Currycarbon.Parsers, Currycarbon.Types, Currycarbon.Utils, Currycarbon.Calibration, Currycarbon.CLI.RunCalibrate, Currycarbon.CalCurves.Intcal20 hs-source-dirs: src - build-depends: base, filepath, parsec, parallel, vector + build-depends: base, filepath, parsec, parallel, vector, statistics default-language: Haskell2010 executable currycarbon diff --git a/src-executables/Main-currycarbon.hs b/src-executables/Main-currycarbon.hs index 5b523e2..7d0e9cb 100644 --- a/src-executables/Main-currycarbon.hs +++ b/src-executables/Main-currycarbon.hs @@ -88,17 +88,17 @@ parseCalCurveFromFile = OP.option (Just <$> OP.str) ( ) parseCalibrationMethod :: OP.Parser CalibrationMethod -parseCalibrationMethod = OP.option (OP.eitherReader readCalibrationMethod) ( +parseCalibrationMethod = OP.option (OP.eitherReader readCalibrationMethodString) ( OP.long "method" <> - OP.help "The calibration algorithm that should be used: Bchron (default) or MatrixMultiplication" <> - OP.value Bchron + OP.help "The calibration algorithm that should be used: \ + \\",,\". \ + \The default setting is equivalent to \"Bchron,StudentT,100\" \ + \which copies the algorithm implemented in the Bchron R package. \ + \Alternatively we implemented \"MatrixMult\", which comes without further arguments. \ + \For the Bchron algorithm with a normal distribution (\"Bchron,Normal\") \ + \the degrees of freedom argument is not relevant" <> + OP.value (Bchron $ StudentTDist 100) ) - where - readCalibrationMethod :: String -> Either String CalibrationMethod - readCalibrationMethod s = case s of - "MatrixMultiplication" -> Right MatrixMultiplication - "Bchron" -> Right Bchron - _ -> Left "must be Bchron or MatrixMultiplication" parseDontInterpolateCalCurve :: OP.Parser (Bool) parseDontInterpolateCalCurve = OP.switch ( diff --git a/src/Currycarbon/Calibration.hs b/src/Currycarbon/Calibration.hs index 7dc35a6..0039ba6 100644 --- a/src/Currycarbon/Calibration.hs +++ b/src/Currycarbon/Calibration.hs @@ -22,6 +22,9 @@ import qualified Data.Vector.Unboxed as VU import qualified Data.Vector as V import Data.Vector.Generic (convert) import Data.Maybe (fromMaybe) +import Statistics.Distribution (density) +import Statistics.Distribution.StudentT (studentT) +--import Statistics.Distribution.Normal (normalDistr) -- | Calibrates a list of dates with the provided calibration curve calibrateDates :: CalibrationMethod -- ^ Calibration method @@ -32,8 +35,8 @@ calibrateDates :: CalibrationMethod -- ^ Calibration method calibrateDates _ _ _ [] = [] calibrateDates MatrixMultiplication interpolate calCurve uncalDates = map (calibrateDateMatrixMult interpolate calCurve) uncalDates `using` parList rpar -calibrateDates Bchron interpolate calCurve uncalDates = - map (calibrateDateBchron interpolate calCurve) uncalDates `using` parList rpar +calibrateDates Bchron{distribution=distr} interpolate calCurve uncalDates = + map (calibrateDateBchron distr interpolate calCurve) uncalDates `using` parList rpar calibrateDateMatrixMult :: Bool -> CalCurve -> UncalC14 -> CalPDF calibrateDateMatrixMult interpolate calCurve uncalC14 = @@ -44,8 +47,8 @@ calibrateDateMatrixMult interpolate calCurve uncalC14 = calPDF = projectUncalOverCalCurve uncalPDF calCurveMatrix in normalizeCalPDF calPDF -calibrateDateBchron :: Bool -> CalCurve -> UncalC14 -> CalPDF -calibrateDateBchron interpolate calCurve uncalC14@(UncalC14 name age ageSd) = +calibrateDateBchron :: CalibrationDistribution -> Bool -> CalCurve -> UncalC14 -> CalPDF +calibrateDateBchron distr interpolate calCurve uncalC14@(UncalC14 name age ageSd) = let rawCalCurveSegment = getRelevantCalCurveSegment uncalC14 calCurve CalCurve cals mus tau1s = prepareCalCurveSegment interpolate True rawCalCurveSegment ageFloat = -(fromIntegral age)+1950 @@ -53,7 +56,9 @@ calibrateDateBchron interpolate calCurve uncalC14@(UncalC14 name age ageSd) = ageSd2Float = fromIntegral ageSd2 musFloat = VU.map fromIntegral mus tau1sFloat = VU.map fromIntegral tau1s - dens = VU.zipWith (\mu tau1 -> dnorm 0 1 ((ageFloat - mu) / sqrt (ageSd2Float + tau1 * tau1))) musFloat tau1sFloat + dens = case distr of + NormalDist -> VU.zipWith (\mu tau1 -> dnorm 0 1 ((ageFloat - mu) / sqrt (ageSd2Float + tau1 * tau1))) musFloat tau1sFloat + StudentTDist numberOfDegreesOfFreedom -> VU.zipWith (\mu tau1 -> dt numberOfDegreesOfFreedom ((ageFloat - mu) / sqrt (ageSd2Float + tau1 * tau1))) musFloat tau1sFloat in normalizeCalPDF $ CalPDF name cals dens -- | Take an uncalibrated date and a raw calibration curve and return @@ -150,6 +155,11 @@ dnorm mu sigma x = c2 = c * c sigma2 = sigma * sigma in a*b + -- alternative implemenation with the statistics package: + -- realToFrac $ density (normalDistr (realToFrac mu) (realToFrac sigma)) (realToFrac x) + +dt :: Double -> Float -> Float +dt dof x = realToFrac $ density (studentT (realToFrac dof)) (realToFrac x) -- dof: number of degrees of freedom normalizeCalPDF :: CalPDF -> CalPDF normalizeCalPDF (CalPDF name cals dens) = @@ -175,7 +185,7 @@ refineCalDate (CalPDF name bps dens) = cumsumDensities = scanl1 (+) $ map snd sortedDensities isIn68 = map (< 0.683) cumsumDensities isIn95 = map (< 0.954) cumsumDensities - contextualizedDensities = reverse $ sort $ zipWith3 (\(year,density) in68 in95 -> (year,density,in68,in95)) sortedDensities isIn68 isIn95 + contextualizedDensities = reverse $ sort $ zipWith3 (\(y,d) in68 in95 -> (y,d,in68,in95)) sortedDensities isIn68 isIn95 in CalC14 name (densities2HDR68 contextualizedDensities) (densities2HDR95 contextualizedDensities) where densities2HDR68 :: [(Int, Float, Bool, Bool)] -> [HDR] diff --git a/src/Currycarbon/Parsers.hs b/src/Currycarbon/Parsers.hs index 07ee724..4fb5af6 100644 --- a/src/Currycarbon/Parsers.hs +++ b/src/Currycarbon/Parsers.hs @@ -15,6 +15,31 @@ import qualified Data.Vector as V -- This module contains a number of functions to manage data input and -- output plumbing for different datatypes +-- CalibrationMethod +readCalibrationMethodString :: String -> Either String CalibrationMethod +readCalibrationMethodString s = + case P.runParser parseCalibrationMethodString () "" s of + Left err -> error $ "Error in parsing method from string: " ++ show err + Right x -> Right x + +parseCalibrationMethodString :: P.Parser CalibrationMethod +parseCalibrationMethodString = do + P.try bchron P.<|> matrixMultiplication + where + bchron = do + _ <- P.string "Bchron," + P.try studentT P.<|> normal + studentT = do + _ <- P.string "StudentT," + dof <- read <$> P.many1 P.digit + return (Bchron $ StudentTDist dof) + normal = do + _ <- P.string "Normal" + return (Bchron NormalDist) + matrixMultiplication = do + _ <- P.string "MatrixMult" + return MatrixMultiplication + -- CalC14 writeCalC14s :: FilePath -> [CalC14] -> IO () writeCalC14s path calC14s = writeFile path $ diff --git a/src/Currycarbon/Types.hs b/src/Currycarbon/Types.hs index 618fcaa..5df16f8 100644 --- a/src/Currycarbon/Types.hs +++ b/src/Currycarbon/Types.hs @@ -14,7 +14,15 @@ import qualified Data.Vector as V -- | Different calibration algorithms implemented in currycarbon data CalibrationMethod = MatrixMultiplication -- ^ Matrix multiplication method - | Bchron -- ^ Algorithm similar to the clever implementation in the R package Bchron by Andrew Parnell + | Bchron { + distribution :: CalibrationDistribution + } -- ^ Algorithm similar to the clever implementation in the R package Bchron by Andrew Parnell + +data CalibrationDistribution = + NormalDist + | StudentTDist { + ndf :: Double + } -- | A data type to represent an uncalibrated radiocarbon date data UncalC14 = UncalC14 {