update of the docu new part for ecdf

ComPlat · Nov 18, 2024 · 53983ff · 53983ff
1 parent c222c64
commit 53983ff
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 103 deletions.
diff --git a/bs/.development/ECDF.R b/bs/.development/ECDF.R
@@ -0,0 +1,68 @@
+# Calculation of empirical cumulative distribution function (ECDF)
+
+n <- 20
+randoms <- runif(n, 1, 10)
+how_often <- sample.int(3, n, replace = TRUE)
+sample <- numeric(sum(how_often))
+counter <- 1
+for (i in 1:n) {
+  sample[counter:(counter + how_often[i] - 1)] <- rep(randoms[i], how_often[i])
+  counter <- counter + how_often[i]
+}
+sample <- sort(sample)
+unique_sample <- unique(sample)
+total_observations <- length(sample)
+ecdf <- sapply(unique_sample, function(x) {
+  sum(sample <= x) / total_observations
+})
+ecdf
+
+df <- data.frame(x = unique_sample, y = ecdf)
+library(ggplot2)
+ggplot(data = df, aes(x = x, y = y)) +
+  geom_step() +
+  geom_label(aes(label = round(x, 2)))
+
+# If the ECDF value is F_n(5) = 0.5
+# it means 50% of the sample values are less than or equal to 5.
+# The CDF corresponds to a theoretical probability distribution (e.g. normal, exponential).
+# It is the integral of the probability density function (PDF)
+
+# PDF are the d-functions in R
+# CDF are the p-functions in R
+# The q-functions in R compute the quantiles of a distribution.
+# They are the inverse of the CDF.
+# In simple terms, the quantile tells you the value of x
+# below which a proportion p of the data lies.
+
+# Empirical quantiles
+n <- 20
+randoms <- runif(n, 1, 10)
+how_often <- sample.int(3, n, replace = TRUE)
+sample <- numeric(sum(how_often))
+counter <- 1
+for (i in 1:n) {
+  sample[counter:(counter + how_often[i] - 1)] <- rep(randoms[i], how_often[i])
+  counter <- counter + how_often[i]
+}
+sample <- sort(sample)
+n <- length(sample)
+
+ps <- seq(0, 1, 0.1)
+qq <- data.frame(
+  empirical = numeric(length(ps)),
+  theoretical = numeric(length(ps))
+)
+for (i in 1:length(ps)) {
+  # i = p * (n + 1)
+  idx <- ps[i] * (n + 1)
+  if (floor(idx) == 0) {
+    idx <- 1
+  } else if (ceiling(idx) >= n) {
+    idx <- n
+  }
+  qq$empirical[i] <- (sample[ceiling(idx)] + sample[floor(idx)] ) / 2
+  qq$theoretical[i] <- qunif(p = ps[i], min = 1, max = 10)
+}
+plot(qq$theoretical, qq$empirical)
+abline(0, 1, col = "red", lty = 2)
diff --git a/bs/.development/overview_doseresponse.R b/bs/.development/overview_doseresponse.R
diff --git a/bs/.development/run_app.R b/bs/.development/run_app.R
diff --git a/bs/.development/tinyplot_test.R b/bs/.development/tinyplot_test.R
diff --git a/bs/.development/warningPlot.R b/bs/.development/warningPlot.R