Skip to content

Commit

Permalink
update of the docu new part for ecdf
Browse files Browse the repository at this point in the history
  • Loading branch information
Konrad1991 committed Nov 18, 2024
1 parent c222c64 commit 53983ff
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 103 deletions.
68 changes: 68 additions & 0 deletions bs/.development/ECDF.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Calculation of empirical cumulative distribution function (ECDF)

n <- 20
randoms <- runif(n, 1, 10)
how_often <- sample.int(3, n, replace = TRUE)
sample <- numeric(sum(how_often))
counter <- 1
for (i in 1:n) {
sample[counter:(counter + how_often[i] - 1)] <- rep(randoms[i], how_often[i])
counter <- counter + how_often[i]
}
sample <- sort(sample)
unique_sample <- unique(sample)
total_observations <- length(sample)
ecdf <- sapply(unique_sample, function(x) {
sum(sample <= x) / total_observations
})
ecdf

df <- data.frame(x = unique_sample, y = ecdf)
library(ggplot2)
ggplot(data = df, aes(x = x, y = y)) +
geom_step() +
geom_label(aes(label = round(x, 2)))

# If the ECDF value is F_n(5) = 0.5
# it means 50% of the sample values are less than or equal to 5.
# The CDF corresponds to a theoretical probability distribution (e.g. normal, exponential).
# It is the integral of the probability density function (PDF)

# PDF are the d-functions in R
# CDF are the p-functions in R
# The q-functions in R compute the quantiles of a distribution.
# They are the inverse of the CDF.
# In simple terms, the quantile tells you the value of x
# below which a proportion p of the data lies.

# Empirical quantiles
n <- 20
randoms <- runif(n, 1, 10)
how_often <- sample.int(3, n, replace = TRUE)
sample <- numeric(sum(how_often))
counter <- 1
for (i in 1:n) {
sample[counter:(counter + how_often[i] - 1)] <- rep(randoms[i], how_often[i])
counter <- counter + how_often[i]
}
sample <- sort(sample)
n <- length(sample)

ps <- seq(0, 1, 0.1)
qq <- data.frame(
empirical = numeric(length(ps)),
theoretical = numeric(length(ps))
)
for (i in 1:length(ps)) {
# i = p * (n + 1)
idx <- ps[i] * (n + 1)
if (floor(idx) == 0) {
idx <- 1
} else if (ceiling(idx) >= n) {
idx <- n
}
qq$empirical[i] <- (sample[ceiling(idx)] + sample[floor(idx)] ) / 2
qq$theoretical[i] <- qunif(p = ps[i], min = 1, max = 10)
}
plot(qq$theoretical, qq$empirical)
abline(0, 1, col = "red", lty = 2)
22 changes: 0 additions & 22 deletions bs/.development/overview_doseresponse.R

This file was deleted.

9 changes: 0 additions & 9 deletions bs/.development/run_app.R

This file was deleted.

29 changes: 0 additions & 29 deletions bs/.development/tinyplot_test.R

This file was deleted.

43 changes: 0 additions & 43 deletions bs/.development/warningPlot.R

This file was deleted.

0 comments on commit 53983ff

Please sign in to comment.