Skip to content

Commit

Permalink
Merge pull request #30 from elpaco-escience/add_coverage_refactor
Browse files Browse the repository at this point in the history
Increase test coverage and refactor
  • Loading branch information
bvreede authored Oct 7, 2023
2 parents e8d4d26 + 63e3a2d commit a5ed18f
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 52 deletions.
109 changes: 60 additions & 49 deletions R/summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,56 @@
#' @param allsources print all sources
#'
#' @export
report_summaries <- function(data, lang, allsources){
bysource <- summarize_language_data(data, lang)
bylanguage <- summarize_source_data(data, lang)
report_summaries <- function(data, lang = NA, allsources = FALSE){
# select the requested language
if(!is.na(lang)){
data <- data |>
dplyr::filter(.data$language==lang)
}
# ensure a translation column is present
if(!"translation" %in% colnames(data)){
data$translation <- NA
}
data <- data |>
dplyr::mutate(translation = ifelse(is.na(.data$translation),0,1))

nhours <- round(bylanguage$hours,1)
nature <- data |>
dplyr::group_by(nature) |>
dplyr::summarise(n=dplyr::n())
## Overall summary of the utterances
overall_stats <- summarize_overall(data)
nhours <- round(overall_stats$hours,1)
language_header <- paste(nhours,"hours")
print_summary(header = language_header, table = overall_stats)

# To command line
cat("\n")
cat("\n")
nhours <- round(bylanguage$hours,1)
cat("### ",nhours,"hours")
print(knitr::kable(bylanguage,label=lang))

cat("\n")
cat("\n")
cat("### nature")
print(knitr::kable(nature))
## Summarize nature of utterances
nature <- summarize_nature(data)
print_summary(header = "nature", table = nature)

cat("\n")
## Summarize by source
bysource <- summarize_bysource(data, allsources)
nsources <- length(unique(bysource$source))
cat("### ",nsources,"sources")

if(allsources) {
print(knitr::kable(bysource |>
dplyr::select(-"start",
-"finish",
-"talktime",
-"totaltime")))
} else {
if(nsources > 10) {
cat("\n")
cat("Showing only the first 10 sources; use `allsources=T` to show all")
}
print(knitr::kable(bysource |>
dplyr::select(-"start",
-"finish",
-"talktime",
-"totaltime") |>
dplyr::slice(1:10)))
}
source_header <- paste(nsources,"sources")
print_summary(header = source_header, table = bysource)
}


summarize_language_data <- function(data, lang){
if(!"translation" %in% colnames(data)){
data$translation <- NA
summarize_bysource <- function(data, allsources){
summary <- data |>
summarize_conversation() |>
dplyr::select(-"start",
-"finish",
-"talktime",
-"totaltime")
if(!allsources) {
summary <- summary |>
dplyr::slice(1:10)
cat("\n")
cat("Showing only the first 10 sources; use `allsources=T` to show all")
cat("\n")
}
return(summary)
}

data |>
dplyr::filter(.data$language == lang) |>
summarize_conversation <- function(data){
summary <- data |>
dplyr::group_by(.data$source) |>
dplyr::mutate(translation = ifelse(is.na(.data$translation),0,1)) |>
dplyr::summarize(start=min.na(.data$begin),finish=max.na(.data$end),
turns=dplyr::n_distinct(.data$uid),
translated=round(sum(.data$translation)/.data$turns,2),
Expand All @@ -70,12 +65,13 @@ summarize_language_data <- function(data, lang){
talkprop = round(.data$talktime / .data$totaltime,1),
minutes = round((.data$totaltime/1000 / 60),1),
hours = round((.data$totaltime/1000) / 3600,2))
return(summary)
}


summarize_source_data <- function(data, lang){
data |>
summarize_language_data(lang=lang) |> #TODO this uses another function?
summarize_overall <- function(data){
summary <- data |>
summarize_conversation() |>
dplyr::summarize(turns = sum(.data$turns),
translated=round(mean.na(.data$translated),2),
words = sum(.data$words),
Expand All @@ -85,4 +81,19 @@ summarize_source_data <- function(data, lang){
hours = round(sum(.data$hours),2),
turns_per_h = round(.data$turns/.data$hours)) |>
dplyr::arrange(desc(.data$hours))
return(summary)
}

summarize_nature <- function(data){
summary <- data |>
dplyr::group_by(nature) |>
dplyr::summarise(n=dplyr::n())
return(summary)
}

print_summary <- function(header, table){
cat("\n")
cat(paste("###", header))
print(knitr::kable(table))
cat("\n")
}
8 changes: 5 additions & 3 deletions tests/testthat/_snaps/inspect_language.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
cat(inspect_language(data, lang = "dutch"))
Output
### 5 hours
### 5 hours
| turns| translated| words| turnduration| talkprop| people| hours| turns_per_h|
|-----:|----------:|-----:|------------:|--------:|------:|-----:|-----------:|
Expand All @@ -20,9 +19,11 @@
|talk | 13366|
|NA | 57|
### 20 sources
Showing only the first 10 sources; use `allsources=T` to show all
### 10 sources
|source | turns| translated| words| people| talkprop| minutes| hours|
|:---------------|-----:|----------:|-----:|------:|--------:|-------:|-----:|
|/dutch2/DVA10O | 501| 0| 3498| 2| 0.9| 15| 0.25|
Expand All @@ -35,4 +36,5 @@
|/dutch2/DVA17AC | 782| 0| 3888| 2| 1.0| 15| 0.25|
|/dutch2/DVA19AG | 648| 0| 2957| 2| 0.9| 15| 0.25|
|/dutch2/DVA1A | 681| 0| 3432| 2| 1.0| 15| 0.25|

88 changes: 88 additions & 0 deletions tests/testthat/_snaps/summaries.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# summary reports are accurate

Code
cat(report_summaries(data, lang = "dutch", allsources = FALSE))
Output
### 5 hours
| turns| translated| words| turnduration| talkprop| people| hours| turns_per_h|
|-----:|----------:|-----:|------------:|--------:|------:|-----:|-----------:|
| 14022| 0| 69169| 1257| 0.98| 3| 5| 2804|
### nature
|nature | n|
|:------|-----:|
|laugh | 599|
|talk | 13366|
|NA | 57|
Showing only the first 10 sources; use `allsources=T` to show all
### 10 sources
|source | turns| translated| words| people| talkprop| minutes| hours|
|:---------------|-----:|----------:|-----:|------:|--------:|-------:|-----:|
|/dutch2/DVA10O | 501| 0| 3498| 2| 0.9| 15| 0.25|
|/dutch2/DVA11Q | 792| 0| 3318| 2| 1.0| 15| 0.25|
|/dutch2/DVA12S | 640| 0| 3112| 2| 0.9| 15| 0.25|
|/dutch2/DVA13U | 717| 0| 3548| 2| 1.0| 15| 0.25|
|/dutch2/DVA14W | 721| 0| 3099| 2| 0.9| 15| 0.25|
|/dutch2/DVA15Y | 770| 0| 3387| 2| 1.1| 15| 0.25|
|/dutch2/DVA16AA | 604| 0| 3889| 2| 1.1| 15| 0.25|
|/dutch2/DVA17AC | 782| 0| 3888| 2| 1.0| 15| 0.25|
|/dutch2/DVA19AG | 648| 0| 2957| 2| 0.9| 15| 0.25|
|/dutch2/DVA1A | 681| 0| 3432| 2| 1.0| 15| 0.25|

---

Code
cat(report_summaries(data, lang = "dutch", allsources = TRUE))
Output
### 5 hours
| turns| translated| words| turnduration| talkprop| people| hours| turns_per_h|
|-----:|----------:|-----:|------------:|--------:|------:|-----:|-----------:|
| 14022| 0| 69169| 1257| 0.98| 3| 5| 2804|
### nature
|nature | n|
|:------|-----:|
|laugh | 599|
|talk | 13366|
|NA | 57|
### 20 sources
|source | turns| translated| words| people| talkprop| minutes| hours|
|:---------------|-----:|----------:|-----:|------:|--------:|-------:|-----:|
|/dutch2/DVA10O | 501| 0| 3498| 2| 0.9| 15| 0.25|
|/dutch2/DVA11Q | 792| 0| 3318| 2| 1.0| 15| 0.25|
|/dutch2/DVA12S | 640| 0| 3112| 2| 0.9| 15| 0.25|
|/dutch2/DVA13U | 717| 0| 3548| 2| 1.0| 15| 0.25|
|/dutch2/DVA14W | 721| 0| 3099| 2| 0.9| 15| 0.25|
|/dutch2/DVA15Y | 770| 0| 3387| 2| 1.1| 15| 0.25|
|/dutch2/DVA16AA | 604| 0| 3889| 2| 1.1| 15| 0.25|
|/dutch2/DVA17AC | 782| 0| 3888| 2| 1.0| 15| 0.25|
|/dutch2/DVA19AG | 648| 0| 2957| 2| 0.9| 15| 0.25|
|/dutch2/DVA1A | 681| 0| 3432| 2| 1.0| 15| 0.25|
|/dutch2/DVA20AI | 758| 0| 3328| 2| 1.0| 15| 0.25|
|/dutch2/DVA22AL | 745| 0| 3549| 2| 1.0| 15| 0.25|
|/dutch2/DVA24AK | 685| 0| 3087| 2| 1.0| 15| 0.25|
|/dutch2/DVA2C | 765| 0| 4448| 2| 1.2| 15| 0.25|
|/dutch2/DVA3E | 753| 0| 3135| 2| 0.9| 15| 0.25|
|/dutch2/DVA4C | 719| 0| 3298| 2| 0.9| 15| 0.25|
|/dutch2/DVA6H | 683| 0| 3499| 2| 1.0| 15| 0.25|
|/dutch2/DVA7B | 691| 0| 3754| 2| 1.0| 15| 0.25|
|/dutch2/DVA8K | 612| 0| 2998| 2| 0.9| 15| 0.25|
|/dutch2/DVA9M | 755| 0| 3945| 2| 0.9| 15| 0.25|

28 changes: 28 additions & 0 deletions tests/testthat/test-summaries.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
## set up the test environment
# Install ifadv and devtools only if required
if (!requireNamespace("ifadv")){
if (!requireNamespace("devtools")){
install.packages("devtools")
}
devtools::install_github("elpaco-escience/ifadv")
}

data <- ifadv::ifadv

test_that("summary reports are accurate", {
expect_snapshot(cat(
report_summaries(
data,
lang="dutch",
allsources = FALSE
))
)

expect_snapshot(cat(
report_summaries(
data,
lang="dutch",
allsources = TRUE
))
)
})

0 comments on commit a5ed18f

Please sign in to comment.