Skip to content

Commit

Permalink
Merge pull request #28 from elpaco-escience/refactor_helper
Browse files Browse the repository at this point in the history
Refactor to fix CI issues
  • Loading branch information
bvreede authored Oct 6, 2023
2 parents f4f0f3f + 60cdf81 commit 1bae9c3
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 50 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ License: Apache License (>= 2)
Encoding: UTF-8
LazyData: true
Suggests:
devtools,
rmarkdown,
testthat (>= 3.0.0)
VignetteBuilder: knitr
Expand All @@ -25,7 +26,6 @@ Imports:
ggrepel,
ggthemes,
knitr,
magrittr,
stats,
stringx,
tidyr,
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@ export(inspect_corpus)
export(inspect_language)
export(report_summaries)
export(theme_turnPlot)
import(cowplot)
import(dplyr)
import(ggplot2)
import(ggrepel)
import(ggthemes)
import(knitr)
import(magrittr)
import(tidyr)
import(viridis)
3 changes: 3 additions & 0 deletions R/geom_turn.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param inherit.aes If FALSE, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. borders().
#' @param ... Other arguments passed on to layer(). These are often aesthetics, used to set an aesthetic to a fixed value, like colour = "red" or size = 3. They may also be parameters to the paired geom/stat.
#' @export
#' @rdname geom_turn
geom_turn <- function(mapping = NULL, data = NULL,
stat = "identity", position = "identity",
..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
Expand All @@ -25,6 +26,8 @@ geom_turn <- function(mapping = NULL, data = NULL,
)
}

#' GeomTurn
#'
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
Expand Down
4 changes: 1 addition & 3 deletions R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#' @import viridis
#' @import tidyr
#' @import ggrepel
#' @import magrittr
#' @import cowplot
#' @import knitr
inspect_corpus <- function(d, d.tokens, lang=NULL,saveplot=F,allsources=F) {

Expand Down Expand Up @@ -72,7 +70,7 @@ inspect_corpus <- function(d, d.tokens, lang=NULL,saveplot=F,allsources=F) {
direction="y",nudge_y = -0.2,size=3,
max.overlaps=Inf)

panel <- plot_grid(pA,pB,pC,labels=c("A","B","C"),rel_widths = c(1,1,2),nrow=1)
panel <- cowplot::plot_grid(pA,pB,pC,labels=c("A","B","C"),rel_widths = c(1,1,2),nrow=1)
print(panel)
cat("\n")

Expand Down
64 changes: 46 additions & 18 deletions R/inspect_language.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
#' @param data_conv conversation dataset
#' @param data_tokens tokens dataset
#' @param lang language
#' @param returnplots boolean indicating whether plots should be made
#' @param saveplot should the plot be saved
#' @param allsources all sources
#'
#' @export
inspect_language <- function(data_conv,
data_tokens,
lang,
data_tokens = NULL,
lang = NULL,
returnplots = FALSE,
saveplot=FALSE,
allsources=FALSE) {
# conversation data
dp <- data_conv |>
dplyr::filter(language == lang)
dplyr::filter(.data$language == lang)

nturns <- sum(!is.na(dp$FTO)) # QUESTION: does this make sense?

Expand All @@ -23,34 +25,60 @@ inspect_language <- function(data_conv,
pC <- plot_turn_duration(data=dp)
pD <- plot_top_turn_types(data=dp)

# token data
dt <- data_tokens |>
dplyr::filter(language==lang)
nwords <- dt$total[1]

pE <- plot_token_rank(data=dt, nwords)

# combine the plots
top_row <- cowplot::plot_grid(pA,pB,pC,labels=c("A","B","C"),rel_widths = c(1,1,1),nrow=1)
bottom_row <- cowplot::plot_grid(pD,pE,labels=c("D","E"),rel_widths = c(1,1),nrow=1)
panel <- cowplot::plot_grid(top_row,bottom_row,ncol=1)
print(panel)
if(!is.null(data_tokens)){
# token data
dt <- data_tokens |>
dplyr::filter(.data$language==lang)
nwords <- dt$total[1]
pE <- plot_token_rank(data=dt, nwords)
}

if(saveplot) {
filename <- paste0('qc-panel-',lang,'.png')
ggplot2::ggsave(filename,bg="white",width=2400,height=1200,units="px")

# generate plots
if(returnplots){
if(!is.null(data_tokens)){
generate_plots(saveplot, pA, pB, pC, pD, pE)
} else {
generate_plots(saveplot, pA, pB, pC, pD, pE = NULL)
}
}

# sample conversation
data_convplot <- prepare_convplot(data_conv, lang)
pconv <- plot_conversation(data_convplot)
print(pconv)

if(returnplots){
pconv <- plot_conversation(data_convplot)
print(pconv)

}

# print summary stats
report_summaries(data_conv, lang, allsources)
}


generate_plots <- function(saveplot = FALSE, pA, pB, pC, pD, pE){
# combine the plots

if(is.null(pE)){
top_row <- cowplot::plot_grid(pA,pB,labels=c("A","B"),rel_widths = c(1,1),nrow=1)
bottom_row <- cowplot::plot_grid(pC,pD,labels=c("C", "D"),rel_widths = c(1,1),nrow=1)
} else{
top_row <- cowplot::plot_grid(pA,pB,pC,labels=c("A","B","C"),rel_widths = c(1,1,1),nrow=1)
bottom_row <- cowplot::plot_grid(pD,pE,labels=c("D","E"),rel_widths = c(1,1),nrow=1)
}

panel <- cowplot::plot_grid(top_row,bottom_row,ncol=1)
print(panel)

if(saveplot) {
filename <- paste0('qc-panel-',lang,'.png')
ggplot2::ggsave(filename,bg="white",width=2400,height=1200,units="px")
}
}




57 changes: 33 additions & 24 deletions R/summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,48 +32,57 @@ report_summaries <- function(data, lang, allsources){

if(allsources) {
print(knitr::kable(bysource |>
dplyr::select(-start,-finish,-talktime,-totaltime)))
dplyr::select(-"start",
-"finish",
-"talktime",
-"totaltime")))
} else {
if(nsources > 10) {
cat("\n")
cat("Showing only the first 10 sources; use `allsources=T` to show all")
}
print(knitr::kable(bysource |>
dplyr::select(-start,-finish,-talktime,-totaltime) |>
dplyr::select(-"start",
-"finish",
-"talktime",
-"totaltime") |>
dplyr::slice(1:10)))
}
}



summarize_language_data <- function(data, lang){
if(!"translation" %in% colnames(data)){
data$translation <- NA
}

data |>
dplyr::filter(language == lang) |>
dplyr::group_by(source) |>
dplyr::mutate(translation = ifelse(is.na(translation),0,1)) |>
dplyr::summarize(start=min.na(begin),finish=max.na(end),
turns=dplyr::n_distinct(uid),
translated=round(sum(translation)/turns,2),
words=sum(nwords,na.rm=T),
people=dplyr::n_distinct(participant),
talktime = sum(duration),
totaltime = finish - start,
talkprop = round(talktime / totaltime,1),
minutes = round((totaltime/1000 / 60),1),
hours = round((totaltime/1000) / 3600,2))
dplyr::filter(.data$language == lang) |>
dplyr::group_by(.data$source) |>
dplyr::mutate(translation = ifelse(is.na(.data$translation),0,1)) |>
dplyr::summarize(start=min.na(.data$begin),finish=max.na(.data$end),
turns=dplyr::n_distinct(.data$uid),
translated=round(sum(.data$translation)/.data$turns,2),
words=sum(.data$nwords,na.rm=T),
people=dplyr::n_distinct(.data$participant),
talktime = sum(.data$duration),
totaltime = .data$finish - .data$start,
talkprop = round(.data$talktime / .data$totaltime,1),
minutes = round((.data$totaltime/1000 / 60),1),
hours = round((.data$totaltime/1000) / 3600,2))
}


summarize_source_data <- function(data, lang){
data |>
summarize_language_data(lang=lang) |> #TODO this uses another function?
dplyr::summarize(turns = sum(turns),
translated=round(mean.na(translated),2),
words = sum(words),
turnduration=round(mean.na(sum(talktime)/turns)),
talkprop = round(mean.na(talkprop),2),
dplyr::summarize(turns = sum(.data$turns),
translated=round(mean.na(.data$translated),2),
words = sum(.data$words),
turnduration=round(mean.na(sum(.data$talktime)/.data$turns)),
talkprop = round(mean.na(.data$talkprop),2),
people = dplyr::n_distinct(data$participant),
hours = round(sum(hours),2),
turns_per_h = round(turns/hours)) |>
dplyr::arrange(desc(hours))
hours = round(sum(.data$hours),2),
turns_per_h = round(.data$turns/.data$hours)) |>
dplyr::arrange(desc(.data$hours))
}
10 changes: 10 additions & 0 deletions man/ggplot2-ggproto.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/inspect_language.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions tests/testthat/_snaps/inspect_language.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# language inspection yields stats

Code
cat(inspect_language(data, lang = "dutch"))
Output
### 5 hours
| turns| translated| words| turnduration| talkprop| people| hours| turns_per_h|
|-----:|----------:|-----:|------------:|--------:|------:|-----:|-----------:|
| 14022| 0| 69169| 1257| 0.98| 3| 5| 2804|
### nature
|nature | n|
|:------|-----:|
|laugh | 599|
|talk | 13366|
|NA | 57|
### 20 sources
Showing only the first 10 sources; use `allsources=T` to show all
|source | turns| translated| words| people| talkprop| minutes| hours|
|:---------------|-----:|----------:|-----:|------:|--------:|-------:|-----:|
|/dutch2/DVA10O | 501| 0| 3498| 2| 0.9| 15| 0.25|
|/dutch2/DVA11Q | 792| 0| 3318| 2| 1.0| 15| 0.25|
|/dutch2/DVA12S | 640| 0| 3112| 2| 0.9| 15| 0.25|
|/dutch2/DVA13U | 717| 0| 3548| 2| 1.0| 15| 0.25|
|/dutch2/DVA14W | 721| 0| 3099| 2| 0.9| 15| 0.25|
|/dutch2/DVA15Y | 770| 0| 3387| 2| 1.1| 15| 0.25|
|/dutch2/DVA16AA | 604| 0| 3889| 2| 1.1| 15| 0.25|
|/dutch2/DVA17AC | 782| 0| 3888| 2| 1.0| 15| 0.25|
|/dutch2/DVA19AG | 648| 0| 2957| 2| 0.9| 15| 0.25|
|/dutch2/DVA1A | 681| 0| 3432| 2| 1.0| 15| 0.25|

19 changes: 19 additions & 0 deletions tests/testthat/test-inspect_language.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## set up the test environment
# Install ifadv and devtools only if required
if (!requireNamespace("ifadv")){
if (!requireNamespace("devtools")){
install.packages("devtools")
}
devtools::install_github("elpaco-escience/ifadv")
}

data <- ifadv::ifadv

test_that("language inspection yields stats", {
expect_snapshot(cat(
inspect_language(
data, lang="dutch"
))
)

})

0 comments on commit 1bae9c3

Please sign in to comment.