Apply auto-formatting

thackl · Jun 25, 2024 · bfe920f · bfe920f
1 parent 5dbeffc
commit bfe920f
Show file tree

Hide file tree

Showing 18 changed files with 432 additions and 336 deletions.
diff --git a/R/add_tracks.R b/R/add_tracks.R
@@ -1,5 +1,5 @@
 #' Add different types of tracks
-#' 
+#'
 #' @name add_tracks
 #' @param x object to add the tracks to (e.g. gggenomes, gggenomes_layout)
 #' @param ... named data.frames, i.e. genes=gene_df, snps=snp_df

diff --git a/R/aes.R b/R/aes.R
@@ -1,23 +1,24 @@
 # thomasp85/ggraph https://github.com/thomasp85/ggraph/blob/master/R/aaa.R
 aes_intersect <- function(aes1, aes2) {
-    structure(
-        c(as.list(aes1), aes2[!names(aes2) %in% names(aes1)]),
-        class = 'uneval'
-    )
+  structure(
+    c(as.list(aes1), aes2[!names(aes2) %in% names(aes1)]),
+    class = "uneval"
+  )
 }
 
-aes_nudge_by_strand <- function(mapping, nudge_by_strand, keys = c("y", "yend")){
-  if(is.null(nudge_by_strand))
+aes_nudge_by_strand <- function(mapping, nudge_by_strand, keys = c("y", "yend")) {
+  if (is.null(nudge_by_strand)) {
     return(mapping)
+  }
 
   # modify y/yend aes
   is_integer(nudge_by_strand) || is_double(nudge_by_strand) ||
     stop("nudge_by_strand needs to be numeric")
 
-  for(k in keys){
-    if(is.null(mapping[[k]])) stop("'", k, "' not defined, cannot modify")
+  for (k in keys) {
+    if (is.null(mapping[[k]])) stop("'", k, "' not defined, cannot modify")
 
-    mapping[[k]] <- rlang::parse_expr(paste0(rlang::quo_text(mapping[[k]]),'+ display_strand * ',-nudge_by_strand))
+    mapping[[k]] <- rlang::parse_expr(paste0(rlang::quo_text(mapping[[k]]), "+ display_strand * ", -nudge_by_strand))
   }
   mapping
 }
diff --git a/R/clusters.R b/R/clusters.R
@@ -10,47 +10,50 @@
 #' # add clusters
 #' gggenomes(emale_genes, emale_seqs) %>%
 #'   add_clusters(emale_cogs) %>%
-#'   sync() +             # works because clusters
-#'   geom_link() +        # become links
+#'   sync() + # works because clusters
+#'   geom_link() + # become links
 #'   geom_seq() +
 #'   # works because cluster info is joined to gene track
-#'   geom_gene(aes(fill=ifelse(is.na(cluster_id), NA,
-#'       stringr::str_glue("{cluster_id} [{cluster_size}]")))) +
+#'   geom_gene(aes(fill = ifelse(is.na(cluster_id), NA,
+#'     stringr::str_glue("{cluster_id} [{cluster_size}]")
+#'   ))) +
 #'   scale_fill_discrete("COGs")
-#' 
-add_clusters <- function(x, ..., .track_id = "genes"){
+#'
+add_clusters <- function(x, ..., .track_id = "genes") {
   UseMethod("add_clusters")
 }
 
 #' @export
-add_clusters.gggenomes <- function(x, ..., .track_id = "genes"){
+add_clusters.gggenomes <- function(x, ..., .track_id = "genes") {
   x$data <- add_clusters(x$data, ..., .track_id = {{ .track_id }})
   x
 }
 
 #' @importFrom rlang .data
 #' @export
-add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
-  if(!has_dots()){
+add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes") {
+  if (!has_dots()) {
     warn("No clusters data provided - check your arguments")
     return(x)
   }
 
-  pid <- tidyselect::vars_pull(track_ids(x), {{.track_id}})
+  pid <- tidyselect::vars_pull(track_ids(x), {{ .track_id }})
   dot_exprs <- enexprs(...) # defuse before list(...)
   tracks <- as_tracks(list(...), dot_exprs, track_ids(x))
 
-  tracks <- purrr::map(tracks, function(track){
+  tracks <- purrr::map(tracks, function(track) {
     require_vars(track, c("feat_id", "cluster_id"))
 
     track <- dplyr::filter(track, .data$feat_id %in% x$feats[[pid]]$feat_id)
-    if(nrow(track) < 1){
-      warn(str_glue("No matches between clusters and parent track based on ",
-          "`track_id`. Check your IDs and arguments"))
+    if (nrow(track) < 1) {
+      warn(str_glue(
+        "No matches between clusters and parent track based on ",
+        "`track_id`. Check your IDs and arguments"
+      ))
       return(x)
     }
 
-    if(any(duplicated(track$feat_id))){
+    if (any(duplicated(track$feat_id))) {
       dup_ids <- track$feat_id[duplicated(track$feat_id)][1:5]
       abort(c("Duplicated `feat_id`s not allowed:", str_glue("{dup_ids}")))
     }
@@ -60,30 +63,33 @@ add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
 
   sublinks <- purrr::map(tracks, cluster2sublinks, x$feats[[pid]]) %>%
     purrr::compact() # can be empty tibble of all clusters were singletons
-  if(length(sublinks) < length(tracks)){
+  if (length(sublinks) < length(tracks)) {
     warn("At least one cluster table had only singletons, so no links were produced")
   }
-  if(length(sublinks)){
-    x <- add_sublink_tracks(x, {{.track_id}}, sublinks, transform="none")
+  if (length(sublinks)) {
+    x <- add_sublink_tracks(x, {{ .track_id }}, sublinks, transform = "none")
   }
 
   # this is just q&d - only adds the ids of the first cluster track. Not sure,
   # how to handle adding multiple ones
-  if(length(tracks) > 1){
-    warn(str_glue("If adding multiple cluster tables, all are added as ",
-        "individual link tracks, but only the first table is joined with the ",
-        "parent feat table"))
+  if (length(tracks) > 1) {
+    warn(str_glue(
+      "If adding multiple cluster tables, all are added as ",
+      "individual link tracks, but only the first table is joined with the ",
+      "parent feat table"
+    ))
   }
 
   x$feats[[pid]] <- left_join(x$feats[[pid]], tracks[[1]])
   x
 }
 
-cluster2sublinks <- function(x, parent_track){
-  x %>% split_by(.data$cluster_id) %>%
-    purrr::keep(~nrow(.) > 1) %>% # links need >2 members, ignore singletons
-    purrr::map_df(.id = "cluster_id", function(g){
-      mat <- utils::combn(g$feat_id, 2, simplify=TRUE)
-      tibble(feat_id = mat[1,], feat_id2 = mat[2,])
+cluster2sublinks <- function(x, parent_track) {
+  x %>%
+    split_by(.data$cluster_id) %>%
+    purrr::keep(~ nrow(.) > 1) %>% # links need >2 members, ignore singletons
+    purrr::map_df(.id = "cluster_id", function(g) {
+      mat <- utils::combn(g$feat_id, 2, simplify = TRUE)
+      tibble(feat_id = mat[1, ], feat_id2 = mat[2, ])
     })
 }
diff --git a/R/flip.R b/R/flip.R
@@ -4,7 +4,7 @@
 #' sequences and their features. `sync` automatically flips bins using a
 #' heuristic that maximizes the amount of forward strand links between
 #' neighboring bins.
-#' 
+#'
 #' For more details see the help vignette:
 #' \code{vignette("flip", package = "gggenomes")}
 #'
@@ -21,136 +21,152 @@
 #'
 #' @examples
 #' library(patchwork)
-#' p <- gggenomes(genes=emale_genes) +
-#'   geom_seq(aes(color=strand), arrow=TRUE) +
-#'   geom_link(aes(fill=strand)) +
-#'   expand_limits(color=c("-")) +
-#'   labs(caption="not flipped")
+#' p <- gggenomes(genes = emale_genes) +
+#'   geom_seq(aes(color = strand), arrow = TRUE) +
+#'   geom_link(aes(fill = strand)) +
+#'   expand_limits(color = c("-")) +
+#'   labs(caption = "not flipped")
 #'
 #' # nothing flipped
 #' p0 <- p %>% add_links(emale_ava)
 #'
 #' # flip manually
-#' p1 <- p %>% add_links(emale_ava) %>%
-#'   flip(4:6) + labs(caption="manually")
+#' p1 <- p %>%
+#'   add_links(emale_ava) %>%
+#'   flip(4:6) + labs(caption = "manually")
 #'
 #' # flip automatically based on genome-genome links
-#' p2 <- p %>% add_links(emale_ava) %>%
-#'   sync() + labs(caption="genome alignments")
+#' p2 <- p %>%
+#'   add_links(emale_ava) %>%
+#'   sync() + labs(caption = "genome alignments")
 #'
 #' # flip automatically based on protein-protein links
-#' p3 <- p %>% add_sublinks(emale_prot_ava) %>%
-#'   sync() + labs(caption="protein alignments")
+#' p3 <- p %>%
+#'   add_sublinks(emale_prot_ava) %>%
+#'   sync() + labs(caption = "protein alignments")
 #'
 #' # flip automatically based on genes linked implicitly by belonging
 #' # to the same clusters of orthologs (or any grouping of your choice)
-#' p4 <- p %>% add_clusters(emale_cogs) %>%
-#'   sync() + labs(caption="shared orthologs")
+#' p4 <- p %>%
+#'   add_clusters(emale_cogs) %>%
+#'   sync() + labs(caption = "shared orthologs")
 #'
-#' p0 + p1 + p2 + p3 + p4 + plot_layout(nrow=1, guides="collect")
+#' p0 + p1 + p2 + p3 + p4 + plot_layout(nrow = 1, guides = "collect")
 #' @export
-flip <- function(x, ..., .bin_track=seqs){
+flip <- function(x, ..., .bin_track = seqs) {
   UseMethod("flip")
 }
 #' @export
-flip.gggenomes <- function(x, ..., .bin_track=seqs){
-  x$data <- flip(x$data, ..., .bin_track={{ .bin_track }})
+flip.gggenomes <- function(x, ..., .bin_track = seqs) {
+  x$data <- flip(x$data, ..., .bin_track = {{ .bin_track }})
   x
 }
 #' @export
-flip.gggenomes_layout <- function(x, ..., .bin_track=seqs){
-  if(!has_dots()) return(x)
-  flip_impl(x, bins=c(...), bin_track={{.bin_track}})
+flip.gggenomes_layout <- function(x, ..., .bin_track = seqs) {
+  if (!has_dots()) {
+    return(x)
+  }
+  flip_impl(x, bins = c(...), bin_track = {{ .bin_track }})
 }
 #' @rdname flip
 #' @export
-flip_seqs <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
+flip_seqs <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
   UseMethod("flip_seqs")
 }
 #' @export
-flip_seqs.gggenomes <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
-  x$data <- flip_seqs(x$data, ..., .bins={{.bins}}, .bin_track={{.bin_track}},
-                      .seq_track={{.seq_track}})
+flip_seqs.gggenomes <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
+  x$data <- flip_seqs(x$data, ...,
+    .bins = {{ .bins }}, .bin_track = {{ .bin_track }},
+    .seq_track = {{ .seq_track }}
+  )
   x
 }
 #' @export
-flip_seqs.gggenomes_layout <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
-  if(!has_dots()) return(x)
-  flip_impl(x, bins={{.bins}}, seqs=c(...), bin_track={{.bin_track}}, seq_track={{.seq_track}})
+flip_seqs.gggenomes_layout <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
+  if (!has_dots()) {
+    return(x)
+  }
+  flip_impl(x, bins = {{ .bins }}, seqs = c(...), bin_track = {{ .bin_track }}, seq_track = {{ .seq_track }})
 }
 #' @rdname flip
 #' @param link_track the link track to use for flipping bins nicely
 #' @param min_support only flip a bin if at least this many more nucleotides
 #'   support an inversion over the given orientation
 #' @export
-sync <- function(x, link_track=1, min_support=0){
+sync <- function(x, link_track = 1, min_support = 0) {
   UseMethod("sync")
 }
 #' @export
-sync.gggenomes <- function(x, link_track=1, min_support=0){
-  x$data <- sync(x$data, link_track={{link_track}}, min_support=min_support)
+sync.gggenomes <- function(x, link_track = 1, min_support = 0) {
+  x$data <- sync(x$data, link_track = {{ link_track }}, min_support = min_support)
   x
 }
 #' @export
-sync.gggenomes_layout <- function(x, link_track=1, min_support=0){
-  if(length(x$links) < 1)
-   rlang::abort("Links are required to `sync`")
-  l0 <- pull_links(x, {{link_track}})
+sync.gggenomes_layout <- function(x, link_track = 1, min_support = 0) {
+  if (length(x$links) < 1) {
+    rlang::abort("Links are required to `sync`")
+  }
+  l0 <- pull_links(x, {{ link_track }})
   s0 <- ungroup(pull_seqs(x))
 
   f0 <- l0 |>
-    dplyr::left_join(select(s0, .data$seq_id, seq_strand=.data$strand), by = "seq_id") |>
-    dplyr::left_join(select(s0, seq_id2=.data$seq_id, seq_strand2=.data$strand), by = "seq_id2") |>
+    dplyr::left_join(select(s0, .data$seq_id, seq_strand = .data$strand), by = "seq_id") |>
+    dplyr::left_join(select(s0, seq_id2 = .data$seq_id, seq_strand2 = .data$strand), by = "seq_id2") |>
     dplyr::mutate(
-      bin_id = ifelse(.data$y<.data$yend, .data$bin_id, .data$bin_id2), # chose the lower bin id
-      bin_id2 = ifelse(.data$y<.data$yend, .data$bin_id2, .data$bin_id), # chose the lower bin id
-      y = (.data$y+.data$yend)/2, # use mean y for sort
+      bin_id = ifelse(.data$y < .data$yend, .data$bin_id, .data$bin_id2), # chose the lower bin id
+      bin_id2 = ifelse(.data$y < .data$yend, .data$bin_id2, .data$bin_id), # chose the lower bin id
+      y = (.data$y + .data$yend) / 2, # use mean y for sort
       support = link_width(.data$start, .data$end, .data$start2, .data$end2) *
-        strand_int(combine_strands(.data$strand, .data$seq_strand, .data$seq_strand2))) |>
+        strand_int(combine_strands(.data$strand, .data$seq_strand, .data$seq_strand2))
+    ) |>
     dplyr::group_by(.data$bin_id, .data$y) |>
     dplyr::summarize(support = sum(.data$support)) |>
     dplyr::ungroup() |>
     dplyr::filter(abs(.data$support) >= min_support) |>
     dplyr::arrange(-.data$y) |>
-    dplyr::mutate(needs_flip=cumprod(strand_int(.data$support >= 0)) < 0)
+    dplyr::mutate(needs_flip = cumprod(strand_int(.data$support >= 0)) < 0)
 
-  bins_to_flip <- f0 |> dplyr::filter(.data$needs_flip) |> dplyr::pull(.data$bin_id)
+  bins_to_flip <- f0 |>
+    dplyr::filter(.data$needs_flip) |>
+    dplyr::pull(.data$bin_id)
 
-  if(!length(bins_to_flip)){
-    inform(str_glue("All bins appear to be flipped nicely based on the given",
-                    "links. Maybe change `min_coverage` or flip manually"))
+  if (!length(bins_to_flip)) {
+    inform(str_glue(
+      "All bins appear to be flipped nicely based on the given",
+      "links. Maybe change `min_coverage` or flip manually"
+    ))
     return(x)
-  }else{
+  } else {
     inform(paste("Flipping:", comma(bins_to_flip)))
   }
 
   x %>% flip(all_of(bins_to_flip))
 }
 
-flip_impl <- function(x, bins=everything(), seqs=NULL, bin_track=seqs, seq_track=seqs){
+flip_impl <- function(x, bins = everything(), seqs = NULL, bin_track = seqs, seq_track = seqs) {
   # split by bin_id and select bins
   seq_tbl <- pull_seqs(x)
   seq_lst <- split_by(seq_tbl, .data$bin_id)
   # in case we want to compute selections based on a track other than seqs
-  bin_sel_lst <- split_by(pull_track(x, {{bin_track}}), .data$bin_id)
+  bin_sel_lst <- split_by(pull_track(x, {{ bin_track }}), .data$bin_id)
   bin_i <- tidyselect::eval_select(expr({{ bins }}), bin_sel_lst)
-  if(length(bin_i) == 0) rlang::abort("no bins selected")
+  if (length(bin_i) == 0) rlang::abort("no bins selected")
   # select bins to operate on
   flip_tbl <- bind_rows(seq_lst[names(bin_i)])
 
   # flip seqs in bins
   seqs <- enquo(seqs)
-  if(!quo_is_null(seqs)){
-    seq_sel_lst <- split_by(pull_track(x, {{seq_track}}), .data$seq_id)
+  if (!quo_is_null(seqs)) {
+    seq_sel_lst <- split_by(pull_track(x, {{ seq_track }}), .data$seq_id)
     seq_sel_lst <- seq_sel_lst[names(seq_sel_lst) %in% flip_tbl$seq_id]
-    seq_i <- tidyselect::eval_select(expr(!! seqs ), seq_sel_lst)
+    seq_i <- tidyselect::eval_select(expr(!!seqs), seq_sel_lst)
     seq_i <- flip_tbl$seq_id %in% names(seq_i)
     flip_tbl$strand[seq_i] <- flip_strand(flip_tbl$strand[seq_i])
     # flip entire bins
-  }else{
+  } else {
     flip_tbl %<>% dplyr::group_by(.data$bin_id) %>%
       dplyr::mutate(strand = flip_strand(.data$strand)) %>%
-      dplyr::arrange(-row_number(), .by_group=TRUE)
+      dplyr::arrange(-row_number(), .by_group = TRUE)
   }
 
   # splice modified bins back into rest
@@ -162,6 +178,6 @@ flip_impl <- function(x, bins=everything(), seqs=NULL, bin_track=seqs, seq_track
   layout(x)
 }
 
-link_width <- function(start, end, start2, end2){
-  (width(start,end) + width(start2, end2))/2
+link_width <- function(start, end, start2, end2) {
+  (width(start, end) + width(start2, end2)) / 2
 }