Skip to content

Commit

Permalink
Apply auto-formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
iimog committed Jun 25, 2024
1 parent 5dbeffc commit bfe920f
Show file tree
Hide file tree
Showing 18 changed files with 432 additions and 336 deletions.
2 changes: 1 addition & 1 deletion R/add_tracks.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Add different types of tracks
#'
#'
#' @name add_tracks
#' @param x object to add the tracks to (e.g. gggenomes, gggenomes_layout)
#' @param ... named data.frames, i.e. genes=gene_df, snps=snp_df
Expand Down
19 changes: 10 additions & 9 deletions R/aes.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
# thomasp85/ggraph https://github.com/thomasp85/ggraph/blob/master/R/aaa.R
aes_intersect <- function(aes1, aes2) {
structure(
c(as.list(aes1), aes2[!names(aes2) %in% names(aes1)]),
class = 'uneval'
)
structure(
c(as.list(aes1), aes2[!names(aes2) %in% names(aes1)]),
class = "uneval"
)
}

aes_nudge_by_strand <- function(mapping, nudge_by_strand, keys = c("y", "yend")){
if(is.null(nudge_by_strand))
aes_nudge_by_strand <- function(mapping, nudge_by_strand, keys = c("y", "yend")) {
if (is.null(nudge_by_strand)) {
return(mapping)
}

# modify y/yend aes
is_integer(nudge_by_strand) || is_double(nudge_by_strand) ||
stop("nudge_by_strand needs to be numeric")

for(k in keys){
if(is.null(mapping[[k]])) stop("'", k, "' not defined, cannot modify")
for (k in keys) {
if (is.null(mapping[[k]])) stop("'", k, "' not defined, cannot modify")

mapping[[k]] <- rlang::parse_expr(paste0(rlang::quo_text(mapping[[k]]),'+ display_strand * ',-nudge_by_strand))
mapping[[k]] <- rlang::parse_expr(paste0(rlang::quo_text(mapping[[k]]), "+ display_strand * ", -nudge_by_strand))
}
mapping
}
62 changes: 34 additions & 28 deletions R/clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,50 @@
#' # add clusters
#' gggenomes(emale_genes, emale_seqs) %>%
#' add_clusters(emale_cogs) %>%
#' sync() + # works because clusters
#' geom_link() + # become links
#' sync() + # works because clusters
#' geom_link() + # become links
#' geom_seq() +
#' # works because cluster info is joined to gene track
#' geom_gene(aes(fill=ifelse(is.na(cluster_id), NA,
#' stringr::str_glue("{cluster_id} [{cluster_size}]")))) +
#' geom_gene(aes(fill = ifelse(is.na(cluster_id), NA,
#' stringr::str_glue("{cluster_id} [{cluster_size}]")
#' ))) +
#' scale_fill_discrete("COGs")
#'
add_clusters <- function(x, ..., .track_id = "genes"){
#'
add_clusters <- function(x, ..., .track_id = "genes") {
UseMethod("add_clusters")
}

#' @export
add_clusters.gggenomes <- function(x, ..., .track_id = "genes"){
add_clusters.gggenomes <- function(x, ..., .track_id = "genes") {
x$data <- add_clusters(x$data, ..., .track_id = {{ .track_id }})
x
}

#' @importFrom rlang .data
#' @export
add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){
if(!has_dots()){
add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes") {
if (!has_dots()) {
warn("No clusters data provided - check your arguments")
return(x)
}

pid <- tidyselect::vars_pull(track_ids(x), {{.track_id}})
pid <- tidyselect::vars_pull(track_ids(x), {{ .track_id }})
dot_exprs <- enexprs(...) # defuse before list(...)
tracks <- as_tracks(list(...), dot_exprs, track_ids(x))

tracks <- purrr::map(tracks, function(track){
tracks <- purrr::map(tracks, function(track) {
require_vars(track, c("feat_id", "cluster_id"))

track <- dplyr::filter(track, .data$feat_id %in% x$feats[[pid]]$feat_id)
if(nrow(track) < 1){
warn(str_glue("No matches between clusters and parent track based on ",
"`track_id`. Check your IDs and arguments"))
if (nrow(track) < 1) {
warn(str_glue(
"No matches between clusters and parent track based on ",
"`track_id`. Check your IDs and arguments"
))
return(x)
}

if(any(duplicated(track$feat_id))){
if (any(duplicated(track$feat_id))) {
dup_ids <- track$feat_id[duplicated(track$feat_id)][1:5]
abort(c("Duplicated `feat_id`s not allowed:", str_glue("{dup_ids}")))
}
Expand All @@ -60,30 +63,33 @@ add_clusters.gggenomes_layout <- function(x, ..., .track_id = "genes"){

sublinks <- purrr::map(tracks, cluster2sublinks, x$feats[[pid]]) %>%
purrr::compact() # can be empty tibble of all clusters were singletons
if(length(sublinks) < length(tracks)){
if (length(sublinks) < length(tracks)) {
warn("At least one cluster table had only singletons, so no links were produced")
}
if(length(sublinks)){
x <- add_sublink_tracks(x, {{.track_id}}, sublinks, transform="none")
if (length(sublinks)) {
x <- add_sublink_tracks(x, {{ .track_id }}, sublinks, transform = "none")
}

# this is just q&d - only adds the ids of the first cluster track. Not sure,
# how to handle adding multiple ones
if(length(tracks) > 1){
warn(str_glue("If adding multiple cluster tables, all are added as ",
"individual link tracks, but only the first table is joined with the ",
"parent feat table"))
if (length(tracks) > 1) {
warn(str_glue(
"If adding multiple cluster tables, all are added as ",
"individual link tracks, but only the first table is joined with the ",
"parent feat table"
))
}

x$feats[[pid]] <- left_join(x$feats[[pid]], tracks[[1]])
x
}

cluster2sublinks <- function(x, parent_track){
x %>% split_by(.data$cluster_id) %>%
purrr::keep(~nrow(.) > 1) %>% # links need >2 members, ignore singletons
purrr::map_df(.id = "cluster_id", function(g){
mat <- utils::combn(g$feat_id, 2, simplify=TRUE)
tibble(feat_id = mat[1,], feat_id2 = mat[2,])
cluster2sublinks <- function(x, parent_track) {
x %>%
split_by(.data$cluster_id) %>%
purrr::keep(~ nrow(.) > 1) %>% # links need >2 members, ignore singletons
purrr::map_df(.id = "cluster_id", function(g) {
mat <- utils::combn(g$feat_id, 2, simplify = TRUE)
tibble(feat_id = mat[1, ], feat_id2 = mat[2, ])
})
}
130 changes: 73 additions & 57 deletions R/flip.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' sequences and their features. `sync` automatically flips bins using a
#' heuristic that maximizes the amount of forward strand links between
#' neighboring bins.
#'
#'
#' For more details see the help vignette:
#' \code{vignette("flip", package = "gggenomes")}
#'
Expand All @@ -21,136 +21,152 @@
#'
#' @examples
#' library(patchwork)
#' p <- gggenomes(genes=emale_genes) +
#' geom_seq(aes(color=strand), arrow=TRUE) +
#' geom_link(aes(fill=strand)) +
#' expand_limits(color=c("-")) +
#' labs(caption="not flipped")
#' p <- gggenomes(genes = emale_genes) +
#' geom_seq(aes(color = strand), arrow = TRUE) +
#' geom_link(aes(fill = strand)) +
#' expand_limits(color = c("-")) +
#' labs(caption = "not flipped")
#'
#' # nothing flipped
#' p0 <- p %>% add_links(emale_ava)
#'
#' # flip manually
#' p1 <- p %>% add_links(emale_ava) %>%
#' flip(4:6) + labs(caption="manually")
#' p1 <- p %>%
#' add_links(emale_ava) %>%
#' flip(4:6) + labs(caption = "manually")
#'
#' # flip automatically based on genome-genome links
#' p2 <- p %>% add_links(emale_ava) %>%
#' sync() + labs(caption="genome alignments")
#' p2 <- p %>%
#' add_links(emale_ava) %>%
#' sync() + labs(caption = "genome alignments")
#'
#' # flip automatically based on protein-protein links
#' p3 <- p %>% add_sublinks(emale_prot_ava) %>%
#' sync() + labs(caption="protein alignments")
#' p3 <- p %>%
#' add_sublinks(emale_prot_ava) %>%
#' sync() + labs(caption = "protein alignments")
#'
#' # flip automatically based on genes linked implicitly by belonging
#' # to the same clusters of orthologs (or any grouping of your choice)
#' p4 <- p %>% add_clusters(emale_cogs) %>%
#' sync() + labs(caption="shared orthologs")
#' p4 <- p %>%
#' add_clusters(emale_cogs) %>%
#' sync() + labs(caption = "shared orthologs")
#'
#' p0 + p1 + p2 + p3 + p4 + plot_layout(nrow=1, guides="collect")
#' p0 + p1 + p2 + p3 + p4 + plot_layout(nrow = 1, guides = "collect")
#' @export
flip <- function(x, ..., .bin_track=seqs){
flip <- function(x, ..., .bin_track = seqs) {
UseMethod("flip")
}
#' @export
flip.gggenomes <- function(x, ..., .bin_track=seqs){
x$data <- flip(x$data, ..., .bin_track={{ .bin_track }})
flip.gggenomes <- function(x, ..., .bin_track = seqs) {
x$data <- flip(x$data, ..., .bin_track = {{ .bin_track }})
x
}
#' @export
flip.gggenomes_layout <- function(x, ..., .bin_track=seqs){
if(!has_dots()) return(x)
flip_impl(x, bins=c(...), bin_track={{.bin_track}})
flip.gggenomes_layout <- function(x, ..., .bin_track = seqs) {
if (!has_dots()) {
return(x)
}
flip_impl(x, bins = c(...), bin_track = {{ .bin_track }})
}
#' @rdname flip
#' @export
flip_seqs <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
flip_seqs <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
UseMethod("flip_seqs")
}
#' @export
flip_seqs.gggenomes <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
x$data <- flip_seqs(x$data, ..., .bins={{.bins}}, .bin_track={{.bin_track}},
.seq_track={{.seq_track}})
flip_seqs.gggenomes <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
x$data <- flip_seqs(x$data, ...,
.bins = {{ .bins }}, .bin_track = {{ .bin_track }},
.seq_track = {{ .seq_track }}
)
x
}
#' @export
flip_seqs.gggenomes_layout <- function(x, ..., .bins=everything(), .seq_track=seqs, .bin_track=seqs){
if(!has_dots()) return(x)
flip_impl(x, bins={{.bins}}, seqs=c(...), bin_track={{.bin_track}}, seq_track={{.seq_track}})
flip_seqs.gggenomes_layout <- function(x, ..., .bins = everything(), .seq_track = seqs, .bin_track = seqs) {
if (!has_dots()) {
return(x)
}
flip_impl(x, bins = {{ .bins }}, seqs = c(...), bin_track = {{ .bin_track }}, seq_track = {{ .seq_track }})
}
#' @rdname flip
#' @param link_track the link track to use for flipping bins nicely
#' @param min_support only flip a bin if at least this many more nucleotides
#' support an inversion over the given orientation
#' @export
sync <- function(x, link_track=1, min_support=0){
sync <- function(x, link_track = 1, min_support = 0) {
UseMethod("sync")
}
#' @export
sync.gggenomes <- function(x, link_track=1, min_support=0){
x$data <- sync(x$data, link_track={{link_track}}, min_support=min_support)
sync.gggenomes <- function(x, link_track = 1, min_support = 0) {
x$data <- sync(x$data, link_track = {{ link_track }}, min_support = min_support)
x
}
#' @export
sync.gggenomes_layout <- function(x, link_track=1, min_support=0){
if(length(x$links) < 1)
rlang::abort("Links are required to `sync`")
l0 <- pull_links(x, {{link_track}})
sync.gggenomes_layout <- function(x, link_track = 1, min_support = 0) {
if (length(x$links) < 1) {
rlang::abort("Links are required to `sync`")
}
l0 <- pull_links(x, {{ link_track }})
s0 <- ungroup(pull_seqs(x))

f0 <- l0 |>
dplyr::left_join(select(s0, .data$seq_id, seq_strand=.data$strand), by = "seq_id") |>
dplyr::left_join(select(s0, seq_id2=.data$seq_id, seq_strand2=.data$strand), by = "seq_id2") |>
dplyr::left_join(select(s0, .data$seq_id, seq_strand = .data$strand), by = "seq_id") |>
dplyr::left_join(select(s0, seq_id2 = .data$seq_id, seq_strand2 = .data$strand), by = "seq_id2") |>
dplyr::mutate(
bin_id = ifelse(.data$y<.data$yend, .data$bin_id, .data$bin_id2), # chose the lower bin id
bin_id2 = ifelse(.data$y<.data$yend, .data$bin_id2, .data$bin_id), # chose the lower bin id
y = (.data$y+.data$yend)/2, # use mean y for sort
bin_id = ifelse(.data$y < .data$yend, .data$bin_id, .data$bin_id2), # chose the lower bin id
bin_id2 = ifelse(.data$y < .data$yend, .data$bin_id2, .data$bin_id), # chose the lower bin id
y = (.data$y + .data$yend) / 2, # use mean y for sort
support = link_width(.data$start, .data$end, .data$start2, .data$end2) *
strand_int(combine_strands(.data$strand, .data$seq_strand, .data$seq_strand2))) |>
strand_int(combine_strands(.data$strand, .data$seq_strand, .data$seq_strand2))
) |>
dplyr::group_by(.data$bin_id, .data$y) |>
dplyr::summarize(support = sum(.data$support)) |>
dplyr::ungroup() |>
dplyr::filter(abs(.data$support) >= min_support) |>
dplyr::arrange(-.data$y) |>
dplyr::mutate(needs_flip=cumprod(strand_int(.data$support >= 0)) < 0)
dplyr::mutate(needs_flip = cumprod(strand_int(.data$support >= 0)) < 0)

bins_to_flip <- f0 |> dplyr::filter(.data$needs_flip) |> dplyr::pull(.data$bin_id)
bins_to_flip <- f0 |>
dplyr::filter(.data$needs_flip) |>
dplyr::pull(.data$bin_id)

if(!length(bins_to_flip)){
inform(str_glue("All bins appear to be flipped nicely based on the given",
"links. Maybe change `min_coverage` or flip manually"))
if (!length(bins_to_flip)) {
inform(str_glue(
"All bins appear to be flipped nicely based on the given",
"links. Maybe change `min_coverage` or flip manually"
))
return(x)
}else{
} else {
inform(paste("Flipping:", comma(bins_to_flip)))
}

x %>% flip(all_of(bins_to_flip))
}

flip_impl <- function(x, bins=everything(), seqs=NULL, bin_track=seqs, seq_track=seqs){
flip_impl <- function(x, bins = everything(), seqs = NULL, bin_track = seqs, seq_track = seqs) {
# split by bin_id and select bins
seq_tbl <- pull_seqs(x)
seq_lst <- split_by(seq_tbl, .data$bin_id)
# in case we want to compute selections based on a track other than seqs
bin_sel_lst <- split_by(pull_track(x, {{bin_track}}), .data$bin_id)
bin_sel_lst <- split_by(pull_track(x, {{ bin_track }}), .data$bin_id)
bin_i <- tidyselect::eval_select(expr({{ bins }}), bin_sel_lst)
if(length(bin_i) == 0) rlang::abort("no bins selected")
if (length(bin_i) == 0) rlang::abort("no bins selected")
# select bins to operate on
flip_tbl <- bind_rows(seq_lst[names(bin_i)])

# flip seqs in bins
seqs <- enquo(seqs)
if(!quo_is_null(seqs)){
seq_sel_lst <- split_by(pull_track(x, {{seq_track}}), .data$seq_id)
if (!quo_is_null(seqs)) {
seq_sel_lst <- split_by(pull_track(x, {{ seq_track }}), .data$seq_id)
seq_sel_lst <- seq_sel_lst[names(seq_sel_lst) %in% flip_tbl$seq_id]
seq_i <- tidyselect::eval_select(expr(!! seqs ), seq_sel_lst)
seq_i <- tidyselect::eval_select(expr(!!seqs), seq_sel_lst)
seq_i <- flip_tbl$seq_id %in% names(seq_i)
flip_tbl$strand[seq_i] <- flip_strand(flip_tbl$strand[seq_i])
# flip entire bins
}else{
} else {
flip_tbl %<>% dplyr::group_by(.data$bin_id) %>%
dplyr::mutate(strand = flip_strand(.data$strand)) %>%
dplyr::arrange(-row_number(), .by_group=TRUE)
dplyr::arrange(-row_number(), .by_group = TRUE)
}

# splice modified bins back into rest
Expand All @@ -162,6 +178,6 @@ flip_impl <- function(x, bins=everything(), seqs=NULL, bin_track=seqs, seq_track
layout(x)
}

link_width <- function(start, end, start2, end2){
(width(start,end) + width(start2, end2))/2
link_width <- function(start, end, start2, end2) {
(width(start, end) + width(start2, end2)) / 2
}
Loading

0 comments on commit bfe920f

Please sign in to comment.