Skip to content

Commit

Permalink
Add compare_motifs and find_motifs
Browse files Browse the repository at this point in the history
  • Loading branch information
HDash committed Jul 4, 2024
1 parent 9a8a9b7 commit 8e52686
Show file tree
Hide file tree
Showing 12 changed files with 488 additions and 40 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ export(calc_frip)
export(check_ENCODE)
export(check_JASPAR)
export(check_genome_build)
export(compare_motifs)
export(denovo_motifs)
export(find_motifs)
export(format_exptype)
export(get_JASPARCORE)
export(get_df_distances)
Expand Down Expand Up @@ -58,12 +60,14 @@ importFrom(magrittr,"%>%")
importFrom(memes,meme_is_installed)
importFrom(memes,runAme)
importFrom(memes,runFimo)
importFrom(memes,runTomTom)
importFrom(plotly,ggplotly)
importFrom(purrr,map_df)
importFrom(rmarkdown,render)
importFrom(tidyr,pivot_longer)
importFrom(tools,file_ext)
importFrom(tools,file_path_sans_ext)
importFrom(universalmotif,compare_motifs)
importFrom(universalmotif,read_homer)
importFrom(universalmotif,read_jaspar)
importFrom(universalmotif,read_meme)
Expand Down
7 changes: 1 addition & 6 deletions R/MotifPeeker.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@
#' file. (optional) Creates additional comparisons based on cell counts.
#' @param denovo_motif_discovery A logical indicating whether to perform
#' de-novo motif discovery for the third section of the report. (default = TRUE)
#' @param motif_db Path to \code{.meme} format file to use as reference
#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
#' objects. (optional) Results from de-novo motif discovery are searched against
#' this database to find similar motifs. If not provided, JASPAR CORE database
#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
#' database has fewer than 50 entries.
#' @param download_buttons A logical indicating whether to include download
#' buttons for various files within the HTML report. (default = TRUE)
#' @param output_dir A character string specifying the directory to save the
Expand All @@ -87,6 +81,7 @@
#' @inheritParams get_bpparam
#' @inheritParams memes::runFimo
#' @inheritParams denovo_motifs
#' @inheritParams find_motifs
#'
#' @import ggplot2
#' @import dplyr
Expand Down
99 changes: 99 additions & 0 deletions R/compare_motifs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#' Compare motifs from segregated sequences
#'
#' Compute motif similarity scores between motifs discovered from segregated
#' sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to
#' compare motifs from different groups of sequences. To see the possible
#' similarity measures available, refer to details.
#'
#' @inheritParams universalmotif::compare_motifs
#' @inheritParams find_motifs
#' @inheritDotParams universalmotif::compare_motifs
#'
#' @importFrom universalmotif compare_motifs
#'
#' @inherit universalmotif::compare_motifs details
#'
#' @returns A list of matrices containing the similarity scores between motifs
#' from different groups of sequences. The order of comparison is as follows,
#' with first element representing the rows and second element representing the
#' columns of the matrix:
#' \itemize{
#' \item 1. \strong{Common motifs comparison:} Common seqs from reference
#' (1) <-> comparison (2)
#' \item 2. \strong{Unique motifs comparison:} Unique seqs from reference
#' (1) <-> comparison (2)
#' \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference
#' (1) <-> comparison (1)
#' \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison
#' (2) <-> reference (1)
#' }
#' The list is repeated for each set of comparison groups in input.
#'
#' @examples
#' data("CTCF_TIP_peaks", package = "MotifPeeker")
#' data("CTCF_ChIP_peaks", package = "MotifPeeker")
#'
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) {
#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks)
#' denovo_motifs <- denovo_motifs(unlist(segregated_peaks),
#' trim_seq_width = 100,
#' genome_build = genome_build,
#' denovo_motifs = 2,
#' filter_n = 6,
#' out_dir = tempdir(),
#' workers = 1)
#' similarity_matrices <- compare_motifs(denovo_motifs)
#' print(similarity_matrices)
#' }
#'
#'
#' @export
compare_motifs <- function(streme_out,
method = "PCC",
normalise.scores = TRUE,
workers = 1,
verbose = FALSE,
...) {
## Motif group sequence - #1 Common seqs - Reference (1)
## (4 Groups per #2 Common seqs - Comparison (2)
## comparison pair) #3 Unique seqs - Reference (1)
## #4 Unique seqs - Comparison (2)
group_indices <- rep(seq_len(length(streme_out) / 4), each = 4)
seg_indices <- rep(seq_len(4), length.out = length(streme_out))
comparison_groups <- list(
"1" = c(1, 2),
"2" = c(3, 4),
"3" = c(3, 2),
"4" = c(4, 1)
)

res <- bpapply(
seq_along(streme_out), function(x) {
comparison_group <- comparison_groups[[seg_indices[x]]]
.motifsx <- function(m) streme_out[[comparison_group[m] *
group_indices[[x]]]]$motif
m1 <- .motifsx(1)
m2 <- .motifsx(2)
if (length(m1) == 0 || length(m2) == 0) return(matrix(NA))
res_x <- universalmotif::compare_motifs(
list(m1, m2),
method = method,
normalise.scores = normalise.scores,
...
)
row_indices <- seq(1, length(m1))
col_indices <- seq(length(m1) + 1, length(m1) + length(m2))

res_x <- res_x[row_indices, col_indices, drop = FALSE]
return(res_x)
}, workers = workers, verbose = verbose
)

## Output: Comparison Pair 1 - 1. common_(1) <-> common_(2)
## 2. unique_(1) <-> unique_(2)
## 3. unique_(1) <-> common_(2) [cross1]
## 4. unique_(2) <-> common_(1) [cross2]
## Comparison Pair 2 - 5. common_(1) <-> common_(2)...
return(res)
}
2 changes: 2 additions & 0 deletions R/denovo_motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ denovo_motifs <- function(seqs,
maxw = 25,
filter_n = 6,
out_dir = tempdir(),
meme_path = NULL,
workers = 1,
verbose = FALSE,
debug = FALSE,
Expand Down Expand Up @@ -86,6 +87,7 @@ denovo_motifs <- function(seqs,
minw = 8,
maxw = 25,
nmotifs = denovo_motifs,
meme_path = meme_path,
...
)

Expand Down
66 changes: 66 additions & 0 deletions R/find_motifs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' Find similar motifs
#'
#' Search through provided motif database to find similar motifs to the input.
#' Light wrapper around \code{TOMTOM} from MEME Suite.
#'
#' @param streme_out Output from \code{\link{denovo_motifs}}.
#' @param motif_db Path to \code{.meme} format file to use as reference
#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
#' objects. (optional) Results from de-novo motif discovery are searched against
#' this database to find similar motifs. If not provided, JASPAR CORE database
#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
#' database has fewer than 50 entries.
#' @param ... Additional arguments to pass to \code{TOMTOM}. For more
#' information, refer to the official MEME Suite documentation on
#' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.
#' @inheritParams denovo_motifs
#'
#' @importFrom memes runTomTom
#'
#' @inherit memes::runTomTom return
#'
#' @examples
#' data("CTCF_TIP_peaks", package = "MotifPeeker")
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) {
#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#'
#' res <- denovo_motifs(list(CTCF_TIP_peaks),
#' trim_seq_width = 100,
#' genome_build = genome_build,
#' denovo_motifs = 2,
#' filter_n = 6,
#' out_dir = tempdir())
#' res2 <- find_motifs(res, motif_db = get_JASPARCORE(),
#' out_dir = tempdir())
#' print(res2)
#' }
#'
#' @export
find_motifs <- function(streme_out,
motif_db,
out_dir = tempdir(),
meme_path = NULL,
workers = 1,
verbose = FALSE,
debug = FALSE,
...) {
out_dir <- file.path(out_dir, "tomtom")
if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

res <- bpapply(
seq_along(streme_out), function(x) {
motifs <- streme_out[[x]]$motif
lapply(seq_along(motifs), function(y) {
out_dir_x <- file.path(out_dir, x, y)
if (!dir.exists(out_dir_x)) dir.create(out_dir_x,
recursive = TRUE)
res_x <- memes::runTomTom(
motifs[[y]], motif_db, outdir = out_dir_x, silent = !debug,
meme_path = meme_path, ...
)
return(res_x)
})
}, workers = workers, verbose = verbose
)
return(res)
}
27 changes: 16 additions & 11 deletions inst/markdown/MotifPeeker.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ motif_summit_dist_df <- get_df_distances(
params$meme_path, params$debug
)
if (comparison_metrics) {
### Known-motif Analysis ###
comparison_indices <- setdiff(seq_along(result$peaks),
Expand All @@ -175,19 +174,25 @@ if (comparison_metrics) {
params$meme_path, params$verbose
)
}
}
### De-Novo Motif Analysis ###
## Run STREME
if (params$denovo_motif_discovery) {
result$streme <- denovo_motifs(
unlist(segregated_peaks), params$trim_seq_width, genome_build,
params$denovo_motifs, filter_n = params$filter_n,
out_dir = out_dir_extra, workers = params$workers,
verbose = params$verbose, debug = params$debug
)
}
### De-Novo Motif Analysis ###
## Run STREME
if (params$denovo_motif_discovery && comparison_metrics) {
result$streme <- denovo_motifs(
unlist(segregated_peaks), params$trim_seq_width, genome_build,
params$denovo_motifs, filter_n = params$filter_n,
out_dir = out_dir_extra, meme_path = params$meme_path,
workers = params$workers, verbose = params$verbose, debug = params$debug
)
result$streme_similar <- find_motifs(
result$streme, motif_db, out_dir = out_dir_extra,
meme_path = params$meme_path, workers = params$workers,
verbose = params$verbose, debug = params$debug
)
}
```

```{r setup_misc, include = FALSE}
Expand Down
Loading

0 comments on commit 8e52686

Please sign in to comment.