Add compare_motifs and find_motifs

neurogenomics · Jul 4, 2024 · 8e52686 · 8e52686
1 parent 9a8a9b7
commit 8e52686
Show file tree

Hide file tree

Showing 12 changed files with 488 additions and 40 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,7 +7,9 @@ export(calc_frip)
 export(check_ENCODE)
 export(check_JASPAR)
 export(check_genome_build)
+export(compare_motifs)
 export(denovo_motifs)
+export(find_motifs)
 export(format_exptype)
 export(get_JASPARCORE)
 export(get_df_distances)
@@ -58,12 +60,14 @@ importFrom(magrittr,"%>%")
 importFrom(memes,meme_is_installed)
 importFrom(memes,runAme)
 importFrom(memes,runFimo)
+importFrom(memes,runTomTom)
 importFrom(plotly,ggplotly)
 importFrom(purrr,map_df)
 importFrom(rmarkdown,render)
 importFrom(tidyr,pivot_longer)
 importFrom(tools,file_ext)
 importFrom(tools,file_path_sans_ext)
+importFrom(universalmotif,compare_motifs)
 importFrom(universalmotif,read_homer)
 importFrom(universalmotif,read_jaspar)
 importFrom(universalmotif,read_meme)

diff --git a/R/MotifPeeker.R b/R/MotifPeeker.R
@@ -55,12 +55,6 @@
 #' file. (optional) Creates additional comparisons based on cell counts.
 #' @param denovo_motif_discovery A logical indicating whether to perform
 #' de-novo motif discovery for the third section of the report. (default = TRUE)
-#' @param motif_db Path to \code{.meme} format file to use as reference
-#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
-#' objects. (optional) Results from de-novo motif discovery are searched against
-#' this database to find similar motifs. If not provided, JASPAR CORE database
-#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
-#' database has fewer than 50 entries.
 #' @param download_buttons A logical indicating whether to include download
 #' buttons for various files within the HTML report. (default = TRUE)
 #' @param output_dir A character string specifying the directory to save the
@@ -87,6 +81,7 @@
 #' @inheritParams get_bpparam
 #' @inheritParams memes::runFimo
 #' @inheritParams denovo_motifs
+#' @inheritParams find_motifs
 #' 
 #' @import ggplot2
 #' @import dplyr

diff --git a/R/compare_motifs.R b/R/compare_motifs.R
@@ -0,0 +1,99 @@
+#' Compare motifs from segregated sequences
+#' 
+#' Compute motif similarity scores between motifs discovered from segregated
+#' sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to
+#' compare motifs from different groups of sequences. To see the possible
+#' similarity measures available, refer to details.
+#' 
+#' @inheritParams universalmotif::compare_motifs
+#' @inheritParams find_motifs
+#' @inheritDotParams universalmotif::compare_motifs
+#' 
+#' @importFrom universalmotif compare_motifs
+#' 
+#' @inherit universalmotif::compare_motifs details
+#' 
+#' @returns A list of matrices containing the similarity scores between motifs
+#' from different groups of sequences. The order of comparison is as follows,
+#' with first element representing the rows and second element representing the
+#' columns of the matrix:
+#' \itemize{
+#'     \item 1. \strong{Common motifs comparison:} Common seqs from reference
+#'     (1) <-> comparison (2)
+#'     \item 2. \strong{Unique motifs comparison:} Unique seqs from reference
+#'     (1) <-> comparison (2)
+#'     \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference
+#'     (1) <-> comparison (1)
+#'     \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison
+#'     (2) <-> reference (1)
+#' }
+#' The list is repeated for each set of comparison groups in input.
+#' 
+#' @examples
+#' data("CTCF_TIP_peaks", package = "MotifPeeker")
+#' data("CTCF_ChIP_peaks", package = "MotifPeeker")
+#' 
+#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) {
+#'     genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+#'     segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks)
+#'     denovo_motifs <- denovo_motifs(unlist(segregated_peaks),
+#'                         trim_seq_width = 100,
+#'                         genome_build = genome_build,
+#'                         denovo_motifs = 2,
+#'                         filter_n = 6,
+#'                         out_dir = tempdir(),
+#'                         workers = 1)
+#'     similarity_matrices <- compare_motifs(denovo_motifs)
+#'     print(similarity_matrices)
+#' }
+#' 
+#' 
+#' @export
+compare_motifs <- function(streme_out,
+                            method = "PCC",
+                            normalise.scores = TRUE,
+                            workers = 1,
+                            verbose = FALSE,
+                            ...) {
+    ## Motif group sequence -   #1 Common seqs - Reference (1)
+    ## (4 Groups per            #2 Common seqs - Comparison (2)
+    ## comparison pair)         #3 Unique seqs - Reference (1)
+    ##                          #4 Unique seqs - Comparison (2)
+    group_indices <- rep(seq_len(length(streme_out) / 4), each = 4)
+    seg_indices <- rep(seq_len(4), length.out = length(streme_out))
+    comparison_groups <- list(
+        "1" = c(1, 2),
+        "2" = c(3, 4),
+        "3" = c(3, 2),
+        "4" = c(4, 1)
+    )
+
+    res <- bpapply(
+        seq_along(streme_out), function(x) {
+            comparison_group <- comparison_groups[[seg_indices[x]]]
+            .motifsx <- function(m) streme_out[[comparison_group[m] *
+                                                    group_indices[[x]]]]$motif
+            m1 <- .motifsx(1)
+            m2 <- .motifsx(2)
+            if (length(m1) == 0 || length(m2) == 0) return(matrix(NA))
+            res_x <- universalmotif::compare_motifs(
+                        list(m1, m2),
+                        method = method,
+                        normalise.scores = normalise.scores,
+                        ...
+                    )
+            row_indices <- seq(1, length(m1))
+            col_indices <- seq(length(m1) + 1, length(m1) + length(m2))
+
+            res_x <- res_x[row_indices, col_indices, drop = FALSE]
+            return(res_x)
+        }, workers = workers, verbose = verbose
+    )
+
+    ## Output: Comparison Pair 1 - 1. common_(1) <-> common_(2)
+    ##                             2. unique_(1) <-> unique_(2)
+    ##                             3. unique_(1) <-> common_(2) [cross1]
+    ##                             4. unique_(2) <-> common_(1) [cross2]
+    ##         Comparison Pair 2 - 5. common_(1) <-> common_(2)...
+    return(res)
+}
diff --git a/R/denovo_motifs.R b/R/denovo_motifs.R
@@ -57,6 +57,7 @@ denovo_motifs <- function(seqs,
                             maxw = 25,
                             filter_n = 6,
                             out_dir = tempdir(),
+                            meme_path = NULL,
                             workers = 1,
                             verbose = FALSE,
                             debug = FALSE,
@@ -86,6 +87,7 @@ denovo_motifs <- function(seqs,
                 minw = 8,
                 maxw = 25,
                 nmotifs = denovo_motifs,
+                meme_path = meme_path,
                 ...
             )
 

diff --git a/R/find_motifs.R b/R/find_motifs.R
@@ -0,0 +1,66 @@
+#' Find similar motifs
+#' 
+#' Search through provided motif database to find similar motifs to the input.
+#' Light wrapper around \code{TOMTOM} from MEME Suite.
+#' 
+#' @param streme_out Output from \code{\link{denovo_motifs}}.
+#' @param motif_db Path to \code{.meme} format file to use as reference
+#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
+#' objects. (optional) Results from de-novo motif discovery are searched against
+#' this database to find similar motifs. If not provided, JASPAR CORE database
+#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
+#' database has fewer than 50 entries.
+#' @param ... Additional arguments to pass to \code{TOMTOM}. For more
+#' information, refer to the official MEME Suite documentation on
+#' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.
+#' @inheritParams denovo_motifs
+#' 
+#' @importFrom memes runTomTom
+#' 
+#' @inherit memes::runTomTom return
+#' 
+#' @examples
+#' data("CTCF_TIP_peaks", package = "MotifPeeker")
+#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) {
+#'     genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+#'    
+#'     res <- denovo_motifs(list(CTCF_TIP_peaks),
+#'                         trim_seq_width = 100,
+#'                         genome_build = genome_build,
+#'                         denovo_motifs = 2,
+#'                         filter_n = 6,
+#'                         out_dir = tempdir())
+#'     res2 <- find_motifs(res, motif_db = get_JASPARCORE(),
+#'                         out_dir = tempdir())
+#'     print(res2)
+#' }
+#' 
+#' @export
+find_motifs <- function(streme_out,
+                        motif_db,
+                        out_dir = tempdir(),
+                        meme_path = NULL,
+                        workers = 1,
+                        verbose = FALSE,
+                        debug = FALSE,
+                        ...) {
+    out_dir <- file.path(out_dir, "tomtom")
+    if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
+
+    res <- bpapply(
+        seq_along(streme_out), function(x) {
+            motifs <- streme_out[[x]]$motif
+            lapply(seq_along(motifs), function(y) {
+                out_dir_x <- file.path(out_dir, x, y)
+                if (!dir.exists(out_dir_x)) dir.create(out_dir_x,
+                                                        recursive = TRUE)
+                res_x <- memes::runTomTom(
+                    motifs[[y]], motif_db, outdir = out_dir_x, silent = !debug,
+                    meme_path = meme_path, ...
+                )
+                return(res_x)
+            })
+        }, workers = workers, verbose = verbose
+    )
+    return(res)
+}
diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd
@@ -157,7 +157,6 @@ motif_summit_dist_df <- get_df_distances(
         params$meme_path, params$debug
     )
 
-
 if (comparison_metrics) {
     ### Known-motif Analysis ###
     comparison_indices <- setdiff(seq_along(result$peaks),
@@ -175,19 +174,25 @@ if (comparison_metrics) {
               params$meme_path, params$verbose
           )
     }
+}
     
-    ### De-Novo Motif Analysis ###
-    ## Run STREME
-    if (params$denovo_motif_discovery) {
-        result$streme <- denovo_motifs(
-          unlist(segregated_peaks), params$trim_seq_width, genome_build,
-          params$denovo_motifs, filter_n = params$filter_n,
-          out_dir = out_dir_extra, workers = params$workers,
-          verbose = params$verbose, debug = params$debug
-        )
-    }
+### De-Novo Motif Analysis ###
+## Run STREME
+if (params$denovo_motif_discovery && comparison_metrics) {
+    result$streme <- denovo_motifs(
+      unlist(segregated_peaks), params$trim_seq_width, genome_build,
+      params$denovo_motifs, filter_n = params$filter_n,
+      out_dir = out_dir_extra, meme_path = params$meme_path,
+      workers = params$workers, verbose = params$verbose, debug = params$debug
+    )
+    result$streme_similar <- find_motifs(
+        result$streme, motif_db, out_dir = out_dir_extra,
+        meme_path = params$meme_path, workers = params$workers,
+        verbose = params$verbose, debug = params$debug
+    )
 }
 
+
 ```
 
 ```{r setup_misc, include = FALSE}