From 8e52686fd65cad488f3db3ce3e576751608916ca Mon Sep 17 00:00:00 2001
From: HDash <16350928+HDash@users.noreply.github.com>
Date: Thu, 4 Jul 2024 17:44:45 +0100
Subject: [PATCH] Add compare_motifs and find_motifs

---
 NAMESPACE                                |   4 +
 R/MotifPeeker.R                          |   7 +-
 R/compare_motifs.R                       |  99 +++++++++++++
 R/denovo_motifs.R                        |   2 +
 R/find_motifs.R                          |  66 +++++++++
 inst/markdown/MotifPeeker.Rmd            |  27 ++--
 man/compare_motifs.Rd                    | 177 +++++++++++++++++++++++
 man/denovo_motifs.Rd                     |   4 +
 man/find_motifs.Rd                       |  76 ++++++++++
 man/pretty_number.Rd                     |   5 +-
 tests/testthat/test-denovo_motif_funcs.R |  40 +++++
 tests/testthat/test-denovo_motifs.R      |  21 ---
 12 files changed, 488 insertions(+), 40 deletions(-)
 create mode 100644 R/compare_motifs.R
 create mode 100644 R/find_motifs.R
 create mode 100644 man/compare_motifs.Rd
 create mode 100644 man/find_motifs.Rd
 create mode 100644 tests/testthat/test-denovo_motif_funcs.R
 delete mode 100644 tests/testthat/test-denovo_motifs.R

diff --git a/NAMESPACE b/NAMESPACE
index 1d6e52d..9047a17 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,7 +7,9 @@ export(calc_frip)
 export(check_ENCODE)
 export(check_JASPAR)
 export(check_genome_build)
+export(compare_motifs)
 export(denovo_motifs)
+export(find_motifs)
 export(format_exptype)
 export(get_JASPARCORE)
 export(get_df_distances)
@@ -58,12 +60,14 @@ importFrom(magrittr,"%>%")
 importFrom(memes,meme_is_installed)
 importFrom(memes,runAme)
 importFrom(memes,runFimo)
+importFrom(memes,runTomTom)
 importFrom(plotly,ggplotly)
 importFrom(purrr,map_df)
 importFrom(rmarkdown,render)
 importFrom(tidyr,pivot_longer)
 importFrom(tools,file_ext)
 importFrom(tools,file_path_sans_ext)
+importFrom(universalmotif,compare_motifs)
 importFrom(universalmotif,read_homer)
 importFrom(universalmotif,read_jaspar)
 importFrom(universalmotif,read_meme)
diff --git a/R/MotifPeeker.R b/R/MotifPeeker.R
index 3a81312..8690f80 100644
--- a/R/MotifPeeker.R
+++ b/R/MotifPeeker.R
@@ -55,12 +55,6 @@
 #' file. (optional) Creates additional comparisons based on cell counts.
 #' @param denovo_motif_discovery A logical indicating whether to perform
 #' de-novo motif discovery for the third section of the report. (default = TRUE)
-#' @param motif_db Path to \code{.meme} format file to use as reference
-#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
-#' objects. (optional) Results from de-novo motif discovery are searched against
-#' this database to find similar motifs. If not provided, JASPAR CORE database
-#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
-#' database has fewer than 50 entries.
 #' @param download_buttons A logical indicating whether to include download
 #' buttons for various files within the HTML report. (default = TRUE)
 #' @param output_dir A character string specifying the directory to save the
@@ -87,6 +81,7 @@
 #' @inheritParams get_bpparam
 #' @inheritParams memes::runFimo
 #' @inheritParams denovo_motifs
+#' @inheritParams find_motifs
 #' 
 #' @import ggplot2
 #' @import dplyr
diff --git a/R/compare_motifs.R b/R/compare_motifs.R
new file mode 100644
index 0000000..085e10e
--- /dev/null
+++ b/R/compare_motifs.R
@@ -0,0 +1,99 @@
+#' Compare motifs from segregated sequences
+#' 
+#' Compute motif similarity scores between motifs discovered from segregated
+#' sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to
+#' compare motifs from different groups of sequences. To see the possible
+#' similarity measures available, refer to details.
+#' 
+#' @inheritParams universalmotif::compare_motifs
+#' @inheritParams find_motifs
+#' @inheritDotParams universalmotif::compare_motifs
+#' 
+#' @importFrom universalmotif compare_motifs
+#' 
+#' @inherit universalmotif::compare_motifs details
+#' 
+#' @returns A list of matrices containing the similarity scores between motifs
+#' from different groups of sequences. The order of comparison is as follows,
+#' with first element representing the rows and second element representing the
+#' columns of the matrix:
+#' \itemize{
+#'     \item 1. \strong{Common motifs comparison:} Common seqs from reference
+#'     (1) <-> comparison (2)
+#'     \item 2. \strong{Unique motifs comparison:} Unique seqs from reference
+#'     (1) <-> comparison (2)
+#'     \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference
+#'     (1) <-> comparison (1)
+#'     \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison
+#'     (2) <-> reference (1)
+#' }
+#' The list is repeated for each set of comparison groups in input.
+#' 
+#' @examples
+#' data("CTCF_TIP_peaks", package = "MotifPeeker")
+#' data("CTCF_ChIP_peaks", package = "MotifPeeker")
+#' 
+#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) {
+#'     genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+#'     segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks)
+#'     denovo_motifs <- denovo_motifs(unlist(segregated_peaks),
+#'                         trim_seq_width = 100,
+#'                         genome_build = genome_build,
+#'                         denovo_motifs = 2,
+#'                         filter_n = 6,
+#'                         out_dir = tempdir(),
+#'                         workers = 1)
+#'     similarity_matrices <- compare_motifs(denovo_motifs)
+#'     print(similarity_matrices)
+#' }
+#' 
+#' 
+#' @export
+compare_motifs <- function(streme_out,
+                            method = "PCC",
+                            normalise.scores = TRUE,
+                            workers = 1,
+                            verbose = FALSE,
+                            ...) {
+    ## Motif group sequence -   #1 Common seqs - Reference (1)
+    ## (4 Groups per            #2 Common seqs - Comparison (2)
+    ## comparison pair)         #3 Unique seqs - Reference (1)
+    ##                          #4 Unique seqs - Comparison (2)
+    group_indices <- rep(seq_len(length(streme_out) / 4), each = 4)
+    seg_indices <- rep(seq_len(4), length.out = length(streme_out))
+    comparison_groups <- list(
+        "1" = c(1, 2),
+        "2" = c(3, 4),
+        "3" = c(3, 2),
+        "4" = c(4, 1)
+    )
+    
+    res <- bpapply(
+        seq_along(streme_out), function(x) {
+            comparison_group <- comparison_groups[[seg_indices[x]]]
+            .motifsx <- function(m) streme_out[[comparison_group[m] *
+                                                    group_indices[[x]]]]$motif
+            m1 <- .motifsx(1)
+            m2 <- .motifsx(2)
+            if (length(m1) == 0 || length(m2) == 0) return(matrix(NA))
+            res_x <- universalmotif::compare_motifs(
+                        list(m1, m2),
+                        method = method,
+                        normalise.scores = normalise.scores,
+                        ...
+                    )
+            row_indices <- seq(1, length(m1))
+            col_indices <- seq(length(m1) + 1, length(m1) + length(m2))
+            
+            res_x <- res_x[row_indices, col_indices, drop = FALSE]
+            return(res_x)
+        }, workers = workers, verbose = verbose
+    )
+    
+    ## Output: Comparison Pair 1 - 1. common_(1) <-> common_(2)
+    ##                             2. unique_(1) <-> unique_(2)
+    ##                             3. unique_(1) <-> common_(2) [cross1]
+    ##                             4. unique_(2) <-> common_(1) [cross2]
+    ##         Comparison Pair 2 - 5. common_(1) <-> common_(2)...
+    return(res)
+}
diff --git a/R/denovo_motifs.R b/R/denovo_motifs.R
index fed23ae..b3e4a7a 100644
--- a/R/denovo_motifs.R
+++ b/R/denovo_motifs.R
@@ -57,6 +57,7 @@ denovo_motifs <- function(seqs,
                             maxw = 25,
                             filter_n = 6,
                             out_dir = tempdir(),
+                            meme_path = NULL,
                             workers = 1,
                             verbose = FALSE,
                             debug = FALSE,
@@ -86,6 +87,7 @@ denovo_motifs <- function(seqs,
                 minw = 8,
                 maxw = 25,
                 nmotifs = denovo_motifs,
+                meme_path = meme_path,
                 ...
             )
             
diff --git a/R/find_motifs.R b/R/find_motifs.R
new file mode 100644
index 0000000..4d273d8
--- /dev/null
+++ b/R/find_motifs.R
@@ -0,0 +1,66 @@
+#' Find similar motifs
+#' 
+#' Search through provided motif database to find similar motifs to the input.
+#' Light wrapper around \code{TOMTOM} from MEME Suite.
+#' 
+#' @param streme_out Output from \code{\link{denovo_motifs}}.
+#' @param motif_db Path to \code{.meme} format file to use as reference
+#' database, or a list of \code{\link[universalmotif]{universalmotif-class}}
+#' objects. (optional) Results from de-novo motif discovery are searched against
+#' this database to find similar motifs. If not provided, JASPAR CORE database
+#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
+#' database has fewer than 50 entries.
+#' @param ... Additional arguments to pass to \code{TOMTOM}. For more
+#' information, refer to the official MEME Suite documentation on
+#' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.
+#' @inheritParams denovo_motifs
+#' 
+#' @importFrom memes runTomTom
+#' 
+#' @inherit memes::runTomTom return
+#' 
+#' @examples
+#' data("CTCF_TIP_peaks", package = "MotifPeeker")
+#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) {
+#'     genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+#'    
+#'     res <- denovo_motifs(list(CTCF_TIP_peaks),
+#'                         trim_seq_width = 100,
+#'                         genome_build = genome_build,
+#'                         denovo_motifs = 2,
+#'                         filter_n = 6,
+#'                         out_dir = tempdir())
+#'     res2 <- find_motifs(res, motif_db = get_JASPARCORE(),
+#'                         out_dir = tempdir())
+#'     print(res2)
+#' }
+#' 
+#' @export
+find_motifs <- function(streme_out,
+                        motif_db,
+                        out_dir = tempdir(),
+                        meme_path = NULL,
+                        workers = 1,
+                        verbose = FALSE,
+                        debug = FALSE,
+                        ...) {
+    out_dir <- file.path(out_dir, "tomtom")
+    if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
+    
+    res <- bpapply(
+        seq_along(streme_out), function(x) {
+            motifs <- streme_out[[x]]$motif
+            lapply(seq_along(motifs), function(y) {
+                out_dir_x <- file.path(out_dir, x, y)
+                if (!dir.exists(out_dir_x)) dir.create(out_dir_x,
+                                                        recursive = TRUE)
+                res_x <- memes::runTomTom(
+                    motifs[[y]], motif_db, outdir = out_dir_x, silent = !debug,
+                    meme_path = meme_path, ...
+                )
+                return(res_x)
+            })
+        }, workers = workers, verbose = verbose
+    )
+    return(res)
+}
diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd
index 49057d0..1d7c5c8 100644
--- a/inst/markdown/MotifPeeker.Rmd
+++ b/inst/markdown/MotifPeeker.Rmd
@@ -157,7 +157,6 @@ motif_summit_dist_df <- get_df_distances(
         params$meme_path, params$debug
     )
 
-
 if (comparison_metrics) {
     ### Known-motif Analysis ###
     comparison_indices <- setdiff(seq_along(result$peaks),
@@ -175,19 +174,25 @@ if (comparison_metrics) {
               params$meme_path, params$verbose
           )
     }
+}
     
-    ### De-Novo Motif Analysis ###
-    ## Run STREME
-    if (params$denovo_motif_discovery) {
-        result$streme <- denovo_motifs(
-          unlist(segregated_peaks), params$trim_seq_width, genome_build,
-          params$denovo_motifs, filter_n = params$filter_n,
-          out_dir = out_dir_extra, workers = params$workers,
-          verbose = params$verbose, debug = params$debug
-        )
-    }
+### De-Novo Motif Analysis ###
+## Run STREME
+if (params$denovo_motif_discovery && comparison_metrics) {
+    result$streme <- denovo_motifs(
+      unlist(segregated_peaks), params$trim_seq_width, genome_build,
+      params$denovo_motifs, filter_n = params$filter_n,
+      out_dir = out_dir_extra, meme_path = params$meme_path,
+      workers = params$workers, verbose = params$verbose, debug = params$debug
+    )
+    result$streme_similar <- find_motifs(
+        result$streme, motif_db, out_dir = out_dir_extra,
+        meme_path = params$meme_path, workers = params$workers,
+        verbose = params$verbose, debug = params$debug
+    )
 }
 
+
 ```
 
 ```{r setup_misc, include = FALSE}
diff --git a/man/compare_motifs.Rd b/man/compare_motifs.Rd
new file mode 100644
index 0000000..761c5a9
--- /dev/null
+++ b/man/compare_motifs.Rd
@@ -0,0 +1,177 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare_motifs.R
+\name{compare_motifs}
+\alias{compare_motifs}
+\title{Compare motifs from segregated sequences}
+\usage{
+compare_motifs(
+  streme_out,
+  method = "PCC",
+  normalise.scores = TRUE,
+  workers = 1,
+  verbose = FALSE,
+  ...
+)
+}
+\arguments{
+\item{streme_out}{Output from \code{\link{denovo_motifs}}.}
+
+\item{method}{\code{character(1)} One of PCC, EUCL, SW, KL, ALLR, BHAT, HELL,
+SEUCL, MAN, ALLR_LL, WEUCL, WPCC. See details.}
+
+\item{normalise.scores}{\code{logical(1)} Favour alignments which leave fewer
+unaligned positions, as well as alignments between motifs of similar length.
+Similarity scores are multiplied by the ratio of
+aligned positions to the total number of positions in the larger motif,
+and the inverse for distance scores.}
+
+\item{workers}{The number of workers to use for parallel processing.}
+
+\item{verbose}{A logical indicating whether to print verbose messages while
+running the function. (default = FALSE)}
+
+\item{...}{
+  Arguments passed on to \code{\link[universalmotif:compare_motifs]{universalmotif::compare_motifs}}
+  \describe{
+    \item{\code{motifs}}{See \code{\link[universalmotif:convert_motifs]{convert_motifs()}} for acceptable motif formats.}
+    \item{\code{compare.to}}{\code{numeric} If missing, compares all motifs to all other motifs.
+Otherwise compares all motifs to the specified motif(s).}
+    \item{\code{db.scores}}{\code{data.frame} or \code{DataFrame}. See \code{details}.}
+    \item{\code{use.freq}}{\code{numeric(1)}. For comparing the \code{multifreq} slot.}
+    \item{\code{use.type}}{\code{character(1)} One of \code{'PPM'} and \code{'ICM'}.
+The latter allows for taking into account the background
+frequencies if \code{relative_entropy = TRUE}. Note that \code{'ICM'} is not
+allowed when \code{method = c("ALLR", "ALLR_LL")}.}
+    \item{\code{tryRC}}{\code{logical(1)} Try the reverse complement of the motifs as well,
+report the best score.}
+    \item{\code{min.overlap}}{\code{numeric(1)} Minimum overlap required when aligning the
+motifs. Setting this to a number higher then the width of the motifs
+will not allow any overhangs. Can also be a number between 0 and 1,
+representing the minimum fraction that the motifs must overlap.}
+    \item{\code{min.mean.ic}}{\code{numeric(1)} Minimum mean information content between the
+two motifs for an alignment to be scored. This helps prevent scoring
+alignments between low information content regions of two motifs. Note that
+this can result in some comparisons failing if no alignment passes the
+mean IC threshold. Use \code{\link[universalmotif:average_ic]{average_ic()}} to filter out low IC motifs to get around
+this if you want to avoid getting \code{NA}s in your output.}
+    \item{\code{min.position.ic}}{\code{numeric(1)} Minimum information content required between
+individual alignment positions for it to be counted in the final alignment
+score. It is recommended to use this together with \code{normalise.scores = TRUE},
+as this will help punish scores resulting from only a fraction of an
+alignment.}
+    \item{\code{relative_entropy}}{\code{logical(1)} Change the ICM calculation affecting
+\code{min.position.ic} and \code{min.mean.ic}. See \code{\link[universalmotif:convert_type]{convert_type()}}.}
+    \item{\code{max.p}}{\code{numeric(1)} Maximum P-value allowed in reporting matches.
+Only used if \code{compare.to} is set.}
+    \item{\code{max.e}}{\code{numeric(1)} Maximum E-value allowed in reporting matches.
+Only used if \code{compare.to} is set. The E-value is the P-value multiplied
+by the number of input motifs times two.}
+    \item{\code{nthreads}}{\code{numeric(1)} Run \code{\link[universalmotif:compare_motifs]{compare_motifs()}} in parallel with \code{nthreads}
+threads. \code{nthreads = 0} uses all available threads.}
+    \item{\code{score.strat}}{\code{character(1)} How to handle column scores calculated from
+motif alignments. "sum": add up all scores. "a.mean": take the arithmetic
+mean. "g.mean": take the geometric mean. "median": take the median.
+"wa.mean", "wg.mean": weighted arithmetic/geometric mean. "fzt": Fisher
+Z-transform. Weights are the
+total information content shared between aligned columns.}
+    \item{\code{output.report}}{\code{character(1)} Provide a filename for \code{\link[universalmotif:compare_motifs]{compare_motifs()}}
+to write an html ouput report to. The top matches are shown alongside
+figures of the match alignments. This requires the \code{knitr} and \code{rmarkdown}
+packages. (Note: still in development.)}
+    \item{\code{output.report.max.print}}{\code{numeric(1)} Maximum number of top matches to
+print.}
+  }}
+}
+\value{
+A list of matrices containing the similarity scores between motifs
+from different groups of sequences. The order of comparison is as follows,
+with first element representing the rows and second element representing the
+columns of the matrix:
+\itemize{
+    \item 1. \strong{Common motifs comparison:} Common seqs from reference
+    (1) <-> comparison (2)
+    \item 2. \strong{Unique motifs comparison:} Unique seqs from reference
+    (1) <-> comparison (2)
+    \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference
+    (1) <-> comparison (1)
+    \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison
+    (2) <-> reference (1)
+}
+The list is repeated for each set of comparison groups in input.
+}
+\description{
+Compute motif similarity scores between motifs discovered from segregated
+sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to
+compare motifs from different groups of sequences. To see the possible
+similarity measures available, refer to details.
+}
+\details{
+\subsection{Available metrics}{
+
+The following metrics are available:
+\itemize{
+\item Euclidean distance (\code{EUCL}) (Choi et al. 2004)
+\item Weighted Euclidean distance (\code{WEUCL})
+\item Kullback-Leibler divergence (\code{KL}) (Kullback and Leibler 1951; Roepcke et al. 2005)
+\item Hellinger distance (\code{HELL}) (Hellinger 1909)
+\item Squared Euclidean distance (\code{SEUCL})
+\item Manhattan distance (\code{MAN})
+\item Pearson correlation coefficient (\code{PCC})
+\item Weighted Pearson correlation coefficient (\code{WPCC})
+\item Sandelin-Wasserman similarity (\code{SW}), or sum of squared distances (Sandelin and Wasserman 2004)
+\item Average log-likelihood ratio (\code{ALLR}) (Wang and Stormo 2003)
+\item Lower limit ALLR (\code{ALLR_LL}) (Mahony et al. 2007)
+\item Bhattacharyya coefficient (\code{BHAT}) (Bhattacharyya 1943)
+}
+
+Comparisons are calculated between two motifs at a time. All possible alignments
+are scored, and the best score is reported. In an alignment scores are calculated
+individually between columns. How those scores are combined to generate the final
+alignment scores depends on \code{score.strat}.
+
+See the "Motif comparisons and P-values" vignette for a description of the
+various metrics. Note that \code{PCC}, \code{WPCC}, \code{SW}, \code{ALLR}, \code{ALLR_LL} and \code{BHAT}
+are similarities;
+higher values mean more similar motifs. For the remaining metrics, values closer
+to zero represent more similar motifs.
+
+Small pseudocounts are automatically added when one of the following methods
+is used: \code{KL}, \code{ALLR}, \code{ALLR_LL}, \code{IS}. This is avoid
+zeros in the calculations.
+}
+
+\subsection{Calculating P-values}{
+
+To note regarding p-values: P-values are pre-computed using the
+\code{\link[universalmotif:make_DBscores]{make_DBscores()}} function. If not given, then uses a set of internal
+precomputed P-values from the JASPAR2018 CORE motifs. These precalculated
+scores are dependent on the length of the motifs being compared. This takes
+into account that comparing small motifs with larger motifs leads to higher
+scores, since the probability of finding a higher scoring alignment is
+higher.
+
+The default P-values have been precalculated for regular DNA motifs. They
+are of little use for motifs with a different number of alphabet letters
+(or even the \code{multifreq} slot).
+}
+}
+\examples{
+data("CTCF_TIP_peaks", package = "MotifPeeker")
+data("CTCF_ChIP_peaks", package = "MotifPeeker")
+
+if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) {
+    genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+    segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks)
+    denovo_motifs <- denovo_motifs(unlist(segregated_peaks),
+                        trim_seq_width = 100,
+                        genome_build = genome_build,
+                        denovo_motifs = 2,
+                        filter_n = 6,
+                        out_dir = tempdir(),
+                        workers = 1)
+    similarity_matrices <- compare_motifs(denovo_motifs)
+    print(similarity_matrices)
+}
+
+
+}
diff --git a/man/denovo_motifs.Rd b/man/denovo_motifs.Rd
index 43a1e1a..628c77d 100644
--- a/man/denovo_motifs.Rd
+++ b/man/denovo_motifs.Rd
@@ -13,6 +13,7 @@ denovo_motifs(
   maxw = 25,
   filter_n = 6,
   out_dir = tempdir(),
+  meme_path = NULL,
   workers = 1,
   verbose = FALSE,
   debug = FALSE,
@@ -50,6 +51,9 @@ repeats a de-novo discovered motif must contain to be filtered out.
 \item{out_dir}{A \code{character} vector of output directory to save STREME
 results to. (default = \code{tempdir()})}
 
+\item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default
+search behavior as described in \code{check_meme_install()} if unset.}
+
 \item{workers}{The number of workers to use for parallel processing.}
 
 \item{verbose}{A logical indicating whether to print verbose messages while
diff --git a/man/find_motifs.Rd b/man/find_motifs.Rd
new file mode 100644
index 0000000..3ed32e7
--- /dev/null
+++ b/man/find_motifs.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/find_motifs.R
+\name{find_motifs}
+\alias{find_motifs}
+\title{Find similar motifs}
+\usage{
+find_motifs(
+  streme_out,
+  motif_db,
+  out_dir = tempdir(),
+  meme_path = NULL,
+  workers = 1,
+  verbose = FALSE,
+  debug = FALSE,
+  ...
+)
+}
+\arguments{
+\item{streme_out}{Output from \code{\link{denovo_motifs}}.}
+
+\item{motif_db}{Path to \code{.meme} format file to use as reference
+database, or a list of \code{\link[universalmotif]{universalmotif-class}}
+objects. (optional) Results from de-novo motif discovery are searched against
+this database to find similar motifs. If not provided, JASPAR CORE database
+will be used. \strong{NOTE}: p-value estimates are inaccurate when the
+database has fewer than 50 entries.}
+
+\item{out_dir}{A \code{character} vector of output directory to save STREME
+results to. (default = \code{tempdir()})}
+
+\item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default
+search behavior as described in \code{check_meme_install()} if unset.}
+
+\item{workers}{The number of workers to use for parallel processing.}
+
+\item{verbose}{A logical indicating whether to print verbose messages while
+running the function. (default = FALSE)}
+
+\item{debug}{A logical indicating whether to print debug/error messages in
+the HTML report. (default = FALSE)}
+
+\item{...}{Additional arguments to pass to \code{TOMTOM}. For more
+information, refer to the official MEME Suite documentation on
+\href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.}
+}
+\value{
+data.frame of match results. Contains \code{best_match_motif} column of
+\code{universalmotif} objects with the matched PWM from the database, a series
+of \verb{best_match_*} columns describing the TomTom results of the match, and a
+\code{tomtom} list column storing the ranked list of possible matches to each
+motif. If a universalmotif data.frame is used as input, these columns are
+appended to the data.frame. If no matches are returned, \code{tomtom} and
+\code{best_match_motif} columns will be set to \code{NA} and a message indicating
+this will print.
+}
+\description{
+Search through provided motif database to find similar motifs to the input.
+Light wrapper around \code{TOMTOM} from MEME Suite.
+}
+\examples{
+data("CTCF_TIP_peaks", package = "MotifPeeker")
+if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) {
+    genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+   
+    res <- denovo_motifs(list(CTCF_TIP_peaks),
+                        trim_seq_width = 100,
+                        genome_build = genome_build,
+                        denovo_motifs = 2,
+                        filter_n = 6,
+                        out_dir = tempdir())
+    res2 <- find_motifs(res, motif_db = get_JASPARCORE(),
+                        out_dir = tempdir())
+    print(res2)
+}
+
+}
diff --git a/man/pretty_number.Rd b/man/pretty_number.Rd
index a1df551..fb2dda4 100644
--- a/man/pretty_number.Rd
+++ b/man/pretty_number.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/pretty_number.R
 \name{pretty_number}
 \alias{pretty_number}
-\title{Convert integers to more readable strings}
+\title{Convert numbers to more readable strings}
 \usage{
 pretty_number(x, decimal_digits = 2)
 }
@@ -12,7 +12,8 @@ pretty_number(x, decimal_digits = 2)
 \item{decimal_digits}{Number of decimal digits to round to.}
 }
 \value{
-A character string of the formatted number.
+A character string of the formatted number. \code{NA} is returned as
+"NA".
 }
 \description{
 Format raw numbers to more readable strings. For example, 1000 will be
diff --git a/tests/testthat/test-denovo_motif_funcs.R b/tests/testthat/test-denovo_motif_funcs.R
new file mode 100644
index 0000000..208bebe
--- /dev/null
+++ b/tests/testthat/test-denovo_motif_funcs.R
@@ -0,0 +1,40 @@
+test_that("De-novo motif enrichment functions works", {
+    data("CTCF_TIP_peaks", package = "MotifPeeker")
+    data("CTCF_ChIP_peaks", package = "MotifPeeker")
+    genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
+    
+    segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks)
+    
+    ### denovo_motifs and filter_repeats ###
+    res <- denovo_motifs(unlist(segregated_peaks),
+                        trim_seq_width = 100,
+                        genome_build = genome_build,
+                        denovo_motifs = 2,
+                        filter_n = 6,
+                        out_dir = tempdir(),
+                        workers = 1,
+                        verbose = FALSE,
+                        debug = FALSE)
+    
+    expect_length(res[[1]]$consensus, 2)
+    expect_warning(denovo_motifs(list(CTCF_TIP_peaks),
+                                trim_seq_width = 100,
+                                genome_build = genome_build,
+                                denovo_motifs = 1,
+                                filter_n = 2))
+    
+    ### find_motifs ###
+    motif_db <- get_JASPARCORE()
+    res2 <- find_motifs(res,
+                        motif_db = motif_db,
+                        workers = 1,
+                        verbose = FALSE,
+                        debug = FALSE)
+    expect_equal(res2[[1]][[1]]$motif[[1]]@alphabet, "DNA")
+    
+    ### compare_motifs ###
+    res3 <- compare_motifs(res,
+                            workers = 1,
+                            verbose = FALSE)
+    expect_true(all(vapply(res3, is.matrix, logical(1))))
+})
diff --git a/tests/testthat/test-denovo_motifs.R b/tests/testthat/test-denovo_motifs.R
deleted file mode 100644
index 61c5a1f..0000000
--- a/tests/testthat/test-denovo_motifs.R
+++ /dev/null
@@ -1,21 +0,0 @@
-test_that("denovo_motifs and filter_repeats works", {
-    data("CTCF_TIP_peaks", package = "MotifPeeker")
-    genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
-    
-    res <- denovo_motifs(list(CTCF_TIP_peaks),
-                        trim_seq_width = 100,
-                        genome_build = genome_build,
-                        denovo_motifs = 2,
-                        filter_n = 6,
-                        out_dir = tempdir(),
-                        workers = 1,
-                        verbose = FALSE,
-                        debug = FALSE)
-    
-    expect_length(res[[1]]$consensus, 2)
-    expect_warning(denovo_motifs(list(CTCF_TIP_peaks),
-                                trim_seq_width = 100,
-                                genome_build = genome_build,
-                                denovo_motifs = 1,
-                                filter_n = 2))
-})