From 8e52686fd65cad488f3db3ce3e576751608916ca Mon Sep 17 00:00:00 2001 From: HDash <16350928+HDash@users.noreply.github.com> Date: Thu, 4 Jul 2024 17:44:45 +0100 Subject: [PATCH] Add compare_motifs and find_motifs --- NAMESPACE | 4 + R/MotifPeeker.R | 7 +- R/compare_motifs.R | 99 +++++++++++++ R/denovo_motifs.R | 2 + R/find_motifs.R | 66 +++++++++ inst/markdown/MotifPeeker.Rmd | 27 ++-- man/compare_motifs.Rd | 177 +++++++++++++++++++++++ man/denovo_motifs.Rd | 4 + man/find_motifs.Rd | 76 ++++++++++ man/pretty_number.Rd | 5 +- tests/testthat/test-denovo_motif_funcs.R | 40 +++++ tests/testthat/test-denovo_motifs.R | 21 --- 12 files changed, 488 insertions(+), 40 deletions(-) create mode 100644 R/compare_motifs.R create mode 100644 R/find_motifs.R create mode 100644 man/compare_motifs.Rd create mode 100644 man/find_motifs.Rd create mode 100644 tests/testthat/test-denovo_motif_funcs.R delete mode 100644 tests/testthat/test-denovo_motifs.R diff --git a/NAMESPACE b/NAMESPACE index 1d6e52d..9047a17 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,9 @@ export(calc_frip) export(check_ENCODE) export(check_JASPAR) export(check_genome_build) +export(compare_motifs) export(denovo_motifs) +export(find_motifs) export(format_exptype) export(get_JASPARCORE) export(get_df_distances) @@ -58,12 +60,14 @@ importFrom(magrittr,"%>%") importFrom(memes,meme_is_installed) importFrom(memes,runAme) importFrom(memes,runFimo) +importFrom(memes,runTomTom) importFrom(plotly,ggplotly) importFrom(purrr,map_df) importFrom(rmarkdown,render) importFrom(tidyr,pivot_longer) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) +importFrom(universalmotif,compare_motifs) importFrom(universalmotif,read_homer) importFrom(universalmotif,read_jaspar) importFrom(universalmotif,read_meme) diff --git a/R/MotifPeeker.R b/R/MotifPeeker.R index 3a81312..8690f80 100644 --- a/R/MotifPeeker.R +++ b/R/MotifPeeker.R @@ -55,12 +55,6 @@ #' file. (optional) Creates additional comparisons based on cell counts. #' @param denovo_motif_discovery A logical indicating whether to perform #' de-novo motif discovery for the third section of the report. (default = TRUE) -#' @param motif_db Path to \code{.meme} format file to use as reference -#' database, or a list of \code{\link[universalmotif]{universalmotif-class}} -#' objects. (optional) Results from de-novo motif discovery are searched against -#' this database to find similar motifs. If not provided, JASPAR CORE database -#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the -#' database has fewer than 50 entries. #' @param download_buttons A logical indicating whether to include download #' buttons for various files within the HTML report. (default = TRUE) #' @param output_dir A character string specifying the directory to save the @@ -87,6 +81,7 @@ #' @inheritParams get_bpparam #' @inheritParams memes::runFimo #' @inheritParams denovo_motifs +#' @inheritParams find_motifs #' #' @import ggplot2 #' @import dplyr diff --git a/R/compare_motifs.R b/R/compare_motifs.R new file mode 100644 index 0000000..085e10e --- /dev/null +++ b/R/compare_motifs.R @@ -0,0 +1,99 @@ +#' Compare motifs from segregated sequences +#' +#' Compute motif similarity scores between motifs discovered from segregated +#' sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to +#' compare motifs from different groups of sequences. To see the possible +#' similarity measures available, refer to details. +#' +#' @inheritParams universalmotif::compare_motifs +#' @inheritParams find_motifs +#' @inheritDotParams universalmotif::compare_motifs +#' +#' @importFrom universalmotif compare_motifs +#' +#' @inherit universalmotif::compare_motifs details +#' +#' @returns A list of matrices containing the similarity scores between motifs +#' from different groups of sequences. The order of comparison is as follows, +#' with first element representing the rows and second element representing the +#' columns of the matrix: +#' \itemize{ +#' \item 1. \strong{Common motifs comparison:} Common seqs from reference +#' (1) <-> comparison (2) +#' \item 2. \strong{Unique motifs comparison:} Unique seqs from reference +#' (1) <-> comparison (2) +#' \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference +#' (1) <-> comparison (1) +#' \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison +#' (2) <-> reference (1) +#' } +#' The list is repeated for each set of comparison groups in input. +#' +#' @examples +#' data("CTCF_TIP_peaks", package = "MotifPeeker") +#' data("CTCF_ChIP_peaks", package = "MotifPeeker") +#' +#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { +#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 +#' segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks) +#' denovo_motifs <- denovo_motifs(unlist(segregated_peaks), +#' trim_seq_width = 100, +#' genome_build = genome_build, +#' denovo_motifs = 2, +#' filter_n = 6, +#' out_dir = tempdir(), +#' workers = 1) +#' similarity_matrices <- compare_motifs(denovo_motifs) +#' print(similarity_matrices) +#' } +#' +#' +#' @export +compare_motifs <- function(streme_out, + method = "PCC", + normalise.scores = TRUE, + workers = 1, + verbose = FALSE, + ...) { + ## Motif group sequence - #1 Common seqs - Reference (1) + ## (4 Groups per #2 Common seqs - Comparison (2) + ## comparison pair) #3 Unique seqs - Reference (1) + ## #4 Unique seqs - Comparison (2) + group_indices <- rep(seq_len(length(streme_out) / 4), each = 4) + seg_indices <- rep(seq_len(4), length.out = length(streme_out)) + comparison_groups <- list( + "1" = c(1, 2), + "2" = c(3, 4), + "3" = c(3, 2), + "4" = c(4, 1) + ) + + res <- bpapply( + seq_along(streme_out), function(x) { + comparison_group <- comparison_groups[[seg_indices[x]]] + .motifsx <- function(m) streme_out[[comparison_group[m] * + group_indices[[x]]]]$motif + m1 <- .motifsx(1) + m2 <- .motifsx(2) + if (length(m1) == 0 || length(m2) == 0) return(matrix(NA)) + res_x <- universalmotif::compare_motifs( + list(m1, m2), + method = method, + normalise.scores = normalise.scores, + ... + ) + row_indices <- seq(1, length(m1)) + col_indices <- seq(length(m1) + 1, length(m1) + length(m2)) + + res_x <- res_x[row_indices, col_indices, drop = FALSE] + return(res_x) + }, workers = workers, verbose = verbose + ) + + ## Output: Comparison Pair 1 - 1. common_(1) <-> common_(2) + ## 2. unique_(1) <-> unique_(2) + ## 3. unique_(1) <-> common_(2) [cross1] + ## 4. unique_(2) <-> common_(1) [cross2] + ## Comparison Pair 2 - 5. common_(1) <-> common_(2)... + return(res) +} diff --git a/R/denovo_motifs.R b/R/denovo_motifs.R index fed23ae..b3e4a7a 100644 --- a/R/denovo_motifs.R +++ b/R/denovo_motifs.R @@ -57,6 +57,7 @@ denovo_motifs <- function(seqs, maxw = 25, filter_n = 6, out_dir = tempdir(), + meme_path = NULL, workers = 1, verbose = FALSE, debug = FALSE, @@ -86,6 +87,7 @@ denovo_motifs <- function(seqs, minw = 8, maxw = 25, nmotifs = denovo_motifs, + meme_path = meme_path, ... ) diff --git a/R/find_motifs.R b/R/find_motifs.R new file mode 100644 index 0000000..4d273d8 --- /dev/null +++ b/R/find_motifs.R @@ -0,0 +1,66 @@ +#' Find similar motifs +#' +#' Search through provided motif database to find similar motifs to the input. +#' Light wrapper around \code{TOMTOM} from MEME Suite. +#' +#' @param streme_out Output from \code{\link{denovo_motifs}}. +#' @param motif_db Path to \code{.meme} format file to use as reference +#' database, or a list of \code{\link[universalmotif]{universalmotif-class}} +#' objects. (optional) Results from de-novo motif discovery are searched against +#' this database to find similar motifs. If not provided, JASPAR CORE database +#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the +#' database has fewer than 50 entries. +#' @param ... Additional arguments to pass to \code{TOMTOM}. For more +#' information, refer to the official MEME Suite documentation on +#' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}. +#' @inheritParams denovo_motifs +#' +#' @importFrom memes runTomTom +#' +#' @inherit memes::runTomTom return +#' +#' @examples +#' data("CTCF_TIP_peaks", package = "MotifPeeker") +#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { +#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 +#' +#' res <- denovo_motifs(list(CTCF_TIP_peaks), +#' trim_seq_width = 100, +#' genome_build = genome_build, +#' denovo_motifs = 2, +#' filter_n = 6, +#' out_dir = tempdir()) +#' res2 <- find_motifs(res, motif_db = get_JASPARCORE(), +#' out_dir = tempdir()) +#' print(res2) +#' } +#' +#' @export +find_motifs <- function(streme_out, + motif_db, + out_dir = tempdir(), + meme_path = NULL, + workers = 1, + verbose = FALSE, + debug = FALSE, + ...) { + out_dir <- file.path(out_dir, "tomtom") + if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE) + + res <- bpapply( + seq_along(streme_out), function(x) { + motifs <- streme_out[[x]]$motif + lapply(seq_along(motifs), function(y) { + out_dir_x <- file.path(out_dir, x, y) + if (!dir.exists(out_dir_x)) dir.create(out_dir_x, + recursive = TRUE) + res_x <- memes::runTomTom( + motifs[[y]], motif_db, outdir = out_dir_x, silent = !debug, + meme_path = meme_path, ... + ) + return(res_x) + }) + }, workers = workers, verbose = verbose + ) + return(res) +} diff --git a/inst/markdown/MotifPeeker.Rmd b/inst/markdown/MotifPeeker.Rmd index 49057d0..1d7c5c8 100644 --- a/inst/markdown/MotifPeeker.Rmd +++ b/inst/markdown/MotifPeeker.Rmd @@ -157,7 +157,6 @@ motif_summit_dist_df <- get_df_distances( params$meme_path, params$debug ) - if (comparison_metrics) { ### Known-motif Analysis ### comparison_indices <- setdiff(seq_along(result$peaks), @@ -175,19 +174,25 @@ if (comparison_metrics) { params$meme_path, params$verbose ) } +} - ### De-Novo Motif Analysis ### - ## Run STREME - if (params$denovo_motif_discovery) { - result$streme <- denovo_motifs( - unlist(segregated_peaks), params$trim_seq_width, genome_build, - params$denovo_motifs, filter_n = params$filter_n, - out_dir = out_dir_extra, workers = params$workers, - verbose = params$verbose, debug = params$debug - ) - } +### De-Novo Motif Analysis ### +## Run STREME +if (params$denovo_motif_discovery && comparison_metrics) { + result$streme <- denovo_motifs( + unlist(segregated_peaks), params$trim_seq_width, genome_build, + params$denovo_motifs, filter_n = params$filter_n, + out_dir = out_dir_extra, meme_path = params$meme_path, + workers = params$workers, verbose = params$verbose, debug = params$debug + ) + result$streme_similar <- find_motifs( + result$streme, motif_db, out_dir = out_dir_extra, + meme_path = params$meme_path, workers = params$workers, + verbose = params$verbose, debug = params$debug + ) } + ``` ```{r setup_misc, include = FALSE} diff --git a/man/compare_motifs.Rd b/man/compare_motifs.Rd new file mode 100644 index 0000000..761c5a9 --- /dev/null +++ b/man/compare_motifs.Rd @@ -0,0 +1,177 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare_motifs.R +\name{compare_motifs} +\alias{compare_motifs} +\title{Compare motifs from segregated sequences} +\usage{ +compare_motifs( + streme_out, + method = "PCC", + normalise.scores = TRUE, + workers = 1, + verbose = FALSE, + ... +) +} +\arguments{ +\item{streme_out}{Output from \code{\link{denovo_motifs}}.} + +\item{method}{\code{character(1)} One of PCC, EUCL, SW, KL, ALLR, BHAT, HELL, +SEUCL, MAN, ALLR_LL, WEUCL, WPCC. See details.} + +\item{normalise.scores}{\code{logical(1)} Favour alignments which leave fewer +unaligned positions, as well as alignments between motifs of similar length. +Similarity scores are multiplied by the ratio of +aligned positions to the total number of positions in the larger motif, +and the inverse for distance scores.} + +\item{workers}{The number of workers to use for parallel processing.} + +\item{verbose}{A logical indicating whether to print verbose messages while +running the function. (default = FALSE)} + +\item{...}{ + Arguments passed on to \code{\link[universalmotif:compare_motifs]{universalmotif::compare_motifs}} + \describe{ + \item{\code{motifs}}{See \code{\link[universalmotif:convert_motifs]{convert_motifs()}} for acceptable motif formats.} + \item{\code{compare.to}}{\code{numeric} If missing, compares all motifs to all other motifs. +Otherwise compares all motifs to the specified motif(s).} + \item{\code{db.scores}}{\code{data.frame} or \code{DataFrame}. See \code{details}.} + \item{\code{use.freq}}{\code{numeric(1)}. For comparing the \code{multifreq} slot.} + \item{\code{use.type}}{\code{character(1)} One of \code{'PPM'} and \code{'ICM'}. +The latter allows for taking into account the background +frequencies if \code{relative_entropy = TRUE}. Note that \code{'ICM'} is not +allowed when \code{method = c("ALLR", "ALLR_LL")}.} + \item{\code{tryRC}}{\code{logical(1)} Try the reverse complement of the motifs as well, +report the best score.} + \item{\code{min.overlap}}{\code{numeric(1)} Minimum overlap required when aligning the +motifs. Setting this to a number higher then the width of the motifs +will not allow any overhangs. Can also be a number between 0 and 1, +representing the minimum fraction that the motifs must overlap.} + \item{\code{min.mean.ic}}{\code{numeric(1)} Minimum mean information content between the +two motifs for an alignment to be scored. This helps prevent scoring +alignments between low information content regions of two motifs. Note that +this can result in some comparisons failing if no alignment passes the +mean IC threshold. Use \code{\link[universalmotif:average_ic]{average_ic()}} to filter out low IC motifs to get around +this if you want to avoid getting \code{NA}s in your output.} + \item{\code{min.position.ic}}{\code{numeric(1)} Minimum information content required between +individual alignment positions for it to be counted in the final alignment +score. It is recommended to use this together with \code{normalise.scores = TRUE}, +as this will help punish scores resulting from only a fraction of an +alignment.} + \item{\code{relative_entropy}}{\code{logical(1)} Change the ICM calculation affecting +\code{min.position.ic} and \code{min.mean.ic}. See \code{\link[universalmotif:convert_type]{convert_type()}}.} + \item{\code{max.p}}{\code{numeric(1)} Maximum P-value allowed in reporting matches. +Only used if \code{compare.to} is set.} + \item{\code{max.e}}{\code{numeric(1)} Maximum E-value allowed in reporting matches. +Only used if \code{compare.to} is set. The E-value is the P-value multiplied +by the number of input motifs times two.} + \item{\code{nthreads}}{\code{numeric(1)} Run \code{\link[universalmotif:compare_motifs]{compare_motifs()}} in parallel with \code{nthreads} +threads. \code{nthreads = 0} uses all available threads.} + \item{\code{score.strat}}{\code{character(1)} How to handle column scores calculated from +motif alignments. "sum": add up all scores. "a.mean": take the arithmetic +mean. "g.mean": take the geometric mean. "median": take the median. +"wa.mean", "wg.mean": weighted arithmetic/geometric mean. "fzt": Fisher +Z-transform. Weights are the +total information content shared between aligned columns.} + \item{\code{output.report}}{\code{character(1)} Provide a filename for \code{\link[universalmotif:compare_motifs]{compare_motifs()}} +to write an html ouput report to. The top matches are shown alongside +figures of the match alignments. This requires the \code{knitr} and \code{rmarkdown} +packages. (Note: still in development.)} + \item{\code{output.report.max.print}}{\code{numeric(1)} Maximum number of top matches to +print.} + }} +} +\value{ +A list of matrices containing the similarity scores between motifs +from different groups of sequences. The order of comparison is as follows, +with first element representing the rows and second element representing the +columns of the matrix: +\itemize{ + \item 1. \strong{Common motifs comparison:} Common seqs from reference + (1) <-> comparison (2) + \item 2. \strong{Unique motifs comparison:} Unique seqs from reference + (1) <-> comparison (2) + \item 3. \strong{Cross motifs comparison 1:} Unique seqs from reference + (1) <-> comparison (1) + \item 4. \strong{Cross motifs comparison 2:} Unique seqs from comparison + (2) <-> reference (1) +} +The list is repeated for each set of comparison groups in input. +} +\description{ +Compute motif similarity scores between motifs discovered from segregated +sequences. Wrapper around \code{\link[universalmotif]{compare_motifs}} to +compare motifs from different groups of sequences. To see the possible +similarity measures available, refer to details. +} +\details{ +\subsection{Available metrics}{ + +The following metrics are available: +\itemize{ +\item Euclidean distance (\code{EUCL}) (Choi et al. 2004) +\item Weighted Euclidean distance (\code{WEUCL}) +\item Kullback-Leibler divergence (\code{KL}) (Kullback and Leibler 1951; Roepcke et al. 2005) +\item Hellinger distance (\code{HELL}) (Hellinger 1909) +\item Squared Euclidean distance (\code{SEUCL}) +\item Manhattan distance (\code{MAN}) +\item Pearson correlation coefficient (\code{PCC}) +\item Weighted Pearson correlation coefficient (\code{WPCC}) +\item Sandelin-Wasserman similarity (\code{SW}), or sum of squared distances (Sandelin and Wasserman 2004) +\item Average log-likelihood ratio (\code{ALLR}) (Wang and Stormo 2003) +\item Lower limit ALLR (\code{ALLR_LL}) (Mahony et al. 2007) +\item Bhattacharyya coefficient (\code{BHAT}) (Bhattacharyya 1943) +} + +Comparisons are calculated between two motifs at a time. All possible alignments +are scored, and the best score is reported. In an alignment scores are calculated +individually between columns. How those scores are combined to generate the final +alignment scores depends on \code{score.strat}. + +See the "Motif comparisons and P-values" vignette for a description of the +various metrics. Note that \code{PCC}, \code{WPCC}, \code{SW}, \code{ALLR}, \code{ALLR_LL} and \code{BHAT} +are similarities; +higher values mean more similar motifs. For the remaining metrics, values closer +to zero represent more similar motifs. + +Small pseudocounts are automatically added when one of the following methods +is used: \code{KL}, \code{ALLR}, \code{ALLR_LL}, \code{IS}. This is avoid +zeros in the calculations. +} + +\subsection{Calculating P-values}{ + +To note regarding p-values: P-values are pre-computed using the +\code{\link[universalmotif:make_DBscores]{make_DBscores()}} function. If not given, then uses a set of internal +precomputed P-values from the JASPAR2018 CORE motifs. These precalculated +scores are dependent on the length of the motifs being compared. This takes +into account that comparing small motifs with larger motifs leads to higher +scores, since the probability of finding a higher scoring alignment is +higher. + +The default P-values have been precalculated for regular DNA motifs. They +are of little use for motifs with a different number of alphabet letters +(or even the \code{multifreq} slot). +} +} +\examples{ +data("CTCF_TIP_peaks", package = "MotifPeeker") +data("CTCF_ChIP_peaks", package = "MotifPeeker") + +if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) { + genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 + segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks) + denovo_motifs <- denovo_motifs(unlist(segregated_peaks), + trim_seq_width = 100, + genome_build = genome_build, + denovo_motifs = 2, + filter_n = 6, + out_dir = tempdir(), + workers = 1) + similarity_matrices <- compare_motifs(denovo_motifs) + print(similarity_matrices) +} + + +} diff --git a/man/denovo_motifs.Rd b/man/denovo_motifs.Rd index 43a1e1a..628c77d 100644 --- a/man/denovo_motifs.Rd +++ b/man/denovo_motifs.Rd @@ -13,6 +13,7 @@ denovo_motifs( maxw = 25, filter_n = 6, out_dir = tempdir(), + meme_path = NULL, workers = 1, verbose = FALSE, debug = FALSE, @@ -50,6 +51,9 @@ repeats a de-novo discovered motif must contain to be filtered out. \item{out_dir}{A \code{character} vector of output directory to save STREME results to. (default = \code{tempdir()})} +\item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default +search behavior as described in \code{check_meme_install()} if unset.} + \item{workers}{The number of workers to use for parallel processing.} \item{verbose}{A logical indicating whether to print verbose messages while diff --git a/man/find_motifs.Rd b/man/find_motifs.Rd new file mode 100644 index 0000000..3ed32e7 --- /dev/null +++ b/man/find_motifs.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find_motifs.R +\name{find_motifs} +\alias{find_motifs} +\title{Find similar motifs} +\usage{ +find_motifs( + streme_out, + motif_db, + out_dir = tempdir(), + meme_path = NULL, + workers = 1, + verbose = FALSE, + debug = FALSE, + ... +) +} +\arguments{ +\item{streme_out}{Output from \code{\link{denovo_motifs}}.} + +\item{motif_db}{Path to \code{.meme} format file to use as reference +database, or a list of \code{\link[universalmotif]{universalmotif-class}} +objects. (optional) Results from de-novo motif discovery are searched against +this database to find similar motifs. If not provided, JASPAR CORE database +will be used. \strong{NOTE}: p-value estimates are inaccurate when the +database has fewer than 50 entries.} + +\item{out_dir}{A \code{character} vector of output directory to save STREME +results to. (default = \code{tempdir()})} + +\item{meme_path}{path to "meme/bin/" (default: \code{NULL}). Will use default +search behavior as described in \code{check_meme_install()} if unset.} + +\item{workers}{The number of workers to use for parallel processing.} + +\item{verbose}{A logical indicating whether to print verbose messages while +running the function. (default = FALSE)} + +\item{debug}{A logical indicating whether to print debug/error messages in +the HTML report. (default = FALSE)} + +\item{...}{Additional arguments to pass to \code{TOMTOM}. For more +information, refer to the official MEME Suite documentation on +\href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.} +} +\value{ +data.frame of match results. Contains \code{best_match_motif} column of +\code{universalmotif} objects with the matched PWM from the database, a series +of \verb{best_match_*} columns describing the TomTom results of the match, and a +\code{tomtom} list column storing the ranked list of possible matches to each +motif. If a universalmotif data.frame is used as input, these columns are +appended to the data.frame. If no matches are returned, \code{tomtom} and +\code{best_match_motif} columns will be set to \code{NA} and a message indicating +this will print. +} +\description{ +Search through provided motif database to find similar motifs to the input. +Light wrapper around \code{TOMTOM} from MEME Suite. +} +\examples{ +data("CTCF_TIP_peaks", package = "MotifPeeker") +if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly = TRUE)) { + genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 + + res <- denovo_motifs(list(CTCF_TIP_peaks), + trim_seq_width = 100, + genome_build = genome_build, + denovo_motifs = 2, + filter_n = 6, + out_dir = tempdir()) + res2 <- find_motifs(res, motif_db = get_JASPARCORE(), + out_dir = tempdir()) + print(res2) +} + +} diff --git a/man/pretty_number.Rd b/man/pretty_number.Rd index a1df551..fb2dda4 100644 --- a/man/pretty_number.Rd +++ b/man/pretty_number.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/pretty_number.R \name{pretty_number} \alias{pretty_number} -\title{Convert integers to more readable strings} +\title{Convert numbers to more readable strings} \usage{ pretty_number(x, decimal_digits = 2) } @@ -12,7 +12,8 @@ pretty_number(x, decimal_digits = 2) \item{decimal_digits}{Number of decimal digits to round to.} } \value{ -A character string of the formatted number. +A character string of the formatted number. \code{NA} is returned as +"NA". } \description{ Format raw numbers to more readable strings. For example, 1000 will be diff --git a/tests/testthat/test-denovo_motif_funcs.R b/tests/testthat/test-denovo_motif_funcs.R new file mode 100644 index 0000000..208bebe --- /dev/null +++ b/tests/testthat/test-denovo_motif_funcs.R @@ -0,0 +1,40 @@ +test_that("De-novo motif enrichment functions works", { + data("CTCF_TIP_peaks", package = "MotifPeeker") + data("CTCF_ChIP_peaks", package = "MotifPeeker") + genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 + + segregated_peaks <- segregate_seqs(CTCF_TIP_peaks, CTCF_ChIP_peaks) + + ### denovo_motifs and filter_repeats ### + res <- denovo_motifs(unlist(segregated_peaks), + trim_seq_width = 100, + genome_build = genome_build, + denovo_motifs = 2, + filter_n = 6, + out_dir = tempdir(), + workers = 1, + verbose = FALSE, + debug = FALSE) + + expect_length(res[[1]]$consensus, 2) + expect_warning(denovo_motifs(list(CTCF_TIP_peaks), + trim_seq_width = 100, + genome_build = genome_build, + denovo_motifs = 1, + filter_n = 2)) + + ### find_motifs ### + motif_db <- get_JASPARCORE() + res2 <- find_motifs(res, + motif_db = motif_db, + workers = 1, + verbose = FALSE, + debug = FALSE) + expect_equal(res2[[1]][[1]]$motif[[1]]@alphabet, "DNA") + + ### compare_motifs ### + res3 <- compare_motifs(res, + workers = 1, + verbose = FALSE) + expect_true(all(vapply(res3, is.matrix, logical(1)))) +}) diff --git a/tests/testthat/test-denovo_motifs.R b/tests/testthat/test-denovo_motifs.R deleted file mode 100644 index 61c5a1f..0000000 --- a/tests/testthat/test-denovo_motifs.R +++ /dev/null @@ -1,21 +0,0 @@ -test_that("denovo_motifs and filter_repeats works", { - data("CTCF_TIP_peaks", package = "MotifPeeker") - genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 - - res <- denovo_motifs(list(CTCF_TIP_peaks), - trim_seq_width = 100, - genome_build = genome_build, - denovo_motifs = 2, - filter_n = 6, - out_dir = tempdir(), - workers = 1, - verbose = FALSE, - debug = FALSE) - - expect_length(res[[1]]$consensus, 2) - expect_warning(denovo_motifs(list(CTCF_TIP_peaks), - trim_seq_width = 100, - genome_build = genome_build, - denovo_motifs = 1, - filter_n = 2)) -})