From c3ce9073c24d1de53c1c98e9fb804bee4180de22 Mon Sep 17 00:00:00 2001 From: "Brian M. Schilder" <34280215+bschilder@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:39:57 +0100 Subject: [PATCH] Fix prepare_ funcs. Add run_integration --- DESCRIPTION | 7 +- NAMESPACE | 7 ++ R/add_mixingmetric.R | 41 ++++++++ R/color_nightlight.R | 33 +++++- R/get_correlated_factors.R | 21 ++++ R/get_variance_explained.R | 40 ++++++++ R/map_xref.R | 3 +- R/plot_dag_enrich.R | 169 +++++++++++++++++++------------ R/plot_factors_sankey.R | 179 +++++++++++++++++++++++++++++++++ R/plot_feature_density.R | 6 +- R/plot_hpo_pseudotime.R | 81 +++++++++++---- R/plot_ontological_velocity.R | 6 +- R/plot_preservation_histo.R | 38 +++++++ R/plot_top_celltypes.R | 10 +- R/prepare_HPO.R | 41 +++++--- R/prepare_opentargets.R | 49 ++++++--- R/replace_char_fun.R | 13 ++- R/run_dag_enrich.R | 25 ++++- R/run_dag_enrich_obj.R | 47 ++++++--- R/run_dag_enrich_obsm.R | 66 ++++++++---- R/run_dag_enrich_postprocess.R | 26 +++++ R/run_hpo_pseudotime.R | 122 +++++++++++++++++----- R/run_integration.R | 138 +++++++++++++++++++++++++ man/add_mixingmetric.Rd | 47 +++++++++ man/get_variance_explained.Rd | 22 ++++ man/map_id_sep.Rd | 14 +++ man/plot_dag_enrich.Rd | 18 ++++ man/plot_factors_sankey.Rd | 45 +++++++++ man/plot_feature_density.Rd | 20 ++-- man/plot_hpo_pseudotime.Rd | 31 +++++- man/plot_preservation_histo.Rd | 18 ++++ man/plot_top_celltypes.Rd | 3 +- man/prepare_hpo.Rd | 35 ++++++- man/prepare_opentargets.Rd | 2 +- man/run_dag_enrich.Rd | 11 +- man/run_integration.Rd | 74 ++++++++++++++ man/scale_color_nightlight.Rd | 47 +++++++++ 37 files changed, 1340 insertions(+), 215 deletions(-) create mode 100644 R/add_mixingmetric.R create mode 100644 R/get_correlated_factors.R create mode 100644 R/get_variance_explained.R create mode 100644 R/plot_factors_sankey.R create mode 100644 R/plot_preservation_histo.R create mode 100644 R/run_dag_enrich_postprocess.R create mode 100644 R/run_integration.R create mode 100644 man/add_mixingmetric.Rd create mode 100644 man/get_variance_explained.Rd create mode 100644 man/map_id_sep.Rd create mode 100644 man/plot_factors_sankey.Rd create mode 100644 man/plot_preservation_histo.Rd create mode 100644 man/run_integration.Rd create mode 100644 man/scale_color_nightlight.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 9f1f916..83d82a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -81,7 +81,9 @@ Suggests: downloadR, HPOExplorer, Nebulosa, - arrow + arrow, + ggsankey, + Hmisc Remotes: github::neurogenomics/scNLP, github::neurogenomics/scKirby, @@ -89,7 +91,8 @@ Remotes: github::neurogenomics/HPOExplorer, github::RajLabMSSM/echotabix, github::RajLabMSSM/downloadR, - github::satijalab/seurat-wrappers + github::satijalab/seurat-wrappers, + github::davidsjoberg/ggsankey RoxygenNote: 7.3.1 VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index e89f35f..38e555b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(add_cluster_colors) +export(add_mixingmetric) export(adjust_zstat) export(dt_to_granges) export(find_neighbors) @@ -15,11 +16,13 @@ export(get_mhc_genes) export(get_top_factors) export(get_top_features) export(get_top_traits) +export(get_variance_explained) export(import_abc) export(is_granges) export(iterate_gsea) export(iterate_lm) export(magma_matrix) +export(map_id_sep) export(map_phenotypes) export(map_snps2genes) export(opengwas_meta) @@ -28,10 +31,12 @@ export(phenomix_query) export(phenomix_query_batched) export(plot_dag_enrich) export(plot_enrichment) +export(plot_factors_sankey) export(plot_feature_density) export(plot_highlights) export(plot_hpo_pseudotime) export(plot_ontological_velocity) +export(plot_preservation_histo) export(plot_reduction) export(plot_top_celltypes) export(plot_trait_cor) @@ -44,12 +49,14 @@ export(run_autoencoder) export(run_dag_enrich) export(run_gprofiler) export(run_imputation) +export(run_integration) export(run_lda) export(run_mofa2) export(run_pca) export(run_sparsesvd) export(run_umap) export(run_variancePartition) +export(scale_color_nightlight) export(theme_nightlight) import(GenomicFiles) import(HPOExplorer) diff --git a/R/add_mixingmetric.R b/R/add_mixingmetric.R new file mode 100644 index 0000000..f7b23f2 --- /dev/null +++ b/R/add_mixingmetric.R @@ -0,0 +1,41 @@ +#' Add mixing metric +#' +#' Compute the mixing metric for a given reduction and add it to the metadata +#' of a \link{Seurat} object. See \link[Seurat]{MixingMetric} for more details. +#' @param invert Invert the mixing metric such that higher values +#' indicate better mixing. +#' @param normalise Normalise the mixing metric to the range [0,1]. +#' @inheritParams Seurat::MixingMetric +#' @inheritDotParams Seurat::MixingMetric +#' @export +add_mixingmetric <- function(obj, + reduction, + new_col=paste0("MixingMetric_",reduction), + grouping.var = "source", + k = 5, + max.k = 300, + dims = NULL, + invert = TRUE, + normalise = FALSE, + ...){ + if(is.null(dims)){ + dims <- seq(ncol(obj[[reduction]])) + } + message("Computing mixing metric for reduction: ",reduction, + " (",max(dims)," dimensions)") + obj@meta.data[[new_col]] <- Seurat::MixingMetric( + obj, + reduction = reduction, + grouping.var = grouping.var, + k = k, + max.k = max.k, + dims = dims, + ...) + if(isTRUE(invert)){ + obj@meta.data[[new_col]] <- max.k - obj@meta.data[[new_col]] + } + if(isTRUE(normalise)){ + obj@meta.data[[new_col]] <- obj@meta.data[[new_col]] / max.k + } + return(obj) +} \ No newline at end of file diff --git a/R/color_nightlight.R b/R/color_nightlight.R index c1c218e..606500b 100644 --- a/R/color_nightlight.R +++ b/R/color_nightlight.R @@ -1,4 +1,31 @@ -color_nightlight <- function(n = 40){ - ggplot2::scale_color_gradientn( - colors=pals::gnuplot(n = n)[-(seq(as.integer(n*0.2)))]) +#' Scale color nightlight +#' +#' Add color gradient with nightlight colors. +#' @export +#' @param reverse Reverse the color gradient. +#' @param alpha Transparency of the colors. +#' @inheritParams pals::gnuplot +#' @inheritParams ggplot2::scale_color_gradientn +#' @inheritDotParams ggplot2::scale_color_gradientn +scale_color_nightlight <- function(n = 3, + colors=pals::gnuplot(n = n+1)[ + -(seq(as.integer((n+1)*0.2))) + ], + reverse=FALSE, + alpha=NULL, + ...){ + if(!is.null(alpha)) { + if(length(alpha)==1){ + colors <- ggplot2::alpha(colors,alpha) + } else if(length(alpha)==length(colors)){ + for(i in seq_along(colors)){ + colors[[i]] <- ggplot2::alpha(colors[[i]],alpha[i]) + } + } else { + stopper("Length of alpha must be 1 or equal to length of colors.") + } + } + if(isTRUE(reverse)) colors <- rev(colors) + ggplot2::scale_color_gradientn(colors=colors, + ...) } \ No newline at end of file diff --git a/R/get_correlated_factors.R b/R/get_correlated_factors.R new file mode 100644 index 0000000..61aabbb --- /dev/null +++ b/R/get_correlated_factors.R @@ -0,0 +1,21 @@ +get_correlated_factors <- function(obj, + keys, + metadata_var, + p_threshold=.05, + r_threshold=.1){ + requireNamespace("Hmisc") + obsm <- scKirby::get_obsm(obj, + keys = keys, + n=1) + obsm <- cbind(obsm, + nFeature_score=obj$nFeature_score[rownames(obsm)]) + obsm_rcor <- Hmisc::rcorr(obsm) + obsm_cor_sig <- sort( + abs(obsm_rcor$r[metadata_var,][obsm_rcor$P[metadata_var,]r_threshold])) + ) + return(omit_dims) +} \ No newline at end of file diff --git a/R/get_variance_explained.R b/R/get_variance_explained.R new file mode 100644 index 0000000..1639069 --- /dev/null +++ b/R/get_variance_explained.R @@ -0,0 +1,40 @@ +#' Get variance explained +#' +#' Get the proportion of total variance explained +#' by a given dimensionality reduction +#' (0-1 where 1 indicates 100% of variance explained). +#' @export +#' @examples +#' obj <- get_HPO() +#' out <- get_variance_explained(obj) +get_variance_explained <- function(obj, + reduction = names(obj@reductions)[1], + layer = "scale.data", + dims=NULL + ){ + dr <- obj[[reduction]] + mat <- Seurat::GetAssayData(obj, layer = layer) + total_variance <- sum(matrixStats::rowVars(mat)) + ## EigenValues + if(length(dr@stdev)==0){ + messager("Computing stdev for reduction.") + dr@stdev <- apply(obj[[reduction]]@cell.embeddings,2, + stats::sd)|>unname() + } + eigValues <- (dr@stdev)^2|>`names<-`(colnames(dr@cell.embeddings)) + if(!is.null(dims)) { + if(length(dims)>length(eigValues) || + max(dims)>length(eigValues) ){ + stopper( + "The length of dims must be less than or equal to", + "the number of dimensions." + ) + } + eigValues <- eigValues[dims] + } + varExplained <- eigValues / total_variance + messager("Proportion of total variance explained by", + length(eigValues),"dimensions:", + round(sum(varExplained),3)) + return(varExplained) +} \ No newline at end of file diff --git a/R/map_xref.R b/R/map_xref.R index 9f5f7c4..7666837 100644 --- a/R/map_xref.R +++ b/R/map_xref.R @@ -12,7 +12,8 @@ map_xref <- function(dat, if(length(r)==0) NA else unlist(r) })] } + dat[,(new_col):=map_id_sep(get(new_col))] dat[get(new_col)=="NA",(new_col):=NA] messager("% of non-NA rows:", round(sum(!is.na(dat[[new_col]]))/nrow(dat)*100,2),v=verbose) -} \ No newline at end of file +} diff --git a/R/plot_dag_enrich.R b/R/plot_dag_enrich.R index de57099..a32a4e5 100644 --- a/R/plot_dag_enrich.R +++ b/R/plot_dag_enrich.R @@ -1,6 +1,7 @@ #' Plot enrichment of terms in a DAG ontology #' #' Plot enrichment of terms in a DAG ontology. +#' @inheritParams ggrepel::geom_label_repel #' @inheritParams Seurat::LabelClusters #' @inheritDotParams Seurat::LabelClusters #' @export @@ -15,11 +16,16 @@ plot_dag_enrich <- function(obj, min_depth=NULL, exact_depth=NULL, exact_ont_ancestors=FALSE, + sort_by=c("log2_fold_enrichment"=-1), cluster_col="seurat_clusters", + id_col=NULL, label_col="dag_enrich.name_wrap", + cluster_id_alpha=.9, + cluster_id_size=3, color_col=cluster_col, replace_char=list("."=":", "_"=":"), + p_threshold=.05, q_threshold=.05, preferred_palettes="kovesi.cyclic_mygbm_30_95_c78", add_labels=TRUE, @@ -32,79 +38,112 @@ plot_dag_enrich <- function(obj, repel = TRUE, box = TRUE, box.padding = 1, + max.overlaps = 100, + na.value = "grey50", + force_new=FALSE, + return_obj=FALSE, show_plot=TRUE, ...){ - term <- depth <- NULL; - - cluster_enrich <- run_dag_enrich_obj(obj=obj, - ont=ont, - cluster_col=cluster_col, - min_hits=min_hits, - min_offspring=min_offspring, - replace_char=replace_char, - q_threshold=q_threshold) - #### Constrain my ancestors in ontology #### - if(isTRUE(exact_ont_ancestors)){ - cluster_enrich <- cluster_enrich[term %in% ont@elementMetadata$ancestor] - } - #### Filter on min_depth #### - if(!is.null(min_depth)){ - cluster_enrich <- cluster_enrich[depth<=min_depth] - } - #### Filter on exact_depth #### - if(!is.null(exact_depth)){ - cluster_enrich <- cluster_enrich[depth==exact_depth] - } - #### Take only the top hit per cluster #### - # cluster_enrich|>data.table::setorderv("z_score",-1) - cluster_enrich_top <- cluster_enrich[,.SD[1],by=c("group")] - names(cluster_enrich_top) <- paste0("dag_enrich.",names(cluster_enrich_top)) - obj@meta.data$id <- colnames(obj) - meta <- merge(obj@meta.data, - cluster_enrich_top, - by.x="seurat_clusters", - by.y="dag_enrich.group", - all.x=TRUE) - rownames(meta) <- meta$id - meta$dag_enrich.name_wrap <- stringr::str_wrap(meta$dag_enrich.name, - width = label_width) - obj@meta.data <- meta - na.value <- "grey50" + term <- depth <- dag_enrich.name <- dag_enrich.name_wrap <- NULL; + if("dag_enrich.name" %in% colnames(obj@meta.data) && + isFALSE(force_new)){ + messager("Using existing enrichment results in obj.", + "To rerun, set force_new=TRUE.") + #### Reconstruct cluster_enrich results from metadata #### + dag_cols <- grep("^dag_enrich\\.",colnames(obj@meta.data), value = TRUE) + cluster_enrich <- data.table::data.table( + unique(obj@meta.data[,c(cluster_col,dag_cols)]) + ) + } else { + #### Remove any existing "dag_enrich." columns #### + dag_cols <- grep("^dag_enrich\\.",colnames(obj@meta.data), value = TRUE) + if(length(dag_cols)>0) obj@meta.data[,dag_cols] <- NULL + #### Run enrichment #### + cluster_enrich <- run_dag_enrich_obj(obj=obj, + ont=ont, + id_col=id_col, + cluster_col=cluster_col, + min_hits=min_hits, + min_offspring=min_offspring, + replace_char=replace_char, + p_threshold=p_threshold, + q_threshold=q_threshold, + sort_by=sort_by) + #### Constrain my ancestors in ontology #### + if(isTRUE(exact_ont_ancestors)){ + cluster_enrich <- cluster_enrich[term %in% ont@elementMetadata$ancestor] + } + #### Filter on min_depth #### + if(!is.null(min_depth)){ + cluster_enrich <- cluster_enrich[depth<=min_depth] + } + #### Filter on exact_depth #### + if(!is.null(exact_depth)){ + cluster_enrich <- cluster_enrich[depth==exact_depth] + } + #### Take only the top hit per cluster #### + cluster_enrich <- cluster_enrich[,.SD[1],by=c("group")] + names(cluster_enrich) <- paste0("dag_enrich.",names(cluster_enrich)) + cluster_enrich[,dag_enrich.name_wrap:=stringr::str_wrap( + dag_enrich.name, + width = label_width)] + obj@meta.data$id <- colnames(obj) + meta <- merge(obj@meta.data, + cluster_enrich, + by.x="seurat_clusters", + by.y="dag_enrich.group", + all.x=TRUE) + rownames(meta) <- meta$id + obj@meta.data <- meta[colnames(obj),] + } + obj <- add_cluster_colors(obj) + cluster_colors <- add_cluster_colors(obj, return_dict = TRUE) dr <- Seurat::DimPlot(obj, - cols = KGExplorer::map_colors( - obj@meta.data, - as = "dict", - columns = label_col, - preferred_palettes = preferred_palettes)[[1]], - alpha=point_alpha, + alpha = point_alpha, pt.size = point_size, na.value = na.value, - group.by = label_col, + label = TRUE, + label.color = ggplot2::alpha("white",cluster_id_alpha), + label.size = cluster_id_size, + group.by = color_col, reduction = reduction) + Seurat::NoLegend() + - theme_nightlight() + ggplot2::scale_color_manual(values = cluster_colors) + + theme_nightlight() if(isTRUE(add_labels)){ - dark_theme <- dr[[1]]$theme$plot.background$fill=="black" - dr <- Seurat::LabelClusters( - dr, - id = label_col, - box = box, - fill = ggplot2::alpha( - setdiff(unique(ggplot2::ggplot_build(dr)$data[[1]]$colour), - na.value), - label_alpha), - color=if(dark_theme) "white" else "black", - arrow = ggplot2::arrow(length = ggplot2::unit(0.01, "npc"), - type="closed"), - repel = repel, - size = label_size, - box.padding = box.padding, - ...) + dark_theme <- dr[[1]]$theme$plot.background$fill=="black" + cluster_enrich2 <- merge(dr[[1]]$layers[[2]]$data, + cluster_enrich, + by.x=cluster_col, + by.y="dag_enrich.group") + # Seurat::LabelClusters ### <-- Not very flexible and has uninformative errors. + dr <- dr + + ggrepel::geom_label_repel( + data = cluster_enrich2, + ggplot2::aes(x=!!ggplot2::sym(names(cluster_enrich2)[[2]]), + y=!!ggplot2::sym(names(cluster_enrich2)[[3]]), + label=!!ggplot2::sym(label_col), + fill=!!ggplot2::sym(color_col) + ), + color=if(dark_theme) "white" else "black", + arrow = ggplot2::arrow(length = ggplot2::unit(0.01, "npc"), + type="closed"), + size = label_size, + box.padding = box.padding, + point.padding = ggplot2::unit(0.02, "npc"), + max.overlaps = max.overlaps, + min.segment.length = 0, + ... + ) + + ggplot2::scale_fill_manual( + values = ggplot2::alpha(cluster_colors,label_alpha) + ) } + #### Show #### if(isTRUE(show_plot)) methods::show(dr) - return( - list(obj=obj, - plot=dr, - data=cluster_enrich) - ) + #### Return #### + out <- list(plot=dr, + data=cluster_enrich) + if(isTRUE(return_obj)) out$obj <- obj + return(out) } diff --git a/R/plot_factors_sankey.R b/R/plot_factors_sankey.R new file mode 100644 index 0000000..646ffb8 --- /dev/null +++ b/R/plot_factors_sankey.R @@ -0,0 +1,179 @@ +#' Plot factors: Sankey +#' +#' Create a Sankey plot from enrichment results performed on the +#' feature weights and sample loadings from a factorization model. +#' @inheritParams ggsankey::geom_sankey +#' @inheritDotParams ggsankey::geom_sankey +#' @inheritParams ggplot2::scale_fill_discrete +#' @export +plot_factors_sankey <- function(factor.traits, + factor.celltypes, + varm = NULL, + max_factors = NULL, + q_threshold=0.05, + logfc_threshold=NULL, + value_var="q", + drop=TRUE, + flow.alpha = .8, + label_size = 3, + label_color="white", + label_fill=ggplot2::alpha("black",.7), + xtext_size=12, + show.legend=FALSE, + ... + ){ + + p_adjust_all <- factor_num <- NULL; + + X.traits <- data.table::dcast.data.table(factor.traits, + formula = factor ~ name, + value.var = "p_adjust_all", + fun.aggregate = mean, + fill=1)|> + KGExplorer::dt_to_matrix() + hc.traits <- X.traits|> + Matrix::t()|> + stats::dist()|> + stats::hclust() + X.celltypes <- data.table::dcast.data.table(factor.celltypes, + formula = factor ~ cl_name, + value.var = "q", + fun.aggregate = mean, + fill=1)|> + KGExplorer::dt_to_matrix() + hc.celltypes <- X.celltypes|> + Matrix::t() |> + stats::dist()|> + stats::hclust() + if(!is.null(varm)){ + hc.factors <- varm |> + Matrix::t() |> + stats::dist()|> + stats::hclust() + } else { + hc.factors <- + Seurat::RowMergeSparseMatrices(X.traits,X.celltypes)|> + stats::dist()|> + stats::hclust() + } + factor.annot <- merge(factor.traits[p_adjust_alllogfc_threshold & + is.finite(logFC)] + } + dt <- ggsankey::make_long(factor.annot, + cl_name, factor_num, name, + value = value_var) + dt$value <- 1-dt$value + lvls <- c(stringr::str_split(hc.factors$labels[hc.factors$order], + "_",simplify = TRUE)[,2], + hc.traits$labels[hc.traits$order], + hc.celltypes$labels[hc.celltypes$order] + ) + dt$node <- factor(dt$node, levels = lvls, ordered = TRUE) + dt$next_node <- factor(dt$next_node, levels = lvls, ordered = TRUE) + #### Create plot #### + gg_sankey <- ggplot2::ggplot(dt, + ggplot2::aes(x = x, + next_x = next_x, + node = node, + next_node = next_node, + fill = factor(node), + label = node) + ) + + ggsankey::geom_sankey(show.legend = show.legend, + flow.alpha = flow.alpha, + ...) + + ggsankey::geom_sankey_label(size = label_size, + color = label_color, + fill = label_fill) + + ggplot2::scale_fill_discrete(drop=drop) + + ggplot2::labs(x=NULL) + + ggplot2::scale_x_discrete(labels=c( + expression("Cell types " %<-% " factor gene weights " %<-% ""), + "Factor", + expression("" %->% " factor trait loadings " %->% " Traits" )) + ) + + ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = c(0,.5,1), + size = xtext_size)) + theme_nightlight() + #### Return #### + return(list( + data=dt, + plot=gg_sankey + )) + + # meta <- merge( + # data.table::data.table(obj@meta.data|>dplyr::select(!id), + # keep.rownames = "id"), + # scKirby::get_obsm(obj, keys = reduction_md, n=1)^2|> + # data.table::as.data.table(keep.rownames = "id")|> + # data.table::melt.data.table( + # id.vars = "id", + # variable.name = "factor", + # value.name = "loading"), + # by="id" + # ) + + # id_to_cluster <- tidygraph::as_tbl_graph( + # data.table::data.table(colnames(obj), + # cluster=paste0("cluster",obj$seurat_clusters), + # value=1, + # edge_type="id_to_cluster") + # ) + # id_to_factor <- tidygraph::as_tbl_graph( + # (scKirby::get_obsm(obj, keys = reduction_md, n=1)^2|> + # data.table::as.data.table(keep.rownames = "id")|> + # data.table::melt.data.table( + # id.vars = "id", + # variable.name = "factor", + # value.name = "value") + # )[,edge_type:="id_to_factor"] + # ) + # factor_to_celltype <- tidygraph::as_tbl_graph( + # factor.celltypes[q<.05,c("factor","cl_name","p")][,edge_type:="factor_to_celltype"] |> + # data.table::setnames("p","value") + # ) + # + # visNetwork::visIgraph(factor_to_celltype) |> + # # visNetwork::visHierarchicalLayout()|> + # visNetwork::visIgraphLayout(layout = "layout_with_sugiyama")|> + # visNetwork::visNodes() |> + # visNetwork::visEdges(color = list(opacity=.1))|> + # visNetwork::visInteraction(hover = TRUE) + # + # g <- tidygraph::graph_join(id_to_cluster, + # id_to_factor)|> + # tidygraph::graph_join(factor_to_celltype) + # # g <- tidygraph::sample_n(g, 1000) + # g.dt <- KGExplorer::graph_to_dt(g) + # nodes <- KGExplorer::graph_to_dt(g, what = "nodes") + # edges <- KGExplorer::graph_to_dt(g, what = "edges") + # edges[,c("from","to"):=list(from-1,to-1)] # networkD3 requirees 0-indexing + # p <- networkD3::sankeyNetwork(Nodes = nodes, + # Links = edges, + # Source = "from", + # Target = "to", + # Value = "weight", + # NodeID = "name") + # p + # + # + # plotly::plot_ly(type="sankey",g) + + # X <- data.table::dcast.data.table(data = factor.celltypes, + # formula = factor~cl_name, + # value.var = "p", + # fun.aggregate = mean, na.rm = TRUE)|> + # KGExplorer::dt_to_matrix() + # + # heatmaply::heatmaply(as.matrix(X)) + +} \ No newline at end of file diff --git a/R/plot_feature_density.R b/R/plot_feature_density.R index 78811fd..cb8f047 100644 --- a/R/plot_feature_density.R +++ b/R/plot_feature_density.R @@ -12,6 +12,7 @@ plot_feature_density <- function(obj, features=sample(rownames(obj),3), alpha=.5, + method=c("wkde","ks")[1], joint=TRUE, joint_only=FALSE, joint_title=if(length(unique(features))>10){ @@ -32,14 +33,15 @@ plot_feature_density <- function(obj, messager(length(features2),"/",length(features),"features found in object.") plts <- Nebulosa::plot_density(obj, + method=method, alpha=alpha, - features = features2, + features = features2, joint = joint, ...) ### Add theme #### plts <- lapply(plts,function(p){ p + theme_nightlight() + - color_nightlight() + scale_color_nightlight() }) if(!is.null(joint_title)){ plts[[length(plts)]] <- plts[[length(plts)]] + diff --git a/R/plot_hpo_pseudotime.R b/R/plot_hpo_pseudotime.R index d11a940..c5811d9 100644 --- a/R/plot_hpo_pseudotime.R +++ b/R/plot_hpo_pseudotime.R @@ -4,18 +4,24 @@ #' then compute pseudotime trajectories between a subset of phenotypes that #' are symptoms of a given disease (or set of diseases). #' @param obj \link{Seurat} object of HPO terms generated by \link{prepare_hpo}. -#' @param disease_ids One or more disease IDs found within \code{dt_genes}. +#' @param disease_ids One or more disease IDs found within \code{p2d}. #' @inheritParams prepare_hpo #' @inheritParams monocle3::learn_graph +#' @inheritParams monocle3::order_cells #' @inheritDotParams monocle3::plot_cells #' @export #' @examples #' obj <- get_HPO() #' out <- plot_hpo_pseudotime(obj) plot_hpo_pseudotime <- function(obj, - dt_genes = HPOExplorer::load_phenotype_to_genes(1), - disease_ids=dt_genes$disease_id[1:3], - learn_graph_control=list(prune_graph=FALSE), + p2d = HPOExplorer::load_phenotype_to_genes(1), + disease_ids=p2d$disease_id[1:3], + root_cells = NULL, + id_col = NULL, + learn_graph_control=list( + prune_graph=FALSE, + nn.cores=KGExplorer::set_cores()$workers), + use_partition=TRUE, merge_trajectories = TRUE, symptom_color="red", bg_colors=c("#5000ff","white"), @@ -23,42 +29,67 @@ plot_hpo_pseudotime <- function(obj, title_col=NULL, title_width=100, color_by_symptoms=TRUE, + color_cells_by = if(color_by_symptoms){ + "is_symptom" }else {"cluster"}, point_alpha=.5, + add_density=TRUE, + density_filled=FALSE, + density_adjust=1, + density_alpha=point_alpha, + title=stringr::str_wrap( + paste0(unique(disease_ids), + collapse = "; "), + 20*2.5 + ), show_plot=TRUE, ... ){ + # obj=readRDS("../thesis/inst/pages/chapter2/data/hpo_ot_integratedV5.rds"); id_col="hp_id"; + # disease_ids <- c( "OMIM:104300","OMIM:104310","OMIM:606889","OMIM:607822","OMIM:608907","ORPHA:1020","ORPHA:280397") disease_id <- NULL; - colnames(obj) <- replace_char_fun(colnames(obj)) disease_ids <- unique(disease_ids) - #### Add disease metadata - ## Using gene overlap (670998 p2d pairs) ## - # nrow(unique(dt_genes[,c("hpo_id","disease_id")])) + #### Map p2d to Mondo IDs #### + p2d <- data.table::copy(p2d)[,.SD[1],by=c("hpo_id","disease_id")] + p2d <- HPOExplorer::add_mondo(phenos = p2d, + allow.cartesian=TRUE) + #### Map disease IDs to Mondo IDs #### + disease_map <- HPOExplorer::add_mondo( + phenos=data.table::data.table(disease_id=disease_ids), + allow.cartesian=TRUE) + disease_ids <- unique(c(disease_ids,unique(disease_map$mondo_id))) + disease_ids <- map_id_sep(unique(disease_ids)) + #### Run pseudotime: merged #### if(isTRUE(merge_trajectories)){ out <- run_hpo_pseudotime(obj = obj, disease_ids = disease_ids, - dt_genes = dt_genes, + p2d = p2d, + id_col = id_col, + root_cells = root_cells, learn_graph_control = learn_graph_control, - color_by_symptoms=color_by_symptoms, - title=stringr::str_wrap( - paste0(unique(disease_ids), - collapse = "; "), - 20*2.5 - ), + use_partition=use_partition, + color_by_symptoms=color_by_symptoms, + color_cells_by=color_cells_by, point_alpha=point_alpha, + title=title, symptom_color=symptom_color, bg_colors=bg_colors, trajectory_graph_color = trajectory_graph_color, + add_density=add_density, + density_filled=density_filled, + density_adjust=density_adjust, + density_alpha=density_alpha, ...) - if(show_plot) methods::show(out$plot) + if(isTRUE(show_plot)) methods::show(out$plot) } else { + #### Run pseudotime: split #### out <- list() out[["subplots"]] <- lapply(stats::setNames(disease_ids, disease_ids), function(d){ if(!is.null(title_col) && - title_col %in% names(dt_genes)){ - # dt_genes <- HPOExplorer::add_disease(dt_genes) - title <- dt_genes[!is.na(get(title_col)) & + title_col %in% names(p2d)){ + # p2d <- HPOExplorer::add_disease(p2d) + title <- p2d[!is.na(get(title_col)) & disease_id==d][[title_col]][1] subtitle <- d } else { @@ -67,13 +98,21 @@ plot_hpo_pseudotime <- function(obj, } run_hpo_pseudotime(obj = obj, disease_ids = d, - dt_genes = dt_genes, + p2d = p2d, + id_col = id_col, + root_cells = root_cells, learn_graph_control = learn_graph_control, + use_partition=use_partition, title = stringr::str_wrap(title, width = title_width), subtitle=subtitle, point_alpha=point_alpha, color_by_symptoms=color_by_symptoms, + color_cells_by=color_cells_by, + add_density=add_density, + density_filled=density_filled, + density_adjust=density_adjust, + density_alpha=density_alpha, ...) }) out[["plot"]] <- patchwork::wrap_plots(lapply(out$subplots, @@ -121,4 +160,4 @@ plot_hpo_pseudotime <- function(obj, # ggplot2::geom_step(data = highlight_df, # ggplot2::aes(x=umap_1, # y=umap_2)) -} \ No newline at end of file +} diff --git a/R/plot_ontological_velocity.R b/R/plot_ontological_velocity.R index f9eb23d..662f47b 100644 --- a/R/plot_ontological_velocity.R +++ b/R/plot_ontological_velocity.R @@ -35,7 +35,7 @@ plot_ontological_velocity <- function(obj, colnames(obj),NA)) } obj <- obj[,!is.na(obj@meta.data[[id_col]])] - colnames(obj) <- make.unique(replace_char_fun(obj$hp_id)) + colnames(obj) <- make.unique(map_id_sep(obj$hp_id)) } dist_lca <- simona::longest_distances_via_LCA(ont, terms = ont@terms) @@ -63,12 +63,12 @@ plot_ontological_velocity <- function(obj, if(length(nns)<2) return(NULL) emb <- (obj@reductions$umap@cell.embeddings |>data.frame())[nns,] #### remove suffixes from make.unique #### - nns <- replace_char_fun( + nns <- map_id_sep( stringr::str_split(nns,"[.]", simplify = TRUE)[,1] ) nns <- intersect(nns,ont@terms) if(length(nns)<2) return(NULL) - emb$id <- replace_char_fun( + emb$id <- map_id_sep( stringr::str_split(rownames(emb),"[.]", simplify = TRUE)[,1] ) combos <- expand.grid(id1=nns, diff --git a/R/plot_preservation_histo.R b/R/plot_preservation_histo.R new file mode 100644 index 0000000..2db3839 --- /dev/null +++ b/R/plot_preservation_histo.R @@ -0,0 +1,38 @@ +#' Plot preservation histogram +#' +#' Plot the correlation between equivalent terms in +#' high-, mid-, and low-dimensional space. +#' @export +plot_preservation_histo <- function(consist_dt, + lvl="hd", + key=c("hd"="High-dimensional", + "md"="Mid-dimensional", + "ld"="Low-dimensional"), + title=paste(key[lvl],"space"), + subtitle=paste0( + "(",consist_dt[[paste0("dim_",lvl)]][1], + " dimensions)") + ){ + ## histograms of high-dimensional cor + ggplot2::ggplot(consist_dt, + ggplot2::aes(x=!!ggplot2::sym(paste0("cor_",lvl)), + fill=id_type, color=id_type))+ + ggplot2::geom_density(ggplot2::aes(y=ggplot2::after_stat(scaled)), + alpha=.5)+ + ## baseline mean + ggplot2::geom_vline(xintercept = mean(consist_dt[[paste0("baseline_cor_",lvl)]]), + linetype="dashed", color="grey40") + + ggplot2::geom_text(x = mean(consist_dt[[paste0("baseline_cor_",lvl)]]), y=.8, + label="baseline", angle=90, color="grey40",vjust=1.5, + check_overlap = TRUE) + + ## group means + ggplot2::geom_vline(data=consist_dt[,.(mean_cor=mean(get(paste0("cor_",lvl)), + na.rm=TRUE)), + by=id_type], + ggplot2::aes(xintercept=mean_cor, color=id_type))+ + ggplot2::xlim(0,1)+ + ggplot2::theme_minimal() + + ggplot2::labs(title=title, + subtitle=subtitle, + x="Pearson correlation") +} diff --git a/R/plot_top_celltypes.R b/R/plot_top_celltypes.R index 1a3d047..09c25b6 100644 --- a/R/plot_top_celltypes.R +++ b/R/plot_top_celltypes.R @@ -9,10 +9,7 @@ plot_top_celltypes <- function(obj, types=c("radial","pie","reduction"), reduction="umap", prefix="^q[.]", - cluster_vars=c("seurat_clusters", - "dag_enrich.name", - "dag_enrich.name_wrap", - "cluster_colors"), + cluster_vars=c("seurat_clusters"), color_col=cluster_vars[1], label_col=cluster_vars[1], enriched_proportion_threshold=.2, @@ -52,6 +49,9 @@ plot_top_celltypes <- function(obj, n=1)|> data.table::data.table(keep.rownames = "rn") embed_cols <- names(embed)[-1] + for(e in embed_cols){ + if(e %in% names(meta)) meta <- meta[,-c(e), with=FALSE] + } meta <- merge(meta,embed, by="rn") } meta_melt <- ( @@ -82,7 +82,7 @@ plot_top_celltypes <- function(obj, umap_1=mean(get(embed_cols[1])), umap_2=mean(get(embed_cols[2])) ), - by=c(cluster_vars,"celltype")]|> + by=c(unique(c(cluster_vars,label_col,"celltype")))]|> data.table::setorderv(c(cluster_vars,"celltype_sig_count","mean_q"), c(rep(1,length(cluster_vars)),-1,1)) diff --git a/R/prepare_HPO.R b/R/prepare_HPO.R index 90648ae..d53b400 100644 --- a/R/prepare_HPO.R +++ b/R/prepare_HPO.R @@ -10,6 +10,8 @@ #' disease and HPO terms. #' @param min_quantile \link{numeric} minimum quantile for filtering #' cell type enrichment results. +#' @param min_genes The minimum number of genes of phenotypes/diseases +#' to include from the data matrix. #' @param min_value The minimum sum of columns to include from the data matrix. #' @param celltype_col Name of the cell type column. #' @param run_nlp Label clusters using natural language processing via @@ -19,12 +21,13 @@ #' @inheritParams HPOExplorer::hpo_to_matrix #' @inheritParams HPOExplorer::make_phenos_dataframe #' @inheritParams scKirby::process_seurat +#' @inheritDotParams scKirby::process_seurat #' #' @export #' @import data.table #' @import HPOExplorer #' @examples -#' obj <- prepare_hpo(id_types="hpo_id") +#' obj <- prepare_hpo(id_types="hpo_id", min_genes=10) prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), dt_annot = HPOExplorer::load_phenotype_to_genes(3), hpo = HPOExplorer::get_hpo(), @@ -34,26 +37,30 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), MSTExplorer::map_celltype(), id_types = c("hpo_id"),#c("disease_id","hpo_id"), min_quantile = 35, + min_genes = NULL, min_value = NULL, celltype_col=c("cl_name","cl_id","CellType"), + celltype_value="q", value.var = "evidence_score_sum", default_assay = "score", nfeatures = NULL, - vars.to.regress = paste0("nFeature_", - default_assay), + vars.to.regress = NULL, # paste0("nFeature_",default_assay), run_nlp = FALSE, run_impute = FALSE, workers = 1, seed = 2023, verbose = TRUE, save_path=NULL, - force_new=FALSE){ + force_new=FALSE, + ...){ top_celltype <- gene_symbol <- disease_id <- disease_db <- id <- ancestor_name <- ancestor_name_abnormality <- NULL celltype_col <- celltype_col[1] #### Check for existing file #### - if(file.exists(save_path) && !force_new){ + if(!is.null(save_path) && + file.exists(save_path) && + isFALSE(force_new)){ messager("Loading precomputed data:",save_path) return(readRDS(save_path)) } @@ -137,8 +144,8 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), dt_annot_melt <- dt_annot_melt[, head(.SD, 1), by="id"][colnames(X_ref),] ## Add id_name column for labeling id_names <- gsub("_id$","_name",id_types) - id_names <- id_names[id_names %in% names(dt_annot)] - dt_annot_melt[,id_name:=data.table::fcoalesce(as.list(get(id_names),"id"))] + id_names <- id_names[id_names %in% names(dt_annot)] + dt_annot_melt[,name:=data.table::fcoalesce(.SD),.SDcols=c(id_names,"id")] } { #### Add per sample gene_symbol counts #### @@ -166,15 +173,15 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), # dplyr::slice_head(n = 1) |> data.table::data.table() ct_results_cast <- data.table::dcast.data.table( ct_results, - formula = hpo_id ~ paste("q",get(celltype_col),sep="."), + formula = hpo_id ~ paste(celltype_value,get(celltype_col),sep="."), fun.aggregate = mean, - value.var = "q", + value.var = celltype_value, na.rm = TRUE) #### Add - ct_cols <- grep("^q\\.",names(ct_results_cast), value = TRUE) + ct_cols <- grep(paste0("^",celltype_value,"\\."),names(ct_results_cast), value = TRUE) ct_results_cast[, top_celltype:=gsub( - "^q\\.","", + paste0("^",celltype_value,"\\."),"", ct_cols[apply(ct_results_cast[,ct_cols, with=FALSE],1, which.min)]) ] @@ -187,6 +194,9 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), by.y = "hpo_id", all.x = TRUE) #### Filter out any terms with min_genes] + } if(!is.null(min_value)){ X_ref <- X_ref[,Matrix::colSums(X_ref)>=min_value] } @@ -203,7 +213,9 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), nfeatures = nfeatures, vars.to.regress = vars.to.regress, default_assay = default_assay, - workers = workers) + workers = workers, + ...) + ref$orig.ident <- "HPO" #### Impute #### if(isTRUE(run_impute)){ messager("Running imputation.",v=verbose) @@ -216,11 +228,10 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1), workers = workers) } #### Save #### - if(!is.null(save_path)) cache_save(ref,save_path) + KGExplorer::cache_save(ref,save_path) #### Run scNLP #### if(isTRUE(run_nlp)){ - ref$name_definition <- paste(ref$id_name, - ref$ancestor_name, + ref$name_definition <- paste(ref$name, ref$definition) scnlp_res <- scNLP::plot_tfidf( ref, diff --git a/R/prepare_opentargets.R b/R/prepare_opentargets.R index 90c13cd..0f0bf33 100644 --- a/R/prepare_opentargets.R +++ b/R/prepare_opentargets.R @@ -10,34 +10,57 @@ prepare_opentargets <- function(data_type="associationByOverallDirect", formula = "approvedSymbol ~ diseaseId", nfeatures=NULL, default_assay = "score", - vars.to.regress = paste0("nFeature_", - default_assay), + vars.to.regress = NULL, #paste0("nFeature_",default_assay), save_path=NULL, force_new=FALSE, ...){ targetId <- id <- NULL; - if(file.exists(save_path) && !force_new){ + if(!is.null(save_path) && + file.exists(save_path) && + isFALSE(force_new)){ messager("Loading precomputed data:",save_path) return(readRDS(save_path)) } #### Get gene-disease relationship data #### - d <- KGExplorer::get_opentargets(data_type=data_type) + d <- KGExplorer::get_opentargets(data_type=data_type, + force_new = force_new) #### Get sample metadata #### - obs <- KGExplorer::get_opentargets(data_type = "diseases") + obs <- KGExplorer::get_opentargets(data_type = "diseases", + force_new = force_new) obs$db <- stringr::str_split(obs$id,"_",simplify = TRUE)[,1] + obs|> data.table::setnames("description","definition") messager("Mapping xref IDs.") for(x in unique(obs$db)){ suppressWarnings( - map_xref(obs,prefix=x,verbose=FALSE) + map_xref(dat = obs, + prefix=x, + verbose=FALSE) ) } #### Get feature metadata #### - var <- KGExplorer::get_opentargets(data_type = "targets") - var <- var[id %in% unique(d$targetId)] |> - data.table::setkeyv("id") - #### Convert ensembl IDs to gene symbols #### - d[,approvedSymbol:=var[targetId]$approvedSymbol] + ## Mapping with orthogene zero difference when converting to gene symbols + # if(isTRUE(run_map_genes)){ + # var <- KGExplorer::get_opentargets(data_type = "targets") + # var <- var[id %in% unique(d$targetId)] + # gene_map <- orthogene::map_genes(genes = unique(var$approvedSymbol)) + # gene_map <- data.table::data.table(gene_map)[,.SD[1], by="input"] + # gene_map[,approvedSymbol:=data.table::fcoalesce(name,input)] + # data.table::setkeyv(gene_map,"input") + # var[,approvedSymbol_standard:=data.table::fcoalesce( + # gene_map[approvedSymbol]$name,approvedSymbol)] + # data.table::setkeyv(var,"id") + # #### Convert ensembl IDs to gene symbols #### + # d[,approvedSymbol:=var[targetId]$approvedSymbol_standard] + # } else{ + #### Get feature metadata #### + var <- KGExplorer::get_opentargets(data_type = "targets", + force_new = force_new) + var <- var[id %in% unique(d$targetId)] |> + data.table::setkeyv("id") + #### Convert ensembl IDs to gene symbols #### + d[,approvedSymbol:=var[targetId]$approvedSymbol] + # } #### Convert to sparse matrix #### messager("Constructing matrix:",formula) X <- data.table::dcast.data.table(d, @@ -46,7 +69,7 @@ prepare_opentargets <- function(data_type="associationByOverallDirect", fun.aggregate = mean, fill = 0, na.rm = TRUE) |> - scKirby::to_sparse() + KGExplorer::dt_to_matrix(as_sparse = TRUE) #### Subset by intersecting IDs #### ids <- intersect(obs$id, d$diseaseId) obs <- obs[id %in% ids,] @@ -68,7 +91,7 @@ prepare_opentargets <- function(data_type="associationByOverallDirect", default_assay = default_assay, ...) #### Save #### - if(!is.null(save_path)) cache_save(obj,save_path) + KGExplorer::cache_save(obj,save_path) #### Return #### return(obj) } \ No newline at end of file diff --git a/R/replace_char_fun.R b/R/replace_char_fun.R index d2f92df..7ad4116 100644 --- a/R/replace_char_fun.R +++ b/R/replace_char_fun.R @@ -1,9 +1,16 @@ -replace_char_fun <- function(nms, - replace_char=list("."=":")){ +#' Map ID separators +#' +#' Replace the separator in an ID in order to standardise it. +#' @export +#' @examples +#' map_id_sep(c("HP:0001","HP.0001","HP_0001")) +map_id_sep <- function(nms, + replace_char=list("."=":", + "_"=":")){ if(length(replace_char)>0){ for(r in names(replace_char)){ nms <- gsub(r,replace_char[[r]],nms, fixed = TRUE) } } return(nms) -} \ No newline at end of file +} diff --git a/R/run_dag_enrich.R b/R/run_dag_enrich.R index e6e3882..b7fa496 100644 --- a/R/run_dag_enrich.R +++ b/R/run_dag_enrich.R @@ -2,6 +2,9 @@ #' #' Run enrichment of terms in a DAG ontology. #' +#' @param value_threshold Minimum weight to include a term +#' (after applying \code{truns_fun}). +#' Only used when \code{reduction} is provided. #' @export #' @examples #' ont <- KGExplorer::get_ontology("hp") @@ -9,6 +12,7 @@ #' res <- run_dag_enrich(obj, ont, reduction="pca") run_dag_enrich <- function(obj, ont, + id_col=NULL, reduction=NULL, cluster_col = NULL, min_hits = 3, @@ -16,8 +20,12 @@ run_dag_enrich <- function(obj, trans_fun=NULL, replace_char=list("."=":", "_"=":"), + value_threshold=NULL, + p_threshold=.05, q_threshold=.05, - top_n=100){ + top_n=100, + sort_by= c("log2_fold_enrichment"=-1) + ){ if(is.null(reduction)){ if(is.null(cluster_col)){ @@ -25,20 +33,27 @@ run_dag_enrich <- function(obj, } run_dag_enrich_obj(obj=obj, ont=ont, + id_col=id_col, cluster_col=cluster_col, min_hits=min_hits, min_offspring=min_offspring, replace_char=replace_char, - q_threshold=q_threshold) + p_threshold=p_threshold, + q_threshold=q_threshold, + sort_by=sort_by) } else { - obsm <- scKirby::get_obsm(obj, keys=reduction, n = 1) - run_dag_enrich_obsm(obsm=obsm, + run_dag_enrich_obsm(obj=obj, ont=ont, + id_col=id_col, + reduction=reduction, min_hits=min_hits, min_offspring=min_offspring, replace_char=replace_char, trans_fun=trans_fun, top_n=top_n, - q_threshold=q_threshold) + value_threshold=value_threshold, + p_threshold=p_threshold, + q_threshold=q_threshold, + sort_by=sort_by) } } \ No newline at end of file diff --git a/R/run_dag_enrich_obj.R b/R/run_dag_enrich_obj.R index deeca44..51a7b16 100644 --- a/R/run_dag_enrich_obj.R +++ b/R/run_dag_enrich_obj.R @@ -1,19 +1,34 @@ run_dag_enrich_obj <- function(obj, ont, + id_col, cluster_col, min_hits, min_offspring, replace_char, - q_threshold=.05){ - z_score <- NULL; + p_threshold, + q_threshold, + sort_by){ messager("Running dag_enrich_on_offsprings.") clusters <- sort(unique(obj[[cluster_col]][,1])) - lapply(seq(length(clusters)), function(i){ - nms <- colnames(obj)[obj[[cluster_col]][,1]==i] - nms <- replace_char_fun(nms,replace_char) + all_ids <- if(is.null(id_col)) { + colnames(obj) + } else { + if(!id_col %in% colnames(obj@meta.data)){ + messager("Could not find",paste0("id_col=",shQuote(id_col)), + "Defaulting to colnames(obj).") + colnames(obj) + } + obj@meta.data[[id_col]] + } + BPPARAM <- KGExplorer::set_cores() + RES <- BiocParallel::bplapply(seq(length(clusters)), + BPPARAM = BPPARAM, + function(i){ + nms <- all_ids[obj@meta.data[[cluster_col]]==i] + nms <- map_id_sep(nms,replace_char) nms <- intersect(nms,ont@terms) if(length(nms) data.table::data.table(key="term") res <- cbind(group=clusters[i],res) - res[,combined_score:=(1-p_adjust)*z_score] - if(is.null(q_threshold)) return(res) - res_sig <- res[p_adjust - data.table::setorderv(c("combined_score"),c(-1)) - res_sig - }) |> data.table::rbindlist() -} \ No newline at end of file + res$input_ids <- paste(nms,collapse = ";") + res$input_names <- paste(KGExplorer::map_ontology_terms(ont = ont, + terms = nms, + to = "name", + verbose = FALSE), + collapse = ";") + return(res) + }) |> data.table::rbindlist(fill=TRUE) + run_dag_enrich_postprocess(RES=RES, + p_threshold=p_threshold, + q_threshold=q_threshold, + sort_by=sort_by) +} diff --git a/R/run_dag_enrich_obsm.R b/R/run_dag_enrich_obsm.R index 72c2033..a947b2c 100644 --- a/R/run_dag_enrich_obsm.R +++ b/R/run_dag_enrich_obsm.R @@ -1,29 +1,51 @@ -run_dag_enrich_obsm <- function(obsm, +run_dag_enrich_obsm <- function(obj, ont, + id_col, + reduction, min_hits, min_offspring, replace_char, - top_n=100, - trans_fun=NULL, - q_threshold=.05){ - z_score <- NULL; + top_n, + trans_fun, + value_threshold, + p_threshold, + q_threshold, + sort_by){ + obsm <- scKirby::get_obsm(obj, keys=reduction, n = 1) ont_prefixes <- unique(stringr::str_split(ont@terms,pattern = ":", simplify = TRUE)[,1]) obsm <- obsm[grepl(paste(paste0("^",ont_prefixes),collapse = "|"), rownames(obsm)),] if(!is.null(trans_fun)) obsm <- trans_fun(obsm) if(nrow(obsm)==0) stopper("No matching terms found in obsm.") - messager("Running dag_enrich_on_offsprings.") - lapply(stats::setNames(seq(ncol(obsm)), - colnames(obsm)), - function(i){ + id_dict <- if(is.null(id_col)) { + stats::setNames(colnames(obj), + colnames(obj)) + } else { + if(!id_col %in% colnames(obj@meta.data)){ + messager("Could not find",paste0("id_col=",shQuote(id_col)), + "Defaulting to colnames(obj).") + stats::setNames(colnames(obj), + colnames(obj)) + } + stats::setNames(obj[[id_col]][,1], + rownames(obj[[id_col]])) + } + messager("Running dag_enrich_on_offsprings.") + BPPARAM <- KGExplorer::set_cores() + RES <- BiocParallel::bplapply(stats::setNames(seq(ncol(obsm)), + colnames(obsm)), + BPPARAM = BPPARAM, + function(i){ val <- obsm[,i] - nms <- names(tail(sort(val),top_n)) - nms <- replace_char_fun(nms,replace_char) + if(!is.null(value_threshold)) val <- val[val>value_threshold] + nms <- id_dict[names(tail(sort(val),top_n))]|>unname() + nms <- map_id_sep(nms,replace_char) nms <- intersect(nms,ont@terms) if(length(nms) data.table::data.table(key="term") res <- cbind(group=colnames(obsm)[i],res) - res[,combined_score:=(1-p_adjust)*z_score] - if(is.null(q_threshold)) return(res) - res_sig <- res[p_adjust - data.table::setorderv(c("combined_score"),c(-1)) - res_sig - }) |> data.table::rbindlist() -} \ No newline at end of file + res$input_ids <- paste(nms,collapse = ";") + res$input_names <- paste(KGExplorer::map_ontology_terms(ont = ont, + terms = nms, + to = "name", + verbose = FALSE), + collapse = ";") + return(res) + }) |> data.table::rbindlist(fill=TRUE) + run_dag_enrich_postprocess(RES=RES, + p_threshold=p_threshold, + q_threshold=q_threshold, + sort_by=sort_by) +} diff --git a/R/run_dag_enrich_postprocess.R b/R/run_dag_enrich_postprocess.R new file mode 100644 index 0000000..b04d490 --- /dev/null +++ b/R/run_dag_enrich_postprocess.R @@ -0,0 +1,26 @@ +run_dag_enrich_postprocess <- function(RES, + p_threshold, + q_threshold, + sort_by){ + combined_score <- p_value <- z_score <- NULL; + RES[,p_adjust_all:=stats::p.adjust(p_value, method = "fdr")] + RES[,combined_score:=(1-p_value)*z_score] + if(!is.null(p_threshold)) { + RES <- RES[p_value + data.table::setorderv(names(sort_by), + unname(sort_by)) + } + return(RES) +} \ No newline at end of file diff --git a/R/run_hpo_pseudotime.R b/R/run_hpo_pseudotime.R index 77a62ad..7d9f06a 100644 --- a/R/run_hpo_pseudotime.R +++ b/R/run_hpo_pseudotime.R @@ -1,7 +1,11 @@ run_hpo_pseudotime <- function(obj, disease_ids, - dt_genes, + root_cells, + p2d, + id_col, learn_graph_control, + use_partition=TRUE, + color_cells_by = "is_symptom", title=NULL, subtitle=NULL, color_by_symptoms=TRUE, @@ -9,20 +13,20 @@ run_hpo_pseudotime <- function(obj, bg_colors=c("#5000ff","white"), trajectory_graph_color=ggplot2::alpha("white",.8), point_alpha=.5, + add_density=TRUE, + density_filled=FALSE, + density_adjust=1, + density_alpha=point_alpha, ...){ requireNamespace("monocle3") requireNamespace("SeuratWrappers") - library(dplyr) - gene_symbol <- disease_id <- NULL; + requireNamespace("dplyr") + gene_symbol <- disease_id <- NULL; - disease_ids <- unique(disease_ids) messager("Running pseudotime analysis for:", - paste(disease_ids, collapse = "; ")) - p2d <- dt_genes[disease_id %in% disease_ids, - list(count=data.table::uniqueN(gene_symbol)), - by=c("hpo_id","disease_id")]#|> - # data.table::dcast.data.table(formula = hpo_id~disease_id, - # value.var = "count") + paste(disease_ids, collapse = "; ")) + p2d <- p2d[disease_id %in% disease_ids| + mondo_id %in% disease_ids,] if(nrow(p2d)==0) { stopper("No hpo_ids found for the given disease_ids.") } else { @@ -42,43 +46,77 @@ run_hpo_pseudotime <- function(obj, # by.x = "id", # by.y = "hpo_id", # all.x = TRUE) + hpo_ids <- if(!is.null(id_col) && + id_col %in% colnames(obj@meta.data)) { + obj@meta.data[[id_col]] + } else { + colnames(obj) + } hpo_ids <- intersect(unique(p2d$hpo_id), - colnames(obj)) + map_id_sep(unique(hpo_ids)) + ) if(length(hpo_ids)==0) { stopper("No hpo_ids overlapping with samples (colnames) in obj.") } else { messager("Found", length(hpo_ids), "matching hpo_ids within obj.") } + #### Convert to CDS format #### cds <- suppressWarnings( SeuratWrappers::as.cell_data_set(obj) - ) - - k <- min(ncol(cds[,hpo_ids]), - max(as.integer(cds@colData$seurat_clusters))) -2 + ) + #### Choose root cells #### + if(is.null(root_cells)) { + messager("Setting disease_ids as root cells.") + root_cells <- disease_ids + } + if(length(root_cells)>0){ + root_cells <- c( + colnames(obj)[map_id_sep(colnames(obj)) %in% map_id_sep(root_cells)], + if(!is.null(id_col) && id_col %in% colnames(obj@meta.data)){ + colnames(obj)[map_id_sep(obj@meta.data[[id_col]]) %in% map_id_sep(root_cells)] + }, + if("mondo_id" %in% colnames(obj@meta.data)){ + colnames(obj)[map_id_sep(obj@meta.data[["mondo_id"]]) %in% map_id_sep(root_cells)] + } + )|> unique() + if(length(root_cells)>0){ + messager("Using",length(root_cells),"root cells.") + } + } + #### Choose k #### + k <- min(ncol(cds[,unique(c(hpo_ids,root_cells))]), + max(as.integer(cds@colData$seurat_clusters), na.rm = TRUE) + ) - 2 + k <- max(k,2,na.rm = TRUE) + messager(paste0("Running monocle3::cluster_cells with k=",k)) cds_sub <- monocle3::cluster_cells( - cds = cds[,hpo_ids], + cds = cds[,unique(c(hpo_ids,root_cells))], k = k) + messager("Running monocle3::learn_graph.") cds_sub <- monocle3::learn_graph(cds_sub, + use_partition=use_partition, learn_graph_control=learn_graph_control) - cds_sub <- monocle3::order_cells(cds_sub, root_cells = colnames(cds_sub)) + if(length(root_cells)==0){ + root_cells <- colnames(cds_sub) + } + cds@colData$is_root <- as.factor(colnames(cds) %in% root_cells) + messager("Running monocle3::order_cells with",ncol(cds_sub),"cells and", + length(root_cells),"root cells. ") + cds_sub <- monocle3::order_cells(cds_sub, + root_cells = root_cells) monocle3::principal_graph(cds) <- monocle3::principal_graph(cds_sub) monocle3::principal_graph_aux(cds) <- monocle3::principal_graph_aux(cds_sub) - #### Contellation plot (dark) #### + #### Constellation plot (dark) #### if(isTRUE(color_by_symptoms)){ cds@colData$is_symptom <- as.factor(colnames(cds) %in% hpo_ids) plt <- monocle3::plot_cells(cds, - color_cells_by = "is_symptom", + color_cells_by = color_cells_by, label_cell_groups = FALSE, trajectory_graph_color = trajectory_graph_color, alpha = point_alpha, ... ) + ggplot2::scale_colour_manual(values = bg_colors) + - # ggplot2::geom_density_2d_filled(data = . %>% dplyr::filter(is_symptom==TRUE), - # contour_var = "ndensity")+ - # ggplot2::scale_fill_manual( - # values = c("transparent", - # ggplot2::alpha(pals::gnuplot(15),.7))) + ggplot2::geom_point(data = . %>% dplyr::filter(is_symptom==TRUE), color=symptom_color, alpha=point_alpha, @@ -87,10 +125,46 @@ run_hpo_pseudotime <- function(obj, ggplot2::theme(legend.position = "none") } else{ plt <- monocle3::plot_cells(cds, + color_cells_by=color_cells_by, ... ) } + # plt$layers <- plt$layers[ 1:10] + # plt + graph_layers <- seq(3,9) + if(add_density){ + if(density_filled){ + plt <- plt + + ggplot2::geom_density2d_filled(data = . %>% dplyr::filter(is_symptom==TRUE), + adjust=density_adjust) + + ggplot2::scale_fill_manual( + values = c("transparent", + ggplot2::alpha(pals::gnuplot(15),density_alpha)) + ) + } else { + plt <- plt + + ggplot2::geom_density2d(data = . %>% dplyr::filter(is_symptom==TRUE), + alpha=density_alpha, + color=symptom_color, + adjust=density_adjust) + } + + } + if(length(root_cells)>0){ + plt <- plt + + ggplot2::geom_point(data = . %>% dplyr::filter(is_root==TRUE), + color="green", + shape=23, + alpha=point_alpha*1.5, + stroke=1.5, + size=3) + + } plt <- plt + ggplot2::labs(title = title, subtitle = subtitle) + #### Reorder layers so that graph and nodes are on top #### + plt$layers <- c(plt$layers[-graph_layers],plt$layers[graph_layers]) + ## Increase alpha of graph + plt$layers[[6]]$aes_params$alpha <- .9 return( list(data=cds, plot=plt) diff --git a/R/run_integration.R b/R/run_integration.R new file mode 100644 index 0000000..84b2cba --- /dev/null +++ b/R/run_integration.R @@ -0,0 +1,138 @@ +#' Run integration +#' +#' Run integration pipeline on a \link{Seurat} object. +#' @inheritParams Seurat::IntegrateLayers +#' @inheritParams Seurat::SplitObject +#' @inheritParams Seurat::ScaleData +#' @inheritParams Seurat::CCAIntegration +#' @inheritParams scKirby::process_seurat +#' @inheritDotParams Seurat::IntegrateLayers +#' @export +#' @examples +#' obj <- scKirby::example_obj("seurat") +#' obj2 <- run_integration(obj, split.by="groups") +run_integration <- function(obj, + split.by, + assay = Seurat::DefaultAssay(obj), + nfeatures = nrow(obj), + vars.to.regress = NULL, + dims = seq(100), + save_path = tempfile(fileext = ".rds"), + pipeline = c("seuratv5","seuratv4"), + method = Seurat::CCAIntegration, + k.weight = 100, + orig.reduction = "pca", + new.reduction = "cca", + cluster_reduction = new.reduction, + verbose = TRUE, + ...){ + # SeuratWrappers::scVIIntegration() + # SeuratWrappers::FastMNNIntegration() + + pipeline <- match.arg(pipeline) + if(file.exists(save_path)){ + message("Loading cached file: ",save_path) + obj <- readRDS(save_path) + } else { + force(obj) + force(split.by) + ## remove traits with no gene associations + # obj <- obj[,obj$nFeature_score!=0] + + # obj_hpo <- Seurat::FindVariableFeatures(obj_hpo, nfeatures = nrow(obj_hpo)) + # var_features <- lapply(Seurat::SplitObject(obj_ot, split.by = "orig.ident"), + # function(x){ + # cat(dim(x),"\n") + # if(ncol(x)<2) return(NULL) + # x <- Seurat::FindVariableFeatures(x, nfeatures = nrow(x)) + # Seurat::VariableFeatures(x) + # }) + # ### Get union of top 2000 genes from each gene lists ### + # var_features <- Reduce(union, lapply(c(var_features, + # HPO=Seurat::VariableFeatures(obj_hpo) + # ), + # head, 1000)) + + max_dim <- min( + data.table::data.table(obj@meta.data)[,.N,by=c(split.by)]$N + ) - 1 + if(max(dims)>max_dim){ + messager("Reducing number of dimensions to match split data:", + max_dim) + dims <- dims[seq(max_dim)] + } + #### Seurat v5 strategy #### + if(pipeline=="seuratv5"){ + obj[[assay]] <- split(obj[[assay]], + f = obj@meta.data[[split.by]]) + obj <- Seurat::NormalizeData(obj) + obj <- Seurat::FindVariableFeatures(obj, + nfeatures = nfeatures) + obj <- Seurat::ScaleData(obj, + vars.to.regress = vars.to.regress) + # obj <- Seurat::RunPCA(obj, npcs=max(dims)) + obj <- Seurat::IntegrateLayers(object = obj, + method = method, + orig.reduction = orig.reduction, + new.reduction = new.reduction, + features = rownames(obj), + dims = dims, + k.weight = min(k.weight,max_dim), + verbose = verbose) + # re-join layers after integration + obj[[assay]] <- SeuratObject::JoinLayers(obj[[assay]]) + obj <- Seurat::FindNeighbors(obj, + reduction = cluster_reduction, + dims = dims) + obj <- Seurat::FindClusters(obj) + max_dims <- ncol(obj@reductions[[new.reduction]]) + if(max(dims)>max_dims){ + messager("Reducing number of dimensions to match reduced data:", + max_dims) + dims <- dims[seq(max_dims)] + } + obj <- Seurat::RunUMAP(obj, + reduction = new.reduction, + dims = dims, + return.model = TRUE) + } else if(pipeline=="seuratv4"){ + #### Seurat 10) { @@ -23,6 +24,16 @@ plot_feature_density( \arguments{ \item{features}{Features (e.g. genes) to visualize} +\item{method}{Kernel density estimation method: +\itemize{ +\item \code{ks}: Computes density using the \code{kde} function from the + \code{ks} package. +\item \code{wkde}: Computes density using a modified version of the +\code{kde2d} function from the \code{MASS} +package to allow weights. Bandwidth selection from the \code{ks} package + is used instead. +}} + \item{joint}{Return joint density plot? By default \code{FALSE}} \item{joint_only}{Only return the joint density plot.} @@ -59,15 +70,6 @@ patchwork (see example).} computed reduction is visualized} \item{\code{dims}}{Vector of length 2 specifying the dimensions to be plotted. By default, the first two dimensions are considered.} - \item{\code{method}}{Kernel density estimation method: -\itemize{ -\item \code{ks}: Computes density using the \code{kde} function from the - \code{ks} package. -\item \code{wkde}: Computes density using a modified version of the -\code{kde2d} function from the \code{MASS} -package to allow weights. Bandwidth selection from the \code{ks} package - is used instead. -}} \item{\code{adjust}}{Numeric value to adjust to bandwidth. Default: 1. Not available for \code{ks} method} \item{\code{size}}{Size of the geom to be plotted (e.g. point size)} diff --git a/man/plot_hpo_pseudotime.Rd b/man/plot_hpo_pseudotime.Rd index 7e4d190..0b25a36 100644 --- a/man/plot_hpo_pseudotime.Rd +++ b/man/plot_hpo_pseudotime.Rd @@ -6,9 +6,13 @@ \usage{ plot_hpo_pseudotime( obj, - dt_genes = HPOExplorer::load_phenotype_to_genes(1), - disease_ids = dt_genes$disease_id[1:3], - learn_graph_control = list(prune_graph = FALSE), + p2d = HPOExplorer::load_phenotype_to_genes(1), + disease_ids = p2d$disease_id[1:3], + root_cells = NULL, + id_col = NULL, + learn_graph_control = list(prune_graph = FALSE, nn.cores = + KGExplorer::set_cores()$workers), + use_partition = TRUE, merge_trajectories = TRUE, symptom_color = "red", bg_colors = c("#5000ff", "white"), @@ -16,7 +20,17 @@ plot_hpo_pseudotime( title_col = NULL, title_width = 100, color_by_symptoms = TRUE, + color_cells_by = if (color_by_symptoms) { + "is_symptom" + } else { + "cluster" + }, point_alpha = 0.5, + add_density = TRUE, + density_filled = FALSE, + density_adjust = 1, + density_alpha = point_alpha, + title = stringr::str_wrap(paste0(unique(disease_ids), collapse = "; "), 20 * 2.5), show_plot = TRUE, ... ) @@ -24,14 +38,21 @@ plot_hpo_pseudotime( \arguments{ \item{obj}{\link{Seurat} object of HPO terms generated by \link{prepare_hpo}.} -\item{dt_genes}{\link{data.table} of phenotype to gene associations.} +\item{disease_ids}{One or more disease IDs found within \code{p2d}.} -\item{disease_ids}{One or more disease IDs found within \code{dt_genes}.} +\item{root_cells}{NULL or a vector of starting cells. If provided, +pseudotime will start (i.e. be zero) at these cells. Both +\code{root_pr_nodes} and \code{root_cells} cannot be provided.} \item{learn_graph_control}{NULL or a list of control parameters to be passed to the reversed graph embedding function. Default is NULL. A list of potential control parameters is provided in details.} +\item{use_partition}{logical parameter that determines whether to use +partitions calculated during \code{cluster_cells} and therefore to learn +disjoint graph in each partition. When \code{use_partition = FALSE}, a +single graph is learned across all partitions. Default is TRUE.} + \item{...}{ Arguments passed on to \code{\link[monocle3:plot_cells]{monocle3::plot_cells}} \describe{ diff --git a/man/plot_preservation_histo.Rd b/man/plot_preservation_histo.Rd new file mode 100644 index 0000000..110e12c --- /dev/null +++ b/man/plot_preservation_histo.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_preservation_histo.R +\name{plot_preservation_histo} +\alias{plot_preservation_histo} +\title{Plot preservation histogram} +\usage{ +plot_preservation_histo( + consist_dt, + lvl = "hd", + key = c(hd = "High-dimensional", md = "Mid-dimensional", ld = "Low-dimensional"), + title = paste(key[lvl], "space"), + subtitle = paste0("(", consist_dt[[paste0("dim_", lvl)]][1], " dimensions)") +) +} +\description{ +Plot the correlation between equivalent terms in +high-, mid-, and low-dimensional space. +} diff --git a/man/plot_top_celltypes.Rd b/man/plot_top_celltypes.Rd index ebf062c..5e05c3c 100644 --- a/man/plot_top_celltypes.Rd +++ b/man/plot_top_celltypes.Rd @@ -9,8 +9,7 @@ plot_top_celltypes( types = c("radial", "pie", "reduction"), reduction = "umap", prefix = "^q[.]", - cluster_vars = c("seurat_clusters", "dag_enrich.name", "dag_enrich.name_wrap", - "cluster_colors"), + cluster_vars = c("seurat_clusters"), color_col = cluster_vars[1], label_col = cluster_vars[1], enriched_proportion_threshold = 0.2, diff --git a/man/prepare_hpo.Rd b/man/prepare_hpo.Rd index e4263f4..209f38d 100644 --- a/man/prepare_hpo.Rd +++ b/man/prepare_hpo.Rd @@ -12,19 +12,22 @@ prepare_hpo( ct_results = MSTExplorer::map_celltype(MSTExplorer::load_example_results()), id_types = c("hpo_id"), min_quantile = 35, + min_genes = NULL, min_value = NULL, celltype_col = c("cl_name", "cl_id", "CellType"), + celltype_value = "q", value.var = "evidence_score_sum", default_assay = "score", nfeatures = NULL, - vars.to.regress = paste0("nFeature_", default_assay), + vars.to.regress = NULL, run_nlp = FALSE, run_impute = FALSE, workers = 1, seed = 2023, verbose = TRUE, save_path = NULL, - force_new = FALSE + force_new = FALSE, + ... ) } \arguments{ @@ -45,6 +48,9 @@ disease and HPO terms.} \item{min_quantile}{\link{numeric} minimum quantile for filtering cell type enrichment results.} +\item{min_genes}{The minimum number of genes of phenotypes/diseases +to include from the data matrix.} + \item{min_value}{The minimum sum of columns to include from the data matrix.} \item{celltype_col}{Name of the cell type column.} @@ -74,11 +80,34 @@ using \pkg{future}.} \item{seed}{Random seed to set.} \item{verbose}{Print messages.} + +\item{...}{ + Arguments passed on to \code{\link[scKirby:process_seurat]{scKirby::process_seurat}} + \describe{ + \item{\code{obj}}{A single-cell object supported by \pkg{scKirby}. +See \link[scKirby]{converters} for a table of all supported conversions.} + \item{\code{meta.data}}{Additional cell-level metadata to add to the Seurat object. +Should be a \code{\link[base]{data.frame}} where the rows are cell names and +the columns are additional metadata fields. Row names in the metadata need +to match the column names of the counts matrix.} + \item{\code{dims}}{Which dimensions to use as input features, used only if +\code{features} is NULL} + \item{\code{add_specificity}}{Add a new assay called "specificity" by deriving +specificity scores from the input data.} + \item{\code{n.components}}{The dimension of the space to embed into.} + \item{\code{cluster_reduction}}{Recompute neighbors graph based on UMAP +to get clusters that best reflect UMAP space. +For this same reason, only cluster in two dimensions, +because this is the view we most often use. +That said, this may reduce the generalisability of these clusters/graph.} + \item{\code{max_mem}}{Max limit on memory usage, to be passed to environmental +variable \code{future.globals.maxSize}.} + }} } \description{ Convert the Human Phenotype Ontology (HPO) to a \link{Seurat} object with extensive metadata. } \examples{ -obj <- prepare_hpo(id_types="hpo_id") +obj <- prepare_hpo(id_types="hpo_id", min_genes=10) } diff --git a/man/prepare_opentargets.Rd b/man/prepare_opentargets.Rd index bfb2ec4..62a14bb 100644 --- a/man/prepare_opentargets.Rd +++ b/man/prepare_opentargets.Rd @@ -9,7 +9,7 @@ prepare_opentargets( formula = "approvedSymbol ~ diseaseId", nfeatures = NULL, default_assay = "score", - vars.to.regress = paste0("nFeature_", default_assay), + vars.to.regress = NULL, save_path = NULL, force_new = FALSE, ... diff --git a/man/run_dag_enrich.Rd b/man/run_dag_enrich.Rd index 835b797..65d8cb8 100644 --- a/man/run_dag_enrich.Rd +++ b/man/run_dag_enrich.Rd @@ -7,16 +7,25 @@ run_dag_enrich( obj, ont, + id_col = NULL, reduction = NULL, cluster_col = NULL, min_hits = 3, min_offspring = 100, trans_fun = NULL, replace_char = list(. = ":", `_` = ":"), + value_threshold = NULL, + p_threshold = 0.05, q_threshold = 0.05, - top_n = 100 + top_n = 100, + sort_by = c(log2_fold_enrichment = -1) ) } +\arguments{ +\item{value_threshold}{Minimum weight to include a term +(after applying \code{truns_fun}). + Only used when \code{reduction} is provided.} +} \description{ Run enrichment of terms in a DAG ontology. } diff --git a/man/run_integration.Rd b/man/run_integration.Rd new file mode 100644 index 0000000..24786c2 --- /dev/null +++ b/man/run_integration.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_integration.R +\name{run_integration} +\alias{run_integration} +\title{Run integration} +\usage{ +run_integration( + obj, + split.by, + assay = Seurat::DefaultAssay(obj), + nfeatures = nrow(obj), + vars.to.regress = NULL, + dims = seq(100), + save_path = tempfile(fileext = ".rds"), + pipeline = c("seuratv5", "seuratv4"), + method = Seurat::CCAIntegration, + k.weight = 100, + orig.reduction = "pca", + new.reduction = "cca", + cluster_reduction = new.reduction, + verbose = TRUE, + ... +) +} +\arguments{ +\item{obj}{A single-cell object supported by \pkg{scKirby}. +See \link[scKirby]{converters} for a table of all supported conversions.} + +\item{split.by}{Attribute for splitting. Default is "ident". Currently +only supported for class-level (i.e. non-quantitative) attributes.} + +\item{assay}{Name of assay for integration} + +\item{nfeatures}{Number of features to select as top variable features; +only used when \code{selection.method} is set to \code{'dispersion'} or +\code{'vst'}} + +\item{vars.to.regress}{Variables to regress out (previously latent.vars in +RegressOut). For example, nUMI, or percent.mito.} + +\item{dims}{Dimensions of dimensional reduction to use for integration} + +\item{method}{Integration method function} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{orig.reduction}{Name of dimensional reduction for correction} + +\item{new.reduction}{Name of new integrated dimensional reduction} + +\item{cluster_reduction}{Recompute neighbors graph based on UMAP +to get clusters that best reflect UMAP space. +For this same reason, only cluster in two dimensions, +because this is the view we most often use. +That said, this may reduce the generalisability of these clusters/graph.} + +\item{verbose}{Displays a progress bar for scaling procedure} + +\item{...}{ + Arguments passed on to \code{\link[Seurat:IntegrateLayers]{Seurat::IntegrateLayers}} + \describe{ + \item{\code{object}}{A \code{\link[SeuratObject]{Seurat}} object} + \item{\code{features}}{A vector of features to use for integration} + \item{\code{layers}}{Names of normalized layers in \code{assay}} + \item{\code{scale.layer}}{Name(s) of scaled layer(s) in \code{assay}} + }} +} +\description{ +Run integration pipeline on a \link{Seurat} object. +} +\examples{ +obj <- scKirby::example_obj("seurat") +obj2 <- run_integration(obj, split.by="groups") +} diff --git a/man/scale_color_nightlight.Rd b/man/scale_color_nightlight.Rd new file mode 100644 index 0000000..c5ea271 --- /dev/null +++ b/man/scale_color_nightlight.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/color_nightlight.R +\name{scale_color_nightlight} +\alias{scale_color_nightlight} +\title{Scale color nightlight} +\usage{ +scale_color_nightlight( + n = 3, + colors = pals::gnuplot(n = n + 1)[-(seq(as.integer((n + 1) * 0.2)))], + reverse = FALSE, + alpha = NULL, + ... +) +} +\arguments{ +\item{n}{Number of colors to return.} + +\item{reverse}{Reverse the color gradient.} + +\item{alpha}{Transparency of the colors.} + +\item{...}{ + Arguments passed on to \code{\link[ggplot2:scale_gradient]{ggplot2::scale_color_gradientn}} + \describe{ + \item{\code{name}}{The name of the scale. Used as the axis or legend title. If +\code{waiver()}, the default, the name of the scale is taken from the first +mapping used for that aesthetic. If \code{NULL}, the legend title will be +omitted.} + \item{\code{space}}{colour space in which to calculate gradient. Must be "Lab" - +other values are deprecated.} + \item{\code{na.value}}{Colour to use for missing values} + \item{\code{guide}}{Type of legend. Use \code{"colourbar"} for continuous +colour bar, or \code{"legend"} for discrete colour legend.} + \item{\code{aesthetics}}{Character string or vector of character strings listing the +name(s) of the aesthetic(s) that this scale works with. This can be useful, for +example, to apply colour settings to the \code{colour} and \code{fill} aesthetics at the +same time, via \code{aesthetics = c("colour", "fill")}.} + \item{\code{colours,colors}}{Vector of colours to use for n-colour gradient.} + \item{\code{values}}{if colours should not be evenly positioned along the gradient +this vector gives the position (between 0 and 1) for each colour in the +\code{colours} vector. See \code{\link[scales:rescale]{rescale()}} for a convenience function +to map an arbitrary range to between 0 and 1.} + }} +} +\description{ +Add color gradient with nightlight colors. +}