From c3ce9073c24d1de53c1c98e9fb804bee4180de22 Mon Sep 17 00:00:00 2001
From: "Brian M. Schilder" <34280215+bschilder@users.noreply.github.com>
Date: Mon, 8 Apr 2024 15:39:57 +0100
Subject: [PATCH] Fix prepare_ funcs. Add run_integration

---
 DESCRIPTION                    |   7 +-
 NAMESPACE                      |   7 ++
 R/add_mixingmetric.R           |  41 ++++++++
 R/color_nightlight.R           |  33 +++++-
 R/get_correlated_factors.R     |  21 ++++
 R/get_variance_explained.R     |  40 ++++++++
 R/map_xref.R                   |   3 +-
 R/plot_dag_enrich.R            | 169 +++++++++++++++++++------------
 R/plot_factors_sankey.R        | 179 +++++++++++++++++++++++++++++++++
 R/plot_feature_density.R       |   6 +-
 R/plot_hpo_pseudotime.R        |  81 +++++++++++----
 R/plot_ontological_velocity.R  |   6 +-
 R/plot_preservation_histo.R    |  38 +++++++
 R/plot_top_celltypes.R         |  10 +-
 R/prepare_HPO.R                |  41 +++++---
 R/prepare_opentargets.R        |  49 ++++++---
 R/replace_char_fun.R           |  13 ++-
 R/run_dag_enrich.R             |  25 ++++-
 R/run_dag_enrich_obj.R         |  47 ++++++---
 R/run_dag_enrich_obsm.R        |  66 ++++++++----
 R/run_dag_enrich_postprocess.R |  26 +++++
 R/run_hpo_pseudotime.R         | 122 +++++++++++++++++-----
 R/run_integration.R            | 138 +++++++++++++++++++++++++
 man/add_mixingmetric.Rd        |  47 +++++++++
 man/get_variance_explained.Rd  |  22 ++++
 man/map_id_sep.Rd              |  14 +++
 man/plot_dag_enrich.Rd         |  18 ++++
 man/plot_factors_sankey.Rd     |  45 +++++++++
 man/plot_feature_density.Rd    |  20 ++--
 man/plot_hpo_pseudotime.Rd     |  31 +++++-
 man/plot_preservation_histo.Rd |  18 ++++
 man/plot_top_celltypes.Rd      |   3 +-
 man/prepare_hpo.Rd             |  35 ++++++-
 man/prepare_opentargets.Rd     |   2 +-
 man/run_dag_enrich.Rd          |  11 +-
 man/run_integration.Rd         |  74 ++++++++++++++
 man/scale_color_nightlight.Rd  |  47 +++++++++
 37 files changed, 1340 insertions(+), 215 deletions(-)
 create mode 100644 R/add_mixingmetric.R
 create mode 100644 R/get_correlated_factors.R
 create mode 100644 R/get_variance_explained.R
 create mode 100644 R/plot_factors_sankey.R
 create mode 100644 R/plot_preservation_histo.R
 create mode 100644 R/run_dag_enrich_postprocess.R
 create mode 100644 R/run_integration.R
 create mode 100644 man/add_mixingmetric.Rd
 create mode 100644 man/get_variance_explained.Rd
 create mode 100644 man/map_id_sep.Rd
 create mode 100644 man/plot_factors_sankey.Rd
 create mode 100644 man/plot_preservation_histo.Rd
 create mode 100644 man/run_integration.Rd
 create mode 100644 man/scale_color_nightlight.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 9f1f916..83d82a5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -81,7 +81,9 @@ Suggests:
     downloadR,
     HPOExplorer,
     Nebulosa,
-    arrow
+    arrow,
+    ggsankey,
+    Hmisc
 Remotes:
     github::neurogenomics/scNLP,
     github::neurogenomics/scKirby,
@@ -89,7 +91,8 @@ Remotes:
     github::neurogenomics/HPOExplorer,
     github::RajLabMSSM/echotabix,
     github::RajLabMSSM/downloadR,
-    github::satijalab/seurat-wrappers
+    github::satijalab/seurat-wrappers,
+    github::davidsjoberg/ggsankey
 RoxygenNote: 7.3.1
 VignetteBuilder: knitr 
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index e89f35f..38e555b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(add_cluster_colors)
+export(add_mixingmetric)
 export(adjust_zstat)
 export(dt_to_granges)
 export(find_neighbors)
@@ -15,11 +16,13 @@ export(get_mhc_genes)
 export(get_top_factors)
 export(get_top_features)
 export(get_top_traits)
+export(get_variance_explained)
 export(import_abc)
 export(is_granges)
 export(iterate_gsea)
 export(iterate_lm)
 export(magma_matrix)
+export(map_id_sep)
 export(map_phenotypes)
 export(map_snps2genes)
 export(opengwas_meta)
@@ -28,10 +31,12 @@ export(phenomix_query)
 export(phenomix_query_batched)
 export(plot_dag_enrich)
 export(plot_enrichment)
+export(plot_factors_sankey)
 export(plot_feature_density)
 export(plot_highlights)
 export(plot_hpo_pseudotime)
 export(plot_ontological_velocity)
+export(plot_preservation_histo)
 export(plot_reduction)
 export(plot_top_celltypes)
 export(plot_trait_cor)
@@ -44,12 +49,14 @@ export(run_autoencoder)
 export(run_dag_enrich)
 export(run_gprofiler)
 export(run_imputation)
+export(run_integration)
 export(run_lda)
 export(run_mofa2)
 export(run_pca)
 export(run_sparsesvd)
 export(run_umap)
 export(run_variancePartition)
+export(scale_color_nightlight)
 export(theme_nightlight)
 import(GenomicFiles)
 import(HPOExplorer)
diff --git a/R/add_mixingmetric.R b/R/add_mixingmetric.R
new file mode 100644
index 0000000..f7b23f2
--- /dev/null
+++ b/R/add_mixingmetric.R
@@ -0,0 +1,41 @@
+#' Add mixing metric
+#' 
+#' Compute the mixing metric for a given reduction and add it to the metadata 
+#' of a \link{Seurat} object. See \link[Seurat]{MixingMetric} for more details.
+#' @param invert Invert the mixing metric such that higher values 
+#' indicate better mixing.
+#' @param normalise Normalise the mixing metric to the range [0,1].
+#' @inheritParams Seurat::MixingMetric
+#' @inheritDotParams Seurat::MixingMetric
+#' @export
+add_mixingmetric <- function(obj,
+                             reduction,
+                             new_col=paste0("MixingMetric_",reduction),
+                             grouping.var = "source",
+                             k = 5,
+                             max.k = 300, 
+                             dims = NULL,
+                             invert = TRUE,
+                             normalise = FALSE,
+                             ...){
+    if(is.null(dims)){
+        dims <- seq(ncol(obj[[reduction]]))
+    }
+    message("Computing mixing metric for reduction: ",reduction,
+            " (",max(dims)," dimensions)")
+    obj@meta.data[[new_col]] <- Seurat::MixingMetric(
+        obj,
+        reduction = reduction,
+        grouping.var = grouping.var,
+        k = k,
+        max.k = max.k, 
+        dims = dims,
+        ...) 
+    if(isTRUE(invert)){
+        obj@meta.data[[new_col]] <- max.k - obj@meta.data[[new_col]]
+    }
+    if(isTRUE(normalise)){
+        obj@meta.data[[new_col]] <- obj@meta.data[[new_col]] / max.k
+    } 
+    return(obj)
+}
\ No newline at end of file
diff --git a/R/color_nightlight.R b/R/color_nightlight.R
index c1c218e..606500b 100644
--- a/R/color_nightlight.R
+++ b/R/color_nightlight.R
@@ -1,4 +1,31 @@
-color_nightlight <- function(n = 40){
-    ggplot2::scale_color_gradientn(
-        colors=pals::gnuplot(n = n)[-(seq(as.integer(n*0.2)))])
+#' Scale color nightlight
+#' 
+#' Add color gradient with nightlight colors.
+#' @export
+#' @param reverse Reverse the color gradient.
+#' @param alpha Transparency of the colors.
+#' @inheritParams pals::gnuplot
+#' @inheritParams ggplot2::scale_color_gradientn
+#' @inheritDotParams ggplot2::scale_color_gradientn
+scale_color_nightlight <- function(n = 3,
+                                   colors=pals::gnuplot(n = n+1)[
+                                       -(seq(as.integer((n+1)*0.2)))
+                                       ],
+                                   reverse=FALSE,
+                                   alpha=NULL,
+                                   ...){
+    if(!is.null(alpha)) {
+        if(length(alpha)==1){
+            colors <- ggplot2::alpha(colors,alpha)    
+        } else if(length(alpha)==length(colors)){
+            for(i in seq_along(colors)){
+                colors[[i]] <- ggplot2::alpha(colors[[i]],alpha[i])
+            }
+        } else {
+            stopper("Length of alpha must be 1 or equal to length of colors.")
+        }
+    }
+    if(isTRUE(reverse)) colors <- rev(colors)
+    ggplot2::scale_color_gradientn(colors=colors,
+                                   ...)
 }
\ No newline at end of file
diff --git a/R/get_correlated_factors.R b/R/get_correlated_factors.R
new file mode 100644
index 0000000..61aabbb
--- /dev/null
+++ b/R/get_correlated_factors.R
@@ -0,0 +1,21 @@
+get_correlated_factors <- function(obj,
+                                   keys,
+                                   metadata_var,
+                                   p_threshold=.05,
+                                   r_threshold=.1){
+    requireNamespace("Hmisc")
+    obsm <- scKirby::get_obsm(obj,
+                              keys = keys,
+                              n=1)
+    obsm <- cbind(obsm, 
+                  nFeature_score=obj$nFeature_score[rownames(obsm)])
+    obsm_rcor <- Hmisc::rcorr(obsm)
+    obsm_cor_sig <- sort(
+        abs(obsm_rcor$r[metadata_var,][obsm_rcor$P[metadata_var,]<p_threshold]), 
+        decreasing = TRUE
+    ) 
+    omit_dims <- as.numeric(gsub(".*_","",
+                                 names(obsm_cor_sig[obsm_cor_sig>r_threshold]))
+                            )
+    return(omit_dims)
+}
\ No newline at end of file
diff --git a/R/get_variance_explained.R b/R/get_variance_explained.R
new file mode 100644
index 0000000..1639069
--- /dev/null
+++ b/R/get_variance_explained.R
@@ -0,0 +1,40 @@
+#' Get variance explained
+#' 
+#' Get the proportion of total variance explained
+#'  by a given dimensionality reduction 
+#'  (0-1 where 1 indicates 100% of variance explained). 
+#' @export
+#' @examples
+#' obj <- get_HPO()
+#' out <- get_variance_explained(obj)
+get_variance_explained <- function(obj,
+                                   reduction = names(obj@reductions)[1],
+                                   layer = "scale.data",
+                                   dims=NULL
+                                   ){
+    dr <- obj[[reduction]] 
+    mat <- Seurat::GetAssayData(obj, layer = layer)
+    total_variance <- sum(matrixStats::rowVars(mat)) 
+    ## EigenValues
+    if(length(dr@stdev)==0){
+        messager("Computing stdev for reduction.")
+        dr@stdev <- apply(obj[[reduction]]@cell.embeddings,2,
+                          stats::sd)|>unname()
+    }
+    eigValues <- (dr@stdev)^2|>`names<-`(colnames(dr@cell.embeddings)) 
+    if(!is.null(dims)) {
+        if(length(dims)>length(eigValues) || 
+           max(dims)>length(eigValues) ){
+            stopper(
+                "The length of dims must be less than or equal to",
+                "the number of dimensions."
+                )
+        }  
+        eigValues <- eigValues[dims]
+    }
+    varExplained <- eigValues / total_variance
+    messager("Proportion of total variance explained by",
+             length(eigValues),"dimensions:",
+             round(sum(varExplained),3))
+    return(varExplained)
+}
\ No newline at end of file
diff --git a/R/map_xref.R b/R/map_xref.R
index 9f5f7c4..7666837 100644
--- a/R/map_xref.R
+++ b/R/map_xref.R
@@ -12,7 +12,8 @@ map_xref <- function(dat,
             if(length(r)==0) NA else unlist(r)
         })]
     }
+    dat[,(new_col):=map_id_sep(get(new_col))]
     dat[get(new_col)=="NA",(new_col):=NA]
     messager("% of non-NA rows:",
              round(sum(!is.na(dat[[new_col]]))/nrow(dat)*100,2),v=verbose)
-}
\ No newline at end of file
+}
diff --git a/R/plot_dag_enrich.R b/R/plot_dag_enrich.R
index de57099..a32a4e5 100644
--- a/R/plot_dag_enrich.R
+++ b/R/plot_dag_enrich.R
@@ -1,6 +1,7 @@
 #' Plot enrichment of terms in a DAG ontology
 #' 
 #' Plot enrichment of terms in a DAG ontology.
+#' @inheritParams ggrepel::geom_label_repel
 #' @inheritParams Seurat::LabelClusters
 #' @inheritDotParams Seurat::LabelClusters
 #' @export
@@ -15,11 +16,16 @@ plot_dag_enrich <- function(obj,
                             min_depth=NULL,
                             exact_depth=NULL,
                             exact_ont_ancestors=FALSE,
+                            sort_by=c("log2_fold_enrichment"=-1),
                             cluster_col="seurat_clusters",
+                            id_col=NULL,
                             label_col="dag_enrich.name_wrap",
+                            cluster_id_alpha=.9,
+                            cluster_id_size=3,
                             color_col=cluster_col,
                             replace_char=list("."=":",
                                               "_"=":"),
+                            p_threshold=.05,
                             q_threshold=.05,
                             preferred_palettes="kovesi.cyclic_mygbm_30_95_c78",
                             add_labels=TRUE,
@@ -32,79 +38,112 @@ plot_dag_enrich <- function(obj,
                             repel = TRUE, 
                             box = TRUE,
                             box.padding = 1,
+                            max.overlaps = 100,
+                            na.value = "grey50",
+                            force_new=FALSE,
+                            return_obj=FALSE,
                             show_plot=TRUE,
                             ...){
-    term <- depth <- NULL;
-    
-    cluster_enrich <- run_dag_enrich_obj(obj=obj, 
-                                         ont=ont,
-                                         cluster_col=cluster_col,
-                                         min_hits=min_hits,
-                                         min_offspring=min_offspring,
-                                         replace_char=replace_char,
-                                         q_threshold=q_threshold) 
-    #### Constrain my ancestors in ontology ####
-    if(isTRUE(exact_ont_ancestors)){
-        cluster_enrich <- cluster_enrich[term %in% ont@elementMetadata$ancestor]    
-    }
-    #### Filter on min_depth ####
-    if(!is.null(min_depth)){
-        cluster_enrich <- cluster_enrich[depth<=min_depth]
-    }
-    #### Filter on exact_depth ####
-    if(!is.null(exact_depth)){
-        cluster_enrich <- cluster_enrich[depth==exact_depth]
-    }
-    #### Take only the top hit per cluster ####
-    # cluster_enrich|>data.table::setorderv("z_score",-1)
-    cluster_enrich_top <- cluster_enrich[,.SD[1],by=c("group")]
-    names(cluster_enrich_top) <- paste0("dag_enrich.",names(cluster_enrich_top))
-    obj@meta.data$id <- colnames(obj)
-    meta <- merge(obj@meta.data, 
-                  cluster_enrich_top,
-                  by.x="seurat_clusters",
-                  by.y="dag_enrich.group", 
-                  all.x=TRUE)
-    rownames(meta) <- meta$id
-    meta$dag_enrich.name_wrap <- stringr::str_wrap(meta$dag_enrich.name,
-                                                   width = label_width)
-    obj@meta.data <- meta
-    na.value <- "grey50"
+    term <- depth <- dag_enrich.name <- dag_enrich.name_wrap <- NULL;
+    if("dag_enrich.name" %in% colnames(obj@meta.data) && 
+       isFALSE(force_new)){
+        messager("Using existing enrichment results in obj.",
+                 "To rerun, set force_new=TRUE.")
+        #### Reconstruct cluster_enrich results from metadata ####
+        dag_cols <- grep("^dag_enrich\\.",colnames(obj@meta.data), value = TRUE)
+        cluster_enrich <- data.table::data.table(
+            unique(obj@meta.data[,c(cluster_col,dag_cols)])
+        )
+    } else {
+        #### Remove any existing "dag_enrich." columns ####
+        dag_cols <- grep("^dag_enrich\\.",colnames(obj@meta.data), value = TRUE)
+        if(length(dag_cols)>0) obj@meta.data[,dag_cols] <- NULL
+        #### Run enrichment ####
+        cluster_enrich <- run_dag_enrich_obj(obj=obj, 
+                                             ont=ont,
+                                             id_col=id_col,
+                                             cluster_col=cluster_col,
+                                             min_hits=min_hits,
+                                             min_offspring=min_offspring,
+                                             replace_char=replace_char,
+                                             p_threshold=p_threshold,
+                                             q_threshold=q_threshold,
+                                             sort_by=sort_by) 
+        #### Constrain my ancestors in ontology ####
+        if(isTRUE(exact_ont_ancestors)){
+            cluster_enrich <- cluster_enrich[term %in% ont@elementMetadata$ancestor]    
+        }
+        #### Filter on min_depth ####
+        if(!is.null(min_depth)){
+            cluster_enrich <- cluster_enrich[depth<=min_depth]
+        }
+        #### Filter on exact_depth ####
+        if(!is.null(exact_depth)){
+            cluster_enrich <- cluster_enrich[depth==exact_depth]
+        }
+        #### Take only the top hit per cluster #### 
+        cluster_enrich <- cluster_enrich[,.SD[1],by=c("group")]
+        names(cluster_enrich) <- paste0("dag_enrich.",names(cluster_enrich))
+        cluster_enrich[,dag_enrich.name_wrap:=stringr::str_wrap(
+            dag_enrich.name,
+            width = label_width)]
+        obj@meta.data$id <- colnames(obj)
+        meta <- merge(obj@meta.data, 
+                      cluster_enrich,
+                      by.x="seurat_clusters",
+                      by.y="dag_enrich.group", 
+                      all.x=TRUE)
+        rownames(meta) <- meta$id
+        obj@meta.data <- meta[colnames(obj),]
+    }  
+    obj <- add_cluster_colors(obj)
+    cluster_colors <- add_cluster_colors(obj, return_dict = TRUE)
     dr <- Seurat::DimPlot(obj,
-                          cols = KGExplorer::map_colors(
-                              obj@meta.data,
-                              as = "dict",
-                              columns = label_col,
-                              preferred_palettes = preferred_palettes)[[1]],
-                          alpha=point_alpha,
+                          alpha = point_alpha,
                           pt.size = point_size,
                           na.value = na.value,
-                          group.by = label_col, 
+                          label = TRUE,
+                          label.color = ggplot2::alpha("white",cluster_id_alpha),
+                          label.size = cluster_id_size,
+                          group.by = color_col, 
                           reduction = reduction) +
         Seurat::NoLegend() +
-        theme_nightlight()
+        ggplot2::scale_color_manual(values = cluster_colors) +
+        theme_nightlight() 
     if(isTRUE(add_labels)){
-        dark_theme <- dr[[1]]$theme$plot.background$fill=="black"
-        dr <- Seurat::LabelClusters(
-            dr,
-            id = label_col,
-            box = box,
-            fill = ggplot2::alpha(
-                setdiff(unique(ggplot2::ggplot_build(dr)$data[[1]]$colour),
-                        na.value),
-                label_alpha),
-            color=if(dark_theme) "white" else "black",
-            arrow = ggplot2::arrow(length = ggplot2::unit(0.01, "npc"),
-                                   type="closed"),
-            repel = repel, 
-            size = label_size, 
-            box.padding = box.padding,
-            ...) 
+        dark_theme <- dr[[1]]$theme$plot.background$fill=="black" 
+        cluster_enrich2 <- merge(dr[[1]]$layers[[2]]$data, 
+                                 cluster_enrich, 
+                                 by.x=cluster_col, 
+                                 by.y="dag_enrich.group")
+        # Seurat::LabelClusters ### <-- Not very flexible and has uninformative errors.
+        dr <- dr +
+         ggrepel::geom_label_repel(
+             data = cluster_enrich2, 
+             ggplot2::aes(x=!!ggplot2::sym(names(cluster_enrich2)[[2]]),
+                          y=!!ggplot2::sym(names(cluster_enrich2)[[3]]),
+                          label=!!ggplot2::sym(label_col),
+                          fill=!!ggplot2::sym(color_col)
+                          ),
+             color=if(dark_theme) "white" else "black",
+             arrow = ggplot2::arrow(length = ggplot2::unit(0.01, "npc"),
+                                    type="closed"), 
+             size = label_size,
+             box.padding = box.padding,
+             point.padding = ggplot2::unit(0.02, "npc"),
+             max.overlaps = max.overlaps,
+             min.segment.length = 0,
+             ...
+             ) +
+            ggplot2::scale_fill_manual(
+                values = ggplot2::alpha(cluster_colors,label_alpha)
+            ) 
     }
+    #### Show #### 
     if(isTRUE(show_plot)) methods::show(dr)
-    return(
-        list(obj=obj,
-             plot=dr,
-             data=cluster_enrich)
-    )
+    #### Return ####
+    out <- list(plot=dr,
+                data=cluster_enrich)
+    if(isTRUE(return_obj)) out$obj <- obj
+    return(out)
 }
diff --git a/R/plot_factors_sankey.R b/R/plot_factors_sankey.R
new file mode 100644
index 0000000..646ffb8
--- /dev/null
+++ b/R/plot_factors_sankey.R
@@ -0,0 +1,179 @@
+#' Plot factors: Sankey
+#' 
+#' Create a Sankey plot from enrichment results performed on the 
+#' feature weights and sample loadings from a factorization model.
+#' @inheritParams ggsankey::geom_sankey
+#' @inheritDotParams ggsankey::geom_sankey
+#' @inheritParams ggplot2::scale_fill_discrete
+#' @export
+plot_factors_sankey <- function(factor.traits,
+                                factor.celltypes,
+                                varm = NULL, 
+                                max_factors = NULL,
+                                q_threshold=0.05,
+                                logfc_threshold=NULL,
+                                value_var="q",
+                                drop=TRUE,
+                                flow.alpha = .8,
+                                label_size = 3,
+                                label_color="white",
+                                label_fill=ggplot2::alpha("black",.7),
+                                xtext_size=12,
+                                show.legend=FALSE,
+                                ...
+                                ){
+    
+    p_adjust_all <- factor_num <- NULL;
+    
+    X.traits <- data.table::dcast.data.table(factor.traits,
+                                             formula = factor ~ name, 
+                                             value.var = "p_adjust_all", 
+                                             fun.aggregate = mean,
+                                             fill=1)|> 
+        KGExplorer::dt_to_matrix()
+    hc.traits <- X.traits|>
+        Matrix::t()|>
+        stats::dist()|>
+        stats::hclust()
+    X.celltypes <- data.table::dcast.data.table(factor.celltypes,
+                                                formula = factor ~  cl_name, 
+                                                value.var = "q",
+                                                fun.aggregate = mean,
+                                                fill=1)|> 
+        KGExplorer::dt_to_matrix()
+    hc.celltypes <- X.celltypes|>
+        Matrix::t() |>
+        stats::dist()|>
+        stats::hclust()
+    if(!is.null(varm)){
+        hc.factors <- varm |>
+            Matrix::t() |>
+            stats::dist()|>
+            stats::hclust()
+    } else {
+        hc.factors <-
+            Seurat::RowMergeSparseMatrices(X.traits,X.celltypes)|>
+            stats::dist()|>
+            stats::hclust()
+    } 
+    factor.annot <- merge(factor.traits[p_adjust_all<q_threshold,], 
+                        factor.celltypes[q<q_threshold & mean_q<q_threshold,], 
+                        by=c("factor","factor_num"), all.x = TRUE, 
+                        allow.cartesian = TRUE) 
+    MSTExplorer::add_logfc(factor.annot)
+    if(!is.null(max_factors)){
+        factor.annot <- factor.annot[factor_num<=max_factors]
+    }
+    if(!is.null(logfc_threshold)){
+        factor.annot <- factor.annot[abs(logFC)>logfc_threshold &
+                                     is.finite(logFC)]
+    }
+    dt <- ggsankey::make_long(factor.annot,
+                              cl_name, factor_num, name, 
+                              value = value_var)
+    dt$value <- 1-dt$value 
+    lvls <- c(stringr::str_split(hc.factors$labels[hc.factors$order],
+                                 "_",simplify = TRUE)[,2],
+              hc.traits$labels[hc.traits$order],
+              hc.celltypes$labels[hc.celltypes$order]
+    ) 
+    dt$node <- factor(dt$node, levels = lvls, ordered = TRUE)
+    dt$next_node <- factor(dt$next_node, levels = lvls, ordered = TRUE)
+    #### Create plot ####
+    gg_sankey <- ggplot2::ggplot(dt,
+                    ggplot2::aes(x = x,
+                                 next_x = next_x,
+                                 node = node,
+                                 next_node = next_node,
+                                 fill = factor(node),
+                                 label = node)
+    ) +
+        ggsankey::geom_sankey(show.legend = show.legend,
+                              flow.alpha = flow.alpha,
+                              ...) +
+        ggsankey::geom_sankey_label(size = label_size, 
+                                    color = label_color, 
+                                    fill = label_fill) +
+        ggplot2::scale_fill_discrete(drop=drop) +
+        ggplot2::labs(x=NULL) +
+        ggplot2::scale_x_discrete(labels=c(
+            expression("Cell types   " %<-% "   factor gene weights   " %<-% ""),
+            "Factor",
+            expression("" %->% "   factor trait loadings   " %->% "   Traits" ))
+        ) +
+        ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = c(0,.5,1),
+                                                           size = xtext_size)) 
+        theme_nightlight()
+    #### Return ####
+    return(list(
+        data=dt,
+        plot=gg_sankey
+    ))
+    
+    # meta <- merge(
+    #   data.table::data.table(obj@meta.data|>dplyr::select(!id),
+    #                          keep.rownames = "id"),
+    #   scKirby::get_obsm(obj, keys = reduction_md, n=1)^2|>
+    #     data.table::as.data.table(keep.rownames = "id")|>
+    #     data.table::melt.data.table(
+    #       id.vars = "id",
+    #       variable.name = "factor",
+    #       value.name = "loading"),
+    #   by="id"
+    # )
+    
+    # id_to_cluster <- tidygraph::as_tbl_graph(
+    #     data.table::data.table(colnames(obj),
+    #                            cluster=paste0("cluster",obj$seurat_clusters),
+    #                            value=1,
+    #                            edge_type="id_to_cluster")
+    # )
+    # id_to_factor <- tidygraph::as_tbl_graph(
+    #     (scKirby::get_obsm(obj, keys = reduction_md, n=1)^2|>
+    #          data.table::as.data.table(keep.rownames = "id")|>
+    #          data.table::melt.data.table(
+    #              id.vars = "id",
+    #              variable.name = "factor",
+    #              value.name = "value")
+    #     )[,edge_type:="id_to_factor"]
+    # )
+    # factor_to_celltype <- tidygraph::as_tbl_graph(
+    #     factor.celltypes[q<.05,c("factor","cl_name","p")][,edge_type:="factor_to_celltype"] |>
+    #         data.table::setnames("p","value")
+    # )
+    # 
+    # visNetwork::visIgraph(factor_to_celltype) |>
+    #     # visNetwork::visHierarchicalLayout()|> 
+    #     visNetwork::visIgraphLayout(layout = "layout_with_sugiyama")|>
+    #     visNetwork::visNodes() |>
+    #     visNetwork::visEdges(color = list(opacity=.1))|>
+    #     visNetwork::visInteraction(hover = TRUE)
+    # 
+    # g <- tidygraph::graph_join(id_to_cluster,
+    #                            id_to_factor)|>
+    #     tidygraph::graph_join(factor_to_celltype)
+    # # g <- tidygraph::sample_n(g, 1000)
+    # g.dt <- KGExplorer::graph_to_dt(g)
+    # nodes <- KGExplorer::graph_to_dt(g, what = "nodes")
+    # edges <- KGExplorer::graph_to_dt(g, what = "edges")
+    # edges[,c("from","to"):=list(from-1,to-1)] # networkD3 requirees 0-indexing
+    # p <- networkD3::sankeyNetwork(Nodes = nodes,
+    #                               Links = edges, 
+    #                               Source = "from",
+    #                               Target = "to", 
+    #                               Value = "weight", 
+    #                               NodeID = "name")
+    # p
+    # 
+    # 
+    # plotly::plot_ly(type="sankey",g)
+    
+    # X <- data.table::dcast.data.table(data = factor.celltypes, 
+    #                                   formula = factor~cl_name,
+    #                                   value.var = "p",
+    #                                   fun.aggregate = mean, na.rm = TRUE)|>
+    #   KGExplorer::dt_to_matrix()
+    # 
+    # heatmaply::heatmaply(as.matrix(X))
+    
+}
\ No newline at end of file
diff --git a/R/plot_feature_density.R b/R/plot_feature_density.R
index 78811fd..cb8f047 100644
--- a/R/plot_feature_density.R
+++ b/R/plot_feature_density.R
@@ -12,6 +12,7 @@
 plot_feature_density <- function(obj,
                                  features=sample(rownames(obj),3),
                                  alpha=.5,
+                                 method=c("wkde","ks")[1],
                                  joint=TRUE,
                                  joint_only=FALSE,
                                  joint_title=if(length(unique(features))>10){
@@ -32,14 +33,15 @@ plot_feature_density <- function(obj,
     messager(length(features2),"/",length(features),"features found in object.")
     
     plts <- Nebulosa::plot_density(obj,
+                                   method=method,
                                    alpha=alpha,
-                                   features =  features2,
+                                   features = features2,
                                    joint = joint,
                                    ...) 
     ### Add theme ####
     plts <- lapply(plts,function(p){
         p + theme_nightlight() + 
-            color_nightlight()
+            scale_color_nightlight()
     })
     if(!is.null(joint_title)){
         plts[[length(plts)]] <- plts[[length(plts)]] + 
diff --git a/R/plot_hpo_pseudotime.R b/R/plot_hpo_pseudotime.R
index d11a940..c5811d9 100644
--- a/R/plot_hpo_pseudotime.R
+++ b/R/plot_hpo_pseudotime.R
@@ -4,18 +4,24 @@
 #' then compute pseudotime trajectories between a subset of phenotypes that
 #' are symptoms of a given disease (or set of diseases).
 #' @param obj \link{Seurat} object of HPO terms generated by \link{prepare_hpo}.
-#' @param disease_ids One or more disease IDs found within \code{dt_genes}.
+#' @param disease_ids One or more disease IDs found within \code{p2d}.
 #' @inheritParams prepare_hpo
 #' @inheritParams monocle3::learn_graph
+#' @inheritParams monocle3::order_cells
 #' @inheritDotParams monocle3::plot_cells
 #' @export
 #' @examples
 #' obj <- get_HPO()
 #' out <- plot_hpo_pseudotime(obj)
 plot_hpo_pseudotime <- function(obj,
-                                dt_genes = HPOExplorer::load_phenotype_to_genes(1),
-                                disease_ids=dt_genes$disease_id[1:3],
-                                learn_graph_control=list(prune_graph=FALSE),
+                                p2d = HPOExplorer::load_phenotype_to_genes(1),
+                                disease_ids=p2d$disease_id[1:3],
+                                root_cells = NULL,
+                                id_col = NULL,
+                                learn_graph_control=list(
+                                    prune_graph=FALSE,
+                                    nn.cores=KGExplorer::set_cores()$workers),
+                                use_partition=TRUE,
                                 merge_trajectories = TRUE,
                                 symptom_color="red",
                                 bg_colors=c("#5000ff","white"),
@@ -23,42 +29,67 @@ plot_hpo_pseudotime <- function(obj,
                                 title_col=NULL,
                                 title_width=100,
                                 color_by_symptoms=TRUE,
+                                color_cells_by = if(color_by_symptoms){
+                                    "is_symptom" }else {"cluster"},
                                 point_alpha=.5,
+                                add_density=TRUE,
+                                density_filled=FALSE,
+                                density_adjust=1,
+                                density_alpha=point_alpha,
+                                title=stringr::str_wrap(
+                                    paste0(unique(disease_ids),
+                                           collapse = "; "),
+                                    20*2.5
+                                ),
                                 show_plot=TRUE,
                                 ...
                                 ){
+    # obj=readRDS("../thesis/inst/pages/chapter2/data/hpo_ot_integratedV5.rds"); id_col="hp_id";
+    # disease_ids <- c( "OMIM:104300","OMIM:104310","OMIM:606889","OMIM:607822","OMIM:608907","ORPHA:1020","ORPHA:280397")
     disease_id <- NULL;
-    colnames(obj) <- replace_char_fun(colnames(obj))
     disease_ids <- unique(disease_ids)
-    #### Add disease metadata
-    ## Using gene overlap (670998 p2d pairs) ##
-    # nrow(unique(dt_genes[,c("hpo_id","disease_id")]))
+    #### Map p2d to Mondo IDs ####
+    p2d <- data.table::copy(p2d)[,.SD[1],by=c("hpo_id","disease_id")]
+    p2d <- HPOExplorer::add_mondo(phenos = p2d, 
+                                  allow.cartesian=TRUE) 
+    #### Map disease IDs to Mondo IDs ####
+    disease_map <- HPOExplorer::add_mondo(
+        phenos=data.table::data.table(disease_id=disease_ids),
+        allow.cartesian=TRUE)
+    disease_ids <- unique(c(disease_ids,unique(disease_map$mondo_id))) 
+    disease_ids <- map_id_sep(unique(disease_ids))
+    #### Run pseudotime: merged ####
     if(isTRUE(merge_trajectories)){
         out <- run_hpo_pseudotime(obj = obj, 
                                   disease_ids = disease_ids,
-                                  dt_genes = dt_genes, 
+                                  p2d = p2d, 
+                                  id_col = id_col,
+                                  root_cells = root_cells,
                                   learn_graph_control = learn_graph_control,
-                                  color_by_symptoms=color_by_symptoms,
-                                  title=stringr::str_wrap(
-                                      paste0(unique(disease_ids),
-                                             collapse = "; "),
-                                      20*2.5
-                                  ),
+                                  use_partition=use_partition,
+                                  color_by_symptoms=color_by_symptoms, 
+                                  color_cells_by=color_cells_by,
                                   point_alpha=point_alpha,
+                                  title=title,
                                   symptom_color=symptom_color,
                                   bg_colors=bg_colors,
                                   trajectory_graph_color = trajectory_graph_color,
+                                  add_density=add_density,
+                                  density_filled=density_filled,
+                                  density_adjust=density_adjust,
+                                  density_alpha=density_alpha,
                                   ...)
-        if(show_plot) methods::show(out$plot)
+        if(isTRUE(show_plot)) methods::show(out$plot)
     } else {
+    #### Run pseudotime: split ####
         out <- list()
         out[["subplots"]] <- lapply(stats::setNames(disease_ids,
                                                     disease_ids),
                                     function(d){
             if(!is.null(title_col) && 
-                         title_col %in% names(dt_genes)){
-                # dt_genes <- HPOExplorer::add_disease(dt_genes) 
-                title <- dt_genes[!is.na(get(title_col)) &
+                         title_col %in% names(p2d)){
+                # p2d <- HPOExplorer::add_disease(p2d) 
+                title <- p2d[!is.na(get(title_col)) &
                                   disease_id==d][[title_col]][1]
                 subtitle <- d
             } else {
@@ -67,13 +98,21 @@ plot_hpo_pseudotime <- function(obj,
             }
           run_hpo_pseudotime(obj = obj, 
                              disease_ids = d,
-                             dt_genes = dt_genes, 
+                             p2d = p2d, 
+                             id_col = id_col,
+                             root_cells = root_cells,
                              learn_graph_control = learn_graph_control,
+                             use_partition=use_partition,
                              title = stringr::str_wrap(title,
                                                        width = title_width),
                              subtitle=subtitle,
                              point_alpha=point_alpha,
                              color_by_symptoms=color_by_symptoms,
+                             color_cells_by=color_cells_by,
+                             add_density=add_density,
+                             density_filled=density_filled,
+                             density_adjust=density_adjust,
+                             density_alpha=density_alpha,
                              ...)
         })
         out[["plot"]] <- patchwork::wrap_plots(lapply(out$subplots,
@@ -121,4 +160,4 @@ plot_hpo_pseudotime <- function(obj,
     #     ggplot2::geom_step(data = highlight_df, 
     #                        ggplot2::aes(x=umap_1,
     #                                     y=umap_2)) 
-}
\ No newline at end of file
+}
diff --git a/R/plot_ontological_velocity.R b/R/plot_ontological_velocity.R
index f9eb23d..662f47b 100644
--- a/R/plot_ontological_velocity.R
+++ b/R/plot_ontological_velocity.R
@@ -35,7 +35,7 @@ plot_ontological_velocity <- function(obj,
                        colnames(obj),NA))   
         }
         obj <- obj[,!is.na(obj@meta.data[[id_col]])]
-        colnames(obj) <- make.unique(replace_char_fun(obj$hp_id))
+        colnames(obj) <- make.unique(map_id_sep(obj$hp_id))
     }  
     dist_lca <- simona::longest_distances_via_LCA(ont,
                                                   terms = ont@terms)
@@ -63,12 +63,12 @@ plot_ontological_velocity <- function(obj,
         if(length(nns)<2) return(NULL)
         emb <- (obj@reductions$umap@cell.embeddings |>data.frame())[nns,]
         #### remove suffixes from make.unique ####
-        nns <- replace_char_fun(
+        nns <- map_id_sep(
             stringr::str_split(nns,"[.]", simplify = TRUE)[,1]
         ) 
         nns <- intersect(nns,ont@terms)
         if(length(nns)<2) return(NULL)
-        emb$id <- replace_char_fun(
+        emb$id <- map_id_sep(
                 stringr::str_split(rownames(emb),"[.]", simplify = TRUE)[,1]
             ) 
         combos <- expand.grid(id1=nns,
diff --git a/R/plot_preservation_histo.R b/R/plot_preservation_histo.R
new file mode 100644
index 0000000..2db3839
--- /dev/null
+++ b/R/plot_preservation_histo.R
@@ -0,0 +1,38 @@
+#' Plot preservation histogram
+#' 
+#' Plot the correlation between equivalent terms in 
+#' high-, mid-, and low-dimensional space.
+#' @export
+plot_preservation_histo <- function(consist_dt,
+                                    lvl="hd",
+                                    key=c("hd"="High-dimensional",
+                                          "md"="Mid-dimensional",
+                                          "ld"="Low-dimensional"),
+                                    title=paste(key[lvl],"space"), 
+                                    subtitle=paste0(
+                                        "(",consist_dt[[paste0("dim_",lvl)]][1],
+                                        " dimensions)")
+                                    ){
+    ## histograms of high-dimensional cor 
+    ggplot2::ggplot(consist_dt,
+                    ggplot2::aes(x=!!ggplot2::sym(paste0("cor_",lvl)),
+                                 fill=id_type, color=id_type))+ 
+        ggplot2::geom_density(ggplot2::aes(y=ggplot2::after_stat(scaled)),
+                              alpha=.5)+ 
+        ## baseline mean
+        ggplot2::geom_vline(xintercept = mean(consist_dt[[paste0("baseline_cor_",lvl)]]), 
+                            linetype="dashed", color="grey40") + 
+        ggplot2::geom_text(x = mean(consist_dt[[paste0("baseline_cor_",lvl)]]), y=.8,
+                           label="baseline", angle=90, color="grey40",vjust=1.5,
+                           check_overlap = TRUE) + 
+        ## group means
+        ggplot2::geom_vline(data=consist_dt[,.(mean_cor=mean(get(paste0("cor_",lvl)),
+                                                             na.rm=TRUE)),
+                                            by=id_type],
+                            ggplot2::aes(xintercept=mean_cor, color=id_type))+  
+        ggplot2::xlim(0,1)+
+        ggplot2::theme_minimal() +
+        ggplot2::labs(title=title,
+                      subtitle=subtitle,
+                      x="Pearson correlation")
+}
diff --git a/R/plot_top_celltypes.R b/R/plot_top_celltypes.R
index 1a3d047..09c25b6 100644
--- a/R/plot_top_celltypes.R
+++ b/R/plot_top_celltypes.R
@@ -9,10 +9,7 @@ plot_top_celltypes <- function(obj,
                                types=c("radial","pie","reduction"),
                                reduction="umap",
                                prefix="^q[.]",
-                               cluster_vars=c("seurat_clusters",
-                                              "dag_enrich.name",
-                                              "dag_enrich.name_wrap",
-                                              "cluster_colors"),
+                               cluster_vars=c("seurat_clusters"),
                                color_col=cluster_vars[1],
                                label_col=cluster_vars[1],
                                enriched_proportion_threshold=.2,
@@ -52,6 +49,9 @@ plot_top_celltypes <- function(obj,
                                    n=1)|>
             data.table::data.table(keep.rownames = "rn") 
         embed_cols <- names(embed)[-1]
+        for(e in embed_cols){
+            if(e %in% names(meta)) meta <- meta[,-c(e), with=FALSE]
+        }
         meta <- merge(meta,embed, by="rn")
     }
     meta_melt <- (
@@ -82,7 +82,7 @@ plot_top_celltypes <- function(obj,
         umap_1=mean(get(embed_cols[1])),
         umap_2=mean(get(embed_cols[2]))
     ),
-    by=c(cluster_vars,"celltype")]|>
+    by=c(unique(c(cluster_vars,label_col,"celltype")))]|>
         data.table::setorderv(c(cluster_vars,"celltype_sig_count","mean_q"),
                               c(rep(1,length(cluster_vars)),-1,1))
     
diff --git a/R/prepare_HPO.R b/R/prepare_HPO.R
index 90648ae..d53b400 100644
--- a/R/prepare_HPO.R
+++ b/R/prepare_HPO.R
@@ -10,6 +10,8 @@
 #' disease and HPO terms.
 #' @param min_quantile \link{numeric} minimum quantile for filtering
 #' cell type enrichment results.
+#' @param min_genes The minimum number of genes of phenotypes/diseases
+#' to include from the data matrix.
 #' @param min_value The minimum sum of columns to include from the data matrix.
 #' @param celltype_col Name of the cell type column. 
 #' @param run_nlp Label clusters using natural language processing via 
@@ -19,12 +21,13 @@
 #' @inheritParams HPOExplorer::hpo_to_matrix
 #' @inheritParams HPOExplorer::make_phenos_dataframe
 #' @inheritParams scKirby::process_seurat
+#' @inheritDotParams scKirby::process_seurat
 #' 
 #' @export
 #' @import data.table
 #' @import HPOExplorer
 #' @examples
-#' obj <- prepare_hpo(id_types="hpo_id")
+#' obj <- prepare_hpo(id_types="hpo_id", min_genes=10)
 prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
                         dt_annot = HPOExplorer::load_phenotype_to_genes(3),
                         hpo = HPOExplorer::get_hpo(),
@@ -34,26 +37,30 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
                             MSTExplorer::map_celltype(),
                         id_types = c("hpo_id"),#c("disease_id","hpo_id"),
                         min_quantile = 35,
+                        min_genes = NULL,
                         min_value = NULL,
                         celltype_col=c("cl_name","cl_id","CellType"),
+                        celltype_value="q",
                         value.var = "evidence_score_sum",
                         default_assay = "score",
                         nfeatures = NULL,
-                        vars.to.regress = paste0("nFeature_",
-                                                 default_assay),
+                        vars.to.regress = NULL, # paste0("nFeature_",default_assay),
                         run_nlp = FALSE,
                         run_impute = FALSE,
                         workers = 1,
                         seed = 2023,
                         verbose = TRUE,
                         save_path=NULL,
-                        force_new=FALSE){ 
+                        force_new=FALSE,
+                        ...){ 
     top_celltype <- gene_symbol <- disease_id <- disease_db <- id <- 
         ancestor_name <- ancestor_name_abnormality <- NULL
     
     celltype_col <- celltype_col[1]
     #### Check for existing file ####
-    if(file.exists(save_path) && !force_new){
+    if(!is.null(save_path) &&
+       file.exists(save_path) && 
+       isFALSE(force_new)){
         messager("Loading precomputed data:",save_path)
         return(readRDS(save_path))
     }
@@ -137,8 +144,8 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
         dt_annot_melt <- dt_annot_melt[, head(.SD, 1), by="id"][colnames(X_ref),]
         ## Add id_name column for labeling
         id_names <- gsub("_id$","_name",id_types)
-        id_names <- id_names[id_names %in% names(dt_annot)]
-        dt_annot_melt[,id_name:=data.table::fcoalesce(as.list(get(id_names),"id"))]
+        id_names <- id_names[id_names %in% names(dt_annot)] 
+        dt_annot_melt[,name:=data.table::fcoalesce(.SD),.SDcols=c(id_names,"id")]
     }
     {
         #### Add per sample gene_symbol counts ####
@@ -166,15 +173,15 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
     #     dplyr::slice_head(n = 1) |> data.table::data.table() 
     ct_results_cast <- data.table::dcast.data.table(
         ct_results,
-        formula = hpo_id ~ paste("q",get(celltype_col),sep="."),
+        formula = hpo_id ~ paste(celltype_value,get(celltype_col),sep="."),
         fun.aggregate = mean,
-        value.var = "q",
+        value.var = celltype_value,
         na.rm = TRUE)
     #### Add
-    ct_cols <- grep("^q\\.",names(ct_results_cast), value = TRUE)
+    ct_cols <- grep(paste0("^",celltype_value,"\\."),names(ct_results_cast), value = TRUE)
     ct_results_cast[,
                     top_celltype:=gsub(
-                        "^q\\.","",
+                        paste0("^",celltype_value,"\\."),"",
                         ct_cols[apply(ct_results_cast[,ct_cols, with=FALSE],1,
                                       which.min)])
     ]
@@ -187,6 +194,9 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
                                                   by.y = "hpo_id",
                                                   all.x = TRUE) 
     #### Filter out any terms with <x genes ####
+    if(!is.null(min_genes)){
+        X_ref <- X_ref[,Matrix::colSums(X_ref!=0)>min_genes]
+    }
     if(!is.null(min_value)){
         X_ref <- X_ref[,Matrix::colSums(X_ref)>=min_value]
     }
@@ -203,7 +213,9 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
                                    nfeatures = nfeatures,
                                    vars.to.regress = vars.to.regress,
                                    default_assay = default_assay,
-                                   workers = workers)
+                                   workers = workers,
+                                   ...)
+    ref$orig.ident <- "HPO"
     #### Impute ####
     if(isTRUE(run_impute)){
         messager("Running imputation.",v=verbose)
@@ -216,11 +228,10 @@ prepare_hpo <- function(dt_genes = HPOExplorer::load_phenotype_to_genes(1),
                                        workers = workers)
     }
     #### Save ####
-    if(!is.null(save_path)) cache_save(ref,save_path)
+    KGExplorer::cache_save(ref,save_path)
     #### Run scNLP ####
     if(isTRUE(run_nlp)){ 
-        ref$name_definition <- paste(ref$id_name,
-                                     ref$ancestor_name,
+        ref$name_definition <- paste(ref$name,
                                      ref$definition)
         scnlp_res <- scNLP::plot_tfidf(
             ref,
diff --git a/R/prepare_opentargets.R b/R/prepare_opentargets.R
index 90c13cd..0f0bf33 100644
--- a/R/prepare_opentargets.R
+++ b/R/prepare_opentargets.R
@@ -10,34 +10,57 @@ prepare_opentargets <- function(data_type="associationByOverallDirect",
                                 formula = "approvedSymbol ~ diseaseId",
                                 nfeatures=NULL,
                                 default_assay = "score",
-                                vars.to.regress = paste0("nFeature_",
-                                                         default_assay),
+                                vars.to.regress = NULL, #paste0("nFeature_",default_assay),
                                 save_path=NULL,
                                 force_new=FALSE,
                                 ...){
     targetId <- id <- NULL;
     
-    if(file.exists(save_path) && !force_new){
+    if(!is.null(save_path) && 
+       file.exists(save_path) && 
+       isFALSE(force_new)){
         messager("Loading precomputed data:",save_path)
         return(readRDS(save_path))
     } 
     #### Get gene-disease relationship data ####
-    d <- KGExplorer::get_opentargets(data_type=data_type)
+    d <- KGExplorer::get_opentargets(data_type=data_type,
+                                     force_new = force_new)
     #### Get sample metadata ####
-    obs <- KGExplorer::get_opentargets(data_type = "diseases")
+    obs <- KGExplorer::get_opentargets(data_type = "diseases",
+                                       force_new = force_new)
     obs$db <- stringr::str_split(obs$id,"_",simplify = TRUE)[,1]
+    obs|> data.table::setnames("description","definition")
     messager("Mapping xref IDs.")
     for(x in unique(obs$db)){
         suppressWarnings(
-            map_xref(obs,prefix=x,verbose=FALSE)    
+            map_xref(dat = obs,
+                     prefix=x,
+                     verbose=FALSE)    
         )
     }
     #### Get feature metadata ####
-    var <- KGExplorer::get_opentargets(data_type = "targets")
-    var <- var[id %in% unique(d$targetId)] |>
-        data.table::setkeyv("id")
-    #### Convert ensembl IDs to gene symbols ####
-    d[,approvedSymbol:=var[targetId]$approvedSymbol]
+    ## Mapping with orthogene zero difference when converting to gene symbols
+    # if(isTRUE(run_map_genes)){
+    #     var <- KGExplorer::get_opentargets(data_type = "targets")
+    #     var <- var[id %in% unique(d$targetId)] 
+    #     gene_map <- orthogene::map_genes(genes = unique(var$approvedSymbol))
+    #     gene_map <- data.table::data.table(gene_map)[,.SD[1], by="input"]
+    #     gene_map[,approvedSymbol:=data.table::fcoalesce(name,input)]
+    #     data.table::setkeyv(gene_map,"input")
+    #     var[,approvedSymbol_standard:=data.table::fcoalesce(
+    #         gene_map[approvedSymbol]$name,approvedSymbol)] 
+    #     data.table::setkeyv(var,"id")
+    #     #### Convert ensembl IDs to gene symbols ####
+    #     d[,approvedSymbol:=var[targetId]$approvedSymbol_standard]   
+    # } else{
+        #### Get feature metadata ####
+        var <- KGExplorer::get_opentargets(data_type = "targets", 
+                                           force_new = force_new)
+        var <- var[id %in% unique(d$targetId)] |>
+            data.table::setkeyv("id")
+        #### Convert ensembl IDs to gene symbols ####
+        d[,approvedSymbol:=var[targetId]$approvedSymbol]
+    # }
     #### Convert to sparse matrix ####
     messager("Constructing matrix:",formula)
     X <- data.table::dcast.data.table(d,
@@ -46,7 +69,7 @@ prepare_opentargets <- function(data_type="associationByOverallDirect",
                                       fun.aggregate = mean, 
                                       fill = 0,
                                       na.rm = TRUE) |>
-        scKirby::to_sparse()
+        KGExplorer::dt_to_matrix(as_sparse = TRUE)
     #### Subset by intersecting IDs ####
     ids <- intersect(obs$id, d$diseaseId)
     obs <- obs[id %in% ids,]
@@ -68,7 +91,7 @@ prepare_opentargets <- function(data_type="associationByOverallDirect",
         default_assay = default_assay,
         ...)
     #### Save ####
-    if(!is.null(save_path)) cache_save(obj,save_path)
+    KGExplorer::cache_save(obj,save_path)
     #### Return ####
     return(obj)
 }
\ No newline at end of file
diff --git a/R/replace_char_fun.R b/R/replace_char_fun.R
index d2f92df..7ad4116 100644
--- a/R/replace_char_fun.R
+++ b/R/replace_char_fun.R
@@ -1,9 +1,16 @@
-replace_char_fun <- function(nms,
-                             replace_char=list("."=":")){
+#' Map ID separators 
+#' 
+#' Replace the separator in an ID in order to standardise it.
+#' @export
+#' @examples
+#' map_id_sep(c("HP:0001","HP.0001","HP_0001"))
+map_id_sep <- function(nms,
+                             replace_char=list("."=":",
+                                               "_"=":")){
     if(length(replace_char)>0){
         for(r in names(replace_char)){
             nms <- gsub(r,replace_char[[r]],nms, fixed = TRUE)
         }
     }
     return(nms)
-}
\ No newline at end of file
+}
diff --git a/R/run_dag_enrich.R b/R/run_dag_enrich.R
index e6e3882..b7fa496 100644
--- a/R/run_dag_enrich.R
+++ b/R/run_dag_enrich.R
@@ -2,6 +2,9 @@
 #' 
 #' Run enrichment of terms in a DAG ontology.
 #' 
+#' @param value_threshold Minimum weight to include a term 
+#' (after applying \code{truns_fun}).
+#'  Only used when \code{reduction} is provided.
 #' @export
 #' @examples
 #' ont <- KGExplorer::get_ontology("hp")
@@ -9,6 +12,7 @@
 #' res <- run_dag_enrich(obj, ont, reduction="pca")
 run_dag_enrich <- function(obj, 
                            ont,
+                           id_col=NULL,
                            reduction=NULL,
                            cluster_col = NULL,
                            min_hits = 3, 
@@ -16,8 +20,12 @@ run_dag_enrich <- function(obj,
                            trans_fun=NULL,
                            replace_char=list("."=":",
                                              "_"=":"),
+                           value_threshold=NULL,
+                           p_threshold=.05,
                            q_threshold=.05,
-                           top_n=100){
+                           top_n=100,
+                           sort_by= c("log2_fold_enrichment"=-1)
+                           ){
     
     if(is.null(reduction)){
         if(is.null(cluster_col)){
@@ -25,20 +33,27 @@ run_dag_enrich <- function(obj,
         }
         run_dag_enrich_obj(obj=obj, 
                            ont=ont,
+                           id_col=id_col,
                            cluster_col=cluster_col,
                            min_hits=min_hits,
                            min_offspring=min_offspring,
                            replace_char=replace_char,
-                           q_threshold=q_threshold)
+                           p_threshold=p_threshold,
+                           q_threshold=q_threshold,
+                           sort_by=sort_by)
     } else {
-        obsm <- scKirby::get_obsm(obj, keys=reduction, n = 1)
-        run_dag_enrich_obsm(obsm=obsm, 
+        run_dag_enrich_obsm(obj=obj, 
                             ont=ont,
+                            id_col=id_col,
+                            reduction=reduction,
                             min_hits=min_hits,
                             min_offspring=min_offspring,
                             replace_char=replace_char,
                             trans_fun=trans_fun,
                             top_n=top_n,
-                            q_threshold=q_threshold)
+                            value_threshold=value_threshold,
+                            p_threshold=p_threshold,
+                            q_threshold=q_threshold,
+                            sort_by=sort_by)
     }
 }
\ No newline at end of file
diff --git a/R/run_dag_enrich_obj.R b/R/run_dag_enrich_obj.R
index deeca44..51a7b16 100644
--- a/R/run_dag_enrich_obj.R
+++ b/R/run_dag_enrich_obj.R
@@ -1,19 +1,34 @@
 run_dag_enrich_obj <- function(obj, 
                                ont,
+                               id_col,
                                cluster_col,
                                min_hits,
                                min_offspring,
                                replace_char,
-                               q_threshold=.05){
-    z_score <- NULL;
+                               p_threshold,
+                               q_threshold,
+                               sort_by){
     messager("Running dag_enrich_on_offsprings.")
     clusters <- sort(unique(obj[[cluster_col]][,1]))
-    lapply(seq(length(clusters)), function(i){
-        nms <- colnames(obj)[obj[[cluster_col]][,1]==i]
-        nms <- replace_char_fun(nms,replace_char)
+    all_ids <- if(is.null(id_col)) {
+        colnames(obj)
+    } else {
+        if(!id_col %in% colnames(obj@meta.data)){
+            messager("Could not find",paste0("id_col=",shQuote(id_col)),
+                     "Defaulting to colnames(obj).")
+            colnames(obj)
+        }
+        obj@meta.data[[id_col]]
+    }
+    BPPARAM <- KGExplorer::set_cores()
+    RES <- BiocParallel::bplapply(seq(length(clusters)),
+                           BPPARAM = BPPARAM, 
+                           function(i){ 
+        nms <- all_ids[obj@meta.data[[cluster_col]]==i]
+        nms <- map_id_sep(nms,replace_char)
         nms <- intersect(nms,ont@terms)
         if(length(nms)<min_hits) {
-            messager("Not enough terms found for cluster:",i)
+            warning("Not enough terms found for cluster:",i)
             return(NULL)
         }
         res <- simona::dag_enrich_on_offsprings(dag = ont, 
@@ -22,10 +37,16 @@ run_dag_enrich_obj <- function(obj,
                                                 min_offspring = min_offspring) |>
             data.table::data.table(key="term")
         res <- cbind(group=clusters[i],res)
-        res[,combined_score:=(1-p_adjust)*z_score]
-        if(is.null(q_threshold)) return(res)
-        res_sig <- res[p_adjust<q_threshold,][!is.na(combined_score)]|>
-            data.table::setorderv(c("combined_score"),c(-1))
-        res_sig
-    }) |> data.table::rbindlist()
-}
\ No newline at end of file
+        res$input_ids <- paste(nms,collapse = ";")
+        res$input_names <- paste(KGExplorer::map_ontology_terms(ont = ont, 
+                                                                terms = nms,
+                                                                to = "name",
+                                                                verbose = FALSE),
+                                 collapse = ";")
+        return(res)
+    }) |> data.table::rbindlist(fill=TRUE) 
+    run_dag_enrich_postprocess(RES=RES,
+                               p_threshold=p_threshold,
+                               q_threshold=q_threshold,
+                               sort_by=sort_by)
+}
diff --git a/R/run_dag_enrich_obsm.R b/R/run_dag_enrich_obsm.R
index 72c2033..a947b2c 100644
--- a/R/run_dag_enrich_obsm.R
+++ b/R/run_dag_enrich_obsm.R
@@ -1,29 +1,51 @@
-run_dag_enrich_obsm <- function(obsm, 
+run_dag_enrich_obsm <- function(obj, 
                                 ont,
+                                id_col,
+                                reduction,
                                 min_hits,
                                 min_offspring,
                                 replace_char,
-                                top_n=100,
-                                trans_fun=NULL,
-                                q_threshold=.05){
-    z_score <- NULL;
+                                top_n,
+                                trans_fun,
+                                value_threshold,
+                                p_threshold,
+                                q_threshold,
+                                sort_by){
     
+    obsm <- scKirby::get_obsm(obj, keys=reduction, n = 1)
     ont_prefixes <- unique(stringr::str_split(ont@terms,pattern = ":",
                                               simplify = TRUE)[,1])
     obsm <- obsm[grepl(paste(paste0("^",ont_prefixes),collapse = "|"),
                        rownames(obsm)),]
     if(!is.null(trans_fun)) obsm <- trans_fun(obsm)
     if(nrow(obsm)==0) stopper("No matching terms found in obsm.")
-    messager("Running dag_enrich_on_offsprings.") 
-    lapply(stats::setNames(seq(ncol(obsm)),
-                           colnames(obsm)), 
-           function(i){
+    id_dict <- if(is.null(id_col)) {
+        stats::setNames(colnames(obj),
+                        colnames(obj))
+    } else {
+        if(!id_col %in% colnames(obj@meta.data)){
+            messager("Could not find",paste0("id_col=",shQuote(id_col)),
+                     "Defaulting to colnames(obj).")
+            stats::setNames(colnames(obj),
+                            colnames(obj))
+        }
+        stats::setNames(obj[[id_col]][,1],
+                        rownames(obj[[id_col]]))
+    }
+    messager("Running dag_enrich_on_offsprings.")
+    BPPARAM <- KGExplorer::set_cores()
+    RES <- BiocParallel::bplapply(stats::setNames(seq(ncol(obsm)),
+                                                  colnames(obsm)), 
+                                  BPPARAM = BPPARAM, 
+                                  function(i){
         val <- obsm[,i]
-        nms <- names(tail(sort(val),top_n))
-        nms <- replace_char_fun(nms,replace_char)
+        if(!is.null(value_threshold)) val <- val[val>value_threshold]
+        nms <- id_dict[names(tail(sort(val),top_n))]|>unname()
+        nms <- map_id_sep(nms,replace_char)
         nms <- intersect(nms,ont@terms)
         if(length(nms)<min_hits) {
-            messager("Not enough terms found for",colnames(obsm)[i])
+            messager("Not enough terms found for",colnames(obsm)[i],
+                     v=!BPPARAM$progressbar)
             return(NULL)
         }
         res <- simona::dag_enrich_on_offsprings(dag = ont, 
@@ -32,10 +54,16 @@ run_dag_enrich_obsm <- function(obsm,
                                                 min_offspring = min_offspring) |>
             data.table::data.table(key="term")
         res <- cbind(group=colnames(obsm)[i],res)
-        res[,combined_score:=(1-p_adjust)*z_score]
-        if(is.null(q_threshold)) return(res)
-        res_sig <- res[p_adjust<q_threshold,][!is.na(combined_score)]|>
-            data.table::setorderv(c("combined_score"),c(-1))
-        res_sig
-    }) |> data.table::rbindlist()
-}
\ No newline at end of file
+        res$input_ids <- paste(nms,collapse = ";")
+        res$input_names <- paste(KGExplorer::map_ontology_terms(ont = ont, 
+                                                                terms = nms,
+                                                                to = "name",
+                                                                verbose = FALSE),
+                                 collapse = ";")
+        return(res)
+    }) |> data.table::rbindlist(fill=TRUE)
+    run_dag_enrich_postprocess(RES=RES,
+                               p_threshold=p_threshold,
+                               q_threshold=q_threshold,
+                               sort_by=sort_by)
+}
diff --git a/R/run_dag_enrich_postprocess.R b/R/run_dag_enrich_postprocess.R
new file mode 100644
index 0000000..b04d490
--- /dev/null
+++ b/R/run_dag_enrich_postprocess.R
@@ -0,0 +1,26 @@
+run_dag_enrich_postprocess <- function(RES,
+                                       p_threshold,
+                                       q_threshold,
+                                       sort_by){
+    combined_score <- p_value <- z_score <- NULL;
+    RES[,p_adjust_all:=stats::p.adjust(p_value, method = "fdr")]
+    RES[,combined_score:=(1-p_value)*z_score]
+    if(!is.null(p_threshold)) {
+        RES <- RES[p_value<p_threshold,]
+        if(nrow(RES)==0){
+            stopper(paste0("No results left @ p_threshold<",p_threshold))
+        }
+    }
+    if(!is.null(q_threshold)) {
+        RES <- RES[p_adjust_all<q_threshold,]
+        if(nrow(RES)==0){
+            stopper(paste0("No results left @ q_threshold<",q_threshold))
+        }
+    }
+    if(all(names(sort_by) %in% names(RES))){ 
+        RES|>
+            data.table::setorderv(names(sort_by),
+                                  unname(sort_by))
+    }
+    return(RES)
+}
\ No newline at end of file
diff --git a/R/run_hpo_pseudotime.R b/R/run_hpo_pseudotime.R
index 77a62ad..7d9f06a 100644
--- a/R/run_hpo_pseudotime.R
+++ b/R/run_hpo_pseudotime.R
@@ -1,7 +1,11 @@
 run_hpo_pseudotime <- function(obj,
                                disease_ids,
-                               dt_genes,
+                               root_cells,
+                               p2d,
+                               id_col,
                                learn_graph_control,
+                               use_partition=TRUE,
+                               color_cells_by = "is_symptom",
                                title=NULL,
                                subtitle=NULL,
                                color_by_symptoms=TRUE,
@@ -9,20 +13,20 @@ run_hpo_pseudotime <- function(obj,
                                bg_colors=c("#5000ff","white"),
                                trajectory_graph_color=ggplot2::alpha("white",.8),
                                point_alpha=.5,
+                               add_density=TRUE,
+                               density_filled=FALSE,
+                               density_adjust=1,
+                               density_alpha=point_alpha,
                                ...){
     requireNamespace("monocle3")
     requireNamespace("SeuratWrappers")
-    library(dplyr)
-    gene_symbol <- disease_id <- NULL;
+    requireNamespace("dplyr")
+    gene_symbol <- disease_id <- NULL; 
     
-    disease_ids <- unique(disease_ids)
     messager("Running pseudotime analysis for:", 
-             paste(disease_ids, collapse = "; "))
-    p2d <- dt_genes[disease_id %in% disease_ids,
-                    list(count=data.table::uniqueN(gene_symbol)),
-                    by=c("hpo_id","disease_id")]#|>
-    # data.table::dcast.data.table(formula = hpo_id~disease_id,
-    #                              value.var = "count")
+             paste(disease_ids, collapse = "; ")) 
+    p2d <- p2d[disease_id %in% disease_ids|  
+               mondo_id %in% disease_ids,]
     if(nrow(p2d)==0) {
         stopper("No hpo_ids found for the given disease_ids.")
     } else {
@@ -42,43 +46,77 @@ run_hpo_pseudotime <- function(obj,
     #                                               by.x = "id",
     #                                               by.y = "hpo_id",
     #                                               all.x = TRUE)  
+    hpo_ids <- if(!is.null(id_col) && 
+       id_col %in% colnames(obj@meta.data)) {
+        obj@meta.data[[id_col]]
+    } else {
+        colnames(obj)
+    }
     hpo_ids <- intersect(unique(p2d$hpo_id),
-                         colnames(obj))
+                         map_id_sep(unique(hpo_ids))
+                         )
     if(length(hpo_ids)==0) {
         stopper("No hpo_ids overlapping with samples (colnames) in obj.")
     } else {
         messager("Found", length(hpo_ids), "matching hpo_ids within obj.")
     }
+    #### Convert to CDS format ####
     cds <- suppressWarnings(
         SeuratWrappers::as.cell_data_set(obj)
-    )
-    
-    k <- min(ncol(cds[,hpo_ids]),
-             max(as.integer(cds@colData$seurat_clusters))) -2
+    ) 
+    #### Choose root cells ####
+    if(is.null(root_cells)) {
+        messager("Setting disease_ids as root cells.")
+        root_cells <- disease_ids
+    }
+    if(length(root_cells)>0){ 
+        root_cells <- c(
+            colnames(obj)[map_id_sep(colnames(obj)) %in% map_id_sep(root_cells)],
+            if(!is.null(id_col) && id_col %in% colnames(obj@meta.data)){
+                colnames(obj)[map_id_sep(obj@meta.data[[id_col]]) %in% map_id_sep(root_cells)]    
+            },
+            if("mondo_id" %in% colnames(obj@meta.data)){
+                colnames(obj)[map_id_sep(obj@meta.data[["mondo_id"]]) %in% map_id_sep(root_cells)]
+            }
+        )|> unique()
+        if(length(root_cells)>0){
+            messager("Using",length(root_cells),"root cells.")
+        }
+    } 
+    #### Choose k ####
+    k <- min(ncol(cds[,unique(c(hpo_ids,root_cells))]),
+             max(as.integer(cds@colData$seurat_clusters), na.rm = TRUE)
+             ) - 2
+    k <- max(k,2,na.rm = TRUE)
+    messager(paste0("Running monocle3::cluster_cells with k=",k))
     cds_sub <- monocle3::cluster_cells(
-        cds = cds[,hpo_ids],
+        cds = cds[,unique(c(hpo_ids,root_cells))],
         k = k)
+    messager("Running monocle3::learn_graph.")
     cds_sub <- monocle3::learn_graph(cds_sub, 
+                                     use_partition=use_partition,
                                      learn_graph_control=learn_graph_control)
-    cds_sub <- monocle3::order_cells(cds_sub, root_cells = colnames(cds_sub))
+    if(length(root_cells)==0){
+        root_cells <- colnames(cds_sub)
+    }
+    cds@colData$is_root <- as.factor(colnames(cds) %in% root_cells)
+    messager("Running monocle3::order_cells with",ncol(cds_sub),"cells and",
+             length(root_cells),"root cells. ") 
+    cds_sub <- monocle3::order_cells(cds_sub, 
+                                     root_cells = root_cells)
     monocle3::principal_graph(cds) <- monocle3::principal_graph(cds_sub)
     monocle3::principal_graph_aux(cds) <- monocle3::principal_graph_aux(cds_sub) 
-    #### Contellation plot (dark) ####
+    #### Constellation plot (dark) ####
     if(isTRUE(color_by_symptoms)){
         cds@colData$is_symptom <-  as.factor(colnames(cds) %in% hpo_ids)
         plt <- monocle3::plot_cells(cds,
-                                    color_cells_by = "is_symptom",
+                                    color_cells_by = color_cells_by,
                                     label_cell_groups = FALSE,
                                     trajectory_graph_color = trajectory_graph_color,
                                     alpha = point_alpha,
                                     ...
         ) +
             ggplot2::scale_colour_manual(values = bg_colors) +
-            # ggplot2::geom_density_2d_filled(data = . %>% dplyr::filter(is_symptom==TRUE), 
-            #                                 contour_var = "ndensity")+
-            # ggplot2::scale_fill_manual(
-            #     values = c("transparent",
-            #                ggplot2::alpha(pals::gnuplot(15),.7))) +
             ggplot2::geom_point(data = . %>% dplyr::filter(is_symptom==TRUE),
                                 color=symptom_color,
                                 alpha=point_alpha,
@@ -87,10 +125,46 @@ run_hpo_pseudotime <- function(obj,
             ggplot2::theme(legend.position = "none")
     } else{
         plt <- monocle3::plot_cells(cds,
+                                    color_cells_by=color_cells_by,
                                     ...
         ) 
     }
+    # plt$layers <- plt$layers[ 1:10]
+    # plt
+    graph_layers <- seq(3,9)
+    if(add_density){ 
+        if(density_filled){
+            plt <- plt + 
+                ggplot2::geom_density2d_filled(data = . %>% dplyr::filter(is_symptom==TRUE),
+                             adjust=density_adjust) +
+                ggplot2::scale_fill_manual(
+                    values = c("transparent",
+                               ggplot2::alpha(pals::gnuplot(15),density_alpha))
+                ) 
+        } else {
+            plt <- plt + 
+                ggplot2::geom_density2d(data = . %>% dplyr::filter(is_symptom==TRUE),
+                             alpha=density_alpha,
+                             color=symptom_color,
+                             adjust=density_adjust)
+        }
+        
+    }
+    if(length(root_cells)>0){
+        plt <- plt + 
+            ggplot2::geom_point(data = . %>% dplyr::filter(is_root==TRUE),
+                                color="green",
+                                shape=23,
+                                alpha=point_alpha*1.5,
+                                stroke=1.5,
+                                size=3) 
+            
+    }
     plt <- plt + ggplot2::labs(title = title, subtitle = subtitle)
+    #### Reorder layers so that graph and nodes are on top ####
+    plt$layers <- c(plt$layers[-graph_layers],plt$layers[graph_layers])
+    ## Increase alpha of graph
+    plt$layers[[6]]$aes_params$alpha <- .9
     return(
         list(data=cds,
              plot=plt)
diff --git a/R/run_integration.R b/R/run_integration.R
new file mode 100644
index 0000000..84b2cba
--- /dev/null
+++ b/R/run_integration.R
@@ -0,0 +1,138 @@
+#' Run integration
+#' 
+#' Run integration pipeline on a \link{Seurat} object.
+#' @inheritParams Seurat::IntegrateLayers
+#' @inheritParams Seurat::SplitObject
+#' @inheritParams Seurat::ScaleData
+#' @inheritParams Seurat::CCAIntegration
+#' @inheritParams scKirby::process_seurat
+#' @inheritDotParams Seurat::IntegrateLayers
+#' @export
+#' @examples
+#' obj <- scKirby::example_obj("seurat")
+#' obj2 <- run_integration(obj, split.by="groups")
+run_integration <- function(obj, 
+                            split.by,
+                            assay = Seurat::DefaultAssay(obj),
+                            nfeatures = nrow(obj),
+                            vars.to.regress = NULL,
+                            dims = seq(100),
+                            save_path = tempfile(fileext = ".rds"),
+                            pipeline = c("seuratv5","seuratv4"),
+                            method = Seurat::CCAIntegration, 
+                            k.weight = 100,
+                            orig.reduction = "pca", 
+                            new.reduction = "cca",
+                            cluster_reduction = new.reduction,
+                            verbose = TRUE,
+                            ...){
+    # SeuratWrappers::scVIIntegration()
+    # SeuratWrappers::FastMNNIntegration()
+    
+    pipeline <- match.arg(pipeline)
+    if(file.exists(save_path)){
+        message("Loading cached file: ",save_path)
+        obj <- readRDS(save_path)
+    } else {
+        force(obj)
+        force(split.by)
+        ## remove traits with no gene associations 
+        # obj <- obj[,obj$nFeature_score!=0]
+        
+        # obj_hpo <- Seurat::FindVariableFeatures(obj_hpo, nfeatures = nrow(obj_hpo)) 
+        # var_features <- lapply(Seurat::SplitObject(obj_ot, split.by = "orig.ident"),
+        #                        function(x){
+        #   cat(dim(x),"\n")
+        #   if(ncol(x)<2) return(NULL)
+        #   x <- Seurat::FindVariableFeatures(x, nfeatures = nrow(x))
+        #   Seurat::VariableFeatures(x)
+        # })
+        # ### Get union of top 2000 genes from each gene lists ###
+        # var_features <- Reduce(union, lapply(c(var_features,
+        #                                         HPO=Seurat::VariableFeatures(obj_hpo)
+        #                                        ),
+        #                                      head, 1000)) 
+       
+        max_dim <- min(
+            data.table::data.table(obj@meta.data)[,.N,by=c(split.by)]$N
+            ) - 1 
+        if(max(dims)>max_dim){
+            messager("Reducing number of dimensions to match split data:",
+                     max_dim)
+            dims <- dims[seq(max_dim)]
+        }
+        #### Seurat v5 strategy ####
+        if(pipeline=="seuratv5"){ 
+            obj[[assay]] <- split(obj[[assay]],
+                                  f = obj@meta.data[[split.by]])
+            obj <- Seurat::NormalizeData(obj)
+            obj <- Seurat::FindVariableFeatures(obj, 
+                                                nfeatures = nfeatures)
+            obj <- Seurat::ScaleData(obj, 
+                                     vars.to.regress = vars.to.regress)
+            # obj <- Seurat::RunPCA(obj, npcs=max(dims))
+            obj <- Seurat::IntegrateLayers(object = obj, 
+                                           method = method, 
+                                           orig.reduction = orig.reduction, 
+                                           new.reduction = new.reduction,
+                                           features = rownames(obj),
+                                           dims = dims,
+                                           k.weight = min(k.weight,max_dim),
+                                           verbose = verbose)
+            # re-join layers after integration
+            obj[[assay]] <- SeuratObject::JoinLayers(obj[[assay]]) 
+            obj <- Seurat::FindNeighbors(obj, 
+                                         reduction = cluster_reduction,
+                                         dims = dims)
+            obj <- Seurat::FindClusters(obj)
+            max_dims <- ncol(obj@reductions[[new.reduction]])
+            if(max(dims)>max_dims){
+                messager("Reducing number of dimensions to match reduced data:",
+                         max_dims)
+                dims <- dims[seq(max_dims)]
+            }
+            obj <- Seurat::RunUMAP(obj, 
+                                   reduction = new.reduction, 
+                                   dims = dims, 
+                                   return.model = TRUE)
+        } else if(pipeline=="seuratv4"){ 
+            #### Seurat <v5 strategy ####
+            ## Merge then split to ensure the union of all genes is used during integration 
+            object.list <- Seurat::SplitObject(obj,
+                                               split.by = split.by)
+            object.list <- lapply(object.list,
+                                  Seurat::FindVariableFeatures,
+                                  nfeatures = nfeatures)
+            anchor.features <- Reduce(f = union,
+                                      lapply(object.list, 
+                                             Seurat::VariableFeatures) 
+                                      )
+            #### Integrate data with Seurat ####
+            anchors <- Seurat::FindIntegrationAnchors( 
+                object.list = object.list,
+                dims = dims,
+                anchor.features = anchor.features
+            )
+            obj <- Seurat::IntegrateData(
+                anchorset = anchors,
+                dims = dims,
+                features = anchors@anchor.features,
+                features.to.integrate = anchors@anchor.features
+            )
+            obj <- Seurat::FindVariableFeatures(obj,  nfeatures = nrow(obj)) 
+            #### Process Seurat obj ####
+            obj <- scKirby::process_seurat(obj,
+                                           reduction = new.reduction,
+                                           cluster_reduction = cluster_reduction,
+                                           ## Regressing out source/orig.ident 
+                                           ## seems to give a MUCH worse integration in UMAP space. 
+                                           vars.to.regress = vars.to.regress,
+                                           nfeatures = NULL,
+                                           dims = dims,#setdiff(dims,omit_dims),
+                                           normalize_data = FALSE) 
+        }  
+        #### Save obj ####
+        KGExplorer::cache_save(obj = obj, save_path = save_path) 
+    }
+    return(obj)
+}
\ No newline at end of file
diff --git a/man/add_mixingmetric.Rd b/man/add_mixingmetric.Rd
new file mode 100644
index 0000000..b5145dc
--- /dev/null
+++ b/man/add_mixingmetric.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_mixingmetric.R
+\name{add_mixingmetric}
+\alias{add_mixingmetric}
+\title{Add mixing metric}
+\usage{
+add_mixingmetric(
+  obj,
+  reduction,
+  new_col = paste0("MixingMetric_", reduction),
+  grouping.var = "source",
+  k = 5,
+  max.k = 300,
+  dims = NULL,
+  invert = TRUE,
+  normalise = FALSE,
+  ...
+)
+}
+\arguments{
+\item{reduction}{Which dimensionally reduced space to use}
+
+\item{grouping.var}{Grouping variable for dataset}
+
+\item{k}{Neighbor number to examine per group}
+
+\item{max.k}{Maximum size of local neighborhood to compute}
+
+\item{dims}{Dimensions to use}
+
+\item{invert}{Invert the mixing metric such that higher values 
+indicate better mixing.}
+
+\item{normalise}{Normalise the mixing metric to the range [0,1].}
+
+\item{...}{
+  Arguments passed on to \code{\link[Seurat:MixingMetric]{Seurat::MixingMetric}}
+  \describe{
+    \item{\code{object}}{Seurat object}
+    \item{\code{eps}}{Error bound on the neighbor finding algorithm (from RANN)}
+    \item{\code{verbose}}{Displays progress bar}
+  }}
+}
+\description{
+Compute the mixing metric for a given reduction and add it to the metadata 
+of a \link{Seurat} object. See \link[Seurat]{MixingMetric} for more details.
+}
diff --git a/man/get_variance_explained.Rd b/man/get_variance_explained.Rd
new file mode 100644
index 0000000..fdf3551
--- /dev/null
+++ b/man/get_variance_explained.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_variance_explained.R
+\name{get_variance_explained}
+\alias{get_variance_explained}
+\title{Get variance explained}
+\usage{
+get_variance_explained(
+  obj,
+  reduction = names(obj@reductions)[1],
+  layer = "scale.data",
+  dims = NULL
+)
+}
+\description{
+Get the proportion of total variance explained
+ by a given dimensionality reduction 
+ (0-1 where 1 indicates 100% of variance explained).
+}
+\examples{
+obj <- get_HPO()
+out <- get_variance_explained(obj)
+}
diff --git a/man/map_id_sep.Rd b/man/map_id_sep.Rd
new file mode 100644
index 0000000..bc1e4a0
--- /dev/null
+++ b/man/map_id_sep.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/replace_char_fun.R
+\name{map_id_sep}
+\alias{map_id_sep}
+\title{Map ID separators}
+\usage{
+map_id_sep(nms, replace_char = list(. = ":", `_` = ":"))
+}
+\description{
+Replace the separator in an ID in order to standardise it.
+}
+\examples{
+map_id_sep(c("HP:0001","HP.0001","HP_0001"))
+}
diff --git a/man/plot_dag_enrich.Rd b/man/plot_dag_enrich.Rd
index e66a2da..b89ab0b 100644
--- a/man/plot_dag_enrich.Rd
+++ b/man/plot_dag_enrich.Rd
@@ -12,10 +12,15 @@ plot_dag_enrich(
   min_depth = NULL,
   exact_depth = NULL,
   exact_ont_ancestors = FALSE,
+  sort_by = c(log2_fold_enrichment = -1),
   cluster_col = "seurat_clusters",
+  id_col = NULL,
   label_col = "dag_enrich.name_wrap",
+  cluster_id_alpha = 0.9,
+  cluster_id_size = 3,
   color_col = cluster_col,
   replace_char = list(. = ":", `_` = ":"),
+  p_threshold = 0.05,
   q_threshold = 0.05,
   preferred_palettes = "kovesi.cyclic_mygbm_30_95_c78",
   add_labels = TRUE,
@@ -28,6 +33,10 @@ plot_dag_enrich(
   repel = TRUE,
   box = TRUE,
   box.padding = 1,
+  max.overlaps = 100,
+  na.value = "grey50",
+  force_new = FALSE,
+  return_obj = FALSE,
   show_plot = TRUE,
   ...
 )
@@ -38,6 +47,15 @@ plot_dag_enrich(
 \item{box}{Use geom_label/geom_label_repel (includes a box around the text
 labels)}
 
+\item{box.padding}{Amount of padding around bounding box, as unit or number.
+Defaults to 0.25. (Default unit is lines, but other units can be specified
+by passing \code{unit(x, "units")}).}
+
+\item{max.overlaps}{Exclude text labels when they overlap too many other
+things. For each text label, we count how many other text labels or other
+data points it overlaps, and exclude the text label if it has too many overlaps.
+Defaults to 10.}
+
 \item{...}{
   Arguments passed on to \code{\link[Seurat:LabelClusters]{Seurat::LabelClusters}}
   \describe{
diff --git a/man/plot_factors_sankey.Rd b/man/plot_factors_sankey.Rd
new file mode 100644
index 0000000..850c68d
--- /dev/null
+++ b/man/plot_factors_sankey.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_factors_sankey.R
+\name{plot_factors_sankey}
+\alias{plot_factors_sankey}
+\title{Plot factors: Sankey}
+\usage{
+plot_factors_sankey(
+  factor.traits,
+  factor.celltypes,
+  varm = NULL,
+  max_factors = NULL,
+  q_threshold = 0.05,
+  logfc_threshold = NULL,
+  value_var = "q",
+  drop = TRUE,
+  flow.alpha = 0.8,
+  label_size = 3,
+  label_color = "white",
+  label_fill = ggplot2::alpha("black", 0.7),
+  xtext_size = 12,
+  show.legend = FALSE,
+  ...
+)
+}
+\arguments{
+\item{show.legend}{show legend in plot}
+
+\item{...}{
+  Arguments passed on to \code{\link[ggsankey:geom_sankey]{ggsankey::geom_sankey}}
+  \describe{
+    \item{\code{mapping}}{provide you own mapping. both x and y need to be numeric.}
+    \item{\code{data}}{provide you own data}
+    \item{\code{position}}{change position}
+    \item{\code{na.rm}}{remove missing values}
+    \item{\code{space}}{space between nodes in the y-direction}
+    \item{\code{type}}{either 'sankey' or 'alluvial'}
+    \item{\code{width}}{width of nodes}
+    \item{\code{smooth}}{how much smooth should the curve have? More means steeper curve.}
+    \item{\code{inherit.aes}}{should the geom inherit aesthetics}
+  }}
+}
+\description{
+Create a Sankey plot from enrichment results performed on the 
+feature weights and sample loadings from a factorization model.
+}
diff --git a/man/plot_feature_density.Rd b/man/plot_feature_density.Rd
index f440658..eeff7f3 100644
--- a/man/plot_feature_density.Rd
+++ b/man/plot_feature_density.Rd
@@ -8,6 +8,7 @@ plot_feature_density(
   obj,
   features = sample(rownames(obj), 3),
   alpha = 0.5,
+  method = c("wkde", "ks")[1],
   joint = TRUE,
   joint_only = FALSE,
   joint_title = if (length(unique(features)) > 10) {
@@ -23,6 +24,16 @@ plot_feature_density(
 \arguments{
 \item{features}{Features (e.g. genes) to visualize}
 
+\item{method}{Kernel density estimation method:
+\itemize{
+\item \code{ks}: Computes density using the \code{kde} function from the
+ \code{ks} package.
+\item \code{wkde}: Computes density using a modified version of the
+\code{kde2d} function from the \code{MASS}
+package to allow weights. Bandwidth selection from the \code{ks} package
+ is used instead.
+}}
+
 \item{joint}{Return joint density plot? By default \code{FALSE}}
 
 \item{joint_only}{Only return the joint density plot.}
@@ -59,15 +70,6 @@ patchwork (see example).}
 computed reduction is visualized}
     \item{\code{dims}}{Vector of length 2 specifying the dimensions to be plotted. By
 default, the first two dimensions are considered.}
-    \item{\code{method}}{Kernel density estimation method:
-\itemize{
-\item \code{ks}: Computes density using the \code{kde} function from the
- \code{ks} package.
-\item \code{wkde}: Computes density using a modified version of the
-\code{kde2d} function from the \code{MASS}
-package to allow weights. Bandwidth selection from the \code{ks} package
- is used instead.
-}}
     \item{\code{adjust}}{Numeric value to adjust to bandwidth. Default: 1. Not available
 for \code{ks} method}
     \item{\code{size}}{Size of the geom to be plotted (e.g. point size)}
diff --git a/man/plot_hpo_pseudotime.Rd b/man/plot_hpo_pseudotime.Rd
index 7e4d190..0b25a36 100644
--- a/man/plot_hpo_pseudotime.Rd
+++ b/man/plot_hpo_pseudotime.Rd
@@ -6,9 +6,13 @@
 \usage{
 plot_hpo_pseudotime(
   obj,
-  dt_genes = HPOExplorer::load_phenotype_to_genes(1),
-  disease_ids = dt_genes$disease_id[1:3],
-  learn_graph_control = list(prune_graph = FALSE),
+  p2d = HPOExplorer::load_phenotype_to_genes(1),
+  disease_ids = p2d$disease_id[1:3],
+  root_cells = NULL,
+  id_col = NULL,
+  learn_graph_control = list(prune_graph = FALSE, nn.cores =
+    KGExplorer::set_cores()$workers),
+  use_partition = TRUE,
   merge_trajectories = TRUE,
   symptom_color = "red",
   bg_colors = c("#5000ff", "white"),
@@ -16,7 +20,17 @@ plot_hpo_pseudotime(
   title_col = NULL,
   title_width = 100,
   color_by_symptoms = TRUE,
+  color_cells_by = if (color_by_symptoms) {
+     "is_symptom"
+ } else {
+     "cluster"
+ },
   point_alpha = 0.5,
+  add_density = TRUE,
+  density_filled = FALSE,
+  density_adjust = 1,
+  density_alpha = point_alpha,
+  title = stringr::str_wrap(paste0(unique(disease_ids), collapse = "; "), 20 * 2.5),
   show_plot = TRUE,
   ...
 )
@@ -24,14 +38,21 @@ plot_hpo_pseudotime(
 \arguments{
 \item{obj}{\link{Seurat} object of HPO terms generated by \link{prepare_hpo}.}
 
-\item{dt_genes}{\link{data.table} of phenotype to gene associations.}
+\item{disease_ids}{One or more disease IDs found within \code{p2d}.}
 
-\item{disease_ids}{One or more disease IDs found within \code{dt_genes}.}
+\item{root_cells}{NULL or a vector of starting cells. If provided,
+pseudotime will start (i.e. be zero) at these cells. Both
+\code{root_pr_nodes} and \code{root_cells} cannot be provided.}
 
 \item{learn_graph_control}{NULL or a list of control parameters to be
 passed to the reversed graph embedding function. Default is NULL. A list
 of potential control parameters is provided in details.}
 
+\item{use_partition}{logical parameter that determines whether to use
+partitions calculated during \code{cluster_cells} and therefore to learn
+disjoint graph in each partition. When \code{use_partition = FALSE}, a
+single graph is learned across all partitions. Default is TRUE.}
+
 \item{...}{
   Arguments passed on to \code{\link[monocle3:plot_cells]{monocle3::plot_cells}}
   \describe{
diff --git a/man/plot_preservation_histo.Rd b/man/plot_preservation_histo.Rd
new file mode 100644
index 0000000..110e12c
--- /dev/null
+++ b/man/plot_preservation_histo.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_preservation_histo.R
+\name{plot_preservation_histo}
+\alias{plot_preservation_histo}
+\title{Plot preservation histogram}
+\usage{
+plot_preservation_histo(
+  consist_dt,
+  lvl = "hd",
+  key = c(hd = "High-dimensional", md = "Mid-dimensional", ld = "Low-dimensional"),
+  title = paste(key[lvl], "space"),
+  subtitle = paste0("(", consist_dt[[paste0("dim_", lvl)]][1], " dimensions)")
+)
+}
+\description{
+Plot the correlation between equivalent terms in 
+high-, mid-, and low-dimensional space.
+}
diff --git a/man/plot_top_celltypes.Rd b/man/plot_top_celltypes.Rd
index ebf062c..5e05c3c 100644
--- a/man/plot_top_celltypes.Rd
+++ b/man/plot_top_celltypes.Rd
@@ -9,8 +9,7 @@ plot_top_celltypes(
   types = c("radial", "pie", "reduction"),
   reduction = "umap",
   prefix = "^q[.]",
-  cluster_vars = c("seurat_clusters", "dag_enrich.name", "dag_enrich.name_wrap",
-    "cluster_colors"),
+  cluster_vars = c("seurat_clusters"),
   color_col = cluster_vars[1],
   label_col = cluster_vars[1],
   enriched_proportion_threshold = 0.2,
diff --git a/man/prepare_hpo.Rd b/man/prepare_hpo.Rd
index e4263f4..209f38d 100644
--- a/man/prepare_hpo.Rd
+++ b/man/prepare_hpo.Rd
@@ -12,19 +12,22 @@ prepare_hpo(
   ct_results = MSTExplorer::map_celltype(MSTExplorer::load_example_results()),
   id_types = c("hpo_id"),
   min_quantile = 35,
+  min_genes = NULL,
   min_value = NULL,
   celltype_col = c("cl_name", "cl_id", "CellType"),
+  celltype_value = "q",
   value.var = "evidence_score_sum",
   default_assay = "score",
   nfeatures = NULL,
-  vars.to.regress = paste0("nFeature_", default_assay),
+  vars.to.regress = NULL,
   run_nlp = FALSE,
   run_impute = FALSE,
   workers = 1,
   seed = 2023,
   verbose = TRUE,
   save_path = NULL,
-  force_new = FALSE
+  force_new = FALSE,
+  ...
 )
 }
 \arguments{
@@ -45,6 +48,9 @@ disease and HPO terms.}
 \item{min_quantile}{\link{numeric} minimum quantile for filtering
 cell type enrichment results.}
 
+\item{min_genes}{The minimum number of genes of phenotypes/diseases
+to include from the data matrix.}
+
 \item{min_value}{The minimum sum of columns to include from the data matrix.}
 
 \item{celltype_col}{Name of the cell type column.}
@@ -74,11 +80,34 @@ using \pkg{future}.}
 \item{seed}{Random seed to set.}
 
 \item{verbose}{Print messages.}
+
+\item{...}{
+  Arguments passed on to \code{\link[scKirby:process_seurat]{scKirby::process_seurat}}
+  \describe{
+    \item{\code{obj}}{A single-cell object supported by \pkg{scKirby}.
+See \link[scKirby]{converters} for a table of all supported conversions.}
+    \item{\code{meta.data}}{Additional cell-level metadata to add to the Seurat object.
+Should be a \code{\link[base]{data.frame}} where the rows are cell names and
+the columns are additional metadata fields. Row names in the metadata need
+to match the column names of the counts matrix.}
+    \item{\code{dims}}{Which dimensions to use as input features, used only if
+\code{features} is NULL}
+    \item{\code{add_specificity}}{Add a new assay called "specificity" by deriving
+specificity scores from the input data.}
+    \item{\code{n.components}}{The dimension of the space to embed into.}
+    \item{\code{cluster_reduction}}{Recompute neighbors graph based on UMAP
+to get clusters that best reflect UMAP space.
+For this same reason, only cluster in two dimensions,
+because this is the view we most often use.
+That said, this may reduce the generalisability of these clusters/graph.}
+    \item{\code{max_mem}}{Max limit on memory usage, to be passed to environmental
+variable \code{future.globals.maxSize}.}
+  }}
 }
 \description{
 Convert the Human Phenotype Ontology (HPO) to a \link{Seurat} object 
 with extensive metadata.
 }
 \examples{
-obj <- prepare_hpo(id_types="hpo_id")
+obj <- prepare_hpo(id_types="hpo_id", min_genes=10)
 }
diff --git a/man/prepare_opentargets.Rd b/man/prepare_opentargets.Rd
index bfb2ec4..62a14bb 100644
--- a/man/prepare_opentargets.Rd
+++ b/man/prepare_opentargets.Rd
@@ -9,7 +9,7 @@ prepare_opentargets(
   formula = "approvedSymbol ~ diseaseId",
   nfeatures = NULL,
   default_assay = "score",
-  vars.to.regress = paste0("nFeature_", default_assay),
+  vars.to.regress = NULL,
   save_path = NULL,
   force_new = FALSE,
   ...
diff --git a/man/run_dag_enrich.Rd b/man/run_dag_enrich.Rd
index 835b797..65d8cb8 100644
--- a/man/run_dag_enrich.Rd
+++ b/man/run_dag_enrich.Rd
@@ -7,16 +7,25 @@
 run_dag_enrich(
   obj,
   ont,
+  id_col = NULL,
   reduction = NULL,
   cluster_col = NULL,
   min_hits = 3,
   min_offspring = 100,
   trans_fun = NULL,
   replace_char = list(. = ":", `_` = ":"),
+  value_threshold = NULL,
+  p_threshold = 0.05,
   q_threshold = 0.05,
-  top_n = 100
+  top_n = 100,
+  sort_by = c(log2_fold_enrichment = -1)
 )
 }
+\arguments{
+\item{value_threshold}{Minimum weight to include a term 
+(after applying \code{truns_fun}).
+ Only used when \code{reduction} is provided.}
+}
 \description{
 Run enrichment of terms in a DAG ontology.
 }
diff --git a/man/run_integration.Rd b/man/run_integration.Rd
new file mode 100644
index 0000000..24786c2
--- /dev/null
+++ b/man/run_integration.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_integration.R
+\name{run_integration}
+\alias{run_integration}
+\title{Run integration}
+\usage{
+run_integration(
+  obj,
+  split.by,
+  assay = Seurat::DefaultAssay(obj),
+  nfeatures = nrow(obj),
+  vars.to.regress = NULL,
+  dims = seq(100),
+  save_path = tempfile(fileext = ".rds"),
+  pipeline = c("seuratv5", "seuratv4"),
+  method = Seurat::CCAIntegration,
+  k.weight = 100,
+  orig.reduction = "pca",
+  new.reduction = "cca",
+  cluster_reduction = new.reduction,
+  verbose = TRUE,
+  ...
+)
+}
+\arguments{
+\item{obj}{A single-cell object supported by \pkg{scKirby}.
+See \link[scKirby]{converters} for a table of all supported conversions.}
+
+\item{split.by}{Attribute for splitting. Default is "ident". Currently
+only supported for class-level (i.e. non-quantitative) attributes.}
+
+\item{assay}{Name of assay for integration}
+
+\item{nfeatures}{Number of features to select as top variable features;
+only used when \code{selection.method} is set to \code{'dispersion'} or
+\code{'vst'}}
+
+\item{vars.to.regress}{Variables to regress out (previously latent.vars in
+RegressOut). For example, nUMI, or percent.mito.}
+
+\item{dims}{Dimensions of dimensional reduction to use for integration}
+
+\item{method}{Integration method function}
+
+\item{k.weight}{Number of neighbors to consider when weighting anchors}
+
+\item{orig.reduction}{Name of dimensional reduction for correction}
+
+\item{new.reduction}{Name of new integrated dimensional reduction}
+
+\item{cluster_reduction}{Recompute neighbors graph based on UMAP
+to get clusters that best reflect UMAP space.
+For this same reason, only cluster in two dimensions,
+because this is the view we most often use.
+That said, this may reduce the generalisability of these clusters/graph.}
+
+\item{verbose}{Displays a progress bar for scaling procedure}
+
+\item{...}{
+  Arguments passed on to \code{\link[Seurat:IntegrateLayers]{Seurat::IntegrateLayers}}
+  \describe{
+    \item{\code{object}}{A \code{\link[SeuratObject]{Seurat}} object}
+    \item{\code{features}}{A vector of features to use for integration}
+    \item{\code{layers}}{Names of normalized layers in \code{assay}}
+    \item{\code{scale.layer}}{Name(s) of scaled layer(s) in \code{assay}}
+  }}
+}
+\description{
+Run integration pipeline on a \link{Seurat} object.
+}
+\examples{
+obj <- scKirby::example_obj("seurat")
+obj2 <- run_integration(obj, split.by="groups")
+}
diff --git a/man/scale_color_nightlight.Rd b/man/scale_color_nightlight.Rd
new file mode 100644
index 0000000..c5ea271
--- /dev/null
+++ b/man/scale_color_nightlight.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/color_nightlight.R
+\name{scale_color_nightlight}
+\alias{scale_color_nightlight}
+\title{Scale color nightlight}
+\usage{
+scale_color_nightlight(
+  n = 3,
+  colors = pals::gnuplot(n = n + 1)[-(seq(as.integer((n + 1) * 0.2)))],
+  reverse = FALSE,
+  alpha = NULL,
+  ...
+)
+}
+\arguments{
+\item{n}{Number of colors to return.}
+
+\item{reverse}{Reverse the color gradient.}
+
+\item{alpha}{Transparency of the colors.}
+
+\item{...}{
+  Arguments passed on to \code{\link[ggplot2:scale_gradient]{ggplot2::scale_color_gradientn}}
+  \describe{
+    \item{\code{name}}{The name of the scale. Used as the axis or legend title. If
+\code{waiver()}, the default, the name of the scale is taken from the first
+mapping used for that aesthetic. If \code{NULL}, the legend title will be
+omitted.}
+    \item{\code{space}}{colour space in which to calculate gradient. Must be "Lab" -
+other values are deprecated.}
+    \item{\code{na.value}}{Colour to use for missing values}
+    \item{\code{guide}}{Type of legend. Use \code{"colourbar"} for continuous
+colour bar, or \code{"legend"} for discrete colour legend.}
+    \item{\code{aesthetics}}{Character string or vector of character strings listing the
+name(s) of the aesthetic(s) that this scale works with. This can be useful, for
+example, to apply colour settings to the \code{colour} and \code{fill} aesthetics at the
+same time, via \code{aesthetics = c("colour", "fill")}.}
+    \item{\code{colours,colors}}{Vector of colours to use for n-colour gradient.}
+    \item{\code{values}}{if colours should not be evenly positioned along the gradient
+this vector gives the position (between 0 and 1) for each colour in the
+\code{colours} vector. See \code{\link[scales:rescale]{rescale()}} for a convenience function
+to map an arbitrary range to between 0 and 1.}
+  }}
+}
+\description{
+Add color gradient with nightlight colors.
+}