Merge pull request #94 from saezlab/development

Development
saezlab · Feb 21, 2025 · 7e0d037 · 7e0d037
2 parents 6223131 + d4230f2
commit 7e0d037
Show file tree

Hide file tree

Showing 40 changed files with 2,012 additions and 11,323 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.1.4
+current_version = 2.1.5
 commit = True
 tag = True
 files = DESCRIPTION README.md README.Rmd

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: MetaProViz
 Type: Package
 Title: METabolomics pre-PRocessing, functiOnal analysis and VIZualisation
-Version: 2.1.4
+Version: 2.1.5
 Authors@R: c(
     person("Christina", "Schmidt", , "christina.schmidt@uni-heidelberg.de", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-3867-0881")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(ClusterORA)
 export(DMA)
+export(EquivalentIDs)
 export(LoadGaude)
 export(LoadHallmarks)
 export(LoadKEGG)
@@ -31,11 +32,14 @@ export(metaproviz_config_path)
 export(metaproviz_load_config)
 export(metaproviz_reset_config)
 export(metaproviz_save_config)
+importFrom(KEGGREST,keggGet)
+importFrom(KEGGREST,keggList)
 importFrom(OmnipathR,ambiguity)
 importFrom(OmnipathR,config_path)
 importFrom(OmnipathR,id_types)
 importFrom(OmnipathR,load_config)
 importFrom(OmnipathR,logfile)
+importFrom(OmnipathR,ramp_table)
 importFrom(OmnipathR,read_log)
 importFrom(OmnipathR,reset_config)
 importFrom(OmnipathR,save_config)
@@ -134,6 +138,7 @@ importFrom(purrr,map_int)
 importFrom(purrr,partial)
 importFrom(purrr,reduce)
 importFrom(qcc,mqcc)
+importFrom(rappdirs,user_cache_dir)
 importFrom(readr,cols)
 importFrom(readr,read_csv)
 importFrom(reshape2,melt)
@@ -154,6 +159,7 @@ importFrom(stats,lm)
 importFrom(stats,p.adjust)
 importFrom(stats,shapiro.test)
 importFrom(stringr,str_match)
+importFrom(stringr,str_remove)
 importFrom(stringr,str_sub)
 importFrom(stringr,str_to_lower)
 importFrom(stringr,str_trim)

diff --git a/R/DifferentialMetaboliteAnalysis.R b/R/DifferentialMetaboliteAnalysis.R
@@ -55,6 +55,7 @@
 #' @importFrom dplyr rename
 #' @importFrom magrittr %>%
 #' @importFrom tibble rownames_to_column column_to_rownames
+#' @importFrom purrr map reduce
 #' @importFrom logger log_info
 #'
 #' @export
@@ -261,6 +262,38 @@ DMA <-function(InputData,
       })
     }
 
+  ################################################################################################################################################################################################
+  ###############  For CoRe=TRUE create summary of Feature_metadata ###############
+  if(CoRe==TRUE){
+    df_list_selected <- purrr::map(names(DMA_Output), function(df_name) {
+    df <- DMA_Output[[df_name]]  # Extract the dataframe
+
+    # Extract the dynamic column name
+    core_col <- grep("^CoRe_", names(df), value = TRUE)  # Find the column that starts with "CoRe_"
+    # Filter only columns where the part after "CoRe_" is in valid_conditions
+    core_col <- core_col[str_remove(core_col, "^CoRe_") %in% unique(SettingsFile_Sample[[SettingsInfo[["Conditions"]]]])]
+
+
+    # Select only the relevant columns
+    df_selected <- df %>%
+      select(Metabolite, all_of(core_col))
+
+    return(df_selected)
+  })
+
+  # Merge all dataframes by "Metabolite"
+  merged_df <- purrr::reduce(df_list_selected, full_join, by = "Metabolite")
+  names(merged_df) <- gsub("\\.x$", "", names(merged_df))#It is likely we have duplications that cause .x, .y, .x.x, .y.y, etc. to be added to the column names. We only keep one column (.x)
+
+  Feature_Metadata <- merged_df %>%
+    select(-all_of(grep("\\.[xy]+$", names(merged_df), value = TRUE)))#Now we remove all other columns with .x.x, .y.y, etc.
+
+  if(is.null(SettingsFile_Metab) == FALSE){ #Add to Metadata file:
+    Feature_Metadata <- merge(SettingsFile_Metab%>%tibble::rownames_to_column("Metabolite"), Feature_Metadata , by = "Metabolite", all.x = TRUE)
+  }
+  }
+
+
   ################################################################################################################################################################################################
   ###############  Plots ###############
   if(CoRe==TRUE){
@@ -354,6 +387,20 @@ DMA <-function(InputData,
                          CoRe=CoRe,
                          PrintPlot=PrintPlot)))
 
+
+  suppressMessages(suppressWarnings(
+    SaveRes(InputList_DF=list("Feature_Metadata"=Feature_Metadata),#This needs to be a list, also for single comparisons
+            InputList_Plot= NULL,
+            SaveAs_Table=SaveAs_Table,
+            SaveAs_Plot=NULL,
+            FolderPath= Folder,
+            FileName= "DMA",
+            CoRe=CoRe,
+            PrintPlot=PrintPlot)))
+
+  DMA_Output_List <- c(DMA_Output_List, list("Feature_Metadata"=Feature_Metadata))
+
+
   DMA_Output_List <- c(DMA_Output_List, list("DMA"=DMA_Output, "VolcanoPlot"=volplotList))
 
   return(invisible(DMA_Output_List))

diff --git a/R/HelperSave.R b/R/HelperSave.R
@@ -170,7 +170,7 @@ SaveRes<- function(InputList_DF= NULL,
         PlotUnit <- "cm"
       }
 
-      ggsave(filename = paste0(FileName_Save, ".",SaveAs_Plot, sep=""), plot = InputList_Plot[[Plot]], width = PlotWidth,  height = PlotHeight, unit=PlotUnit)
+      ggplot2::ggsave(filename = paste0(FileName_Save, ".",SaveAs_Plot, sep=""), plot = InputList_Plot[[Plot]], width = PlotWidth,  height = PlotHeight, unit=PlotUnit)
 
       if(PrintPlot==TRUE){
         suppressMessages(suppressWarnings(plot(InputList_Plot[[Plot]])))

diff --git a/R/MetaDataAnalysis.R b/R/MetaDataAnalysis.R
@@ -28,8 +28,9 @@
 #' @param InputData DF with unique sample identifiers as row names and metabolite numerical values in columns with metabolite identifiers as column names. Use NA for metabolites that were not detected. includes experimental design and outlier column.
 #' @param SettingsFile_Sample \emph{Optional: } DF which contains information about the samples, which will be combined with your input data based on the unique sample identifiers used as rownames. Column "Conditions" with information about the sample conditions (e.g. "N" and "T" or "Normal" and "Tumor"), can be used for feature filtering and colour coding in the PCA. Column "AnalyticalReplicate" including numerical values, defines technical repetitions of measurements, which will be summarised. Column "BiologicalReplicates" including numerical values. Please use the following names: "Conditions", "Biological_Replicates", "Analytical_Replicates".\strong{Default = NULL}
 #' @param Scaling \emph{Optional: } TRUE or FALSE for whether a data scaling is used \strong{Default = TRUE}
-#' @param Percentage \emph{Optional: } Percentage of top and bottom features to be displayed in the results. \strong{Default = 0.1}
-#' @param StatCutoff \emph{Optional: } Cutoff for the adjusted p-value of the ANOVA test. \strong{Default = 0.05}
+#' @param Percentage \emph{Optional: } Percentage of top and bottom features to be displayed in the results summary. \strong{Default = 0.1}
+#' @param StatCutoff \emph{Optional: } Cutoff for the adjusted p-value of the ANOVA test for the results summary and on the heatmap. \strong{Default = 0.05}
+#' @param VarianceCutoff \emph{Optional: } Cutoff for the PCs variance that should be displayed on the heatmap. \strong{Default = 1}
 #' @param SaveAs_Plot \emph{Optional: } Select the file type of output plots. Options are svg, png, pdf. \strong{Default = svg}
 #' @param SaveAs_Table \emph{Optional: } File types for the analysis results are: "csv", "xlsx", "txt". \strong{Default = "csv"}
 #' @param PrintPlot \emph{Optional: } TRUE or FALSE, if TRUE Volcano plot is saved as an overview of the results. \strong{Default = TRUE}
@@ -59,6 +60,7 @@ MetaAnalysis <- function(InputData,
                          Scaling = TRUE,
                          Percentage = 0.1,
                          StatCutoff= 0.05,
+                         VarianceCutoff=1,
                          SaveAs_Table = "csv",
                          SaveAs_Plot = "svg",
                          PrintPlot= TRUE,
@@ -210,7 +212,7 @@ MetaAnalysis <- function(InputData,
 
    ## ---------- DF 2: Metabolites as row names ------------##
    Res_Top <- Stat_results%>%
-     dplyr::filter(tukeyHSD_p.adjusted< StatCutoff)%>%
+     dplyr::filter(tukeyHSD_p.adjusted < StatCutoff)%>%
      tidyr::separate_rows(paste("Features_", "(Top", Percentage, "%)", sep=""), sep = ", ")%>% # Separate 'Features (Top 0.1%)'
      dplyr::rename("FeatureID":= paste("Features_", "(Top", Percentage, "%)", sep=""))%>%
      dplyr::select(- paste("Features_", "(Bottom", Percentage, "%)", sep=""))
@@ -254,22 +256,31 @@ MetaAnalysis <- function(InputData,
    ## ---------- Plot ------------##
    # Plot DF
    Data_Heat <- Stat_results %>%
-     dplyr::filter(tukeyHSD_p.adjusted < 0.05)%>%#Filter for significant results
-     dplyr::filter(Explained_Variance > 0.1)%>%#Exclude Residuals row
+     dplyr::filter(tukeyHSD_p.adjusted < StatCutoff)%>%#Filter for significant results
+     dplyr::filter(Explained_Variance > VarianceCutoff)%>%#Exclude Residuals row
      dplyr::distinct(term, PC, .keep_all = TRUE)%>%#only keep unique term~PC combinations AND STATS
      dplyr::select(term, PC, Explained_Variance)
 
-   Data_Heat <- reshape2::dcast( Data_Heat, term ~ PC, value.var = "Explained_Variance")%>%
-     tibble::column_to_rownames("term")%>%
-     dplyr::mutate_all(~replace(., is.na(.), 0))
-
-   #Plot
-   invisible(VizHeatmap(InputData = Data_Heat,
-                                    PlotName = "ExplainedVariance-bigger-0.1Percent_AND_p.adj-smaller0.05",
-                                    Scale = "none",
-                                    SaveAs_Plot = SaveAs_Plot,
-                                    PrintPlot = PrintPlot,
-                                    FolderPath = Folder))
+  Data_Heat <- reshape2::dcast( Data_Heat, term ~ PC, value.var = "Explained_Variance")%>%
+       tibble::column_to_rownames("term")%>%
+       dplyr::mutate_all(~replace(., is.na(.), 0))
+
+   if(nrow(Data_Heat) > 2){
+
+     #Plot
+     invisible(VizHeatmap(InputData = Data_Heat,
+                          PlotName = paste0("ExplainedVariance-bigger-", VarianceCutoff , "Percent_AND_p.adj-smaller", StatCutoff, sep=""),
+                          Scale = "none",
+                          SaveAs_Plot = SaveAs_Plot,
+                          PrintPlot = PrintPlot,
+                          FolderPath = Folder))
+
+   }else{
+     message <- paste0("StatCutoff of ", StatCutoff, " and VarianceCutoff of ", VarianceCutoff, " do only return <= 2 cases, hence no heatmap is plotted.")
+     logger::log_info("warning: ", message)
+     warning(message)
+   }
+
 
 
    ###############################################################################################################################################################################################################

diff --git a/R/Processing.R b/R/Processing.R
@@ -213,7 +213,11 @@ PreProcessing <- function(InputData,
   }
 
   if(CoRe ==TRUE){
-     DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]],"Variation_ContigencyTable_CoRe_blank"=data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])
+    if(is.null(data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])){
+      DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]])
+    }else{
+      DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]],"Variation_ContigencyTable_CoRe_blank"=data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])
+    }
      DFList <- c(DFList, DFList_CoRe)
   }
 
@@ -1177,6 +1181,11 @@ CoReNorm <-function(InputData,
       }
       # Filter the CoRe_media samples
       CoRe_medias <- CoRe_medias %>% dplyr::filter(!rownames(CoRe_medias) %in% different_samples)
+    }else{
+      message <- paste0("Only >=2 blank samples available. Thus,we can not perform outlier testing for the blank samples.")
+      logger::log_trace(message)
+      message(message)
+
     }
     CoRe_media_df <- as.data.frame(data.frame("CoRe_mediaMeans"=  colMeans(CoRe_medias, na.rm = TRUE)))
   }