Skip to content

Commit

Permalink
Merge pull request #94 from saezlab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
ChristinaSchmidt1 authored Feb 21, 2025
2 parents 6223131 + d4230f2 commit 7e0d037
Show file tree
Hide file tree
Showing 40 changed files with 2,012 additions and 11,323 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.1.4
current_version = 2.1.5
commit = True
tag = True
files = DESCRIPTION README.md README.Rmd
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: MetaProViz
Type: Package
Title: METabolomics pre-PRocessing, functiOnal analysis and VIZualisation
Version: 2.1.4
Version: 2.1.5
Authors@R: c(
person("Christina", "Schmidt", , "christina.schmidt@uni-heidelberg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3867-0881")),
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(ClusterORA)
export(DMA)
export(EquivalentIDs)
export(LoadGaude)
export(LoadHallmarks)
export(LoadKEGG)
Expand Down Expand Up @@ -31,11 +32,14 @@ export(metaproviz_config_path)
export(metaproviz_load_config)
export(metaproviz_reset_config)
export(metaproviz_save_config)
importFrom(KEGGREST,keggGet)
importFrom(KEGGREST,keggList)
importFrom(OmnipathR,ambiguity)
importFrom(OmnipathR,config_path)
importFrom(OmnipathR,id_types)
importFrom(OmnipathR,load_config)
importFrom(OmnipathR,logfile)
importFrom(OmnipathR,ramp_table)
importFrom(OmnipathR,read_log)
importFrom(OmnipathR,reset_config)
importFrom(OmnipathR,save_config)
Expand Down Expand Up @@ -134,6 +138,7 @@ importFrom(purrr,map_int)
importFrom(purrr,partial)
importFrom(purrr,reduce)
importFrom(qcc,mqcc)
importFrom(rappdirs,user_cache_dir)
importFrom(readr,cols)
importFrom(readr,read_csv)
importFrom(reshape2,melt)
Expand All @@ -154,6 +159,7 @@ importFrom(stats,lm)
importFrom(stats,p.adjust)
importFrom(stats,shapiro.test)
importFrom(stringr,str_match)
importFrom(stringr,str_remove)
importFrom(stringr,str_sub)
importFrom(stringr,str_to_lower)
importFrom(stringr,str_trim)
Expand Down
47 changes: 47 additions & 0 deletions R/DifferentialMetaboliteAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#' @importFrom dplyr rename
#' @importFrom magrittr %>%
#' @importFrom tibble rownames_to_column column_to_rownames
#' @importFrom purrr map reduce
#' @importFrom logger log_info
#'
#' @export
Expand Down Expand Up @@ -261,6 +262,38 @@ DMA <-function(InputData,
})
}

################################################################################################################################################################################################
############### For CoRe=TRUE create summary of Feature_metadata ###############
if(CoRe==TRUE){
df_list_selected <- purrr::map(names(DMA_Output), function(df_name) {
df <- DMA_Output[[df_name]] # Extract the dataframe

# Extract the dynamic column name
core_col <- grep("^CoRe_", names(df), value = TRUE) # Find the column that starts with "CoRe_"
# Filter only columns where the part after "CoRe_" is in valid_conditions
core_col <- core_col[str_remove(core_col, "^CoRe_") %in% unique(SettingsFile_Sample[[SettingsInfo[["Conditions"]]]])]


# Select only the relevant columns
df_selected <- df %>%
select(Metabolite, all_of(core_col))

return(df_selected)
})

# Merge all dataframes by "Metabolite"
merged_df <- purrr::reduce(df_list_selected, full_join, by = "Metabolite")
names(merged_df) <- gsub("\\.x$", "", names(merged_df))#It is likely we have duplications that cause .x, .y, .x.x, .y.y, etc. to be added to the column names. We only keep one column (.x)

Feature_Metadata <- merged_df %>%
select(-all_of(grep("\\.[xy]+$", names(merged_df), value = TRUE)))#Now we remove all other columns with .x.x, .y.y, etc.

if(is.null(SettingsFile_Metab) == FALSE){ #Add to Metadata file:
Feature_Metadata <- merge(SettingsFile_Metab%>%tibble::rownames_to_column("Metabolite"), Feature_Metadata , by = "Metabolite", all.x = TRUE)
}
}


################################################################################################################################################################################################
############### Plots ###############
if(CoRe==TRUE){
Expand Down Expand Up @@ -354,6 +387,20 @@ DMA <-function(InputData,
CoRe=CoRe,
PrintPlot=PrintPlot)))


suppressMessages(suppressWarnings(
SaveRes(InputList_DF=list("Feature_Metadata"=Feature_Metadata),#This needs to be a list, also for single comparisons
InputList_Plot= NULL,
SaveAs_Table=SaveAs_Table,
SaveAs_Plot=NULL,
FolderPath= Folder,
FileName= "DMA",
CoRe=CoRe,
PrintPlot=PrintPlot)))

DMA_Output_List <- c(DMA_Output_List, list("Feature_Metadata"=Feature_Metadata))


DMA_Output_List <- c(DMA_Output_List, list("DMA"=DMA_Output, "VolcanoPlot"=volplotList))

return(invisible(DMA_Output_List))
Expand Down
2 changes: 1 addition & 1 deletion R/HelperSave.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ SaveRes<- function(InputList_DF= NULL,
PlotUnit <- "cm"
}

ggsave(filename = paste0(FileName_Save, ".",SaveAs_Plot, sep=""), plot = InputList_Plot[[Plot]], width = PlotWidth, height = PlotHeight, unit=PlotUnit)
ggplot2::ggsave(filename = paste0(FileName_Save, ".",SaveAs_Plot, sep=""), plot = InputList_Plot[[Plot]], width = PlotWidth, height = PlotHeight, unit=PlotUnit)

if(PrintPlot==TRUE){
suppressMessages(suppressWarnings(plot(InputList_Plot[[Plot]])))
Expand Down
43 changes: 27 additions & 16 deletions R/MetaDataAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
#' @param InputData DF with unique sample identifiers as row names and metabolite numerical values in columns with metabolite identifiers as column names. Use NA for metabolites that were not detected. includes experimental design and outlier column.
#' @param SettingsFile_Sample \emph{Optional: } DF which contains information about the samples, which will be combined with your input data based on the unique sample identifiers used as rownames. Column "Conditions" with information about the sample conditions (e.g. "N" and "T" or "Normal" and "Tumor"), can be used for feature filtering and colour coding in the PCA. Column "AnalyticalReplicate" including numerical values, defines technical repetitions of measurements, which will be summarised. Column "BiologicalReplicates" including numerical values. Please use the following names: "Conditions", "Biological_Replicates", "Analytical_Replicates".\strong{Default = NULL}
#' @param Scaling \emph{Optional: } TRUE or FALSE for whether a data scaling is used \strong{Default = TRUE}
#' @param Percentage \emph{Optional: } Percentage of top and bottom features to be displayed in the results. \strong{Default = 0.1}
#' @param StatCutoff \emph{Optional: } Cutoff for the adjusted p-value of the ANOVA test. \strong{Default = 0.05}
#' @param Percentage \emph{Optional: } Percentage of top and bottom features to be displayed in the results summary. \strong{Default = 0.1}
#' @param StatCutoff \emph{Optional: } Cutoff for the adjusted p-value of the ANOVA test for the results summary and on the heatmap. \strong{Default = 0.05}
#' @param VarianceCutoff \emph{Optional: } Cutoff for the PCs variance that should be displayed on the heatmap. \strong{Default = 1}
#' @param SaveAs_Plot \emph{Optional: } Select the file type of output plots. Options are svg, png, pdf. \strong{Default = svg}
#' @param SaveAs_Table \emph{Optional: } File types for the analysis results are: "csv", "xlsx", "txt". \strong{Default = "csv"}
#' @param PrintPlot \emph{Optional: } TRUE or FALSE, if TRUE Volcano plot is saved as an overview of the results. \strong{Default = TRUE}
Expand Down Expand Up @@ -59,6 +60,7 @@ MetaAnalysis <- function(InputData,
Scaling = TRUE,
Percentage = 0.1,
StatCutoff= 0.05,
VarianceCutoff=1,
SaveAs_Table = "csv",
SaveAs_Plot = "svg",
PrintPlot= TRUE,
Expand Down Expand Up @@ -210,7 +212,7 @@ MetaAnalysis <- function(InputData,

## ---------- DF 2: Metabolites as row names ------------##
Res_Top <- Stat_results%>%
dplyr::filter(tukeyHSD_p.adjusted< StatCutoff)%>%
dplyr::filter(tukeyHSD_p.adjusted < StatCutoff)%>%
tidyr::separate_rows(paste("Features_", "(Top", Percentage, "%)", sep=""), sep = ", ")%>% # Separate 'Features (Top 0.1%)'
dplyr::rename("FeatureID":= paste("Features_", "(Top", Percentage, "%)", sep=""))%>%
dplyr::select(- paste("Features_", "(Bottom", Percentage, "%)", sep=""))
Expand Down Expand Up @@ -254,22 +256,31 @@ MetaAnalysis <- function(InputData,
## ---------- Plot ------------##
# Plot DF
Data_Heat <- Stat_results %>%
dplyr::filter(tukeyHSD_p.adjusted < 0.05)%>%#Filter for significant results
dplyr::filter(Explained_Variance > 0.1)%>%#Exclude Residuals row
dplyr::filter(tukeyHSD_p.adjusted < StatCutoff)%>%#Filter for significant results
dplyr::filter(Explained_Variance > VarianceCutoff)%>%#Exclude Residuals row
dplyr::distinct(term, PC, .keep_all = TRUE)%>%#only keep unique term~PC combinations AND STATS
dplyr::select(term, PC, Explained_Variance)

Data_Heat <- reshape2::dcast( Data_Heat, term ~ PC, value.var = "Explained_Variance")%>%
tibble::column_to_rownames("term")%>%
dplyr::mutate_all(~replace(., is.na(.), 0))

#Plot
invisible(VizHeatmap(InputData = Data_Heat,
PlotName = "ExplainedVariance-bigger-0.1Percent_AND_p.adj-smaller0.05",
Scale = "none",
SaveAs_Plot = SaveAs_Plot,
PrintPlot = PrintPlot,
FolderPath = Folder))
Data_Heat <- reshape2::dcast( Data_Heat, term ~ PC, value.var = "Explained_Variance")%>%
tibble::column_to_rownames("term")%>%
dplyr::mutate_all(~replace(., is.na(.), 0))

if(nrow(Data_Heat) > 2){

#Plot
invisible(VizHeatmap(InputData = Data_Heat,
PlotName = paste0("ExplainedVariance-bigger-", VarianceCutoff , "Percent_AND_p.adj-smaller", StatCutoff, sep=""),
Scale = "none",
SaveAs_Plot = SaveAs_Plot,
PrintPlot = PrintPlot,
FolderPath = Folder))

}else{
message <- paste0("StatCutoff of ", StatCutoff, " and VarianceCutoff of ", VarianceCutoff, " do only return <= 2 cases, hence no heatmap is plotted.")
logger::log_info("warning: ", message)
warning(message)
}



###############################################################################################################################################################################################################
Expand Down
11 changes: 10 additions & 1 deletion R/Processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,11 @@ PreProcessing <- function(InputData,
}

if(CoRe ==TRUE){
DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]],"Variation_ContigencyTable_CoRe_blank"=data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])
if(is.null(data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])){
DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]])
}else{
DFList_CoRe <- list( "CV_CoRe_blank"= data_CoReNorm[["DF"]][["CV_CoRe_blank"]],"Variation_ContigencyTable_CoRe_blank"=data_CoReNorm[["DF"]][["Contigency_table_CoRe_blank"]])
}
DFList <- c(DFList, DFList_CoRe)
}

Expand Down Expand Up @@ -1177,6 +1181,11 @@ CoReNorm <-function(InputData,
}
# Filter the CoRe_media samples
CoRe_medias <- CoRe_medias %>% dplyr::filter(!rownames(CoRe_medias) %in% different_samples)
}else{
message <- paste0("Only >=2 blank samples available. Thus,we can not perform outlier testing for the blank samples.")
logger::log_trace(message)
message(message)

}
CoRe_media_df <- as.data.frame(data.frame("CoRe_mediaMeans"= colMeans(CoRe_medias, na.rm = TRUE)))
}
Expand Down
Loading

0 comments on commit 7e0d037

Please sign in to comment.