Skip to content

Commit

Permalink
Merge branch 'tree-mix'
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaskf committed Feb 22, 2024
2 parents f049984 + 29bc4ee commit d68b0b6
Show file tree
Hide file tree
Showing 36 changed files with 4,906 additions and 959 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ endif()
# The version number.
set (iqtree_VERSION_MAJOR 2)
set (iqtree_VERSION_MINOR 2)
set (iqtree_VERSION_PATCH ".6")
set (iqtree_VERSION_PATCH ".6.1")

option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)

Expand Down
113 changes: 95 additions & 18 deletions main/phyloanalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "alignment/superalignmentunlinked.h"
#include "tree/iqtree.h"
#include "tree/iqtreemix.h"
#include "tree/iqtreemixhmm.h"
#include "tree/phylotreemixlen.h"
#include "model/modelmarkov.h"
#include "model/modeldna.h"
Expand Down Expand Up @@ -452,14 +453,16 @@ void reportModel(ostream &out, PhyloTree &tree) {
reportModel(out, *treemix->at(i));
}
}
// show the tree weights
out << "Tree weights: ";
for (i=0; i<treemix->size(); i++) {
if (i>0)
out << ", ";
out << treemix->weights[i];
}
out << endl << endl;
// if (!tree.isHMM()) {
// show the tree weights
out << "Tree weights: ";
for (i=0; i<treemix->size(); i++) {
if (i>0)
out << ", ";
out << treemix->weights[i];
}
out << endl << endl;
// }
} else if (tree.getModel()->isMixture() && !tree.getModel()->isPolymorphismAware()) {
out << "Mixture model of substitution: " << tree.getModelName() << endl;
// out << "Full name: " << tree.getModelName() << endl;
Expand Down Expand Up @@ -597,12 +600,12 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
double totalLen;
vector<double> totalLens; // for tree mixture
int df;
IQTreeMix* treemix = NULL;
size_t i;
IQTreeMix* treemix;

if (tree.isTreeMix()) {
treemix = (IQTreeMix*) &tree;
df = ((IQTreeMix*) &tree)->getNParameters();
df = treemix->getNParameters();
for (i=0; i<treemix->size(); i++) {
totalLens.push_back(treemix->at(i)->treeLength());
}
Expand Down Expand Up @@ -954,6 +957,9 @@ void printOutfilesInfo(Params &params, IQTree &tree) {
if (params.print_site_prob)
cout << " Site probability per rate/mix: " << params.out_prefix << ".siteprob"
<< endl;

if (params.print_marginal_prob && params.optimize_params_use_hmm)
cout << " Marginal probability: " << params.out_prefix << ".mprob" << endl;

if (params.print_ancestral_sequence) {
cout << " Ancestral state: " << params.out_prefix << ".state" << endl;
Expand Down Expand Up @@ -1003,6 +1009,11 @@ void printOutfilesInfo(Params &params, IQTree &tree) {
/* if (params.model_name == "WHTEST")
cout <<" WH-TEST report: " << params.out_prefix << ".whtest" << endl;*/

if (params.optimize_params_use_hmm) {
cout << " HMM result file: " << params.out_prefix << ".hmm" << endl;
cout << " " << params.out_prefix << ".pp.hmm" << endl;
}

cout << endl;

}
Expand Down Expand Up @@ -1808,7 +1819,7 @@ void checkZeroDist(Alignment *aln, double *dist) {
void printAnalysisInfo(int model_df, IQTree& iqtree, Params& params) {
// if (!params.raxmllib) {
cout << "Model of evolution: ";
if (iqtree.isSuperTree()) {
if (iqtree.isSuperTree() || iqtree.isTreeMix()) {
cout << iqtree.getModelName() << " (" << model_df << " free parameters)" << endl;
} else {
cout << iqtree.getModelName() << " with ";
Expand Down Expand Up @@ -2212,7 +2223,25 @@ void printMiscInfo(Params &params, IQTree &iqtree, double *pattern_lh) {
else
printSiteLhCategory(site_lh_file.c_str(), &iqtree, params.print_site_lh);
}

if (params.optimize_params_use_hmm){
string hmm_file = string(params.out_prefix) + ".hmm";
int cat_assign_method = 0;
// the categories along sites is assigned according to the path with maximum probability (default)
printHMMResult(hmm_file.c_str(), &iqtree, cat_assign_method);

hmm_file = string(params.out_prefix) + ".pp.hmm";
cat_assign_method = 1;
// the categories along sites is assigned according to the max posterior probability
printHMMResult(hmm_file.c_str(), &iqtree, cat_assign_method);

if (params.print_marginal_prob) {
string mp_file = params.out_prefix;
mp_file += ".mprob";
printMarginalProb(mp_file.c_str(), &iqtree);
}
}

if (params.print_partition_lh && !iqtree.isSuperTree()) {
outWarning("-wpl does not work with non-partition model");
params.print_partition_lh = false;
Expand Down Expand Up @@ -2491,6 +2520,18 @@ bool isTreeMixture(Params& params) {
return (params.model_name.find("+T") != string::npos);
}

// get the number after "+T" for tree-mixture model
int getTreeMixNum(Params& params) {
int n = 0;
size_t p = params.model_name.find("+T");
string str_n;
if (p != string::npos && p < params.model_name.length()-2) {
str_n = params.model_name.substr(p+2);
n = atoi(str_n.c_str());
}
return n;
}

void runTreeReconstruction(Params &params, IQTree* &iqtree) {

// string dist_file;
Expand Down Expand Up @@ -2599,7 +2640,12 @@ void runTreeReconstruction(Params &params, IQTree* &iqtree) {
cout << endl;
if (verbose_mode >= VB_MED) {
cout << "ML-TREE SEARCH START WITH THE FOLLOWING PARAMETERS:" << endl;
int model_df = iqtree->getModelFactory()->getNParameters(BRLEN_OPTIMIZE);
int model_df;
if (iqtree->isTreeMix()) {
model_df = ((IQTreeMix*) iqtree)->getNParameters();
} else {
model_df = iqtree->getModelFactory()->getNParameters(BRLEN_OPTIMIZE);
}
printAnalysisInfo(model_df, *iqtree, params);
}

Expand Down Expand Up @@ -2653,6 +2699,12 @@ void runTreeReconstruction(Params &params, IQTree* &iqtree) {

bool finishedInitTree = false;
double initEpsilon = params.min_iterations == 0 ? params.modelEps : (params.modelEps*10);
if (iqtree->isTreeMix()) {
if (iqtree->isHMM())
initEpsilon = params.treemixhmm_eps;
else
initEpsilon = params.treemix_eps;
}
string initTree;

//None of his will work until there is actually a tree
Expand Down Expand Up @@ -3854,20 +3906,24 @@ int checkCharInFile(char* infile, char c) {
return k;
}

IQTree *newIQTreeMix(Params &params, Alignment *alignment) {
int i, numTree;
IQTree *newIQTreeMix(Params &params, Alignment *alignment, int numTree = 0) {
int i;
vector<IQTree*> trees;

// check how many trees inside the user input file
numTree = checkCharInFile(params.user_file, ';');
if (numTree == 0)
numTree = checkCharInFile(params.user_file, ';');
cout << "Number of input trees: " << numTree << endl;
if (numTree <= 1) {
outError("For using the tree mixture model, there must be at least 2 trees inside the tree file: " + string(params.user_file) + ", and each tree must be followed by the character ';'.");
}
for (i=0; i<numTree; i++) {
trees.push_back(newIQTree(params,alignment));
}
return new IQTreeMix(params, alignment, trees);
// if (params.optimize_params_use_hmm)
return new IQTreeMixHmm(params, alignment, trees);
// else
// return new IQTreeMix(params, alignment, trees);
}

/** get ID of bad or good symtest results */
Expand Down Expand Up @@ -4151,11 +4207,32 @@ void runPhyloAnalysis(Params &params, Checkpoint *checkpoint, IQTree *&tree, Ali
/*************** initialize tree ********************/
bool isTreeMix = isTreeMixture(params);

if (params.optimize_params_use_hmm && !isTreeMix) {
outError("option '-hmmster' is only available for tree mixture model");
}

if (isTreeMix) {
cout << "Tree-mixture model" << endl;
if (params.optimize_params_use_hmm)
cout << "HMMSTER ";
// tree-mixture model
cout << "Tree-mixture model" << endl;

// the minimum gamma shape should be greater than MIN_GAMMA_SHAPE_TREEMIX for tree mixture model
if (params.min_gamma_shape < MIN_GAMMA_SHAPE_TREEMIX) {
if (params.min_gamma_shape != MIN_GAMMA_SHAPE)
cout << "The minimum value for Gamma shape is changed to " << MIN_GAMMA_SHAPE_TREEMIX << endl;
params.min_gamma_shape = MIN_GAMMA_SHAPE_TREEMIX;
}

if (params.user_file == NULL) {
outError("Tree file has to be inputed (using the option -te) for tree-mixture model");
// get the number after "+T" for tree-mixture model
int treeNum = getTreeMixNum(params);
if (treeNum == 0) {
outError("Specify the number of trees in the model or input the tree file using the option '-te' for tree-mixture model");
}
tree = newIQTreeMix(params, alignment, treeNum); // tree mixture model
} else {
tree = newIQTreeMix(params, alignment); // tree mixture model
}
if (params.compute_ml_tree_only) {
outError("option compute_ml_tree_only cannot be set for tree-mixture model");
Expand Down
2 changes: 2 additions & 0 deletions main/phylotesting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ string getSeqTypeName(SeqType seq_type) {
case SEQ_UNKNOWN: return "unknown";
case SEQ_MULTISTATE: return "MultiState";
}
return "unknown";
}

string getUsualModelSubst(SeqType seq_type) {
Expand Down Expand Up @@ -268,6 +269,7 @@ double CandidateModel::getScore(ModelTestCriterion mtc) {
ASSERT(0 && "Unhandled case");
return 0.0;
}
return 0.0;
}

double CandidateModel::getScore() {
Expand Down
99 changes: 56 additions & 43 deletions main/treetesting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "tree/phylotree.h"
#include "tree/phylosupertree.h"
#include "tree/iqtreemix.h"
#include "tree/iqtreemixhmm.h"
#include "gsl/mygsl.h"
#include "utils/timeutil.h"

Expand All @@ -24,13 +25,11 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh,
bool append, const char *linename) {
double *pattern_lh;

if (!tree->isTreeMix()) {
if (!ptn_lh) {
pattern_lh = new double[tree->getAlnNPattern()];
tree->computePatternLikelihood(pattern_lh);
} else
pattern_lh = ptn_lh;
}
if (!ptn_lh) {
pattern_lh = new double[tree->getAlnNPattern()];
tree->computePatternLikelihood(pattern_lh);
} else
pattern_lh = ptn_lh;

try {
ofstream out;
Expand All @@ -40,42 +39,20 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh,
} else {
out.open(filename);
}

if (tree->isTreeMix()) {
// tree mixture model
IQTreeMix* treeMix = (IQTreeMix*) tree;
treeMix->showLhProb(out);
/*
// also print out the pattern's likelihoods
string pfilename = filename;
pfilename += ".ptn";
ofstream pout;
pout.exceptions(ios::failbit | ios::badbit);
if (append) {
pout.open(pfilename.c_str(), ios::out | ios::app);
} else {
pout.open(pfilename.c_str());
}
treeMix->showPatternLhProb(pout);
pout.close();
if (!append)
cout << "pattern log-likelihoods printed to " << pfilename << endl;
*/
} else {
if (!append)
out << 1 << " " << tree->getAlnNSite() << endl;
if (!linename)
out << "Site_Lh ";
else {
out.width(10);
out << left << linename;
}
IntVector pattern_index;
tree->aln->getSitePatternIndex(pattern_index);
for (size_t i = 0; i < tree->getAlnNSite(); i++)
out << " " << pattern_lh[pattern_index[i]];
out << endl;

if (!append)
out << 1 << " " << tree->getAlnNSite() << endl;
if (!linename)
out << "Site_Lh ";
else {
out.width(10);
out << left << linename;
}
IntVector pattern_index;
tree->aln->getSitePatternIndex(pattern_index);
for (size_t i = 0; i < tree->getAlnNSite(); i++)
out << " " << pattern_lh[pattern_index[i]];
out << endl;
out.close();
if (!append)
cout << "Site log-likelihoods printed to " << filename << endl;
Expand All @@ -87,6 +64,28 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh,
delete[] pattern_lh;
}

void printHMMResult(const char* filename, PhyloTree *tree, int cat_assign_method) {
// For HMM model, to show the assignment of the categories along sites with max likelihood
// cat_assign_method:
// 0 - the categories along sites is assigned according to the path with maximum probability (default)
// 1 - the categories along sites is assigned according to the max posterior probability
IQTreeMixHmm* iqtreehmm = dynamic_cast<IQTreeMixHmm*>(tree);
iqtreehmm->printResults(filename, cat_assign_method);
/*
if (cat_assign_method == 0)
cout << "HMM results printed to " << filename << endl;
else
cout << "HMM results with max-posterior prob printed to " << filename << endl;
*/
}

void printMarginalProb(const char* filename, PhyloTree *tree) {
// For HMM model, to show the assignment of the categories along sites with max likelihood
IQTreeMixHmm* iqtreehmm = dynamic_cast<IQTreeMixHmm*>(tree);
iqtreehmm->printMarginalProb(filename);
cout << "Marginal probabilities printed to " << filename << endl;
}

void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh,
bool append, const char *linename) {

Expand Down Expand Up @@ -142,6 +141,18 @@ void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl)

if (wsl == WSL_NONE || wsl == WSL_SITE)
return;
if (tree->isTreeMix()) {
wsl = WSL_TMIXTURE; // tree mixture model
} else if (!tree->getModel()->isMixture()) {
if (wsl != WSL_RATECAT) {
wsl = WSL_RATECAT;
}
} else {
// mixture model
if (wsl == WSL_MIXTURE_RATECAT && tree->getModelFactory()->fused_mix_rate) {
wsl = WSL_MIXTURE;
}
}
int ncat = tree->getNumLhCat(wsl);
if (tree->isSuperTree()) {
PhyloSuperTree *stree = (PhyloSuperTree*)tree;
Expand Down Expand Up @@ -329,7 +340,9 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws
if (wsl == WSL_NONE || wsl == WSL_SITE)
return;
// error checking
if (!tree->getModel()->isMixture()) {
if (tree->isTreeMix())
wsl = WSL_TMIXTURE; // tree mixture model
else if (!tree->getModel()->isMixture()) {
if (wsl != WSL_RATECAT) {
outWarning("Switch now to '-wspr' as it is the only option for non-mixture model");
wsl = WSL_RATECAT;
Expand Down
Loading

0 comments on commit d68b0b6

Please sign in to comment.