Skip to content
This repository was archived by the owner on Feb 6, 2024. It is now read-only.

Commit

Permalink
New option --out-csv to print .rfdist file in csv format
Browse files Browse the repository at this point in the history
  • Loading branch information
bqminh committed Aug 31, 2019
1 parent 370d684 commit 494d375
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 54 deletions.
107 changes: 53 additions & 54 deletions main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1364,23 +1364,55 @@ void calcDistribution(Params &params) {
}
}

void printRFDist(ostream &out, double *rfdist, int n, int m, int rf_dist_mode) {
void printRFDist(string filename, double *rfdist, int n, int m, int rf_dist_mode, bool print_msg = true) {
int i, j;
if (rf_dist_mode == RF_ADJACENT_PAIR || Params::getInstance().rf_same_pair) {
out << "XXX ";
out << 1 << " " << n << endl;
for (i = 0; i < n; i++)
out << " " << rfdist[i];
out << endl;
} else {
// all pairs
out << n << " " << m << endl;
for (i = 0; i < n; i++) {
out << "Tree" << i << " ";
for (j = 0; j < m; j++)
out << " " << rfdist[i*m+j];

try {
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(filename);
if (Params::getInstance().output_format == FORMAT_CSV) {
out << "# Robinson-Foulds distances" << endl
<< "# This file can be read in MS Excel or in R with command:" << endl
<< "# dat=read.csv('" << filename << "',comment.char='#')" << endl
<< "# Columns are comma-separated with following meanings:" << endl
<< "# ID1: Tree 1 ID" << endl
<< "# ID2: Tree 2 ID" << endl
<< "# Dist: Robinson-Foulds distance" << endl
<< "ID1,ID2,Dist" << endl;
if (rf_dist_mode == RF_ADJACENT_PAIR) {
for (i = 0; i < n; i++)
out << i+1 << ',' << i+2 << ',' << rfdist[i] << endl;
} else if (Params::getInstance().rf_same_pair) {
for (i = 0; i < n; i++)
out << i+1 << ',' << i+1 << ',' << rfdist[i] << endl;
} else {
for (i = 0; i < n; i++) {
for (j = 0; j < m; j++)
out << i+1 << ',' << j+1 << ',' << rfdist[i*m+j] << endl;
}
}
} else if (rf_dist_mode == RF_ADJACENT_PAIR || Params::getInstance().rf_same_pair) {
out << "XXX ";
out << 1 << " " << n << endl;
for (i = 0; i < n; i++)
out << " " << rfdist[i];
out << endl;
} else {
// all pairs
out << n << " " << m << endl;
for (i = 0; i < n; i++) {
out << "Tree" << i << " ";
for (j = 0; j < m; j++)
out << " " << rfdist[i*m+j];
out << endl;
}
}
out.close();
if (print_msg)
cout << "Robinson-Foulds distances printed to " << filename << endl;
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, filename);
}
}

Expand Down Expand Up @@ -1423,16 +1455,7 @@ void computeRFDistExtended(const char *trees1, const char *trees2, const char *f
outError(ERR_READ_INPUT, trees1);
}

try {
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(filename);
printRFDist(out, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);
out.close();
cout << "Robinson-Foulds distances printed to " << filename << endl;
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, filename);
}
printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);
delete [] rfdist_raw;
}

Expand Down Expand Up @@ -1481,16 +1504,8 @@ void computeRFDistSamePair(const char *trees1, const char *trees2, const char *f
outError(ERR_READ_INPUT, trees1);
}

try {
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(filename);
printRFDist(out, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);
out.close();
cout << "Robinson-Foulds distances printed to " << filename << endl;
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, filename);
}
printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED);

delete [] rfdist_raw;
}

Expand Down Expand Up @@ -1546,31 +1561,15 @@ void computeRFDist(Params &params) {
trees.computeRFDist(rfdist, params.rf_dist_mode, params.split_weight_threshold);
}

if (verbose_mode >= VB_MED) printRFDist(cout, rfdist, n, m, params.rf_dist_mode);
//if (verbose_mode >= VB_MED) printRFDist(cout, rfdist, n, m, params.rf_dist_mode);

try {
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(filename.c_str());
printRFDist(out, rfdist, n, m, params.rf_dist_mode);
out.close();
cout << "Robinson-Foulds distances printed to " << filename << endl;
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, filename);
}
printRFDist(filename, rfdist, n, m, params.rf_dist_mode);

if (incomp_splits)
try {
if (incomp_splits) {
filename = params.out_prefix;
filename += ".incomp";
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(filename.c_str());
printRFDist(out, incomp_splits, n, m, params.rf_dist_mode);
out.close();
printRFDist(filename, incomp_splits, n, m, params.rf_dist_mode, false);
cout << "Number of incompatible splits in printed to " << filename << endl;
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, filename);
}

if (incomp_splits) delete [] incomp_splits;
Expand Down
12 changes: 12 additions & 0 deletions utils/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,7 @@ void parseArg(int argc, char *argv[], Params &params) {
params.aln_output = NULL;
params.aln_site_list = NULL;
params.aln_output_format = ALN_PHYLIP;
params.output_format = FORMAT_NORMAL;
params.newick_extended_format = false;
params.gap_masked_aln = NULL;
params.concatenate_aln = NULL;
Expand Down Expand Up @@ -2091,6 +2092,17 @@ void parseArg(int argc, char *argv[], Params &params) {
continue;
}


if (strcmp(argv[cnt], "--out-csv") == 0) {
params.output_format = FORMAT_CSV;
continue;
}

if (strcmp(argv[cnt], "--out-tsv") == 0) {
params.output_format = FORMAT_TSV;
continue;
}

if (strcmp(argv[cnt], "--figtree") == 0) {
params.newick_extended_format = true;
continue;
Expand Down
15 changes: 15 additions & 0 deletions utils/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,16 @@ enum AlnFormat {
ALN_PHYLIP, ALN_FASTA
};

/*
outfile file format
FORMAT_NORMAL: usual file format used so far
FORMAT_CSV: csv file format
FORMAT_TSV: tab separated file format
*/
enum FileFormat {
FORMAT_NORMAL, FORMAT_CSV, FORMAT_TSV
};

enum ModelTestCriterion {
MTC_AIC, MTC_AICC, MTC_BIC, MTC_ALL
};
Expand Down Expand Up @@ -962,6 +972,11 @@ class Params {
alignment output format
*/
AlnFormat aln_output_format;

/**
output file format
*/
FileFormat output_format;

/**
tree in extended newick format with node label like [&label=""]
Expand Down

0 comments on commit 494d375

Please sign in to comment.