diff --git a/.gitignore b/.gitignore index b0cf483c8..410b29d7b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,7 @@ build/ /GTAGS /.dir-locals.el /iqtree_config.h +.DS_Store +*.ipch +*.log +/.vs/ diff --git a/.gitmodules b/.gitmodules index e69de29bb..146ec759c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lsd2"] + path = lsd2 + url = https://github.com/tothuhien/lsd2.git diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..3fb634102 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: cpp +compiler: + - clang + - gcc + +before_script: + - sudo apt-get install libeigen3-dev + - sudo apt-get install libboost1.69-all-dev + - mkdir -p build + - cd build + - cmake .. + +script: +############################################################################ +# Build main and tests +############################################################################ + - make + - ./iqtree + diff --git a/CMakeLists.txt b/CMakeLists.txt index 5bdff1f1e..42e607032 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,714 +1,899 @@ -################################################################## -# IQ-TREE cmake build definition -# Copyright (c) 2012-2015 Bui Quang Minh, Lam-Tung Nguyen -################################################################## - -# Windows example usages: -#------------------------ -# cmake -G "Visual Studio 12" (32-bit version, compiled with MSVC) -# cmake -G "Visual Studio 12 Win64" (64-bit version, compiled with MSVC) -# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" (64-bit version, compiled with ICC) -# cmake -G "MinGW Makefiles" (TDM-GCC) -# cmake -G "Unix Makefiles" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_MAKE_PROGRAM=mingw32-make (TDM-GCC and clang) - -# Linux example usages: -#---------------------- -# cmake (sequential version) -# cmake (OpenMP version) -# cmake -DIQTREE_FLAGS="m32-single" (32-bit sequential version) -# cmake -DIQTREE_FLAGS="m32" (32-bit OpenMP version) -# -# To compile with CLANG on Linux: -# export CC=/usr/bin/clang -# export CXX=/usr/bin/clang++ -# Best practices for setting up CMAKE for diffrent compiler can be found here: -# http://stackoverflow.com/questions/7031126/switching-between-gcc-and-clang-llvm-using-cmake -# -# Mac OSX example usages: -#------------------------ -# -# To build OpenMP version one needs to download Clang version 3.7 or later (as of November 2015) -# Then assuming clang3.7 and clang++3.7 are the newly built compilers, then: -# cmake -DCMAKE_C_COMPILER=clang3.7 -DCMAKE_CXX_COMPILER=clang++3.7 (OpenMP version) -# -# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIQTREE_FLAGS=single (sequential version) -# - - -# Compile OpenMP version: cmake .... -# Compile 32-bit version: cmake -DIQTREE_FLAGS=m32 .... -# Compile static version: cmake -DIQTREE_FLAGS=static .... -# Compile static OpenMP version: cmake -DIQTREE_FLAGS="static" .... - -#NOTE: Static linking with clang windows: make a symlink libgcc_eh.a to libgcc.a (administrator required) -# C:\TDM-GCC-64\lib\gcc\x86_64-w64-mingw32\5.1.0>mklink libgcc_eh.a libgcc.a - -cmake_minimum_required(VERSION 2.8.10 FATAL_ERROR) -set(CMAKE_LEGACY_CYGWIN_WIN32 0) - -project(iqtree) -add_definitions(-DIQ_TREE) -# The version number. -set (iqtree_VERSION_MAJOR 1) -set (iqtree_VERSION_MINOR 6) -set (iqtree_VERSION_PATCH "rc2") - -set(BUILD_SHARED_LIBS OFF) - -if (CMAKE_C_COMPILER MATCHES "mpic") - set(IQTREE_FLAGS "${IQTREE_FLAGS} mpi") -endif() - -message("IQ-TREE flags : ${IQTREE_FLAGS}") - -if (NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") -endif() - -if (CMAKE_BUILD_TYPE STREQUAL "Release") - message("Builde mode : Release") -endif() - -if (CMAKE_GENERATOR MATCHES "Xcode") - set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") -endif() - -include_directories("${PROJECT_SOURCE_DIR}") - -################################################################## -# Detect target platforms -################################################################## -if (WIN32) - message("Target OS : Windows") - # build as static binary to run on most machines - if (IQTREE_FLAGS MATCHES "static") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") - endif() - SET(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) - add_definitions(-DWIN32) -elseif (APPLE) - message("Target OS : Mac OS X") - # to be compatible back to Mac OS X 10.7 - if (IQTREE_FLAGS MATCHES "oldmac") - add_definitions("-mmacosx-version-min=10.5") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.5") - else() - add_definitions("-mmacosx-version-min=10.7") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.7") - endif() - SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) -elseif (UNIX) - message("Target OS : Unix") - # build as static binary to run on most machines - if (NOT IQTREE_FLAGS MATCHES "static") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") - else() - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") - endif() -else() - # Note that IQ-TREE has NOT been tested on other platforms - message("Target OS : Unknown and untested yet") -endif() - -################################################################## -# Setup compiler, currently supported GCC, CLANG, MSVC, and ICC -################################################################## - -set(GCC "FALSE") # GNU compiler -set(CLANG "FALSE") # Clang compiler -set(ICC "FALSE") # Intel compiler -set(VCC "FALSE") # MS Visual C Compiler, note that it is different from MSVC variable -# using C++11 standard -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - -if (CMAKE_COMPILER_IS_GNUCXX) - message("Compiler : GNU Compiler (gcc)") - set(GCC "TRUE") -# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -fms-extensions -Wno-deprecated") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -ffunction-sections -fdata-sections") - set(CMAKE_C_FLAGS_RELEASE "-O3 -g -ffunction-sections -fdata-sections") - if (APPLE) - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip") - else() - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections") - endif() - # require at least gcc 4.8 - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - message(FATAL_ERROR "GCC version must be at least 4.8!") - endif() - if (WIN32) - # disable AVX on Windows due to memory alignment - set(IQTREE_FLAGS "${IQTREE_FLAGS} novx") - message("WARNING: AVX is disabled on Windows as GCC does not properly suport memory alignment") - endif() -elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - message("Compiler : Clang") - set(CLANG "TRUE") -# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -Wno-nested-anon-types") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections") - set(CMAKE_C_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections") - if (APPLE) - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip") - else() - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections") - endif() - - # use libc++ per default in MacOS - if (APPLE) - SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - endif() - - #remove -rdynamic for Clang under Linux - if (UNIX AND IQTREE_FLAGS MATCHES "static") - SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS) - endif() -elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - set(VCC "TRUE") - message("Compiler : MS Visual C++ Compiler") -elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel") - message("Compiler : Intel C++ Compiler (icc)") - set(ICC "TRUE") - #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99") -else() - message("Compiler : Unknown and untested yet") -endif() - -set(EXE_SUFFIX "") - -if (MSVC) - # MS Visual Studio environment - message("Exporting MS Visual Studio projects...") - add_definitions(/MP) # enable multi-processor compilation - if (CMAKE_BUILD_TYPE STREQUAL "Release") - add_definitions(/Ot /Oi) - if (VCC) - add_definitions(/O2) - elseif (ICC) - #add_definitions(/O3) - add_definitions(/O3) - endif() - endif() -endif() - -# enable link time optimization -if (IQTREE_FLAGS MATCHES "lto") - #if (CLANG) - # set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto=thin") - #else() - set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto") - #endif() -endif() - -################################################################## -# configure MPI compilation -################################################################## - -if (IQTREE_FLAGS MATCHES "mpi") - add_definitions(-D_IQTREE_MPI) - if (NOT CMAKE_CXX_COMPILER MATCHES "mpi") - # if not using the MPI compiler wrapper, set own options manually - find_package(MPI REQUIRED) - set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") - set(CMAKE_C_COMPILE_FLAGS "${CMAKE_C_COMPILE_FLAGS} ${MPI_C_COMPILE_FLAGS}") - set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${MPI_CXX_LINK_FLAGS}") - set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} ${MPI_C_LINK_FLAGS}") - include_directories(${MPI_C_INCLUDE_PATH}) - include_directories(${MPI_CXX_INCLUDE_PATH}) - endif() -endif() - - -################################################################## -# Configure PLL build -################################################################## -if (IQTREE_FLAGS MATCHES "pll") - add_definitions(-DUSING_PLL) - set(EXE_SUFFIX "${EXE_SUFFIX}-pll") -endif() - -################################################################## -# detect 32 or 64 bit binary -################################################################## -set (BINARY32 "FALSE") -if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR IQTREE_FLAGS MATCHES "m32") - set(BINARY32 "TRUE") - message("Target binary : 32-bit") - if (CMAKE_GENERATOR MATCHES "Win64") - error("Both 32-bit and 64-bit mode cannot be specified") - endif() - #SET(EXE_SUFFIX "${EXE_SUFFIX}32") - if (GCC OR CLANG) - set(COMBINED_FLAGS "${COMBINED_FLAGS} -m32") - endif() - add_definitions(-DBINARY32) -else() - message("Target binary : 64-bit") -endif() - -if(IQTREE_FLAGS MATCHES "novx") - add_definitions(-D__NOAVX__) -endif() - -################################################################## -# configure OpenMP/PThreads compilation -# change the executable name if compiled for OpenMP parallel version -################################################################## -if (NOT IQTREE_FLAGS MATCHES "single") - message("OpenMP : Yes") - #SET(EXE_SUFFIX "${EXE_SUFFIX}-omp") - add_definitions(-D_USE_PTHREADS) - if (MSVC) - add_definitions(/MT) - endif() - - if (VCC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") - include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers - elseif (ICC) - if (WIN32) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qopenmp") - include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qopenmp") - endif() - elseif (GCC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread") - elseif (CLANG) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libomp") - endif() -else() - message("OpenMP : NONE") -endif() - - -if (IQTREE_FLAGS MATCHES "mpi") - message("MPI : Yes") - SET(EXE_SUFFIX "${EXE_SUFFIX}-mpi") -else() - message("MPI : NONE") -endif() - -################################################################## -# configure SSE/AVX/FMA instructions -################################################################## - -SET(AVX_FLAGS "-D__SSE3 -D__AVX") -if (VCC) - set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") -elseif (CLANG) - set(AVX_FLAGS "${AVX_FLAGS} -mavx") -elseif (GCC) - set(AVX_FLAGS "${AVX_FLAGS} -mavx -fabi-version=0") -elseif (ICC) - if (WIN32) - set(AVX_FLAGS "${AVX_FLAGS} /arch:avx") - else() - set(AVX_FLAGS "${AVX_FLAGS} -mavx") - endif() -endif() - -SET(SSE_FLAGS "-D__SSE3") -if (VCC) - set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE2 -D__SSE3__") -elseif (GCC OR CLANG) - set(SSE_FLAGS "${SSE_FLAGS} -msse3") -elseif (ICC) - if (WIN32) - set(SSE_FLAGS "${SSE_FLAGS} /arch:sse3") - else() - set(SSE_FLAGS "${SSE_FLAGS} -msse3") - endif() -endif() - -SET(FMA_FLAGS "-D__SSE3 -D__AVX") -if (VCC) - set(FMA_FLAGS "${FMA_FLAGS} /arch:AVX2") -elseif (CLANG) - set(FMA_FLAGS "${FMA_FLAGS} -mavx -mfma") -elseif (GCC) - set(FMA_FLAGS "${FMA_FLAGS} -mavx -fabi-version=0 -mfma") -elseif (ICC) - if (WIN32) - set(FMA_FLAGS "${FMA_FLAGS} /arch:core-avx2") - else() - set(FMA_FLAGS "${FMA_FLAGS} -march=core-avx2") - endif() -endif() - -SET(AVX512_FLAGS "-D__SSE3 -D__AVX") -if (VCC) - message("AVX512 not available in Visual C++") - #set(AVX512_FLAGS "${AVX512_FLAGS} /arch:AVX512") -elseif (CLANG) - set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma") -elseif (GCC) - set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma") -elseif (ICC) - if (WIN32) - set(AVX512_FLAGS "${AVX512_FLAGS} /QxMIC-AVX512") - else() - set(AVX512_FLAGS "${AVX512_FLAGS} -xMIC-AVX512") - endif() -endif() - - -# further flag to improve performance - -if (IQTREE_FLAGS MATCHES "fma") # AVX+FMA instruction set - message("Vectorization : AVX+FMA") - add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive - set(COMBINED_FLAGS "${COMBINED_FLAGS} ${FMA_FLAGS}") - #SET(EXE_SUFFIX "${EXE_SUFFIX}-fma") -elseif (IQTREE_FLAGS MATCHES "avx") # AVX instruction set - message("Vectorization : AVX") - add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive - set(COMBINED_FLAGS "${COMBINED_FLAGS} ${AVX_FLAGS}") - #SET(EXE_SUFFIX "${EXE_SUFFIX}-avx") -elseif (NOT IQTREE_FLAGS MATCHES "nosse") #SSE intruction set - if (IQTREE_FLAGS MATCHES "KNL") - message("Vectorization : SSE3/AVX/AVX2/AVX-512") - add_definitions(-D__AVX512KNL) - else() - message("Vectorization : SSE3/AVX/AVX2") - endif() - #add_definitions(-D__SSE3) - #set(COMBINED_FLAGS "${COMBINED_FLAGS} ${SSE_FLAGS}") -endif() - - -################################################################## -# Setup compiler flags -################################################################## - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMBINED_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMBINED_FLAGS}") -set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -fno-default-inline -fno-inline -O2 -fno-omit-frame-pointer -g") -set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -O2 -fno-omit-frame-pointer -g") - -if(CLANG AND IQTREE_FLAGS MATCHES "static") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -Wl,--allow-multiple-definition") -endif() - -if (IQTREE_FLAGS MATCHES "libcxx") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") -endif() - - -if (CMAKE_BUILD_TYPE STREQUAL "Release") - message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_RELEASE}") - message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") -endif() - -if (CMAKE_BUILD_TYPE STREQUAL "Debug") - message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_DEBUG}") - message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") -endif() - -if (CMAKE_BUILD_TYPE STREQUAL "Profile") - message("C flags : ${CMAKE_C_FLAGS_PROFILE} ") - message("CXX flags : ${CMAKE_CXX_FLAGS_PROFILE} ") -endif() - -message("LINKER flags : ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") - - -if (GCC) - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline") - set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline") - set(CMAKE_CXX_FLAGS_MEM "-g -O1") - set(CMAKE_C_FLAGS_MEM "-g -O1") -elseif (CLANG) - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline") - set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline") - set(CMAKE_CXX_FLAGS_MEM "-g -O1") - set(CMAKE_C_FLAGS_MEM "-g -O1") -endif() - -################################################################## -# check existence of a few basic functions -################################################################## -include (${CMAKE_ROOT}/Modules/CheckFunctionExists.cmake) -check_function_exists (gettimeofday HAVE_GETTIMEOFDAY) -check_function_exists (getrusage HAVE_GETRUSAGE) -check_function_exists (GlobalMemoryStatusEx HAVE_GLOBALMEMORYSTATUSEX) -check_function_exists (strndup HAVE_STRNDUP) -find_package(Backtrace) - -# configure a header file to pass some of the CMake settings -# to the source code -configure_file ( - "${PROJECT_SOURCE_DIR}/iqtree_config.h.in" - "${PROJECT_BINARY_DIR}/iqtree_config.h" - ) - -# add the binary tree to the search path for include files -# so that we will find iqtree_config.h -include_directories("${PROJECT_BINARY_DIR}") - -#zlib will be detected for appearance -#include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7") - - -if (NOT IQTREE_FLAGS MATCHES "nozlib") - find_package(ZLIB) -endif() - -if(ZLIB_FOUND) - message ("Using system zlib") - include_directories(${ZLIB_INCLUDE_DIRS}) -else(ZLIB_FOUND) - message ("Using own zlib-1.2.7") - include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7" "${PROJECT_SOURCE_DIR}/zlib-1.2.7") - add_subdirectory(zlib-1.2.7) -endif(ZLIB_FOUND) - -################################################################## -# subdirectories containing necessary libraries for the build -################################################################## -add_subdirectory(pll) -add_subdirectory(ncl) -add_subdirectory(nclextra) -add_subdirectory(utils) -add_subdirectory(pda) -add_subdirectory(lbfgsb) -add_subdirectory(whtest) -add_subdirectory(sprng) -#add_subdirectory(zlib-1.2.7) -add_subdirectory(vectorclass) -LIST(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}") -if (NOT EIGEN3_INCLUDE_DIR) - find_package(Eigen3) - if(NOT EIGEN3_FOUND) - message(FATAL_ERROR "Eigen3 library not found. Either install it or rerun cmake with -DEIGEN3_INCLUDE_DIR=") - endif() -endif() -add_definitions("-I${EIGEN3_INCLUDE_DIR} -DUSE_EIGEN3") -add_subdirectory(model) -add_subdirectory(gsl) -add_subdirectory(alignment) -add_subdirectory(tree) - -################################################################## -# the main executable -################################################################## - -add_library(kernelsse tree/phylokernelsse.cpp) - -if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") -add_library(kernelavx tree/phylotreeavx.cpp) -add_library(kernelfma tree/phylokernelfma.cpp) - if (IQTREE_FLAGS MATCHES "KNL") - add_library(kernelavx512 tree/phylokernelavx512.cpp) - endif() -endif() - -if (IQTREE_FLAGS MATCHES "mpi") - add_library(mympi utils/TreeCollection.cpp utils/ObjectStream.cpp) -endif() - -add_executable(iqtree -main/main.cpp -main/phyloanalysis.cpp -main/phyloanalysis.h -main/phylotesting.cpp -main/phylotesting.h -) - -if(Backtrace_FOUND) - include_directories(${Backtrace_INCLUDE_DIR}) - target_link_libraries(iqtree ${Backtrace_LIBRARY}) -endif(Backtrace_FOUND) - - -if (NOT IQTREE_FLAGS MATCHES "avx" AND NOT IQTREE_FLAGS MATCHES "fma") - if (NOT IQTREE_FLAGS MATCHES "nosse") - set_target_properties(iqtree ncl nclextra utils pda lbfgsb whtest sprng vectorclass model alignment tree PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") - endif() - set_target_properties(kernelsse pll PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") - if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") - set_target_properties(kernelavx pllavx PROPERTIES COMPILE_FLAGS "${AVX_FLAGS}") - set_target_properties(kernelfma PROPERTIES COMPILE_FLAGS "${FMA_FLAGS}") - if (IQTREE_FLAGS MATCHES "KNL") - set_target_properties(kernelavx512 PROPERTIES COMPILE_FLAGS "${AVX512_FLAGS}") - endif() - endif() -endif() - -################################################################## -# setup linking flags -################################################################## - -# link special lib for WIN32 -if (WIN32) - set(PLATFORM_LIB "ws2_32") -else() - set(PLATFORM_LIB "m") -endif() - -if (IQTREE_FLAGS MATCHES "libcxx") - set(STD_LIB "c++abi") -endif() - -set(THREAD_LIB "") -if (NOT IQTREE_FLAGS MATCHES "single") - link_directories(${PROJECT_SOURCE_DIR}/lib) - if (MSVC) - if (BINARY32) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${PROJECT_SOURCE_DIR}/lib32") - set(THREAD_LIB "pthreadVC2") - else() - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${PROJECT_SOURCE_DIR}/lib") - set(THREAD_LIB "pthreadVC2") - endif() - elseif(CLANG AND APPLE) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac") - elseif(CLANG AND WIN32) - if (BINARY32) - target_link_libraries(iqtree ${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll) - else() - target_link_libraries(iqtree ${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll) - endif() -# set(THREAD_LIB "ompstatic") - endif() - - if (CLANG AND BINARY32) - set (ATOMIC_LIB "atomic") - endif() - -endif() - -# basic linking librararies -target_link_libraries(iqtree pll ncl nclextra utils pda lbfgsb whtest sprng vectorclass model gsl alignment tree ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB} ${ATOMIC_LIB}) - -if (NOT IQTREE_FLAGS MATCHES "nosse") - target_link_libraries(iqtree kernelsse) -endif() - -# MPI libraries -if (IQTREE_FLAGS MATCHES "mpi") - target_link_libraries(iqtree mympi) - if (NOT CMAKE_CXX_COMPILER MATCHES "mpi") - target_link_libraries(iqtree ${MPI_CXX_LIBRARIES}) - endif() -endif() - -# SSE, AVX etc. libraries -if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") - target_link_libraries(iqtree pllavx kernelavx kernelfma) - if (IQTREE_FLAGS MATCHES "KNL") - target_link_libraries(iqtree kernelavx512) - endif() -endif() - -# setup the executable name -################################################################## -set_target_properties(iqtree PROPERTIES OUTPUT_NAME "iqtree${EXE_SUFFIX}") - -# strip the release build -if (CMAKE_BUILD_TYPE STREQUAL "Release" AND (GCC OR CLANG) AND NOT APPLE) # strip is not necessary for MSVC - if (WIN32) - ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND strip $) - elseif (NOT APPLE) - ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND strip $) - endif() -endif() - -if (MSVC) - set (BINARY_DIR "${PROJECT_BINARY_DIR}/Release") -else() - set (BINARY_DIR "${PROJECT_BINARY_DIR}") -endif() - -if (WIN32) - if (MSVC) - ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND copy "Release\\iqtree${EXE_SUFFIX}.exe" "Release\\iqtree${EXE_SUFFIX}-click.exe") - else() - ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND copy "iqtree${EXE_SUFFIX}.exe" "iqtree${EXE_SUFFIX}-click.exe") - endif() -endif() - -############################################################## -# add the install targets -############################################################## -install (TARGETS iqtree DESTINATION bin) -install (FILES "${PROJECT_SOURCE_DIR}/example/models.nex" DESTINATION .) -install (FILES "${PROJECT_SOURCE_DIR}/example/example.phy" DESTINATION .) -install (FILES "${PROJECT_SOURCE_DIR}/example/example.nex" DESTINATION .) -install (FILES "${PROJECT_SOURCE_DIR}/example/example.cf" DESTINATION .) - -if (WIN32) - install (FILES "${BINARY_DIR}/iqtree${EXE_SUFFIX}-click.exe" DESTINATION bin) - if (NOT IQTREE_FLAGS MATCHES "single" AND MSVC) - if (BINARY32) - install(FILES "${PROJECT_SOURCE_DIR}/lib32/pthreadVC2.dll" DESTINATION bin) - install(FILES "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin) - else() - install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadVC2.dll" DESTINATION bin) - install(FILES "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin) - endif() -# install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadGC2.dll" DESTINATION bin) -# install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadGC2_64.dll" DESTINATION bin) - endif() - - if (NOT IQTREE_FLAGS MATCHES "single" AND CLANG) - if (BINARY32) - install(FILES "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin) - else() - install(FILES "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin) - endif() - endif() - -endif() - -############################################################## -# build a CPack driven installer package -############################################################## -include (InstallRequiredSystemLibraries) -set (CPACK_RESOURCE_FILE_LICENSE - "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") -set (CPACK_PACKAGE_VERSION_MAJOR "${iqtree_VERSION_MAJOR}") -set (CPACK_PACKAGE_VERSION_MINOR "${iqtree_VERSION_MINOR}") -set (CPACK_PACKAGE_VERSION_PATCH "${iqtree_VERSION_PATCH}") -if(WIN32 OR APPLE) - set(CPACK_GENERATOR "ZIP") - set(CPACK_SOURCE_GENERATOR "ZIP") -else() - set(CPACK_GENERATOR "TGZ") - set(CPACK_SOURCE_GENERATOR "TGZ") -endif() - -#set(CPACK_SOURCE_PACKAGE_FILE_NAME -# "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}") -set(CPACK_SOURCE_IGNORE_FILES - "/build.*/;/debug.*/;/examples/;/test_scripts/;/manual/;/.bzr/;~$;/\\\\.svn/;/\\\\.git/;/pllrepo/;${CPACK_SOURCE_IGNORE_FILES}") - -set (SYSTEM_NAME "${CMAKE_SYSTEM_NAME}") -if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") - if (IQTREE_FLAGS MATCHES "oldmac") - set (SYSTEM_NAME "MacOS10.5") - else() - set (SYSTEM_NAME "MacOSX") - endif() -endif() - -if (BINARY32) - set (SYSTEM_NAME "${SYSTEM_NAME}32") -endif() - -if (IQTREE_FLAGS MATCHES "KNL") - set (SYSTEM_NAME "${SYSTEM_NAME}KNL") -endif() - -set(CPACK_PACKAGE_FILE_NAME - "${CMAKE_PROJECT_NAME}${EXE_SUFFIX}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${SYSTEM_NAME}") - -if (NOT APPLE) - set(CPACK_STRIP_FILES TRUE) -endif() - -include (CPack) - -#add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source) +################################################################## +# IQ-TREE cmake build definition +# Copyright (c) 2012-2015 Bui Quang Minh, Lam-Tung Nguyen +################################################################## + +# Windows example usages: +#------------------------ +# cmake -G "Visual Studio 12" (32-bit version, compiled with MSVC) +# cmake -G "Visual Studio 12 Win64" (64-bit version, compiled with MSVC) +# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" (64-bit version, compiled with ICC) +# cmake -G "MinGW Makefiles" (TDM-GCC) +# cmake -G "Unix Makefiles" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_MAKE_PROGRAM=mingw32-make (TDM-GCC and clang) + +# Linux example usages: +#---------------------- +# cmake (sequential version) +# cmake (OpenMP version) +# cmake -DIQTREE_FLAGS="m32-single" (32-bit sequential version) +# cmake -DIQTREE_FLAGS="m32" (32-bit OpenMP version) +# +# To compile with CLANG on Linux: +# export CC=/usr/bin/clang +# export CXX=/usr/bin/clang++ +# Best practices for setting up CMAKE for diffrent compiler can be found here: +# http://stackoverflow.com/questions/7031126/switching-between-gcc-and-clang-llvm-using-cmake +# +# Mac OSX example usages: +#------------------------ +# +# To build OpenMP version one needs to download Clang version 3.7 or later (as of November 2015) +# Then assuming clang3.7 and clang++3.7 are the newly built compilers, then: +# cmake -DCMAKE_C_COMPILER=clang3.7 -DCMAKE_CXX_COMPILER=clang++3.7 (OpenMP version) +# +# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIQTREE_FLAGS=single (sequential version) +# +# Xcode project example usages: +#------------------------------ +# +# To generate Xcode project without OpenMP: +# cmake -G Xcode -DIQTREE_FLAGS=single +# +# To generate Xcode project with OpenMP support, +# assuming you installed LLVM via Homebrew: brew install --with-toolchain llvm +# cmake -G XCode -DCMAKE_XCODE_ATTRIBUTE_CC=/usr/local/opt/llvm/bin/clang -DCMAKE_XCODE_ATTRIBUTE_CXX=/usr/local/opt/llvm/bin/clang++ +# +# +# Compile OpenMP version: cmake .... +# Compile 32-bit version: cmake -DIQTREE_FLAGS=m32 .... +# Compile static version: cmake -DIQTREE_FLAGS=static .... +# Compile static OpenMP version: cmake -DIQTREE_FLAGS="static" .... +# + +#NOTE: Static linking with clang windows: make a symlink libgcc_eh.a to libgcc.a (administrator required) +# C:\TDM-GCC-64\lib\gcc\x86_64-w64-mingw32\5.1.0>mklink libgcc_eh.a libgcc.a +# +# + +cmake_minimum_required(VERSION 2.8.10 FATAL_ERROR) +set(CMAKE_LEGACY_CYGWIN_WIN32 0) + +set(GCC_MIN_VERSION "4.8") + +project(iqtree) +add_definitions(-DIQ_TREE) + +# Find Eigen3 library +if (NOT EIGEN3_INCLUDE_DIR) + find_package(Eigen3) + if(NOT EIGEN3_FOUND) + message(FATAL_ERROR "Eigen3 library not found. Either install it or rerun cmake with -DEIGEN3_INCLUDE_DIR=") + endif() +endif() +add_definitions(-I${EIGEN3_INCLUDE_DIR}) + +# Find Boost library +find_package(Boost REQUIRED) +if(Boost_FOUND) + add_definitions(-I${Boost_INCLUDE_DIRS}) + add_definitions(-DUSE_BOOST) +endif() + + +# The version number. +set (iqtree_VERSION_MAJOR 2) +set (iqtree_VERSION_MINOR 1) +set (iqtree_VERSION_PATCH ".0") + +option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF) + +if (CMAKE_C_COMPILER MATCHES "mpic") + set(IQTREE_FLAGS "${IQTREE_FLAGS} mpi") +endif() + +message("IQ-TREE flags : ${IQTREE_FLAGS}") + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + message("Build mode : Release") +endif() + +if (CMAKE_GENERATOR MATCHES "Xcode") + set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") +# if (NOT CMAKE_XCODE_ATTRIBUTE_COMPILER_INDEX_STORE_ENABLE) +# set(CMAKE_XCODE_ATTRIBUTE_COMPILER_INDEX_STORE_ENABLE "No") +# endif() +endif() + +include_directories("${PROJECT_SOURCE_DIR}") +include_directories("${PROJECT_SOURCE_DIR}/yaml-cpp/include") + +################################################################## +# Include the Terraphast library +################################################################## +option(USE_TERRAPHAST "Use phylogentic terraces library (terraphast)" ON) + +if (USE_TERRAPHAST) + set(GCC_MIN_VERSION "5.4") + add_definitions(-DIQTREE_TERRAPHAST) + + option(TERRAPHAST_USE_GMP "" OFF) + option(TERRAPHAST_BUILD_CLIB "" OFF) + option(TERRAPHAST_BUILD_APPS "" OFF) + option(TERRAPHAST_BUILD_TESTS "" OFF) +endif() + +################################################################## +# Include the LSD2 library +################################################################## +option(USE_LSD2 "Use least square dating (lsd2)" OFF) + +if (USE_LSD2) + add_definitions(-DUSE_LSD2) +endif() + +################################################################## +# Detect target platforms +################################################################## +if (WIN32) + message("Target OS : Windows") + # build as static binary to run on most machines + if (IQTREE_FLAGS MATCHES "static") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") + endif() + SET(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) +elseif (APPLE) + message("Target OS : Mac OS X") + # to be compatible back to Mac OS X 10.7 + if (IQTREE_FLAGS MATCHES "oldmac") + add_definitions("-mmacosx-version-min=10.5") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.5") + else() + add_definitions("--target=x86_64-apple-macos10.7") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --target=x86_64-apple-macos10.7") + endif() + SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) +elseif (UNIX) + message("Target OS : Unix") + # build as static binary to run on most machines + if (NOT IQTREE_FLAGS MATCHES "static") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") + else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") + endif() +else() + # Note that IQ-TREE has NOT been tested on other platforms + message("Target OS : Unknown and untested yet") +endif() + +################################################################## +# Setup compiler, currently supported GCC, CLANG, MSVC, and ICC +################################################################## + +set(GCC "FALSE") # GNU compiler +set(CLANG "FALSE") # Clang compiler +set(ICC "FALSE") # Intel compiler +set(VCC "FALSE") # MS Visual C Compiler, note that it is different from MSVC variable +set(CLANG_UNDER_VS "FALSE") #Clang compiler, used from inside Visual Studio +# using C++11 standard +if (CMAKE_CXX_COMPILER MATCHES "VISUAL STUDIO") + set(CLANG_UNDER_VS "TRUE") + #it won't recognize the -std=c++11 parameter. + #Todo: don't hard-code this; figure out some way it can be passed in (though ideally, not the whole shebang). + include_directories("C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\Llvm\\lib\\clang\\10.0.0\\include") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif() + +if (CMAKE_COMPILER_IS_GNUCXX) + message("Compiler : GNU Compiler (gcc)") + set(GCC "TRUE") +# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -fms-extensions -Wno-deprecated") + set(CMAKE_CXX_FLAGS_RELEASE "-O2 -g -ffunction-sections -fdata-sections") + set(CMAKE_C_FLAGS_RELEASE "-O2 -g -ffunction-sections -fdata-sections") + if (APPLE) + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip") + else() + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections") + endif() + # require at least gcc ${GCC_MIN_VERSION} + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GCC_MIN_VERSION) + message(FATAL_ERROR "GCC version must be at least ${GCC_MIN_VERSION}!") + endif() + if (WIN32) + # disable AVX on Windows due to memory alignment + set(IQTREE_FLAGS "${IQTREE_FLAGS} novx") + message("WARNING: AVX is disabled on Windows as GCC does not properly suport memory alignment") + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + message("Compiler : Clang") + set(CLANG "TRUE") +# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -Wno-nested-anon-types") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections") + set(CMAKE_C_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections") + if (APPLE) + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip") + else() + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections") + endif() + + # use libc++ per default in MacOS + if (APPLE) + SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") + endif() + + #remove -rdynamic for Clang under Linux + if (UNIX AND IQTREE_FLAGS MATCHES "static") + SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS) + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(VCC "TRUE") + message("Compiler : MS Visual C++ Compiler") +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel") + message("Compiler : Intel C++ Compiler (icc)") + set(ICC "TRUE") + #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99") +else() + message("Compiler : Unknown and untested yet") +endif() +message("Compiler version: ${CMAKE_CXX_COMPILER_VERSION}") + +set(EXE_SUFFIX "") + +if (MSVC) + # MS Visual Studio environment + message("Exporting MS Visual Studio projects...") + if (CLANG_UNDER_VS) + #see https://clang.llvm.org/docs/UsersManual.html#clang-cl + #note .GX is how you say -fexceptions + add_definitions(/D_UWIN) + set(CMAKE_C_FLAGS_RELEASE "/O2 /GX") + set(CMAKE_C_FLAGS_DEBUG "/D_UWIN /GX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /I${EIGEN3_INCLUDE_DIR}") + else() + add_definitions(/MP) # enable multi-processor compilation + endif() + if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_definitions(/Ot /Oi) + if (VCC) + add_definitions(/O2) + elseif (ICC) + add_definitions(/O3) + endif() + endif() +endif() + +# enable link time optimization +if (IQTREE_FLAGS MATCHES "lto") + #if (CLANG) + # set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto=thin") + #else() + set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto") + #endif() +endif() + +################################################################## +# onnxruntime (added by TD) +################################################################## + +if(NOT ONNXRUNTIME_ROOTDIR) + if(WIN32) + set(ONNXRUNTIME_ROOTDIR "C:/Program Files (x86)/onnxruntime") + else() + include_directories("/usr/local/include/onnxruntime") + endif() +endif() + +################################################################## +# detect 32 or 64 bit binary +################################################################## +set (BINARY32 "FALSE") +if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR IQTREE_FLAGS MATCHES "m32") + set(BINARY32 "TRUE") + message("Target binary : 32-bit") + if (CMAKE_GENERATOR MATCHES "Win64") + error("Both 32-bit and 64-bit mode cannot be specified") + endif() + if (GCC OR CLANG) + set(COMBINED_FLAGS "${COMBINED_FLAGS} -m32") + endif() + add_definitions(-DBINARY32) + if (WIN32) + add_definitions(-DWIN32) + endif() +else() + message("Target binary : 64-bit") + if (CLANG_UNDER_VS) + set(COMBINED_FLAGS "${COMBINED_FLAGS} -m64") + endif() + if (WIN32) + add_definitions(-DWIN64) + endif() +endif() + +################################################################## +# configure MPI compilation +################################################################## + +if (IQTREE_FLAGS MATCHES "mpi") + add_definitions(-D_IQTREE_MPI) + if (NOT CMAKE_CXX_COMPILER MATCHES "mpi") + # if not using the MPI compiler wrapper, set own options manually + find_package(MPI) + if (MPI_Found) + set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") + set(CMAKE_C_COMPILE_FLAGS "${CMAKE_C_COMPILE_FLAGS} ${MPI_C_COMPILE_FLAGS}") + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${MPI_CXX_LINK_FLAGS}") + set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} ${MPI_C_LINK_FLAGS}") + include_directories(${MPI_C_INCLUDE_PATH}) + include_directories(${MPI_CXX_INCLUDE_PATH}) + else() + if(CLANG_UNDER_VS) + #Under Visual Studio, the MPI package isn't found, if you haven't installed FORTRAN + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /I${MPI_DIR}/Include") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /I${MPI_DIR}/Include") + #Zork. Linker + #message("MPI DIR was ${MPI_DIR}") + if (BINARY32) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${MPI_DIR}\\lib\\x86 msmpi.lib /PROFILE") + else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${MPI_DIR}\\lib\\x64 msmpi.lib /PROFILE") + endif() + endif() + endif() + endif() +endif() + + +################################################################## +# Configure PLL build +################################################################## +if (IQTREE_FLAGS MATCHES "pll") + add_definitions(-DUSING_PLL) + set(EXE_SUFFIX "${EXE_SUFFIX}-pll") +endif() + +if(IQTREE_FLAGS MATCHES "novx") + add_definitions(-D__NOAVX__) +endif() + +################################################################## +# configure OpenMP/PThreads compilation +# change the executable name if compiled for OpenMP parallel version +################################################################## +if (NOT IQTREE_FLAGS MATCHES "single") + message("OpenMP : Yes") + #SET(EXE_SUFFIX "${EXE_SUFFIX}-omp") + if (NOT CLANG_UNDER_VS) + add_definitions(-D_USE_PTHREADS) + endif() + if (MSVC) + if (NOT CLANG_UNDER_VS) + add_definitions(/MT) + endif() + endif() + + if(CLANG AND APPLE) + link_directories(${PROJECT_SOURCE_DIR}/libmac) + elseif (WIN32 OR UNIX) + if (BINARY32) + link_directories(${PROJECT_SOURCE_DIR}/lib32) + else() + link_directories(${PROJECT_SOURCE_DIR}/lib) + endif() + endif() + + if (VCC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers + elseif (ICC) + if (WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qopenmp") + include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qopenmp") + endif() + elseif (GCC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread") + elseif (CLANG) + if (CLANG_UNDER_VS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp /D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_WARNINGS /DCLANG_UNDER_VS=1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /openmp /D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_WARNINGS /DCLANG_UNDER_VS=1") + include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers + + #Next two lines don't work for me, as I only have VS 2019 Community + #set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /implib:vcomp.lib") + #set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /implib:vcompd.lib") + + #The problem here is that LLVM installs either 64 bit or 32 bit libraries, but not both. + #Though perhaps in debug builds this should be libiomp5md.lib + set (LLVM_DIR "C:\\Projects\\LLVM_10") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${LLVM_DIR}\\lib libomp.lib") + + #Also need to ensure libomp.dll is in the path (or copied into the output directory). + #(You want the one in ${LLVM_DIR}\\bin + #But I don't as yet know how to tell CMake to do that. -James B. 23-Jul-2020 + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") + if (APPLE OR WIN32 ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp -pthread") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lomp") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread") + endif() + endif() + endif() +else() + message("OpenMP : NONE") +endif() + + +if (IQTREE_FLAGS MATCHES "mpi") + message("MPI : Yes") + SET(EXE_SUFFIX "${EXE_SUFFIX}-mpi") +else() + message("MPI : NONE") +endif() + +################################################################## +# configure SSE/AVX/FMA instructions +################################################################## + +SET(AVX_FLAGS "-D__SSE3 -D__AVX") +if (VCC) + set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") +elseif (CLANG) + set(AVX_FLAGS "${AVX_FLAGS} -mavx") +elseif (GCC) + set(AVX_FLAGS "${AVX_FLAGS} -mavx -fabi-version=0") +elseif (ICC) + if (WIN32) + set(AVX_FLAGS "${AVX_FLAGS} /arch:avx") + else() + set(AVX_FLAGS "${AVX_FLAGS} -mavx") + endif() +endif() + +SET(SSE_FLAGS "-D__SSE3") +if (VCC) + set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE2 -D__SSE3__") +elseif (GCC OR CLANG) + set(SSE_FLAGS "${SSE_FLAGS} -msse3") +elseif (ICC) + if (WIN32) + set(SSE_FLAGS "${SSE_FLAGS} /arch:sse3") + else() + set(SSE_FLAGS "${SSE_FLAGS} -msse3") + endif() +endif() + +SET(FMA_FLAGS "-D__SSE3 -D__AVX") +if (VCC) + set(FMA_FLAGS "${FMA_FLAGS} /arch:AVX2") +elseif (CLANG) + set(FMA_FLAGS "${FMA_FLAGS} -mavx -mfma") +elseif (GCC) + set(FMA_FLAGS "${FMA_FLAGS} -mavx -fabi-version=0 -mfma") +elseif (ICC) + if (WIN32) + set(FMA_FLAGS "${FMA_FLAGS} /arch:core-avx2") + else() + set(FMA_FLAGS "${FMA_FLAGS} -march=core-avx2") + endif() +endif() + +SET(AVX512_FLAGS "-D__SSE3 -D__AVX") +if (VCC) + message("AVX512 not available in Visual C++") + #set(AVX512_FLAGS "${AVX512_FLAGS} /arch:AVX512") +elseif (CLANG) + set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma") +elseif (GCC) + set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma") +elseif (ICC) + if (WIN32) + set(AVX512_FLAGS "${AVX512_FLAGS} /QxMIC-AVX512") + else() + set(AVX512_FLAGS "${AVX512_FLAGS} -xMIC-AVX512") + endif() +endif() + +# further flag to improve performance + +if (IQTREE_FLAGS MATCHES "fma") # AVX+FMA instruction set + message("Vectorization : AVX+FMA") + add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive + set(COMBINED_FLAGS "${COMBINED_FLAGS} ${FMA_FLAGS}") + #SET(EXE_SUFFIX "${EXE_SUFFIX}-fma") +elseif (IQTREE_FLAGS MATCHES "avx") # AVX instruction set + message("Vectorization : AVX") + add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive + set(COMBINED_FLAGS "${COMBINED_FLAGS} ${AVX_FLAGS}") + #SET(EXE_SUFFIX "${EXE_SUFFIX}-avx") +elseif (NOT IQTREE_FLAGS MATCHES "nosse") #SSE intruction set + if (IQTREE_FLAGS MATCHES "KNL") + message("Vectorization : SSE3/AVX/AVX2/AVX-512") + add_definitions(-D__AVX512KNL) + else() + message("Vectorization : SSE3/AVX/AVX2") + endif() + #add_definitions(-D__SSE3) + #set(COMBINED_FLAGS "${COMBINED_FLAGS} ${SSE_FLAGS}") +endif() + + +################################################################## +# Setup compiler flags +################################################################## + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMBINED_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMBINED_FLAGS}") +set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -fno-default-inline -fno-inline -O2 -fno-omit-frame-pointer -g") +set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -O2 -fno-omit-frame-pointer -g") + +if(CLANG AND IQTREE_FLAGS MATCHES "static") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -Wl,--allow-multiple-definition") +endif() + +if (IQTREE_FLAGS MATCHES "libcxx") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") +endif() + + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_RELEASE}") + message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}") +endif() + +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_DEBUG}") + message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") +endif() + +if (CMAKE_BUILD_TYPE STREQUAL "Profile") + message("C flags : ${CMAKE_C_FLAGS_PROFILE} ") + message("CXX flags : ${CMAKE_CXX_FLAGS_PROFILE} ") +endif() + +message("LINKER flags : ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") + + +if (GCC) + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline") + set(CMAKE_CXX_FLAGS_MEM "-g -O1") + set(CMAKE_C_FLAGS_MEM "-g -O1") +elseif (CLANG AND NOT CLANG_UNDER_VS) + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline") + set(CMAKE_CXX_FLAGS_MEM "-g -O1") + set(CMAKE_C_FLAGS_MEM "-g -O1") +endif() + +################################################################## +# check existence of a few basic functions +################################################################## +include (${CMAKE_ROOT}/Modules/CheckFunctionExists.cmake) +check_function_exists (gettimeofday HAVE_GETTIMEOFDAY) +check_function_exists (getrusage HAVE_GETRUSAGE) +check_function_exists (GlobalMemoryStatusEx HAVE_GLOBALMEMORYSTATUSEX) +check_function_exists (strndup HAVE_STRNDUP) +check_function_exists (strtok_r HAVE_STRTOK_R) + +find_package(Backtrace) + +# configure a header file to pass some of the CMake settings +# to the source code +configure_file ( + "${PROJECT_SOURCE_DIR}/iqtree_config.h.in" + "${PROJECT_BINARY_DIR}/iqtree_config.h" + ) + +# add the binary tree to the search path for include files +# so that we will find iqtree_config.h +include_directories("${PROJECT_BINARY_DIR}") + +#zlib will be detected for appearance +#include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7") + + +if (NOT IQTREE_FLAGS MATCHES "nozlib") + find_package(ZLIB) +endif() + +if(ZLIB_FOUND) + message ("Using system zlib") + include_directories(${ZLIB_INCLUDE_DIRS}) +else(ZLIB_FOUND) + message ("Using own zlib-1.2.7") + include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7" "${PROJECT_SOURCE_DIR}/zlib-1.2.7") + add_subdirectory(zlib-1.2.7) +endif(ZLIB_FOUND) + +################################################################## +# subdirectories containing necessary libraries for the build +################################################################## + +option(USE_BOOSTER "Use Booster for transfer bootstrap expectation" ON) +if (USE_BOOSTER) + add_subdirectory(booster) + add_definitions(-DUSE_BOOSTER) +endif() + +add_subdirectory(main) +add_subdirectory(pll) +add_subdirectory(ncl) +add_subdirectory(nclextra) +add_subdirectory(utils) +add_subdirectory(pda) +add_subdirectory(lbfgsb) +add_subdirectory(whtest) +add_subdirectory(sprng) +#add_subdirectory(zlib-1.2.7) +add_subdirectory(vectorclass) +LIST(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}") +add_subdirectory(model) +add_subdirectory(gsl) +add_subdirectory(alignment) +add_subdirectory(tree) + +# YAML library +option(YAML_CPP_BUILD_TESTS "Enable testing" OFF) +option(YAML_CPP_BUILD_TOOLS "Enable parse tools" OFF) +option(YAML_CPP_BUILD_CONTRIB "Enable contrib stuff in library" OFF) +option(YAML_CPP_INSTALL "Enable generation of install target" OFF) +add_subdirectory(yaml-cpp) +add_subdirectory(phylo-yaml) + +if (USE_TERRAPHAST) + add_subdirectory(terraphast) + add_subdirectory(terrace) +endif() + +if (USE_LSD2) + add_subdirectory(lsd2) +endif() + +################################################################## +# the main executable +################################################################## + +add_library(kernelsse tree/phylokernelsse.cpp) + +if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") +add_library(kernelavx tree/phylotreeavx.cpp) +add_library(kernelfma tree/phylokernelfma.cpp) + if (IQTREE_FLAGS MATCHES "KNL") + add_library(kernelavx512 tree/phylokernelavx512.cpp) + endif() +endif() + +add_executable(iqtree2 +obsolete/parsmultistate.cpp +obsolete/parsmultistate.h + nn/neuralnetwork.cpp nn/neuralnetwork.h) # added by TD (TODO: move into cmake in nn folder?) + +if(Backtrace_FOUND) + include_directories(${Backtrace_INCLUDE_DIR}) + target_link_libraries(iqtree2 ${Backtrace_LIBRARY}) +endif(Backtrace_FOUND) + +if (USE_BOOSTER) + target_link_libraries(iqtree2 booster) +endif() + +if (NOT IQTREE_FLAGS MATCHES "avx" AND NOT IQTREE_FLAGS MATCHES "fma") + if (NOT IQTREE_FLAGS MATCHES "nosse") + set_target_properties(iqtree2 ncl nclextra utils pda lbfgsb whtest sprng vectorclass model gsl alignment tree yaml-cpp phyloYAML main PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") + if (USE_TERRAPHAST) + set_target_properties(terrace terraphast PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") + endif() + if (USE_LSD2) + set_target_properties(lsd2 PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") + endif() + if (USE_BOOSTER) + set_target_properties(booster PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") + endif() + endif() + set_target_properties(kernelsse pll PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}") + if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") + set_target_properties(kernelavx pllavx PROPERTIES COMPILE_FLAGS "${AVX_FLAGS}") + set_target_properties(kernelfma PROPERTIES COMPILE_FLAGS "${FMA_FLAGS}") + if (IQTREE_FLAGS MATCHES "KNL") + set_target_properties(kernelavx512 PROPERTIES COMPILE_FLAGS "${AVX512_FLAGS}") + endif() + endif() +endif() + +################################################################## +# setup linking flags +################################################################## + +# link special lib for WIN32 +if (WIN32) + set(PLATFORM_LIB "ws2_32") +else() + set(PLATFORM_LIB "m") +endif() + +if (IQTREE_FLAGS MATCHES "libcxx") + set(STD_LIB "c++abi") +endif() + +set(THREAD_LIB "") +if (NOT IQTREE_FLAGS MATCHES "single") + if (MSVC) + if (BINARY32) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${PROJECT_SOURCE_DIR}/lib32") + set(THREAD_LIB "pthreadVC2") + else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${PROJECT_SOURCE_DIR}/lib") + set(THREAD_LIB "pthreadVC2") + endif() + elseif(CLANG AND WIN32) + if (BINARY32) + target_link_libraries(iqtree2 ${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll) + else() + target_link_libraries(iqtree2 ${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll) + endif() +# set(THREAD_LIB "ompstatic") + endif() + + if (CLANG AND BINARY32) + set (ATOMIC_LIB "atomic") + endif() + +endif() + +# basic linking librararies +target_link_libraries(iqtree2 pll ncl nclextra utils pda lbfgsb whtest sprng vectorclass model + gsl alignment tree yaml-cpp phyloYAML main ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB} ${ATOMIC_LIB}) + +# added by TD +target_link_libraries(iqtree2 onnxruntime) + +if (USE_TERRAPHAST) + target_link_libraries(iqtree2 terrace) +endif() + +if (USE_LSD2) + target_link_libraries(iqtree2 lsd2) +endif() + +if (NOT IQTREE_FLAGS MATCHES "nosse") + target_link_libraries(iqtree2 kernelsse) +endif() + +# MPI libraries +if (IQTREE_FLAGS MATCHES "mpi") + if (NOT CMAKE_CXX_COMPILER MATCHES "mpi") + target_link_libraries(iqtree2 ${MPI_CXX_LIBRARIES}) + endif() +endif() + +# SSE, AVX etc. libraries +if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx") + target_link_libraries(iqtree2 pllavx kernelavx kernelfma) + if (IQTREE_FLAGS MATCHES "KNL") + target_link_libraries(iqtree2 kernelavx512) + endif() +endif() + +# setup the executable name +################################################################## +set_target_properties(iqtree2 PROPERTIES OUTPUT_NAME "iqtree2${EXE_SUFFIX}") + +# strip the release build +if (NOT IQTREE_FLAGS MATCHES "nostrip" AND CMAKE_BUILD_TYPE STREQUAL "Release" AND (GCC OR CLANG) AND NOT APPLE) # strip is not necessary for MSVC + if (WIN32) + ADD_CUSTOM_COMMAND(TARGET iqtree2 POST_BUILD COMMAND strip $) + elseif (NOT APPLE) + ADD_CUSTOM_COMMAND(TARGET iqtree2 POST_BUILD COMMAND strip $) + endif() +endif() + +if (MSVC) + set (BINARY_DIR "${PROJECT_BINARY_DIR}/Release") +else() + set (BINARY_DIR "${PROJECT_BINARY_DIR}") +endif() + +if (WIN32) + if (MSVC) + if (CLANG_UNDER_VS) + ADD_CUSTOM_COMMAND(TARGET iqtree2 POST_BUILD COMMAND copy "iqtree2${EXE_SUFFIX}.exe" "iqtree2${EXE_SUFFIX}-click.exe") + else() + ADD_CUSTOM_COMMAND(TARGET iqtree2 POST_BUILD COMMAND copy "Release\\iqtree2${EXE_SUFFIX}.exe" "Release\\iqtree2${EXE_SUFFIX}-click.exe") + endif() + else() + ADD_CUSTOM_COMMAND(TARGET iqtree2 POST_BUILD COMMAND copy "iqtree2${EXE_SUFFIX}.exe" "iqtree2${EXE_SUFFIX}-click.exe") + endif() +endif() + +############################################################## +# add the install targets +############################################################## +install (TARGETS iqtree2 DESTINATION bin) +install (FILES "${PROJECT_SOURCE_DIR}/example/models.nex" DESTINATION .) +install (FILES "${PROJECT_SOURCE_DIR}/example/example.phy" DESTINATION .) +install (FILES "${PROJECT_SOURCE_DIR}/example/example.nex" DESTINATION .) +install (FILES "${PROJECT_SOURCE_DIR}/example/example.cf" DESTINATION .) + +if (WIN32) + install (FILES "${BINARY_DIR}/iqtree2${EXE_SUFFIX}-click.exe" DESTINATION bin) + if (NOT IQTREE_FLAGS MATCHES "single" AND MSVC) + if (BINARY32) + install(FILES "${PROJECT_SOURCE_DIR}/lib32/pthreadVC2.dll" DESTINATION bin) + install(FILES "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin) + else() + install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadVC2.dll" DESTINATION bin) + install(FILES "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin) + endif() +# install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadGC2.dll" DESTINATION bin) +# install(FILES "${PROJECT_SOURCE_DIR}/lib/pthreadGC2_64.dll" DESTINATION bin) + endif() + + if (NOT IQTREE_FLAGS MATCHES "single" AND CLANG) + if (BINARY32) + install(FILES "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin) + else() + install(FILES "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin) + endif() + endif() + +endif() + +############################################################## +# build a CPack driven installer package +############################################################## +include (InstallRequiredSystemLibraries) +set (CPACK_RESOURCE_FILE_LICENSE + "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") +set (CPACK_PACKAGE_VERSION_MAJOR "${iqtree_VERSION_MAJOR}") +set (CPACK_PACKAGE_VERSION_MINOR "${iqtree_VERSION_MINOR}") +set (CPACK_PACKAGE_VERSION_PATCH "${iqtree_VERSION_PATCH}") +if(WIN32 OR APPLE) + set(CPACK_GENERATOR "ZIP") + set(CPACK_SOURCE_GENERATOR "ZIP") +else() + set(CPACK_GENERATOR "TGZ") + set(CPACK_SOURCE_GENERATOR "TGZ") +endif() + +#set(CPACK_SOURCE_PACKAGE_FILE_NAME +# "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}") +set(CPACK_SOURCE_IGNORE_FILES + "/build.*/;/debug.*/;/examples/;/test_scripts/;/manual/;/.bzr/;~$;/\\\\.svn/;/\\\\.git/;/pllrepo/;${CPACK_SOURCE_IGNORE_FILES}") + +set (SYSTEM_NAME "${CMAKE_SYSTEM_NAME}") +if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + if (IQTREE_FLAGS MATCHES "oldmac") + set (SYSTEM_NAME "MacOS10.5") + else() + set (SYSTEM_NAME "MacOSX") + endif() +endif() + +if (BINARY32) + set (SYSTEM_NAME "${SYSTEM_NAME}32") +endif() + +if (IQTREE_FLAGS MATCHES "KNL") + set (SYSTEM_NAME "${SYSTEM_NAME}KNL") +endif() + +set(CPACK_PACKAGE_FILE_NAME + "${CMAKE_PROJECT_NAME}${EXE_SUFFIX}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}${CPACK_PACKAGE_VERSION_PATCH}-${SYSTEM_NAME}") + +if (NOT APPLE) + set(CPACK_STRIP_FILES TRUE) +endif() + +include (CPack) + +#add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source) diff --git a/CMakeSettings.json b/CMakeSettings.json new file mode 100644 index 000000000..5731f9dcc --- /dev/null +++ b/CMakeSettings.json @@ -0,0 +1,28 @@ +{ + "configurations": [ + { + "name": "x64-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "inheritEnvironments": [ "clang_cl_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "-DEIGEN3_INCLUDE_DIR=C:\\Projects\\eigen-3.3.7 -DBoost_INCLUDE_DIR=C:\\Projects\\boost_1_73_0\\boost_1_73_0 -DIQTREE_FLAGS=mpi,fma -DMPI_DIR=c:\\Projects\\Microsoft_MPI_10_1_2", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "variables": [] + }, + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "-DEIGEN3_INCLUDE_DIR=C:\\Projects\\eigen-3.3.7 -DBoost_INCLUDE_DIR=C:\\Projects\\boost_1_73_0\\boost_1_73_0 -DIQTREE_FLAGS=mpi,fma -DMPI_DIR=c:\\Projects\\Microsoft_MPI_10_1_2", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64" ], + "variables": [] + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index a2ea33b1a..bb086b2b1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ IQ-TREE ======= +[![Github IQ-TREE 1 Releases](https://img.shields.io/github/downloads/Cibiv/IQ-TREE/total.svg?style=social&logo=github&label=iqtree1%20download)](https://github.com/Cibiv/IQ-TREE/releases) +[![Github IQ-TREE 2 Releases](https://img.shields.io/github/downloads/iqtree/iqtree2/total.svg?style=social&logo=github&label=iqtree2%20download)](https://github.com/iqtree/iqtree2/releases) +[![BioConda downloads](https://img.shields.io/conda/dn/bioconda/iqtree.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/iqtree) +[![Build Status](https://travis-ci.org/bqminh/IQ-TREE.svg?branch=master)](https://travis-ci.org/bqminh/IQ-TREE) +[![License: GPL v2](https://img.shields.io/badge/License-GPL%20v2-blue.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) + Efficient and versatile phylogenomic software by maximum likelihood Introduction @@ -25,42 +31,7 @@ The strength of IQ-TREE is the availability of a wide variety of phylogenetic mo * __Common models__: All [common substitution models](http://www.iqtree.org/doc/Substitution-Models) for DNA, protein, codon, binary and morphological data with [rate heterogeneity among sites](http://www.iqtree.org/doc/Substitution-Models/#rate-heterogeneity-across-sites) and [ascertainment bias correction](http://www.iqtree.org/doc/Substitution-Models/#ascertainment-bias-correction) for e.g. SNP data. * __[Partition models](http://www.iqtree.org/doc/Complex-Models/#partition-models)__: Allowing individual models for different genomic loci (e.g. genes or codon positions), mixed data types, mixed rate heterogeneity types, linked or unlinked branch lengths between partitions. * __Mixture Models__: [fully customizable mixture models](http://www.iqtree.org/doc/Complex-Models/#mixture-models) and [empirical protein mixture models](http://www.iqtree.org/doc/Substitution-Models/#protein-models) and. - -IQ-TREE PoMo ------------- - -IQ-TREE+PoMo is still under development. Please check out - - iqtree --help - -Especially, the section titled `POLYMORPHISM AWARE MODELS (PoMo)`. - -``` -POLYMORPHISM AWARE MODELS (PoMo): -PoMo is run when -- a Counts File is used as input file, and/or when -- it is specified in the model string (see below). - -st C[FR] or C[FR]ps Counts File (automatically detected). - Useful to customize the virtual population size `ps` - 3 <= ps <= 19; ps has to be an odd number, 2 or 10. - F: Sum over partial likelihoods at the tip of the tree (weighted). - R: Random binomial sampling of PoMo states from data (sampled). - Default is `CF9`. - -m ++ Default: `HKY+rP+FO`. - : Substitution model. - DNA: HKY (default), JC, F81, K2P, K3P, K81uf, TN/TrN, TNef, - TIM, TIMef, TVM, TVMef, SYM, GTR, or a 6-digit model - specification (e.g., 010010 = HKY). - : PoMo model. - - rP (default; reversible PoMo with tree inference). - : Frequency type (optional; default: +F, counted). - F or +FO or +FU or +FQ. - Counted, optimized, user-defined, equal state frequency. - This overwrites the specifications of the DNA model. - The default model string is: -m HKY+rP+F. - Until now, only DNA models work with PoMo. - Model testing and rate heterogeneity do not work with PoMo yet. -``` +* __Polymorphism-aware models (PoMo)__: IQ-TREE web service @@ -95,9 +66,17 @@ For the ultrafast bootstrap (UFBoot) please cite: * D.T. Hoang, O. Chernomor, A. von Haeseler, B.Q. Minh, and L.S. Vinh (2017) UFBoot2: Improving the ultrafast bootstrap approximation. *Mol. Biol. Evol.*, in press. +When using posterior mean site frequency model (PMSF) please cite: + +* H.C. Wang, B.Q. Minh, S. Susko, A.J. Roger (in press) Modeling site heterogeneity with posterior mean site frequency profiles accelerates accurate phylogenomic estimation. *Syst. Biol.* + When using partition models please cite: -O. Chernomor, A. von Haeseler, B.Q. Minh (2016) Terrace aware data structure for phylogenomic inference from supermatrices. *Syst. Biol.*, 65:997-1008. +* O. Chernomor, A. von Haeseler, B.Q. Minh (2016) Terrace aware data structure for phylogenomic inference from supermatrices. *Syst. Biol.*, 65:997-1008. + +When using polymorphism-aware models please cite: + +* D. Schrempf, B.Q. Minh, N. De Maio, A. von Haeseler, C. Kosiol (2016) Reversible polymorphism-aware phylogenetic models and their application to tree inference. *J. Theor. Biol.*, 407:362-370. #### Credits and Acknowledgements diff --git a/alignment/CMakeLists.txt b/alignment/CMakeLists.txt index 2efbfa40c..2a3d3394d 100644 --- a/alignment/CMakeLists.txt +++ b/alignment/CMakeLists.txt @@ -5,6 +5,8 @@ alignment.cpp alignment.h alignmentpairwise.cpp alignmentpairwise.h +alignmentsummary.cpp +alignmentsummary.h maalignment.cpp maalignment.h superalignment.cpp @@ -13,6 +15,8 @@ superalignmentpairwise.cpp superalignmentpairwise.h superalignmentpairwiseplen.cpp superalignmentpairwiseplen.h +superalignmentunlinked.cpp +superalignmentunlinked.h ) target_link_libraries(alignment ncl gsl) diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index 9d1f7e199..5b35af899 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -17,8 +17,21 @@ #include "model/rategamma.h" #include "gsl/mygsl.h" #include "utils/gzstream.h" +#include "utils/timeutil.h" //for getRealTime() +#include "utils/progress.h" //for progress_display +#include "alignmentsummary.h" + +#include +#include + +#include +#ifdef USE_BOOST +#include +#endif + using namespace std; +using namespace Eigen; char symbols_protein[] = "ARNDCQEGHILKMFPSTWYVX"; // X for unknown AA char symbols_dna[] = "ACGT"; @@ -76,14 +89,14 @@ vector& Alignment::getSeqNames() { } int Alignment::getSeqID(string &seq_name) { - for (int i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); i++) if (seq_name == getSeqName(i)) return i; return -1; } int Alignment::getMaxSeqNameLength() { int len = 0; - for (int i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); i++) if (getSeqName(i).length() > len) len = getSeqName(i).length(); return len; @@ -118,12 +131,12 @@ int Alignment::checkAbsentStates(string msg) { absent_states += ", "; absent_states += convertStateBackStr(i); count++; - } else if (state_freq[i] <= MIN_FREQUENCY) { + } else if (state_freq[i] <= Params::getInstance().min_state_freq) { if (!rare_states.empty()) rare_states += ", "; rare_states += convertStateBackStr(i); } - if (absent_states.size() >= num_states-1) + if (count >= num_states-1 && Params::getInstance().fixed_branch_length != BRLEN_FIX) outError("Only one state is observed in " + msg); if (!absent_states.empty()) cout << "NOTE: State(s) " << absent_states << " not present in " << msg << " and thus removed from Markov process to prevent numerical problems" << endl; @@ -138,15 +151,10 @@ void Alignment::checkSeqName() { StrVector::iterator it; for (it = seq_names.begin(); it != seq_names.end(); it++) { string orig_name = (*it); - for (string::iterator i = it->begin(); i != it->end(); i++) { - if (!isalnum(*i) && (*i) != '_' && (*i) != '-' && (*i) != '.') { - (*i) = '_'; - } - } - if (orig_name != (*it)) + if (renameString(*it)) warn_str << orig_name << " -> " << (*it) << endl; } - if (warn_str.str() != "") { + if (!warn_str.str().empty() && Params::getInstance().compute_seq_composition) { string str = "Some sequence names are changed as follows:\n"; outWarning(str + warn_str.str()); } @@ -164,124 +172,171 @@ void Alignment::checkSeqName() { } if (!ok) outError("Please rename sequences listed above!"); - double *state_freq = new double[num_states]; -// double *freq_per_sequence = new double[num_states*getNSeq()]; - double *freq_per_sequence = new double[num_states]; + if (!Params::getInstance().compute_seq_composition) { + return; + } + + double state_freq[num_states]; unsigned *count_per_seq = new unsigned[num_states*getNSeq()]; computeStateFreq(state_freq); -// computeStateFreqPerSequence(freq_per_sequence); countStatePerSequence(count_per_seq); - int i, df = -1; - for (i = 0; i < num_states; i++) - if (state_freq[i] > 0.0) df++; - - if (seq_type == SEQ_POMO) + int df = -1; //degrees of freedom (for a chi-squared test) + for (int i = 0; i < num_states; i++) { + if (state_freq[i] > 0.0) { + df++; + } + } + if (seq_type == SEQ_POMO) { cout << "NOTE: The composition test for PoMo only tests the proportion of fixed states!" << endl; - + } + bool listSequences = !Params::getInstance().suppress_list_of_sequences; int max_len = getMaxSeqNameLength()+1; - cout.width(max_len+14); - cout << right << "Gap/Ambiguity" << " Composition p-value"<< endl; + if (listSequences) { + cout.width(max_len+14); + cout << right << "Gap/Ambiguity" << " Composition p-value"<< endl; + } int num_problem_seq = 0; int total_gaps = 0; cout.precision(2); int num_failed = 0; - for (i = 0; i < seq_names.size(); i++) { - int j; - int num_gaps = getNSite() - countProperChar(i); + + size_t numSequences = seq_names.size(); + size_t numSites = getNSite(); + char maxProperState = static_cast(num_states + pomo_sampled_states.size()); + AlignmentSummary s(this, true, true); + + //The progress bar, displayed by s.constructSequenceMatrixNoisily, + //lies a bit here. We're not counting gap characters, + //we are constructing the sequences (so we can count gap characters quickly). + s.constructSequenceMatrixNoisily(false, "Analyzing sequences", "counted gaps in"); + + struct SequenceInfo { + double percent_gaps; + bool failed; + double pvalue; + }; + SequenceInfo* seqInfo = new SequenceInfo[numSequences]; + + #ifdef _OPENMP + #pragma omp parallel for reduction(+:total_gaps,num_problem_seq,num_failed) + #endif + for (size_t i = 0; i < numSequences; i++) { + size_t num_gaps = numSites; + if (s.sequenceMatrix!=nullptr) { + //Discount the non-gap characters with a (not-yet-vectorized) + //sweep over the sequence. + const char* sequence = s.sequenceMatrix + i * s.sequenceLength; + for (size_t scan = 0; scan 50) { + seqInfo[i].percent_gaps = ((double)num_gaps / getNSite()) * 100.0; + if ( 50.0 < seqInfo[i].percent_gaps ) { num_problem_seq++; } - + size_t iRow = i * num_states; + double freq_per_sequence[num_states]; double chi2 = 0.0; unsigned sum_count = 0; - + double pvalue; if (seq_type == SEQ_POMO) { - // FIXME: Number of nucleotides hardcoded here. - int nnuc = 4; - df = nnuc-1; // Have to normalize allele frequencies. - double state_freq_norm[nnuc]; + double state_freq_norm[num_states]; double sum_freq = 0.0; - for (j = 0; j < nnuc; j++) { + for (int j = 0; j < num_states; j++) { sum_freq += state_freq[j]; state_freq_norm[j] = state_freq[j]; } - for (j = 0; j < nnuc; j++) { + for (int j = 0; j < num_states; j++) { state_freq_norm[j] /= sum_freq; } - - for (j = 0; j < nnuc; j++) - sum_count += count_per_seq[i*num_states+j]; - double sum_inv = 1.0/sum_count; - for (j = 0; j < nnuc; j++) - freq_per_sequence[j] = count_per_seq[i*num_states+j]*sum_inv; - for (j = 0; j < nnuc; j++) + for (int j = 0; j < num_states; j++) { + sum_count += count_per_seq[iRow + j]; + } + double sum_inv = 1.0 / sum_count; + for (int j = 0; j < num_states; j++) { + freq_per_sequence[j] = count_per_seq[iRow + j] * sum_inv; + } + for (int j = 0; j < num_states; j++) { chi2 += (state_freq_norm[j] - freq_per_sequence[j]) * (state_freq_norm[j] - freq_per_sequence[j]) / state_freq_norm[j]; - - // chi2 *= getNSite(); - chi2 *= sum_count; - double pvalue = chi2prob(nnuc-1, chi2); - if (pvalue < 0.05) { - cout << " failed "; - num_failed++; - } else - cout << " passed "; - cout.width(9); - cout << right << pvalue*100 << "%"; - } else { - for (j = 0; j < num_states; j++) - sum_count += count_per_seq[i*num_states+j]; - double sum_inv = 1.0/sum_count; - for (j = 0; j < num_states; j++) - freq_per_sequence[j] = count_per_seq[i*num_states+j]*sum_inv; - for (j = 0; j < num_states; j++) - if (state_freq[j] > 0.0) + } + chi2 *= sum_count; + pvalue = chi2prob(num_states - 1, chi2); + } + else { + for (int j = 0; j < num_states; j++) { + sum_count += count_per_seq[iRow + j]; + } + double sum_inv = 1.0 / sum_count; + for (int j = 0; j < num_states; j++) { + freq_per_sequence[j] = count_per_seq[iRow + j] * sum_inv; + } + for (int j = 0; j < num_states; j++) { + if (state_freq[j] > 0.0) { chi2 += (state_freq[j] - freq_per_sequence[j]) * (state_freq[j] - freq_per_sequence[j]) / state_freq[j]; - - chi2 *= sum_count; - double pvalue = chi2prob(df, chi2); - if (pvalue < 0.05) { + } + } + chi2 *= sum_count; + pvalue = chi2prob(df, chi2); + + } + seqInfo[i].pvalue = pvalue; + seqInfo[i].failed = (pvalue < 0.05); + num_failed += seqInfo[i].failed ? 1 : 0; + } + if (listSequences) { + for (size_t i = 0; i < numSequences; i++) { + cout.width(4); + cout << right << i + 1 << " "; + cout.width(max_len); + cout << left << seq_names[i] << " "; + cout.width(6); + cout << right << seqInfo[i].percent_gaps << "%"; + if (seqInfo[i].failed) { cout << " failed "; - num_failed++; - } else + } + else { cout << " passed "; + } cout.width(9); - cout << right << pvalue*100 << "%"; + cout << right << (seqInfo[i].pvalue * 100) << "%"; + cout << endl; } - cout << endl; } - if (num_problem_seq) cout << "WARNING: " << num_problem_seq << " sequences contain more than 50% gaps/ambiguity" << endl; - cout << "**** "; - cout.width(max_len+2); - cout << left << " TOTAL "; - cout.width(6); - cout << right << ((double)total_gaps/getNSite())/getNSeq()*100 << "% "; - cout << " " << num_failed << " sequences failed composition chi2 test (p-value<5%; df=" << df << ")" << endl; - cout.precision(3); + delete[] seqInfo; + if (num_problem_seq) { + cout << "WARNING: " << num_problem_seq << " sequences contain more than 50% gaps/ambiguity" << endl; + } + if (listSequences) { + cout << "**** "; + cout.width(max_len+2); + cout << left << " TOTAL "; + cout.width(6); + cout << right << ((double)total_gaps/getNSite())/getNSeq()*100 << "% "; + cout << " " << num_failed << " sequences failed composition chi2 test (p-value<5%; df=" << df << ")" << endl; + cout.precision(3); + } delete [] count_per_seq; - delete [] freq_per_sequence; - delete [] state_freq; } int Alignment::checkIdenticalSeq() { + //Todo: This should use sequence hashing. int num_identical = 0; IntVector checked; checked.resize(getNSeq(), 0); - for (int seq1 = 0; seq1 < getNSeq(); seq1++) { + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (checked[seq1]) continue; bool first = true; - for (int seq2 = seq1+1; seq2 < getNSeq(); seq2++) { + for (size_t seq2 = seq1+1; seq2 < getNSeq(); ++seq2) { bool equal_seq = true; for (iterator it = begin(); it != end(); it++) if ((*it)[seq1] != (*it)[seq2]) { @@ -307,52 +362,110 @@ int Alignment::checkIdenticalSeq() Alignment *Alignment::removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs) { + auto n = getNSeq(); IntVector checked; vector removed; - checked.resize(getNSeq(), 0); - removed.resize(getNSeq(), false); - int seq1; + checked.resize(n, 0); + removed.resize(n, false); + + //JB2020-06-17 Begin : Determine hashes for all the sequences + auto startHash = getRealTime(); + vector hashes; + hashes.resize(n, 0); + progress_display progress(n*2, "Checking for duplicate sequences"); + #ifdef _OPENMP + #pragma omp parallel for schedule(static,100) + #endif + for (int seq1=0; seq1= VB_MED && !progress_display::getProgressDisplay()) { + auto hashTime = getRealTime() - startHash; + cout << "Hashing sequences took " << hashTime << " wall-clock seconds" << endl; + } + //JB2020-06-17 Finish - for (seq1 = 0; seq1 < getNSeq(); seq1++) { + bool listIdentical = !Params::getInstance().suppress_duplicate_sequence_warnings; + + auto startCheck = getRealTime(); + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (checked[seq1]) continue; bool first_ident_seq = true; - for (int seq2 = seq1+1; seq2 < getNSeq(); seq2++) { - if (getSeqName(seq2) == not_remove) continue; + for (size_t seq2 = seq1+1; seq2 < getNSeq(); ++seq2) { + if (getSeqName(seq2) == not_remove || removed[seq2]) continue; + if (hashes[seq1] != hashes[seq2]) continue; //JB2020-06-17 bool equal_seq = true; - for (iterator it = begin(); it != end(); it++) - if ((*it)[seq1] != (*it)[seq2]) { - equal_seq = false; - break; - } - if (equal_seq) { - if (removed_seqs.size() < getNSeq()-3 && (!keep_two || !first_ident_seq)) { - removed_seqs.push_back(getSeqName(seq2)); - target_seqs.push_back(getSeqName(seq1)); - removed[seq2] = true; - } else { + for (iterator it = begin(); it != end(); it++) { + if ((*it)[seq1] != (*it)[seq2]) { + equal_seq = false; + break; + } + } + if (!equal_seq) continue; + if (removed_seqs.size()+3 < getNSeq() && (!keep_two || !first_ident_seq)) { + removed_seqs.push_back(getSeqName(seq2)); + target_seqs.push_back(getSeqName(seq1)); + removed[seq2] = true; + } else { + if (listIdentical) { cout << "NOTE: " << getSeqName(seq2) << " is identical to " << getSeqName(seq1) << " but kept for subsequent analysis" << endl; } - checked[seq2] = 1; - first_ident_seq = false; - } + } + checked[seq2] = 1; + first_ident_seq = false; } checked[seq1] = 1; + ++progress; } - - if (removed_seqs.size() > 0) { - if (removed_seqs.size() >= getNSeq()-3) - outWarning("Your alignment contains too many identical sequences!"); - IntVector keep_seqs; - for (seq1 = 0; seq1 < getNSeq(); seq1++) - if (!removed[seq1]) keep_seqs.push_back(seq1); - Alignment *aln = new Alignment; - aln->extractSubAlignment(this, keep_seqs, 0); - return aln; - } else return this; + if (verbose_mode >= VB_MED && !progress_display::getProgressDisplay()) { + auto checkTime = getRealTime() - startCheck; + cout << "Checking for duplicate sequences took " << checkTime + << " wall-clock seconds" << endl; + } + progress.done(); + if (removed_seqs.size() > 0) { + double removeDupeStart = getRealTime(); + if (removed_seqs.size() + 3 >= getNSeq()) { + outWarning("Your alignment contains too many identical sequences!"); + } + IntVector keep_seqs; + for (size_t seq1 = 0; seq1 < getNSeq(); seq1++) { + if (!removed[seq1]) { + keep_seqs.emplace_back(seq1); + } + } + Alignment *aln = new Alignment; + aln->extractSubAlignment(this, keep_seqs, 0); + //cout << "NOTE: Identified " << removed_seqs.size() + // << " sequences as duplicates." << endl; + if (verbose_mode >= VB_MED) { + cout << "Removing " << removed_seqs.size() << " duplicated sequences took " + << (getRealTime() - removeDupeStart) << " sec." << endl; + } + return aln; + } else return this; } +void Alignment::adjustHash(StateType v, size_t& hash) const { + //Based on what boost::hash_combine() does. + //For now there's no need for a templated version + //in a separate header file. But if other classes start + //wanting to "roll their own hashing" this should move + //to, say, utils/hashing.h. + hash ^= std::hash()(v) + 0x9e3779b9 + + (hash<<6) + (hash>>2); +} +void Alignment::adjustHash(bool v, size_t& hash) const { + hash ^= std::hash()(v) + 0x9e3779b9 + + (hash<<6) + (hash>>2); +} -bool Alignment::isGapOnlySeq(int seq_id) { +bool Alignment::isGapOnlySeq(size_t seq_id) { ASSERT(seq_id < getNSeq()); for (iterator it = begin(); it != end(); it++) if ((*it)[seq_id] != STATE_UNKNOWN) { @@ -361,10 +474,233 @@ bool Alignment::isGapOnlySeq(int seq_id) { return true; } +// added by TD +vector Alignment::computeSummaryStats(int seq1_idx, int seq2_idx) { + ASSERT(seq1_idx < getNSeq()); + ASSERT(seq2_idx < getNSeq()); + + vector stats(26); + //vector freqs_seq1(4); + //vector freqs_seq2(4); + //vector titv_rates(16); + + map bitshift_map = {{0, 1}, {1, 3}, {2, 5}, {3, 7}}; + + for (iterator it = begin(); it != end(); it++) { + // check if contains gaps, if yes discard position + /*if ((*it)[seq1_idx] == 18 || (*it)[seq2_idx] == 18) { // 18 stands for a gap + continue; + }*/ + // count nucleotides for sequence 1 + switch ((*it)[seq1_idx]) { + case 0: stats[4]++; + break; + case 1: stats[5]++; + break; + case 2: stats[6]++; + break; + case 3: stats[7]++; + break; + default: + throw "Sequence contains other characters than A, C, G, T"; + } + // count nucleotides for sequence 2 + switch ((*it)[seq2_idx]) { + case 0: stats[8]++; + break; + case 1: stats[9]++; + break; + case 2: stats[10]++; + break; + case 3: stats[11]++; + break; + default: + throw "Sequence contains other characters than A, C, G, T"; + } + // count transitions and transversions + switch(bitshift_map[(*it)[seq1_idx]] << bitshift_map[(*it)[seq2_idx]]) { + case 2: stats[0]++; // AA + break; + case 8: stats[14]++; // AC + break; + case 32: stats[15]++; // AG + break; + case 128: stats[16]++; // AT + break; + case 6: stats[20]++; // CA + break; + case 24: stats[1]++; // CC + break; + case 96: stats[18]++; // CG + break; + case 384: stats[17]++; // CT + break; + case 10: stats[21]++; // GA + break; + case 40: stats[24]++; // GC + break; + case 160: stats[2]++; // GG + break; + case 640: stats[19]++; // GT + break; + case 14: stats[22]++; // TA + break; + case 56: stats[23]++; // TC + break; + case 224: stats[25]++; // TG + break; + case 896: stats[3]++; // TT + break; + default: + throw "Bitshift result not known!"; + } + } + stats[12] = stats[14] + stats[16] + stats[24] + stats[19] + + stats[25] + stats[22] + stats[18] + stats[20]; // transversion counts + stats[13] = stats[15] + stats[21] + stats[17] + stats[23]; // transition counts + + size_t n_sites = getNSite(); + // todo: check how to make it work with transform + //std::transform(stats.begin(), stats.end(), stats.begin(), [n_sites](int &c){return c/n_sites;}); + + for (size_t i = 0; i < 26; i++) { + stats[i] /= n_sites; + } + return stats; +} + +Alignment *Alignment::replaceAmbiguousChars() { + + IntVector patterns; + + for (size_t idx = 0; idx < getNPattern(); idx++) { + patterns.push_back(idx); + } + + Alignment *aln = new Alignment; + aln->extractPatterns(this, patterns); + + for (size_t idx = 0; idx < boost::size(patterns); idx++) { + for (size_t i = 0; i < getNSeq(); i++) { + if (aln->at(idx)[i] > 3) { + uint32_t base; + mt19937 rng(chrono::steady_clock::now().time_since_epoch().count()); + switch(aln->at(idx)[i]) { + case 6: { // M: A or C + std::uniform_int_distribution dist(0, 1); + base = dist(rng); + aln->at(idx)[i] = (StateType) base; + } + break; + case 8: { // R: A or G + std::uniform_int_distribution dist(0, 1); + base = dist(rng); + aln->at(idx)[i] = base == 0 ? (StateType) base: (StateType) 2; + } + break; + case 9: { // S: C or G + std::uniform_int_distribution dist(1, 2); + base = dist(rng); + aln->at(idx)[i] = base; + } + break; + case 10: { // V: A or C or G + std::uniform_int_distribution dist(0, 2); + base = dist(rng); + aln->at(idx)[i] = base; + } + break; + case 12: { // W: A or T + std::uniform_int_distribution dist(0, 1); + base = dist(rng); + aln->at(idx)[i] = base == 0 ? (StateType) base: (StateType) 3; + } + break; + case 13: { // Y: C or T + std::uniform_int_distribution dist(1, 2); + base = dist(rng); + aln->at(idx)[i] = base == 1 ? (StateType) base: (StateType) 3; + } + break; + case 14: { // H: A or C or T + std::uniform_int_distribution dist(0, 2); + base = dist(rng); + aln->at(idx)[i] = base == 2 ? (StateType) 3 : (StateType) base; + } + break; + case 15: { // K: G or T + std::uniform_int_distribution dist(2, 3); + base = dist(rng); + aln->at(idx)[i] = (StateType) base; + } + break; + case 16: { // D: A or G or T + std::uniform_int_distribution dist(1, 3); + base = dist(rng); + aln->at(idx)[i] = base == 1 ? (StateType) 0: (StateType) base; + } + break; + case 17: { // B: C or G or T + std::uniform_int_distribution dist(1, 3); + base = dist(rng); + aln->at(idx)[i] = (StateType) base; + } + case 18: { // N + std::uniform_int_distribution dist(0, 3); + base = dist(rng); + aln->at(idx)[i] = (StateType) base; + } + break; + default: + throw "Ambiguous character not known!"; + } + } + } + } + return aln; + +} + +// added by TD +Alignment *Alignment::removeAndFillUpGappySites() { + + IntVector keep_patterns; + + // remove all sites with > 70% gaps + for (size_t idx = 0; idx < getNPattern(); idx++) { + size_t count_gaps = 0; + Pattern pattern = getPattern(idx); + for (size_t i = 0; i < getNSeq(); i++) { + if (pattern[i] == STATE_UNKNOWN) + count_gaps++; + } + if (count_gaps / getNSeq() <= 0.7) { + keep_patterns.push_back(idx); + } + } + + Alignment *aln = new Alignment; + aln->extractPatterns(this, keep_patterns); + + for (size_t idx = 0; idx < boost::size(keep_patterns); idx++) { + vector freqs = aln->at(idx).freqs; + uint32_t most_frequent_base = std::max_element(freqs.begin(), freqs.end()) - freqs.begin(); + for (size_t i = 0; i < getNSeq(); i++) { + if (aln->at(idx)[i] == STATE_UNKNOWN) { + // fill up gap with most frequent base + aln->at(idx)[i] = (StateType)most_frequent_base; + } + } + } + + return aln; + +} + Alignment *Alignment::removeGappySeq() { IntVector keep_seqs; - int i, nseq = getNSeq(); - for (i = 0; i < nseq; i++) + size_t nseq = getNSeq(); + for (size_t i = 0; i < nseq; i++) if (! isGapOnlySeq(i)) { keep_seqs.push_back(i); } @@ -372,7 +708,7 @@ Alignment *Alignment::removeGappySeq() { return this; // 2015-12-03: if resulting alignment has too few seqs, try to add some back if (keep_seqs.size() < 3 && getNSeq() >= 3) { - for (i = 0; i < nseq && keep_seqs.size() < 3; i++) + for (size_t i = 0; i < nseq && keep_seqs.size() < 3; i++) if (isGapOnlySeq(i)) keep_seqs.push_back(i); } @@ -382,9 +718,9 @@ Alignment *Alignment::removeGappySeq() { } void Alignment::checkGappySeq(bool force_error) { - int nseq = getNSeq(), i; + size_t nseq = getNSeq(); int wrong_seq = 0; - for (i = 0; i < nseq; i++) + for (size_t i = 0; i < nseq; i++) if (isGapOnlySeq(i)) { outWarning("Sequence " + getSeqName(i) + " contains only gaps or missing data"); wrong_seq++; @@ -394,7 +730,12 @@ void Alignment::checkGappySeq(bool force_error) { } } -Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : vector() { +Alignment::Alignment(char *filename, char *sequence_type, InputType &intype, string model) : vector() { + name = "Noname"; + this->model_name = model; + if (sequence_type) + this->sequence_type = sequence_type; + aln_file = filename; num_states = 0; frac_const_sites = 0.0; frac_invariant_sites = 0.0; @@ -404,11 +745,11 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v seq_type = SEQ_UNKNOWN; STATE_UNKNOWN = 126; pars_lower_bound = NULL; + double readStart = getRealTime(); cout << "Reading alignment file " << filename << " ... "; intype = detectInputFile(filename); try { - if (intype == IN_NEXUS) { cout << "Nexus format detected" << endl; readNexus(filename); @@ -440,18 +781,27 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v } catch (string str) { outError(str); } - + if (verbose_mode >= VB_MED) { + cout << "Time to read input file was " << (getRealTime() - readStart) << " sec." << endl; + } if (getNSeq() < 3) + { outError("Alignment must have at least 3 sequences"); - + } + double constCountStart = getRealTime(); countConstSite(); - - cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() - << " columns, " << getNPattern() << " distinct patterns" << endl - << num_informative_sites << " parsimony-informative, " - << num_variant_sites-num_informative_sites << " singleton sites, " - << (int)(frac_const_sites*getNSite()) << " constant sites" << endl; - buildSeqStates(); + if (verbose_mode >= VB_MED) { + cout << "Time to count constant sites was " << (getRealTime() - constCountStart) << " sec." << endl; + } + if (Params::getInstance().compute_seq_composition) + { + cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() + << " columns, " << getNPattern() << " distinct patterns" << endl + << num_informative_sites << " parsimony-informative, " + << num_variant_sites-num_informative_sites << " singleton sites, " + << (int)(frac_const_sites*getNSite()) << " constant sites" << endl; + } + //buildSeqStates(); checkSeqName(); // OBSOLETE: identical sequences are handled later // checkIdenticalSeq(); @@ -461,6 +811,45 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v } +Alignment::Alignment(NxsDataBlock *data_block, char *sequence_type, string model) : vector() { + name = "Noname"; + this->model_name = model; + if (sequence_type) + this->sequence_type = sequence_type; + num_states = 0; + frac_const_sites = 0.0; + frac_invariant_sites = 0.0; + codon_table = NULL; + genetic_code = NULL; + non_stop_codon = NULL; + seq_type = SEQ_UNKNOWN; + STATE_UNKNOWN = 126; + pars_lower_bound = NULL; + + extractDataBlock(data_block); + if (verbose_mode >= VB_DEBUG) + data_block->Report(cout); + + if (getNSeq() < 3) + outError("Alignment must have at least 3 sequences"); + + countConstSite(); + + if (Params::getInstance().compute_seq_composition) + cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() + << " columns, " << getNPattern() << " distinct patterns" << endl + << num_informative_sites << " parsimony-informative, " + << num_variant_sites-num_informative_sites << " singleton sites, " + << (int)(frac_const_sites*getNSite()) << " constant sites" << endl; + //buildSeqStates(); + checkSeqName(); + // OBSOLETE: identical sequences are handled later + // checkIdenticalSeq(); + //cout << "Number of character states is " << num_states << endl; + //cout << "Number of patterns = " << size() << endl; + //cout << "Fraction of constant sites: " << frac_const_sites << endl; + +} bool Alignment::isStopCodon(int state) { // 2017-05-27: all stop codon removed from Markov process return false; @@ -484,9 +873,14 @@ bool Alignment::isStandardGeneticCode() { return (genetic_code == genetic_code1 || genetic_code == genetic_code11); } -void Alignment::buildSeqStates(bool add_unobs_const) { - string unobs_const; - if (add_unobs_const) unobs_const = getUnobservedConstPatterns(); +/* +void Alignment::buildSeqStates(vector > &seq_states, bool add_unobs_const) { + vector unobs_const; + if (add_unobs_const) { + unobs_const.resize(num_states); + for (StateType state = 0; state < num_states; state++) + unobs_const[state] = state; + } seq_states.clear(); seq_states.resize(getNSeq()); for (int seq = 0; seq < getNSeq(); seq++) { @@ -494,14 +888,15 @@ void Alignment::buildSeqStates(bool add_unobs_const) { has_state.resize(STATE_UNKNOWN+1, false); for (int site = 0; site < getNPattern(); site++) has_state[at(site)[seq]] = true; - for (string::iterator it = unobs_const.begin(); it != unobs_const.end(); it++) - has_state[*it] = true; + for (StateType it : unobs_const) + has_state[it] = true; seq_states[seq].clear(); for (int state = 0; state < STATE_UNKNOWN; state++) if (has_state[state]) seq_states[seq].push_back(state); } } +*/ int Alignment::readNexus(char *filename) { NxsTaxaBlock *taxa_block; @@ -532,18 +927,26 @@ int Alignment::readNexus(char *filename) { return 0; } - if (char_block->GetNTax() == 0) { char_block = data_block; } - - if (char_block->GetNTax() == 0) { - outError("No data is given in the input file"); + if (data_block->GetNTax() == 0 && char_block->GetNTax() == 0) { + outError("No DATA or CHARACTERS blocks found"); return 0; } - if (verbose_mode >= VB_DEBUG) - char_block->Report(cout); - - extractDataBlock(char_block); + if (char_block->GetNTax() > 0) { + extractDataBlock(char_block); + if (verbose_mode >= VB_DEBUG) + char_block->Report(cout); + } else { + extractDataBlock(data_block); + if (verbose_mode >= VB_DEBUG) + data_block->Report(cout); + } + delete trees_block; + delete char_block; + delete data_block; + delete assumptions_block; + delete taxa_block; return 1; } @@ -553,7 +956,7 @@ void Alignment::computeUnknownState() { case SEQ_PROTEIN: STATE_UNKNOWN = 23; break; case SEQ_POMO: { if (pomo_sampling_method == SAMPLING_SAMPLED) STATE_UNKNOWN = num_states; - else STATE_UNKNOWN = 0xffffffff; + else STATE_UNKNOWN = 0xffffffff; // only dummy, will be initialized later break; } default: STATE_UNKNOWN = num_states; break; @@ -567,8 +970,8 @@ int getDataBlockMorphStates(NxsCharactersBlock *data_block) { char ch; int nstates = 0; - for (site = 0; site < nsite; site++) - for (seq = 0; seq < nseq; seq++) { + for (seq = 0; seq < nseq; seq++) + for (site = 0; site < nsite; site++) { int nstate = data_block->GetNumStates(seq, site); if (nstate == 0) continue; @@ -580,23 +983,11 @@ int getDataBlockMorphStates(NxsCharactersBlock *data_block) { else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 11; else - outError(data_block->GetTaxonLabel(seq) + " has invalid state at site " + convertIntToString(site)); + outError(data_block->GetTaxonLabel(seq) + " has invalid single state " + ch + " at site " + convertIntToString(site+1)); if (ch > nstates) nstates = ch; continue; } - for (int state = 0; state < nstate; state++) { - ch = data_block->GetState(seq, site, state); - if (!isalnum(ch)) continue; - if (ch >= '0' && ch <= '9') ch = ch - '0' + 1; - if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 11; - if (ch >= '0' && ch <= '9') - ch = ch - '0' + 1; - else if (ch >= 'A' && ch <= 'Z') - ch = ch - 'A' + 11; - else - outError(data_block->GetTaxonLabel(seq) + " has invalid state at site " + convertIntToString(site)); - if (ch > nstates) nstates = ch; - } + //cout << "NOTE: " << data_block->GetTaxonLabel(seq) << " has ambiguous state at site " << site+1 << " which is treated as unknown" << endl; } return nstates; } @@ -608,7 +999,10 @@ void Alignment::extractDataBlock(NxsCharactersBlock *data_block) { //num_states = strlen(symbols); char char_to_state[NUM_CHAR]; char state_to_char[NUM_CHAR]; - + + if (!data_block->GetMatrix()) + outError("MATRIX command undeclared or invalid"); + NxsCharactersBlock::DataTypesEnum data_type = (NxsCharactersBlock::DataTypesEnum)data_block->GetDataType(); if (data_type == NxsCharactersBlock::continuous) { outError("Continuous characters not supported"); @@ -674,20 +1068,31 @@ void Alignment::extractDataBlock(NxsCharactersBlock *data_block) { pat.push_back(STATE_UNKNOWN); else if (nstate == 1) { pat.push_back(char_to_state[(int)data_block->GetState(seq, site, 0)]); - } else { - ASSERT(data_type != NxsCharactersBlock::dna || data_type != NxsCharactersBlock::rna || data_type != NxsCharactersBlock::nucleotide); + } else if (data_type == NxsCharactersBlock::dna || data_type == NxsCharactersBlock::rna || data_type == NxsCharactersBlock::nucleotide) { + // 2018-06-07: correctly interpret ambiguous nucleotide char pat_ch = 0; for (int state = 0; state < nstate; state++) { pat_ch |= (1 << char_to_state[(int)data_block->GetState(seq, site, state)]); } pat_ch += 3; pat.push_back(pat_ch); + } else { + // other ambiguous characters are treated as unknown + stringstream str; + str << "Sequence " << seq_names[seq] << " site " << site+1 << ": {"; + for (int state = 0; state < nstate; state++) { + str << data_block->GetState(seq, site, state); + } + str << "} treated as unknown character"; + outWarning(str.str()); + pat.push_back(STATE_UNKNOWN); } } num_gaps_only += addPattern(pat, site); } - if (num_gaps_only) + if ( 0 < num_gaps_only ) { cout << "WARNING: " << num_gaps_only << " sites contain only gaps or ambiguous characters." << endl; + } if (verbose_mode >= VB_MAX) for (site = 0; site < size(); site++) { for (seq = 0; seq < nseq; seq++) @@ -713,8 +1118,9 @@ void Alignment::computeConst(Pattern &pat) { StateBitset state_app; state_app.reset(); int j; - for (j = 0; j < num_states; j++) - state_app[j] = 1; + for (j = 0; j < num_states; j++) { + state_app[j] = 1; + } // number of appearance for each state, to compute is_informative size_t num_app[num_states]; @@ -741,6 +1147,10 @@ void Alignment::computeConst(Pattern &pat) { } } + // added by TD + vector vec(num_app, num_app + num_states); + pat.freqs = vec; + // at least 2 states, each appearing at least twice is_informative = (count >= 2); @@ -815,8 +1225,8 @@ void Alignment::computeConst(Pattern &pat) { void Alignment::printSiteInfo(ostream &out, int part_id) { - int nsite = getNSite(); - for (int site = 0; site != nsite; site++) { + size_t nsite = getNSite(); + for (size_t site = 0; site != nsite; site++) { Pattern ptn = getPattern(site); if (part_id >= 0) out << part_id << "\t"; @@ -865,61 +1275,90 @@ void Alignment::printSiteInfo(const char* filename) { } } -bool Alignment::addPattern(Pattern &pat, int site, int freq) { +bool Alignment::addPatternLazy(Pattern &pat, int site, int freq, bool& gaps_only) { + //Returns true if the pattern was actually added, false + //if it was identified as a duplicate (and handled by + //increasing he frequency of an existing pattern) // check if pattern contains only gaps - bool gaps_only = true; + gaps_only = true; for (Pattern::iterator it = pat.begin(); it != pat.end(); it++) if ((*it) != STATE_UNKNOWN) { gaps_only = false; break; } if (gaps_only) { - if (verbose_mode >= VB_DEBUG) + if (verbose_mode >= VB_DEBUG) { cout << "Site " << site << " contains only gaps or ambiguous characters" << endl; - //return true; + } } PatternIntMap::iterator pat_it = pattern_index.find(pat); if (pat_it == pattern_index.end()) { // not found pat.frequency = freq; - computeConst(pat); + //We don't do computeConst(pat); here, that's why + //there's a "Lazy" in this member function's name! + //We do that in addPattern... push_back(pat); pattern_index[back()] = size()-1; site_pattern[site] = size()-1; + return true; } else { int index = pat_it->second; at(index).frequency += freq; site_pattern[site] = index; + return false; + } +} + +bool Alignment::addPattern(Pattern &pat, int site, int freq) { + bool gaps_only = false; + if (addPatternLazy(pat, site, freq, gaps_only)) { + computeConst(back()); } return gaps_only; } +void Alignment::updatePatterns(size_t oldPatternCount) { + size_t patternCount = size(); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t patIndex = oldPatternCount; patIndex < patternCount; ++patIndex ) { + computeConst(at(patIndex)); + } +} + void Alignment::addConstPatterns(char *freq_const_patterns) { IntVector vec; convert_int_vec(freq_const_patterns, vec); if (vec.size() != num_states) outError("Const pattern frequency vector has different number of states: ", freq_const_patterns); - int nsite = getNSite(), orig_nsite = getNSite(); - int i; - for (i = 0; i < vec.size(); i++) { + size_t nsite = getNSite(); + size_t orig_nsite = getNSite(); + for (size_t i = 0; i < vec.size(); i++) { nsite += vec[i]; if (vec[i] < 0) outError("Const pattern frequency must be non-negative"); } site_pattern.resize(nsite, -1); - int nseq = getNSeq(); + size_t nseq = getNSeq(); nsite = orig_nsite; - for (i = 0; i < vec.size(); i++) if (vec[i] > 0) { - Pattern pat; - pat.resize(nseq, i); -// if (pattern_index.find(pat) != pattern_index.end()) { -// outWarning("Constant pattern of all " + convertStateBackStr(i) + " already exists"); -// } - for (int j = 0; j < vec[i]; j++) - addPattern(pat, nsite++, 1); - } + size_t oldPatternCount = size(); //JB 27-Jul-2020 (Parallelize) + for (size_t i = 0; i < vec.size(); i++) { + if (vec[i] > 0) { + Pattern pat; + pat.resize(nseq, i); + //if (pattern_index.find(pat) != pattern_index.end()) { + // outWarning("Constant pattern of all " + convertStateBackStr(i) + " already exists"); + //} + for (int j = 0; j < vec[i]; j++) { + bool gaps_only; + addPatternLazy(pat, nsite++, 1, gaps_only); //JB 27-Jul-2020 (Parallelize) + } + } + } + updatePatterns(oldPatternCount); //JB 27-Jul-20 (Parallelize) countConstSite(); - buildSeqStates(); } void Alignment::orderPatternByNumChars(int pat_type) { @@ -963,6 +1402,15 @@ void Alignment::orderPatternByNumChars(int pat_type) { pars_lower_bound[i] += num; } } + + // fill up to vectoclass with dummy pattern + int maxnptn = get_safe_upper_limit_float(ordered_pattern.size()); + while (ordered_pattern.size() < maxnptn) { + Pattern pat; + pat.resize(getNSeq(), STATE_UNKNOWN); + pat.frequency = 0; + ordered_pattern.push_back(pat); + } sum += pars_lower_bound[i]; // now transform lower_bound // assert(i == maxi-1); @@ -990,7 +1438,7 @@ void Alignment::ungroupSitePattern() { vector stored_pat = (*this); clear(); - for (int i = 0; i < getNSite(); i++) { + for (size_t i = 0; i < getNSite(); ++i) { Pattern pat = stored_pat[getPatternID(i)]; pat.frequency = 1; push_back(pat); @@ -1006,10 +1454,10 @@ void Alignment::regroupSitePattern(int groups, IntVector& site_group) clear(); site_pattern.clear(); site_pattern.resize(stored_site_pattern.size(), -1); - int count = 0; + size_t count = 0; for (int g = 0; g < groups; g++) { pattern_index.clear(); - for (int i = 0; i < site_group.size(); i++) + for (size_t i = 0; i < site_group.size(); ++i) if (site_group[i] == g) { count++; Pattern pat = stored_pat[stored_site_pattern[i]]; @@ -1018,7 +1466,7 @@ void Alignment::regroupSitePattern(int groups, IntVector& site_group) } ASSERT(count == stored_site_pattern.size()); count = 0; - for (iterator it = begin(); it != end(); it++) + for (iterator it = begin(); it != end(); ++it) count += it->frequency; ASSERT(count == getNSite()); pattern_index.clear(); @@ -1032,22 +1480,45 @@ void Alignment::regroupSitePattern(int groups, IntVector& site_group) @return the data type of the input sequences */ SeqType Alignment::detectSequenceType(StrVector &sequences) { - int num_nuc = 0; - int num_ungap = 0; - int num_bin = 0; - int num_alpha = 0; - int num_digit = 0; - - for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++) - for (string::iterator i = it->begin(); i != it->end(); i++) { - if ((*i) != '?' && (*i) != '-' && (*i) != '.' && *i != 'N' && *i != 'X' && (*i) != '~') num_ungap++; - if ((*i) == 'A' || (*i) == 'C' || (*i) == 'G' || (*i) == 'T' || (*i) == 'U') - num_nuc++; - if ((*i) == '0' || (*i) == '1') - num_bin++; - if (isalpha(*i)) num_alpha++; - if (isdigit(*i)) num_digit++; + size_t num_nuc = 0; + size_t num_ungap = 0; + size_t num_bin = 0; + size_t num_alpha = 0; + size_t num_digit = 0; + double detectStart = getRealTime(); + size_t sequenceCount = sequences.size(); +#ifdef _OPENMP +#pragma omp parallel for reduction(+:num_nuc,num_ungap,num_bin,num_alpha,num_digit) +#endif + for (size_t seqNum = 0; seqNum < sequenceCount; ++seqNum) { + auto start = sequences.at(seqNum).data(); + auto stop = start + sequences.at(seqNum).size(); + for (auto i = start; i!=stop; ++i) { + if ((*i) == 'A' || (*i) == 'C' || (*i) == 'G' || (*i) == 'T' || (*i) == 'U') { + ++num_nuc; + ++num_ungap; + continue; + } + if ((*i)=='?' || (*i)=='-' || (*i) == '.' ) { + continue; + } + if (*i != 'N' && *i != 'X' && (*i) != '~') { + num_ungap++; + if (isdigit(*i)) { + num_digit++; + if ((*i) == '0' || (*i) == '1') { + num_bin++; + } + } + } + if (isalpha(*i)) { + num_alpha++; + } } + } + if (verbose_mode >= VB_MED) { + cout << "Sequence Type detection took " << (getRealTime()-detectStart) << " seconds." << endl; + } if (((double)num_nuc) / num_ungap > 0.9) return SEQ_DNA; if (((double)num_bin) / num_ungap > 0.9) @@ -1103,6 +1574,7 @@ void Alignment::buildStateMap(char *map, SeqType seq_type) { map[(unsigned char)'J'] = 22; // I or L map[(unsigned char)'*'] = STATE_UNKNOWN; // stop codon map[(unsigned char)'U'] = STATE_UNKNOWN; // 21st amino acid + map[(unsigned char)'O'] = STATE_UNKNOWN; // 22nd amino acid return; case SEQ_MULTISTATE: @@ -1126,7 +1598,7 @@ void Alignment::buildStateMap(char *map, SeqType seq_type) { @param seq_type data type (SEQ_DNA, etc.) @return state ID */ -char Alignment::convertState(char state, SeqType seq_type) { +StateType Alignment::convertState(char state, SeqType seq_type) { if (state == '?' || state == '-' || state == '.' || state == '~') return STATE_UNKNOWN; @@ -1191,6 +1663,7 @@ char Alignment::convertState(char state, SeqType seq_type) { if (state == 'J') return 22; if (state == '*') return STATE_UNKNOWN; // stop codon if (state == 'U') return STATE_UNKNOWN; // 21st amino-acid + if (state == 'O') return STATE_UNKNOWN; // 22nd amino-acid loc = strchr(symbols_protein, state); if (!loc) return STATE_INVALID; // unrecognize character @@ -1211,7 +1684,7 @@ char Alignment::convertState(char state, SeqType seq_type) { } // TODO: state should int -char Alignment::convertState(char state) { +StateType Alignment::convertState(char state) { return convertState(state, seq_type); } @@ -1287,29 +1760,36 @@ char Alignment::convertStateBack(char state) { } } -string Alignment::convertStateBackStr(char state) { +string Alignment::convertStateBackStr(StateType state) { string str; - if (seq_type == SEQ_POMO) - return string("POMO")+convertIntToString(state); - if (seq_type != SEQ_CODON) { - str = convertStateBack(state); - } else { - // codon data - if (state >= num_states) return "???"; - assert(codon_table); - state = codon_table[(int)state]; - str = symbols_dna[state/16]; - str += symbols_dna[(state%16)/4]; - str += symbols_dna[state%4]; + if (seq_type == SEQ_POMO) { + return string("POMO") + convertIntToString(state); + } + if (seq_type == SEQ_MULTISTATE) { + return " " + convertIntToString(state); + } + if (seq_type == SEQ_CODON) { + // codon data + if (state >= num_states) return "???"; + assert(codon_table); + state = codon_table[(int)state]; + str = symbols_dna[state/16]; + str += symbols_dna[(state%16)/4]; + str += symbols_dna[state%4]; + return str; } + // all other data types + str = convertStateBack(state); return str; } +/* void Alignment::convertStateStr(string &str, SeqType seq_type) { for (string::iterator it = str.begin(); it != str.end(); it++) (*it) = convertState(*it, seq_type); } - +*/ + void Alignment::initCodon(char *gene_code_id) { // build index from 64 codons to non-stop codons int transl_table = 1; @@ -1395,9 +1875,9 @@ SeqType Alignment::getSeqType(const char *sequence_type) { user_seq_type = SEQ_PROTEIN; } else if (strncmp(sequence_type, "NT2AA", 5) == 0) { user_seq_type = SEQ_PROTEIN; - } else if (strcmp(sequence_type, "NUM") == 0 || strcmp(sequence_type, "MORPH") == 0 || strcmp(sequence_type, "MULTI") == 0) { + } else if (strcmp(sequence_type, "NUM") == 0 || strcmp(sequence_type, "MORPH") == 0) { user_seq_type = SEQ_MORPH; - } else if (strcmp(sequence_type, "TINA") == 0) { + } else if (strcmp(sequence_type, "TINA") == 0 || strcmp(sequence_type, "MULTI") == 0) { user_seq_type = SEQ_MULTISTATE; } else if (strncmp(sequence_type, "CODON", 5) == 0) { user_seq_type = SEQ_CODON; @@ -1412,23 +1892,29 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, genetic_code = NULL; non_stop_codon = NULL; - - if (nseq != seq_names.size()) throw "Different number of sequences than specified"; - + if (nseq != seq_names.size()) { + throw "Different number of sequences than specified"; + } + unordered_set namesSeen; + double seqCheckStart = getRealTime(); /* now check that all sequence names are correct */ for (seq_id = 0; seq_id < nseq; seq_id ++) { ostringstream err_str; if (seq_names[seq_id] == "") err_str << "Sequence number " << seq_id+1 << " has no names\n"; // check that all the names are different - for (int i = 0; i < seq_id; i++) - if (seq_names[i] == seq_names[seq_id]) - err_str << "The sequence name " << seq_names[seq_id] << " is dupplicated\n"; + if (!namesSeen.insert(seq_names[seq_id]).second) { + err_str << "The sequence name " << seq_names[seq_id] << " is duplicated\n"; + } } if (err_str.str() != "") + { throw err_str.str(); - - + } + if (verbose_mode >= VB_MED) { + cout.precision(6); + cout << "Duplicate sequence name check took " << (getRealTime()-seqCheckStart) << " seconds." << endl; + } /* now check that all sequences have the same length */ for (seq_id = 0; seq_id < nseq; seq_id ++) { if (sequences[seq_id].length() != nsite) { @@ -1447,6 +1933,7 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, /* now check data type */ seq_type = detectSequenceType(sequences); + switch (seq_type) { case SEQ_BINARY: num_states = 2; @@ -1492,11 +1979,11 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, num_states = 20; nt2aa = true; cout << "Translating to amino-acid sequences with genetic code " << &sequence_type[5] << " ..." << endl; - } else if (strcmp(sequence_type, "NUM") == 0 || strcmp(sequence_type, "MORPH") == 0 || strcmp(sequence_type, "MULTI") == 0) { + } else if (strcmp(sequence_type, "NUM") == 0 || strcmp(sequence_type, "MORPH") == 0) { num_states = getMorphStates(sequences); if (num_states < 2 || num_states > 32) throw "Invalid number of states"; user_seq_type = SEQ_MORPH; - } else if (strcmp(sequence_type, "TINA") == 0) { + } else if (strcmp(sequence_type, "TINA") == 0 || strcmp(sequence_type, "MULTI") == 0) { cout << "Multi-state data with " << num_states << " alphabets" << endl; user_seq_type = SEQ_MULTISTATE; } else if (strncmp(sequence_type, "CODON", 5) == 0) { @@ -1512,6 +1999,13 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, seq_type = user_seq_type; } + // added by TD + if (Params::getInstance().use_nn_model && seq_type != SEQ_DNA) { + throw "Can't use option use-nn-model with non DNA/RNA alignments!"; + } + + //initStateSpace(seq_type); + // now convert to patterns int site, seq, num_gaps_only = 0; @@ -1533,6 +2027,8 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, clear(); pattern_index.clear(); int num_error = 0; + + progress_display progress(nsite, "Constructing alignment", "examined", "site"); for (site = 0; site < nsite; site+=step) { for (seq = 0; seq < nseq; seq++) { //char state = convertState(sequences[seq][site], seq_type); @@ -1581,12 +2077,21 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, pat[seq] = state; } if (!num_error) - num_gaps_only += addPattern(pat, site/step); + { + bool gaps_only; + addPatternLazy(pat, site/step, 1, gaps_only); + num_gaps_only += gaps_only ? 1 : 0; + } + progress += step; } - if (num_gaps_only) + progress.done(); + updatePatterns(0); + if (num_gaps_only) { cout << "WARNING: " << num_gaps_only << " sites contain only gaps or ambiguous characters." << endl; - if (err_str.str() != "") + } + if (err_str.str() != "") { throw err_str.str(); + } return 1; } @@ -1623,7 +2128,7 @@ int Alignment::readPhylip(char *filename, char *sequence_type) { string line; // remove the failbit in.exceptions(ios::badbit); - bool tina_state = (sequence_type && strcmp(sequence_type,"TINA") == 0); + bool tina_state = (sequence_type && (strcmp(sequence_type,"TINA") == 0 || strcmp(sequence_type,"MULTI") == 0)); num_states = 0; for (; !in.eof(); line_num++) { @@ -1748,7 +2253,6 @@ int Alignment::readPhylipSequential(char *filename, char *sequence_type) { } int Alignment::readFasta(char *filename, char *sequence_type) { - StrVector sequences; ostringstream err_str; igzstream in; @@ -1769,35 +2273,46 @@ int Alignment::readFasta(char *filename, char *sequence_type) { // remove the failbit in.exceptions(ios::badbit); - for (; !in.eof(); line_num++) { - safeGetline(in, line); - if (line == "") continue; - - //cout << line << endl; - if (line[0] == '>') { // next sequence - string::size_type pos = line.find_first_of("\n\r"); - seq_names.push_back(line.substr(1, pos-1)); - trimString(seq_names.back()); - sequences.push_back(""); - continue; + { + progress_display progress(in.getCompressedLength(), "Reading fasta file", "", ""); + for (; !in.eof(); line_num++) { + safeGetline(in, line); + if (line == "") { + continue; + } + //cout << line << endl; + if (line[0] == '>') { // next sequence + string::size_type pos = line.find_first_of("\n\r"); + seq_names.push_back(line.substr(1, pos-1)); + trimString(seq_names.back()); + sequences.push_back(""); + continue; + } + // read sequence contents + if (sequences.empty()) { + throw "First line must begin with '>' to define sequence name"; + } + processSeq(sequences.back(), line, line_num); + progress = (double)in.getCompressedPosition(); } - // read sequence contents - if (sequences.empty()) throw "First line must begin with '>' to define sequence name"; - processSeq(sequences.back(), line, line_num); } + in.clear(); // set the failbit again in.exceptions(ios::failbit | ios::badbit); in.close(); // now try to cut down sequence name if possible - int i, j, step = 0; + int i, step = 0; StrVector new_seq_names, remain_seq_names; new_seq_names.resize(seq_names.size()); remain_seq_names = seq_names; + double startShorten = getRealTime(); for (step = 0; step < 4; step++) { bool duplicated = false; + unordered_set namesSeenThisTime; + //Set of shorted names seen so far, this iteration for (i = 0; i < seq_names.size(); i++) { if (remain_seq_names[i].empty()) continue; size_t pos = remain_seq_names[i].find_first_of(" \t"); @@ -1808,17 +2323,19 @@ int Alignment::readFasta(char *filename, char *sequence_type) { new_seq_names[i] += remain_seq_names[i].substr(0, pos); remain_seq_names[i] = "_" + remain_seq_names[i].substr(pos+1); } - // now check for duplication - if (!duplicated) - for (j = 0; j < i-1; j++) - if (new_seq_names[j] == new_seq_names[i]) { - duplicated = true; - break; - } + if (!duplicated) { + //add the shortened name for sequence i to the + //set of shortened names seen so far, and set + //duplicated to true if it was already there. + duplicated = !namesSeenThisTime.insert(new_seq_names[i]).second; + } } if (!duplicated) break; } - + if (verbose_mode >= VB_MED) { + cout.precision(6); + cout << "Name shortening took " << (getRealTime() - startShorten) << " seconds." << endl; + } if (step > 0) { for (i = 0; i < seq_names.size(); i++) if (seq_names[i] != new_seq_names[i]) { @@ -1833,14 +2350,12 @@ int Alignment::readFasta(char *filename, char *sequence_type) { int Alignment::readClustal(char *filename, char *sequence_type) { - StrVector sequences; igzstream in; int line_num = 1; string line; num_states = 0; - // set the failbit and badbit in.exceptions(ios::failbit | ios::badbit); in.open(filename); @@ -2491,7 +3006,7 @@ bool Alignment::getSiteFromResidue(int seq_id, int &residue_left, int &residue_r } int Alignment::buildRetainingSites(const char *aln_site_list, IntVector &kept_sites, - bool exclude_gaps, bool exclude_const_sites, const char *ref_seq_name) + int exclude_sites, const char *ref_seq_name) { if (aln_site_list) { int seq_id = -1; @@ -2534,19 +3049,26 @@ int Alignment::buildRetainingSites(const char *aln_site_list, IntVector &kept_si } int j; - if (exclude_gaps) { + if (exclude_sites & EXCLUDE_GAP) { for (j = 0; j < kept_sites.size(); j++) if (kept_sites[j] && at(site_pattern[j]).computeAmbiguousChar(num_states) > 0) { kept_sites[j] = 0; } } - if (exclude_const_sites) { + if (exclude_sites & EXCLUDE_INVAR) { for (j = 0; j < kept_sites.size(); j++) if (at(site_pattern[j]).isInvariant()) kept_sites[j] = 0; } + if (exclude_sites & EXCLUDE_UNINF) { + for (j = 0; j < kept_sites.size(); j++) + if (!at(site_pattern[j]).isInformative()) + kept_sites[j] = 0; + + } + int final_length = 0; for (j = 0; j < kept_sites.size(); j++) if (kept_sites[j]) final_length++; @@ -2554,88 +3076,184 @@ int Alignment::buildRetainingSites(const char *aln_site_list, IntVector &kept_si } void Alignment::printPhylip(ostream &out, bool append, const char *aln_site_list, - bool exclude_gaps, bool exclude_const_sites, const char *ref_seq_name, bool print_taxid) { + int exclude_sites, const char *ref_seq_name, bool print_taxid) { IntVector kept_sites; - int final_length = buildRetainingSites(aln_site_list, kept_sites, exclude_gaps, exclude_const_sites, ref_seq_name); - if (seq_type == SEQ_CODON) + int final_length = buildRetainingSites(aln_site_list, kept_sites, exclude_sites, ref_seq_name); + if (seq_type == SEQ_CODON) { final_length *= 3; + } + out << getNSeq() << " " << final_length << endl; + int max_len = getMaxSeqNameLength(); + if (print_taxid) max_len = 10; + if (max_len < 10) max_len = 10; + + auto seq_count = seq_names.size(); + + //Precalculate state representation strings + vector stateStrings; + stateStrings.resize(this->num_states); + for (int i=0; i seq_data; + seq_data.resize(seq_count); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t seq_id = 0; seq_id < seq_count; seq_id++) { + std::string& str = seq_data[seq_id]; + auto patterns = site_pattern.data(); + auto patternCount = site_pattern.size(); + for (int i=0; i" << (*it) << endl; + int j = 0; + for (IntVector::iterator i = site_pattern.begin(); i != site_pattern.end(); i++, j++) + if (kept_sites[j]) + out << convertStateBackStr(at(*i)[seq_id]); + out << endl; + } +} + +void Alignment::printNexus(ostream &out, bool append, const char *aln_site_list, + int exclude_sites, const char *ref_seq_name, bool print_taxid) { + IntVector kept_sites; + int final_length = buildRetainingSites(aln_site_list, kept_sites, exclude_sites, ref_seq_name); + if (seq_type == SEQ_CODON) + final_length *= 3; + + out << "#nexus" << endl << "begin data;" << endl; + out << " dimensions ntax=" << getNSeq() << " nchar=" << final_length << ";" << endl; + out << " format datatype="; + switch (seq_type) { + case SEQ_DNA: + case SEQ_CODON: + out << "nucleotide"; break; + case SEQ_MORPH: + case SEQ_BINARY: + case SEQ_MULTISTATE: + out << "standard"; break; + case SEQ_PROTEIN: + out << "protein"; break; + default: + outError("Unspported datatype for NEXUS file"); + } + out << " missing=? gap=-;" << endl; + out << " matrix" << endl; + int max_len = getMaxSeqNameLength(); if (print_taxid) max_len = 10; - if (max_len < 10) max_len = 10; - int seq_id; - for (seq_id = 0; seq_id < seq_names.size(); seq_id++) { - out.width(max_len); + if (max_len < 10) max_len = 10; + int seq_id; + for (seq_id = 0; seq_id < seq_names.size(); seq_id++) { + out << " "; + out.width(max_len); if (print_taxid) out << left << seq_id << " "; else out << left << seq_names[seq_id] << " "; - int j = 0; - for (IntVector::iterator i = site_pattern.begin(); i != site_pattern.end(); i++, j++) - if (kept_sites[j]) - out << convertStateBackStr(at(*i)[seq_id]); - out << endl; - } + int j = 0; + for (IntVector::iterator i = site_pattern.begin(); i != site_pattern.end(); i++, j++) + if (kept_sites[j]) + out << convertStateBackStr(at(*i)[seq_id]); + out << endl; + } + out << " ;" << endl; + out << "end;" << endl; + } -void Alignment::printPhylip(const char *file_name, bool append, const char *aln_site_list, - bool exclude_gaps, bool exclude_const_sites, const char *ref_seq_name) { +void Alignment::printAlignment(InputType format, const char *file_name, bool append, const char *aln_site_list, + int exclude_sites, const char *ref_seq_name) { try { ofstream out; out.exceptions(ios::failbit | ios::badbit); - + if (append) out.open(file_name, ios_base::out | ios_base::app); else out.open(file_name); - - printPhylip(out, append, aln_site_list, exclude_gaps, exclude_const_sites, ref_seq_name); + + printAlignment(format, out, file_name, append, aln_site_list, exclude_sites, ref_seq_name); out.close(); - if (verbose_mode >= VB_MED) - cout << "Alignment was printed to " << file_name << endl; + if (verbose_mode >= VB_MED || !append) + cout << "Alignment was printed to " << file_name << endl; } catch (ios::failure) { outError(ERR_WRITE_OUTPUT, file_name); } } -void Alignment::printFasta(const char *file_name, bool append, const char *aln_site_list - , bool exclude_gaps, bool exclude_const_sites, const char *ref_seq_name) -{ - IntVector kept_sites; - buildRetainingSites(aln_site_list, kept_sites, exclude_gaps, exclude_const_sites, ref_seq_name); - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - if (append) - out.open(file_name, ios_base::out | ios_base::app); - else - out.open(file_name); - StrVector::iterator it; - int seq_id = 0; - for (it = seq_names.begin(); it != seq_names.end(); it++, seq_id++) { - out << ">" << (*it) << endl; - int j = 0; - for (IntVector::iterator i = site_pattern.begin(); i != site_pattern.end(); i++, j++) - if (kept_sites[j]) - out << convertStateBackStr(at(*i)[seq_id]); - out << endl; - } - out.close(); - cout << "Alignment was printed to " << file_name << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, file_name); +void Alignment::printAlignment(InputType format, ostream &out, const char* file_name + , bool append, const char *aln_site_list + , int exclude_sites, const char *ref_seq_name) { + double printStart = getRealTime(); + const char* formatName = "phylip"; + switch (format) { + case IN_PHYLIP: + printPhylip(out, append, aln_site_list, exclude_sites, ref_seq_name); + break; + case IN_FASTA: + formatName = "fasta"; + printFasta(out, append, aln_site_list, exclude_sites, ref_seq_name); + break; + case IN_NEXUS: + formatName = "nexus"; + printNexus(out, append, aln_site_list, exclude_sites, ref_seq_name); + break; + default: + ASSERT(0 && "Unsupported alignment output format"); + } + if (verbose_mode >= VB_MED) { + std::cout << "Printing alignment to " << formatName << " file " + << file_name << " took " << (getRealTime()-printStart) + << " sec" << std::endl; } } - void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) { IntVector::iterator it; for (it = seq_id.begin(); it != seq_id.end(); it++) { ASSERT(*it >= 0 && *it < aln->getNSeq()); seq_names.push_back(aln->getSeqName(*it)); } + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; STATE_UNKNOWN = aln->STATE_UNKNOWN; @@ -2649,30 +3267,39 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t site_pattern.resize(aln->getNSite(), -1); clear(); pattern_index.clear(); - int site = 0, removed_sites = 0; + size_t removed_sites = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern -// for (iterator pit = aln->begin(); pit != aln->end(); pit++) { - for (site = 0; site < aln->getNSite(); site++) { + + progress_display progress(aln->getNSite(), "Identifying sites to remove", "examined", "site"); + size_t oldPatternCount = size(); //JB 27-Jul-2020 Parallelized + int siteMod = 0; //site # modulo 100. + for (size_t site = 0; site < aln->getNSite(); ++site) { iterator pit = aln->begin() + (aln->getPatternID(site)); Pattern pat; - int true_char = 0; - for (it = seq_id.begin(); it != seq_id.end(); it++) { - char ch = (*pit)[*it]; - if (ch != STATE_UNKNOWN) true_char++; - pat.push_back(ch); + for (it = seq_id.begin(); it != seq_id.end(); ++it) { + pat.push_back ( (*pit)[*it] ); } - if (true_char < min_true_char) + int true_char = pat.computeGapChar(num_states, STATE_UNKNOWN); //JB 27-Jul-2020 Vectorized + if (true_char < min_true_char) { removed_sites++; - else - addPattern(pat, site-removed_sites); -// for (int i = 0; i < (*pit).frequency; i++) -// site_pattern[site++] = size()-1; + } + else { + bool gaps_only = false; + addPatternLazy(pat, site-removed_sites, 1, gaps_only); //JB 27-Jul-2020 Parallelized + } + if (siteMod == 100 ) { + progress += 100; + siteMod = 0; + } + ++siteMod; } + progress.done(); + updatePatterns(oldPatternCount); //JB 27-Jul-2020 Parallelized site_pattern.resize(aln->getNSite() - removed_sites); verbose_mode = save_mode; countConstSite(); - buildSeqStates(); +// buildSeqStates(); ASSERT(size() <= aln->size()); if (kept_partitions) kept_partitions->push_back(0); @@ -2680,19 +3307,23 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { - int i; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; STATE_UNKNOWN = aln->STATE_UNKNOWN; genetic_code = aln->genetic_code; if (seq_type == SEQ_CODON) { - codon_table = new char[num_states]; - memcpy(codon_table, aln->codon_table, num_states); - non_stop_codon = new char[strlen(genetic_code)]; - memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code)); + codon_table = new char[num_states]; + memcpy(codon_table, aln->codon_table, num_states); + non_stop_codon = new char[strlen(genetic_code)]; + memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code)); } site_pattern.resize(aln->getNSite(), -1); clear(); @@ -2700,7 +3331,7 @@ void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { int site = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != ptn_id.size(); i++) { + for (size_t i = 0; i != ptn_id.size(); ++i) { ASSERT(ptn_id[i] >= 0 && ptn_id[i] < aln->getNPattern()); Pattern pat = aln->at(ptn_id[i]); addPattern(pat, site, aln->at(ptn_id[i]).frequency); @@ -2710,16 +3341,20 @@ void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { site_pattern.resize(site); verbose_mode = save_mode; countConstSite(); - buildSeqStates(); +// buildSeqStates(); ASSERT(size() <= aln->size()); } void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { - int i; ASSERT(ptn_freq.size() <= aln->getNPattern()); - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; genetic_code = aln->genetic_code; @@ -2736,7 +3371,7 @@ void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { int site = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != ptn_freq.size(); i++) + for (size_t i = 0; i != ptn_freq.size(); ++i) if (ptn_freq[i]) { ASSERT(ptn_freq[i] > 0); Pattern pat = aln->at(i); @@ -2747,42 +3382,45 @@ void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { site_pattern.resize(site); verbose_mode = save_mode; countConstSite(); - buildSeqStates(); ASSERT(size() <= aln->size()); } void Alignment::extractSites(Alignment *aln, IntVector &site_id) { - int i; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; STATE_UNKNOWN = aln->STATE_UNKNOWN; genetic_code = aln->genetic_code; if (seq_type == SEQ_CODON) { - codon_table = new char[num_states]; - memcpy(codon_table, aln->codon_table, num_states); - non_stop_codon = new char[strlen(genetic_code)]; - memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code)); + codon_table = new char[num_states]; + memcpy(codon_table, aln->codon_table, num_states); + non_stop_codon = new char[strlen(genetic_code)]; + memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code)); } site_pattern.resize(site_id.size(), -1); clear(); pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != site_id.size(); i++) { + for (size_t i = 0; i != site_id.size(); i++) { Pattern pat = aln->getPattern(site_id[i]); addPattern(pat, i); } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); // sanity check - for (iterator it = begin(); it != end(); it++) - if (it->at(0) == -1) - ASSERT(0); - + for (iterator it = begin(); it != end(); it++) { + if (it->at(0) == -1) { + ASSERT(0); + } + } //cout << getNSite() << " positions were extracted" << endl; //cout << __func__ << " " << num_states << endl; } @@ -2794,11 +3432,15 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a outError("Cannot convert non-DNA alignment into codon alignment"); if (aln->getNSite() % 3 != 0) outError("Sequence length is not divisible by 3 when converting to codon sequences"); - int i, site; char AA_to_state[NUM_CHAR]; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); i++) { seq_names.push_back(aln->getSeqName(i)); } + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; // num_states = aln->num_states; seq_type = SEQ_CODON; initCodon(gene_code_id); @@ -2820,15 +3462,15 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nsite = aln->getNSite(); - int nseq = aln->getNSeq(); + size_t nsite = aln->getNSite(); + size_t nseq = aln->getNSeq(); Pattern pat; pat.resize(nseq); int num_error = 0; ostringstream err_str; - for (site = 0; site < nsite; site+=step) { - for (int seq = 0; seq < nseq; seq++) { + for (size_t site = 0; site < nsite; site+=step) { + for (size_t seq = 0; seq < nseq; ++seq) { //char state = convertState(sequences[seq][site], seq_type); char state = aln->at(aln->getPatternID(site))[seq]; // special treatment for codon @@ -2868,24 +3510,121 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a } pat[seq] = state; } - if (!num_error) - addPattern(pat, site/step); + if (!num_error) + addPattern(pat, site/step); + } + if (num_error) + outError(err_str.str()); + verbose_mode = save_mode; + countConstSite(); +// buildSeqStates(); + // sanity check + for (iterator it = begin(); it != end(); it++) + if (it->at(0) == -1) + ASSERT(0); + +} + +Alignment *Alignment::convertCodonToAA() { + Alignment *res = new Alignment; + if (seq_type != SEQ_CODON) + outError("Cannot convert non-codon alignment into AA"); + char AA_to_state[NUM_CHAR]; + for (size_t i = 0; i < getNSeq(); ++i) { + res->seq_names.push_back(getSeqName(i)); + } + res->name = name; + res->model_name = model_name; + res->sequence_type = sequence_type; + res->position_spec = position_spec; + res->aln_file = aln_file; + res->seq_type = SEQ_PROTEIN; + res->num_states = 20; + + res->computeUnknownState(); + + res->buildStateMap(AA_to_state, SEQ_PROTEIN); + + res->site_pattern.resize(getNSite(), -1); + res->clear(); + res->pattern_index.clear(); + + VerboseMode save_mode = verbose_mode; + verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern + size_t nsite = getNSite(); + size_t nseq = getNSeq(); + Pattern pat; + pat.resize(nseq); + + for (size_t site = 0; site < nsite; ++site) { + for (size_t seq = 0; seq < nseq; ++seq) { + StateType state = at(getPatternID(site))[seq]; + if (state == STATE_UNKNOWN) + state = res->STATE_UNKNOWN; + else + state = AA_to_state[(int)genetic_code[(int)codon_table[state]]]; + pat[seq] = state; + } + res->addPattern(pat, site); } - if (num_error) - outError(err_str.str()); verbose_mode = save_mode; - countConstSite(); - buildSeqStates(); - // sanity check - for (iterator it = begin(); it != end(); it++) - if (it->at(0) == -1) - ASSERT(0); + res->countConstSite(); + return res; +} +Alignment *Alignment::convertCodonToDNA() { + Alignment *res = new Alignment; + if (seq_type != SEQ_CODON) + outError("Cannot convert non-codon alignment into DNA"); + for (size_t i = 0; i < getNSeq(); ++i) { + res->seq_names.push_back(getSeqName(i)); + } + res->name = name; + res->model_name = model_name; + res->sequence_type = sequence_type; + res->position_spec = position_spec; + res->aln_file = aln_file; + res->seq_type = SEQ_DNA; + res->num_states = 4; + + res->computeUnknownState(); + + res->site_pattern.resize(getNSite()*3, -1); + res->clear(); + res->pattern_index.clear(); + + VerboseMode save_mode = verbose_mode; + verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern + size_t nsite = getNSite(); + size_t nseq = getNSeq(); + Pattern pat[3]; + pat[0].resize(nseq); + pat[1].resize(nseq); + pat[2].resize(nseq); + + for (size_t site = 0; site < nsite; ++site) { + for (size_t seq = 0; seq < nseq; ++seq) { + StateType state = at(getPatternID(site))[seq]; + if (state == STATE_UNKNOWN) { + for (int i = 0; i < 3; ++i) + pat[i][seq] = res->STATE_UNKNOWN; + } else { + state = codon_table[state]; + pat[0][seq] = state/16; + pat[1][seq] = (state%16)/4; + pat[2][seq] = state%4; + } + } + for (int i = 0; i < 3; ++i) + res->addPattern(pat[i], site*3+i); + } + verbose_mode = save_mode; + res->countConstSite(); +// res->buildSeqStates(); + return res; } void convert_range(const char *str, int &lower, int &upper, int &step_size, char* &endptr) throw (string) { - //char *endptr; - char *beginptr = (char*) str; // parse the lower bound of the range int d = strtol(str, &endptr, 10); @@ -2899,20 +3638,34 @@ void convert_range(const char *str, int &lower, int &upper, int &step_size, char //int d_save = d; upper = d; step_size = 1; + // skip blank chars + for (; *endptr == ' '; endptr++) {} if (*endptr != '-') return; // parse the upper bound of the range - str = endptr+1; + endptr++; + // skip blank chars + for (; *endptr == ' '; endptr++) {} + str = endptr; d = strtol(str, &endptr, 10); if ((d == 0 && endptr == str) || abs(d) == HUGE_VALL) { - string err = "Expecting integer, but found \""; - err += str; - err += "\" instead"; - throw err; + if (str[0] == '.') { + // 2019-06-03: special character '.' for whatever ending position + d = lower-1; + endptr++; + } else { + string err = "Expecting integer, but found \""; + err += str; + err += "\" instead"; + throw err; + } } //lower = d_save; upper = d; + // skip blank chars + for (; *endptr == ' '; endptr++) {} + if (*endptr != '\\') return; // parse the step size of the range @@ -2924,10 +3677,7 @@ void convert_range(const char *str, int &lower, int &upper, int &step_size, char err += "\" instead"; throw err; } - step_size = d; - str = beginptr; - } void extractSiteID(Alignment *aln, const char* spec, IntVector &site_id) { @@ -2938,6 +3688,9 @@ void extractSiteID(Alignment *aln, const char* spec, IntVector &site_id) { for (; *str != 0; ) { int lower, upper, step; convert_range(str, lower, upper, step, str); + // 2019-06-03: special '.' character + if (upper == lower-1) + upper = aln->getNSite(); lower--; upper--; nchars += (upper-lower+1)/step; @@ -2952,7 +3705,7 @@ void extractSiteID(Alignment *aln, const char* spec, IntVector &site_id) { for (i = lower; i <= upper; i+=step) site_id.push_back(i); if (*str == ',' || *str == ' ') str++; - else break; + //else break; } if (aln->seq_type == SEQ_CODON && nchars % 3 != 0) throw (string)"Range " + spec + " length is not multiple of 3 (necessary for codon data)"; @@ -2971,7 +3724,12 @@ void Alignment::extractSites(Alignment *aln, const char* spec) { void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq, const char *spec) { if (aln->isSuperAlignment()) outError("Internal error: ", __func__); - int site, nsite = aln->getNSite(); + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; + size_t nsite = aln->getNSite(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); num_states = aln->num_states; seq_type = aln->seq_type; @@ -3005,30 +3763,41 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (aln->site_state_freq.size() != aln->getNPattern() || spec) outError("Unsupported bootstrap feature, pls contact the developers"); } + + if (Params::getInstance().jackknife_prop > 0.0 && spec) { + outError((string)"Unsupported jackknife with sampling " + spec); + } IntVector site_vec; if (!spec) { // standard bootstrap - for (site = 0; site < nsite; site++) { - int site_id = random_int(nsite); - int ptn_id = aln->getPatternID(site_id); - Pattern pat = aln->at(ptn_id); - int nptn = getNPattern(); - addPattern(pat, site); - if (!aln->site_state_freq.empty() && getNPattern() > nptn) { - // a new pattern is added, copy state frequency vector - double *state_freq = new double[num_states]; - memcpy(state_freq, aln->site_state_freq[ptn_id], num_states*sizeof(double)); - site_state_freq.push_back(state_freq); + int added_sites = 0; + IntVector sample; + random_resampling(nsite, sample); + for (size_t site = 0; site < nsite; ++site) { + for (int rep = 0; rep < sample[site]; ++rep) { + int ptn_id = aln->getPatternID(site); + Pattern pat = aln->at(ptn_id); + int nptn = getNPattern(); + addPattern(pat, added_sites); + if (!aln->site_state_freq.empty() && getNPattern() > nptn) { + // a new pattern is added, copy state frequency vector + double *state_freq = new double[num_states]; + memcpy(state_freq, aln->site_state_freq[ptn_id], num_states*sizeof(double)); + site_state_freq.push_back(state_freq); + } + if (pattern_freq) ((*pattern_freq)[ptn_id])++; + added_sites++; } - if (pattern_freq) ((*pattern_freq)[ptn_id])++; - } + } + if (added_sites < nsite) + site_pattern.resize(added_sites); } else if (strncmp(spec, "GENESITE,", 9) == 0) { // resampling genes, then resampling sites within resampled genes convert_int_vec(spec+9, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; @@ -3036,9 +3805,9 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (i = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size()); - for (int j = 0; j < site_vec[part]; j++) { + for (int j = 0; j < site_vec[part]; ++j) { site = random_int(site_vec[part]) + begin_site[part]; int ptn = aln->getPatternID(site); Pattern pat = aln->at(ptn); @@ -3049,9 +3818,9 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq } else if (strncmp(spec, "GENE,", 5) == 0) { // resampling genes instead of sites convert_int_vec(spec+5, site_vec); - int i; + size_t site = 0; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; @@ -3059,7 +3828,7 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (i = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size()); for (site = begin_site[part]; site < begin_site[part] + site_vec[part]; site++) { int ptn = aln->getPatternID(site); @@ -3074,14 +3843,14 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site_vec.size() % 2 != 0) outError("Bootstrap specification length is not divisible by 2"); nsite = 0; - int part, begin_site = 0, out_site = 0; - for (part = 0; part < site_vec.size(); part+=2) + int begin_site = 0, out_site = 0; + for (size_t part = 0; part < site_vec.size(); part+=2) nsite += site_vec[part+1]; site_pattern.resize(nsite, -1); - for (part = 0; part < site_vec.size(); part += 2) { + for (size_t part = 0; part < site_vec.size(); part+=2) { if (begin_site + site_vec[part] > aln->getNSite()) outError("Sum of lengths exceeded alignment length"); - for (site = 0; site < site_vec[part+1]; site++) { + for (size_t site = 0; site < site_vec[part+1]; ++site) { int site_id = random_int(site_vec[part]) + begin_site; int ptn_id = aln->getPatternID(site_id); Pattern pat = aln->at(ptn_id); @@ -3098,7 +3867,7 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); +// buildSeqStates(); } void Alignment::createBootstrapAlignment(IntVector &pattern_freq, const char *spec) { @@ -3112,33 +3881,43 @@ void Alignment::createBootstrapAlignment(IntVector &pattern_freq, const char *sp } void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, int *rstream) { - int site, nsite = getNSite(); + size_t nsite = getNSite(); memset(pattern_freq, 0, getNPattern()*sizeof(int)); IntVector site_vec; - if (!spec || strncmp(spec, "SCALE=", 6) == 0) { - - if (spec) { - double scale = convert_double(spec+6); - nsite = (int)round(scale * nsite); + if (Params::getInstance().jackknife_prop > 0.0 && spec) + outError((string)"Unsupported jackknife with " + spec); + + if (spec && strncmp(spec, "SCALE=", 6) == 0) { + // multi-scale bootstrapping called by AU test + int orig_nsite = nsite; + double scale = convert_double(spec+6); + nsite = (size_t)round(scale * nsite); + for (size_t site = 0; site < nsite; site++) { + int site_id = random_int(orig_nsite, rstream); + int ptn_id = getPatternID(site_id); + pattern_freq[ptn_id]++; } - int nptn = getNPattern(); - - if (nsite/8 < nptn) { - int orig_nsite = getNSite(); - for (site = 0; site < nsite; site++) { - int site_id = random_int(orig_nsite, rstream); - int ptn_id = getPatternID(site_id); - pattern_freq[ptn_id]++; + } else if (!spec) { + + size_t nptn = getNPattern(); + + if (nsite/8 < nptn || Params::getInstance().jackknife_prop > 0.0) { + IntVector sample; + random_resampling(nsite, sample, rstream); + for (size_t site = 0; site < nsite; site++) { + for (int rep = 0; rep < sample[site]; rep++) { + int ptn_id = getPatternID(site); + pattern_freq[ptn_id]++; + } } } else { // BQM 2015-12-27: use multinomial sampling for faster generation if #sites is much larger than #patterns - int ptn; double *prob = new double[nptn]; - for (ptn = 0; ptn < nptn; ptn++) + for (size_t ptn = 0; ptn < nptn; ++ptn) prob[ptn] = at(ptn).frequency; gsl_ran_multinomial(nptn, nsite, prob, (unsigned int*)pattern_freq, rstream); int sum = 0; - for (ptn = 0; ptn < nptn; ptn++) + for (size_t ptn = 0; ptn < nptn; ++ptn) sum += pattern_freq[ptn]; ASSERT(sum == nsite); delete [] prob; @@ -3146,17 +3925,17 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in } else if (strncmp(spec, "GENESITE,", 9) == 0) { // resampling genes, then resampling sites within resampled genes convert_int_vec(spec+9, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; } - if (site > getNSite()) + if (site > getNSite()) { outError("Sum of lengths exceeded alignment length"); - - for (i = 0; i < site_vec.size(); i++) { + } + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size(), rstream); for (int j = 0; j < site_vec[part]; j++) { site = random_int(site_vec[part], rstream) + begin_site[part]; @@ -3167,33 +3946,37 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in } else if (strncmp(spec, "GENE,", 5) == 0) { // resampling genes instead of sites convert_int_vec(spec+5, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { - begin_site.push_back(site); + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { + begin_site.emplace_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; } - if (site > getNSite()) + if (site > getNSite()) { outError("Sum of lengths exceeded alignment length"); - - for (i = 0; i < site_vec.size(); i++) { + } + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size(), rstream); - for (site = begin_site[part]; site < begin_site[part] + site_vec[part]; site++) { + for (size_t site = begin_site[part]; site < begin_site[part] + site_vec[part]; ++site) { int ptn = getPatternID(site); pattern_freq[ptn]++; } } } else { // resampling sites within genes - convert_int_vec(spec, site_vec); + try { + convert_int_vec(spec, site_vec); + } catch (...) { + outError("-bsam not allowed for non-partition model"); + } if (site_vec.size() % 2 != 0) outError("Bootstrap specification length is not divisible by 2"); - int part, begin_site = 0, out_site = 0; - for (part = 0; part < site_vec.size(); part += 2) { + int begin_site = 0, out_site = 0; + for (size_t part = 0; part < site_vec.size(); part += 2) { if (begin_site + site_vec[part] > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (site = 0; site < site_vec[part+1]; site++) { + for (size_t site = 0; site < site_vec[part+1]; ++site) { int site_id = random_int(site_vec[part], rstream) + begin_site; int ptn_id = getPatternID(site_id); pattern_freq[ptn_id]++; @@ -3201,13 +3984,18 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in begin_site += site_vec[part]; out_site += site_vec[part+1]; } - } + } } void Alignment::buildFromPatternFreq(Alignment & aln, IntVector new_pattern_freqs){ - int nsite = aln.getNSite(); + size_t nsite = aln.getNSite(); seq_names.insert(seq_names.begin(), aln.seq_names.begin(), aln.seq_names.end()); + name = aln.name; + model_name = aln.model_name; + sequence_type = aln.sequence_type; + position_spec = aln.position_spec; + aln_file = aln.aln_file; num_states = aln.num_states; seq_type = aln.seq_type; @@ -3236,17 +4024,26 @@ void Alignment::buildFromPatternFreq(Alignment & aln, IntVector new_pattern_freq } countConstSite(); - buildSeqStates(); +// buildSeqStates(); // checkSeqName(); } void Alignment::createGapMaskedAlignment(Alignment *masked_aln, Alignment *aln) { - if (masked_aln->getNSeq() != aln->getNSeq()) outError("Different number of sequences in masked alignment"); - if (masked_aln->getNSite() != aln->getNSite()) outError("Different number of sites in masked alignment"); - - int site, nsite = aln->getNSite(), nseq = aln->getNSeq(); + if (masked_aln->getNSeq() != aln->getNSeq()) { + outError("Different number of sequences in masked alignment"); + } + if (masked_aln->getNSite() != aln->getNSite()) { + outError("Different number of sites in masked alignment"); + } + size_t nsite = aln->getNSite(); + size_t nseq = aln->getNSeq(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; genetic_code = aln->genetic_code; @@ -3268,17 +4065,19 @@ void Alignment::createGapMaskedAlignment(Alignment *masked_aln, Alignment *aln) } VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; ++site) { int ptn_id = aln->getPatternID(site); Pattern pat = aln->at(ptn_id); Pattern masked_pat = masked_aln->at(masked_aln->getPatternID(site)); - for (int seq = 0; seq < nseq; seq++) - if (masked_pat[name_map[seq]] == STATE_UNKNOWN) pat[seq] = STATE_UNKNOWN; + for (size_t seq = 0; seq < nseq; ++seq) { + if (masked_pat[name_map[seq]] == STATE_UNKNOWN) { + pat[seq] = STATE_UNKNOWN; + } + } addPattern(pat, site); } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); } void Alignment::shuffleAlignment() { @@ -3288,34 +4087,48 @@ void Alignment::shuffleAlignment() { void Alignment::concatenateAlignment(Alignment *aln) { - if (getNSeq() != aln->getNSeq()) outError("Different number of sequences in two alignments"); - if (num_states != aln->num_states) outError("Different number of states in two alignments"); - if (seq_type != aln->seq_type) outError("Different data type in two alignments"); - int site, nsite = aln->getNSite(); - int cur_sites = getNSite(); + if (getNSeq() != aln->getNSeq()) { + outError("Different number of sequences in two alignments"); + } + if (num_states != aln->num_states) { + outError("Different number of states in two alignments"); + } + if (seq_type != aln->seq_type) { + outError("Different data type in two alignments"); + } + size_t nsite = aln->getNSite(); + size_t cur_sites = getNSite(); site_pattern.resize(cur_sites + nsite , -1); IntVector name_map; for (StrVector::iterator it = seq_names.begin(); it != seq_names.end(); it++) { int seq_id = aln->getSeqID(*it); - if (seq_id < 0) outError("The other alignment does not contain taxon ", *it); + if (seq_id < 0) { + outError("The other alignment does not contain taxon ", *it); + } name_map.push_back(seq_id); } VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; site++) { Pattern pat = aln->at(aln->getPatternID(site)); Pattern new_pat = pat; - for (int i = 0; i < name_map.size(); i++) new_pat[i] = pat[name_map[i]]; + for (size_t i = 0; i < name_map.size(); i++) { + new_pat[i] = pat[name_map[i]]; + } addPattern(new_pat, site + cur_sites); } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); } void Alignment::copyAlignment(Alignment *aln) { - int site, nsite = aln->getNSite(); + size_t nsite = aln->getNSite(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; num_states = aln->num_states; seq_type = aln->seq_type; genetic_code = aln->genetic_code; @@ -3331,7 +4144,7 @@ void Alignment::copyAlignment(Alignment *aln) { pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; ++site) { int site_id = site; int ptn_id = aln->getPatternID(site_id); Pattern pat = aln->at(ptn_id); @@ -3339,7 +4152,7 @@ void Alignment::copyAlignment(Alignment *aln) { } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); +// buildSeqStates(); } void Alignment::countConstSite() { @@ -3362,103 +4175,190 @@ void Alignment::countConstSite() { frac_invariant_sites = ((double)num_invariant_sites) / getNSite(); } -string Alignment::getUnobservedConstPatterns() { - string ret = ""; - for (char state = 0; state < num_states; state++) - if (!isStopCodon(state)) - { - Pattern pat; - pat.resize(getNSeq(), state); - if (pattern_index.find(pat) == pattern_index.end()) { - // constant pattern is unobserved - ret.push_back(state); - } - } - return ret; +/** + * generate all subsets of a set + * @param inset input set + * @param[out] subsets vector of all subsets of inset + */ +template +void generateSubsets(vector &inset, vector > &subsets) { + if (inset.size() > 30) + outError("Cannot work with more than 31 states"); + uint64_t total = ((uint64_t)1 << inset.size()); + for (uint64_t binrep = 0; binrep < total; binrep++) { + vector subset; + for (uint64_t i = 0; i < inset.size(); i++) + if (binrep & (1 << i)) + subset.push_back(inset[i]); + subsets.push_back(subset); + } +} + +void Alignment::generateUninfPatterns(StateType repeat, vector &singleton, vector &seq_pos, vector &unobserved_ptns) { + size_t seqs = getNSeq(); + if (seq_pos.size() == singleton.size()) { + Pattern pat; + pat.resize(seqs, repeat); + for (int i = 0; i < seq_pos.size(); i++) + pat[seq_pos[i]] = singleton[i]; + unobserved_ptns.push_back(pat); + return; + } + for (size_t seq = 0; seq < seqs; seq++) { + bool dup = false; + for (auto s: seq_pos) + if (seq == s) { dup = true; break; } + if (dup) continue; + vector seq_pos_new = seq_pos; + seq_pos_new.push_back(seq); + generateUninfPatterns(repeat, singleton, seq_pos_new, unobserved_ptns); + } +} + +void Alignment::getUnobservedConstPatterns(ASCType ASC_type, vector &unobserved_ptns) { + switch (ASC_type) { + case ASC_NONE: break; + case ASC_VARIANT: { + // Lewis's correction for variant sites + unobserved_ptns.reserve(num_states); + for (StateType state = 0; state < num_states; state++) { + if (!isStopCodon(state)) { + Pattern pat; + pat.resize(getNSeq(), state); + if (pattern_index.find(pat) == pattern_index.end()) { + // constant pattern is unobserved + unobserved_ptns.push_back(pat); + } + } + } + break; + } + case ASC_VARIANT_MISSING: { + // Holder's correction for variant sites with missing data + size_t orig_nptn = getNPattern(); + size_t max_orig_nptn = get_safe_upper_limit(orig_nptn); + unobserved_ptns.reserve(max_orig_nptn*num_states); + int nseq = getNSeq(); + for (StateType state = 0; state < num_states; state++) + for (size_t ptn = 0; ptn < max_orig_nptn; ptn++) { + Pattern new_ptn; + if (ptn < orig_nptn) { + new_ptn.reserve(nseq); + for (auto state_ptn: at(ptn)) { + if (state_ptn < num_states) + new_ptn.push_back(state); + else + new_ptn.push_back(STATE_UNKNOWN); + } + } else + new_ptn.resize(nseq, STATE_UNKNOWN); + unobserved_ptns.push_back(new_ptn); + } + break; + } + case ASC_INFORMATIVE: { + // Holder correction for informative sites + for (StateType repeat = 0; repeat < num_states; repeat++) { + vector rest; + rest.reserve(num_states-1); + for (StateType s = 0; s < num_states; s++) + if (s != repeat) rest.push_back(s); + vector > singletons; + generateSubsets(rest, singletons); + for (auto singleton : singletons) + if (singleton.size() < getNSeq()-1 || (singleton.size() == getNSeq()-1 && repeat == 0)) { + vector seq_pos; + generateUninfPatterns(repeat, singleton, seq_pos, unobserved_ptns); + } + } + break; + } + case ASC_INFORMATIVE_MISSING: { + // Holder correction for informative sites with missing data + ASSERT(0 && "Not supported yet"); + break; + } + } } int Alignment::countProperChar(int seq_id) { int num_proper_chars = 0; for (iterator it = begin(); it != end(); it++) { - if ((*it)[seq_id] < num_states + pomo_sampled_states.size()) num_proper_chars+=(*it).frequency; + if ((*it)[seq_id] < num_states + pomo_sampled_states.size()) { + num_proper_chars+=(*it).frequency; + } } return num_proper_chars; } Alignment::~Alignment() { - if (codon_table) { - delete [] codon_table; - codon_table = NULL; - } - if (non_stop_codon) { - delete [] non_stop_codon; - non_stop_codon = NULL; - } - if (pars_lower_bound) { - delete [] pars_lower_bound; - pars_lower_bound = NULL; + delete [] codon_table; + codon_table = nullptr; + delete [] non_stop_codon; + non_stop_codon = nullptr; + delete [] pars_lower_bound; + pars_lower_bound = nullptr; + for (auto it = site_state_freq.rbegin(); it != site_state_freq.rend(); ++it) { + delete [] (*it); } - for (vector::reverse_iterator it = site_state_freq.rbegin(); it != site_state_freq.rend(); it++) - if (*it) delete [] (*it); site_state_freq.clear(); site_model.clear(); } double Alignment::computeObsDist(int seq1, int seq2) { int diff_pos = 0, total_pos = 0; + total_pos = getNSite() - num_variant_sites; // initialize with number of constant sites for (iterator it = begin(); it != end(); it++) { + if ((*it).isConst()) + continue; int state1 = convertPomoState((*it)[seq1]); int state2 = convertPomoState((*it)[seq2]); if (state1 < num_states && state2 < num_states) { - //if ((*it)[seq1] != STATE_UNKNOWN && (*it)[seq2] != STATE_UNKNOWN) { total_pos += (*it).frequency; - if ((*it)[seq1] != (*it)[seq2] ) + if (state1 != state2 ) diff_pos += (*it).frequency; } } if (!total_pos) { if (verbose_mode >= VB_MED) - outWarning("No overlapping characters between " + getSeqName(seq1) + " and " + getSeqName(seq2)); + { + outWarning("No overlapping characters between " + + getSeqName(seq1) + " and " + getSeqName(seq2)); + } return MAX_GENETIC_DIST; // return +INF if no overlap between two sequences } return ((double)diff_pos) / total_pos; } -double Alignment::computeJCDist(int seq1, int seq2) { - double obs_dist = computeObsDist(seq1, seq2); +double Alignment::computeJCDistanceFromObservedDistance(double obs_dist) const +{ double z = (double)num_states / (num_states-1); double x = 1.0 - (z * obs_dist); - if (x <= 0) { - // string str = "Too long distance between two sequences "; - // str += getSeqName(seq1); - // str += " and "; - // str += getSeqName(seq2); - // outWarning(str); return MAX_GENETIC_DIST; } - return -log(x) / z; } +double Alignment::computeJCDist(int seq1, int seq2) { + double obs_dist = computeObsDist(seq1, seq2); + return computeJCDistanceFromObservedDistance(obs_dist); +} + void Alignment::printDist(ostream &out, double *dist_mat) { - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); int max_len = getMaxSeqNameLength(); if (max_len < 10) max_len = 10; out << nseqs << endl; - int pos = 0; out.precision(max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6)); out << fixed; - for (int seq1 = 0; seq1 < nseqs; seq1 ++) { + size_t pos = 0; + for (size_t seq1 = 0; seq1 < nseqs; ++seq1) { out.width(max_len); out << left << getSeqName(seq1) << " "; - for (int seq2 = 0; seq2 < nseqs; seq2 ++) { + for (size_t seq2 = 0; seq2 < nseqs; ++seq2) { out << dist_mat[pos++]; - /*if (seq2 % 7 == 6) { - out << endl; - out.width(max_len+1); - } */ out << " "; } out << endl; @@ -3480,7 +4380,7 @@ void Alignment::printDist(const char *file_name, double *dist_mat) { double Alignment::readDist(istream &in, double *dist_mat) { double longest_dist = 0.0; - int nseqs; + size_t nseqs; in >> nseqs; if (nseqs != getNSeq()) throw "Distance file has different number of taxa"; @@ -3570,36 +4470,72 @@ double Alignment::readDist(const char *file_name, double *dist_mat) { return longest_dist; } -// TODO DS: This only works when the sampling method is SAMPLING_SAMPLED or when -// the virtual population size is also the sample size (for every species and -// every site). -void Alignment::computeStateFreq (double *state_freq, size_t num_unknown_states) { +void Alignment::countStatesForSites(size_t startPattern, size_t stopPattern, size_t *state_count) { + memset(state_count, 0, sizeof(size_t)*(STATE_UNKNOWN+1)); + for (size_t patternIndex = startPattern; patternIndex < stopPattern; ++patternIndex ) { + const Pattern& pat = at(patternIndex); + int freq = pat.frequency; + const Pattern::value_type *stateArray = pat.data(); + size_t stateCount = pat.size(); + for (int i=0; iSTATE_UNKNOWN+1]; + memset(localStateCount, 0, sizeof(size_t)*(STATE_UNKNOWN+1)); + countStatesForSites(start, stop, localStateCount); + #pragma omp critical (sum_states) + { + for (size_t state=0; state<=STATE_UNKNOWN; ++state) { + state_count[state] += localStateCount[state]; + } + } + } + } else +#endif + { + for (iterator it = begin(); it != end(); it++) { + int freq = it->frequency; + for (Pattern::iterator it2 = it->begin(); it2 != it->end(); it2++) { + state_count[convertPomoState((int)*it2)] += freq; + } + } + } + if (verbose_mode >= VB_MED) { + cout << "Alignment state count time was " << (getRealTime()-countStart) << " seconds." << endl; + } +} + +void Alignment::convertCountToFreq(size_t *state_count, double *state_freq) { int i, j; double *states_app = new double[num_states*(STATE_UNKNOWN+1)]; double *new_freq = new double[num_states]; - unsigned *state_count = new unsigned[STATE_UNKNOWN+1]; double *new_state_freq = new double[num_states]; - - - memset(state_count, 0, sizeof(unsigned)*(STATE_UNKNOWN+1)); - state_count[(int)STATE_UNKNOWN] = num_unknown_states; - - for (i = 0; i <= STATE_UNKNOWN; i++) + + for (i = 0; i <= STATE_UNKNOWN; i++) { getAppearance(i, &states_app[i*num_states]); - - size_t aln_len = 0; - - for (iterator it = begin(); it != end(); it++) { - aln_len += it->frequency; - for (Pattern::iterator it2 = it->begin(); it2 != it->end(); it2++) - state_count[convertPomoState((int)*it2)] += it->frequency; } - - for (i = 0; i < num_states; i++) + for (i = 0; i < num_states; i++) { state_freq[i] = 1.0/num_states; - + } const int NUM_TIME = 8; - if (aln_len > 0) for (int k = 0; k < NUM_TIME; k++) { memset(new_state_freq, 0, sizeof(double)*num_states); @@ -3615,29 +4551,40 @@ void Alignment::computeStateFreq (double *state_freq, size_t num_unknown_states) new_state_freq[j] += new_freq[j]*sum_freq*state_count[i]; } } - double sum_freq = 0.0; for (j = 0; j < num_states; j++) sum_freq += new_state_freq[j]; + if (sum_freq == 0.0) + break; sum_freq = 1.0/sum_freq; for (j = 0; j < num_states; j++) state_freq[j] = new_state_freq[j]*sum_freq; } - convfreq(state_freq); + convfreq(state_freq); + delete [] new_state_freq; + delete [] new_freq; + delete [] states_app; +} + +// TODO DS: This only works when the sampling method is SAMPLING_SAMPLED or when +// the virtual population size is also the sample size (for every species and +// every site). +void Alignment::computeStateFreq (double *state_freq, size_t num_unknown_states) { + size_t *state_count = new size_t[STATE_UNKNOWN+1]; + + countStates(state_count, num_unknown_states); + convertCountToFreq(state_count, state_freq); if (verbose_mode >= VB_MED) { cout << "Empirical state frequencies: "; cout << setprecision(10); - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) cout << state_freq[i] << " "; cout << endl; } - delete [] new_state_freq; delete [] state_count; - delete [] new_freq; - delete [] states_app; } int Alignment::convertPomoState(int state) { @@ -3706,17 +4653,17 @@ void Alignment::computeAbsoluteStateFreq(unsigned int *abs_state_freq) { } else { for (iterator it = begin(); it != end(); it++) for (Pattern::iterator it2 = it->begin(); it2 != it->end(); it2++) - abs_state_freq[(int)*it2] += it->frequency; + if ((*it2) < num_states) + abs_state_freq[(int)*it2] += it->frequency; } } void Alignment::countStatePerSequence (unsigned *count_per_sequence) { - int i; - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); memset(count_per_sequence, 0, sizeof(unsigned)*num_states*nseqs); for (iterator it = begin(); it != end(); it++) - for (i = 0; i != nseqs; i++) { + for (size_t i = 0; i != nseqs; ++i) { int state = convertPomoState(it->at(i)); if (state < num_states) { count_per_sequence[i*num_states + state] += it->frequency; @@ -3725,50 +4672,47 @@ void Alignment::countStatePerSequence (unsigned *count_per_sequence) { } void Alignment::computeStateFreqPerSequence (double *freq_per_sequence) { - int i, j; - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); double *states_app = new double[num_states*(STATE_UNKNOWN+1)]; double *new_freq = new double[num_states]; unsigned *state_count = new unsigned[(STATE_UNKNOWN+1)*nseqs]; double *new_state_freq = new double[num_states]; - - memset(state_count, 0, sizeof(unsigned)*(STATE_UNKNOWN+1)*nseqs); - for (i = 0; i <= STATE_UNKNOWN; i++) + for (int i = 0; i <= STATE_UNKNOWN; i++) { getAppearance(i, &states_app[i*num_states]); - - for (iterator it = begin(); it != end(); it++) - for (i = 0; i != nseqs; i++) { + } + for (iterator it = begin(); it != end(); it++) { + for (size_t i = 0; i != nseqs; i++) { state_count[i*(STATE_UNKNOWN+1) + it->at(i)] += it->frequency; } + } double equal_freq = 1.0/num_states; - for (i = 0; i < num_states*nseqs; i++) + for (size_t i = 0; i < num_states*nseqs; i++) { freq_per_sequence[i] = equal_freq; - + } const int NUM_TIME = 8; for (int k = 0; k < NUM_TIME; k++) { for (int seq = 0; seq < nseqs; seq++) { double *state_freq = &freq_per_sequence[seq*num_states]; memset(new_state_freq, 0, sizeof(double)*num_states); - for (i = 0; i <= STATE_UNKNOWN; i++) { + for (int i = 0; i <= STATE_UNKNOWN; i++) { if (state_count[seq*(STATE_UNKNOWN+1)+i] == 0) continue; double sum_freq = 0.0; - for (j = 0; j < num_states; j++) { + for (int j = 0; j < num_states; j++) { new_freq[j] = state_freq[j] * states_app[i*num_states+j]; sum_freq += new_freq[j]; } sum_freq = 1.0/sum_freq; - for (j = 0; j < num_states; j++) { + for (int j = 0; j < num_states; j++) { new_state_freq[j] += new_freq[j]*sum_freq*state_count[seq*(STATE_UNKNOWN+1)+i]; } } - double sum_freq = 0.0; - for (j = 0; j < num_states; j++) + for (int j = 0; j < num_states; j++) sum_freq += new_state_freq[j]; sum_freq = 1.0/sum_freq; - for (j = 0; j < num_states; j++) + for (int j = 0; j < num_states; j++) state_freq[j] = new_state_freq[j]*sum_freq; } } @@ -3944,8 +4888,7 @@ void Alignment::getAppearance(StateType state, StateBitset &state_app) { } void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double *ntfreq) { - int nseqs = getNSeq(); - int i, j; + size_t nseqs = getNSeq(); if (freq == FREQ_CODON_1x4) { memset(ntfreq, 0, sizeof(double)*4); @@ -3962,35 +4905,35 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double } } double sum = 0; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) sum += ntfreq[i]; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) ntfreq[i] /= sum; if (verbose_mode >= VB_MED) { - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) cout << " " << symbols_dna[i] << ": " << ntfreq[i]; cout << endl; } memcpy(ntfreq+4, ntfreq, sizeof(double)*4); memcpy(ntfreq+8, ntfreq, sizeof(double)*4); sum = 0.0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; i++) { int codon = codon_table[i]; state_freq[i] = ntfreq[codon/16] * ntfreq[(codon%16)/4] * ntfreq[codon%4]; if (isStopCodon(i)) { // sum_stop += state_freq[i]; - state_freq[i] = MIN_FREQUENCY; + state_freq[i] = Params::getInstance().min_state_freq; } else { sum += state_freq[i]; } } // sum = (1.0-sum)/(1.0-sum_stop); sum = 1.0/sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) if (!isStopCodon(i)) state_freq[i] *= sum; sum = 0.0; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) sum += state_freq[i]; ASSERT(fabs(sum-1.0)<1e-5); } else if (freq == FREQ_CODON_3x4) { @@ -4008,14 +4951,14 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double ntfreq[8+nt3] += (*it).frequency; } } - for (j = 0; j < 12; j+=4) { + for (int j = 0; j < 12; j+=4) { double sum = 0; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) sum += ntfreq[i+j]; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) ntfreq[i+j] /= sum; if (verbose_mode >= VB_MED) { - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) cout << " " << symbols_dna[i] << ": " << ntfreq[i+j]; cout << endl; } @@ -4023,23 +4966,23 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double // double sum_stop=0.0; double sum = 0.0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; i++) { int codon = codon_table[i]; state_freq[i] = ntfreq[codon/16] * ntfreq[4+(codon%16)/4] * ntfreq[8+codon%4]; if (isStopCodon(i)) { // sum_stop += state_freq[i]; - state_freq[i] = MIN_FREQUENCY; + state_freq[i] = Params::getInstance().min_state_freq; } else { sum += state_freq[i]; } } // sum = (1.0-sum)/(1.0-sum_stop); sum = 1.0 / sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) if (!isStopCodon(i)) state_freq[i] *= sum; sum = 0.0; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) sum += state_freq[i]; ASSERT(fabs(sum-1.0)<1e-5); @@ -4083,9 +5026,9 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double outError("F3X4C not yet implemented. Contact authors if you really need it."); } else if (freq == FREQ_EMPIRICAL || freq == FREQ_ESTIMATE) { memset(state_freq, 0, num_states*sizeof(double)); - i = 0; - for (iterator it = begin(); it != end(); it++, i++) - for (int seq = 0; seq < nseqs; seq++) { + int i = 0; + for (iterator it = begin(); it != end(); ++it, ++i) + for (size_t seq = 0; seq < nseqs; seq++) { int state = it->at(seq); if (state >= num_states) continue; state_freq[state] += it->frequency; @@ -4101,86 +5044,249 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double convfreq(state_freq); } -void Alignment::computeDivergenceMatrix(double *rates) { - int i, j, k; - ASSERT(rates); - int nseqs = getNSeq(); - unsigned *pair_rates = new unsigned[num_states*num_states]; - memset(pair_rates, 0, sizeof(unsigned)*num_states*num_states); -// for (i = 0; i < num_states; i++) { -// pair_rates[i] = new double[num_states]; -// memset(pair_rates[i], 0, sizeof(double)*num_states); -// } +void Alignment::computeDivergenceMatrix(double *pair_freq, double *state_freq, bool normalize) { + int i, j; + ASSERT(pair_freq); + size_t nseqs = getNSeq(); + memset(pair_freq, 0, sizeof(double)*num_states*num_states); + memset(state_freq, 0, sizeof(double)*num_states); - unsigned *state_freq = new unsigned[STATE_UNKNOWN+1]; + uint64_t *site_state_freq = new uint64_t[STATE_UNKNOWN+1]; + // count pair_freq over all sites for (iterator it = begin(); it != end(); it++) { - memset(state_freq, 0, sizeof(unsigned)*(STATE_UNKNOWN+1)); + memset(site_state_freq, 0, sizeof(uint64_t)*(STATE_UNKNOWN+1)); for (i = 0; i < nseqs; i++) { - state_freq[(int)it->at(i)]++; + site_state_freq[it->at(i)]++; } for (i = 0; i < num_states; i++) { - if (state_freq[i] == 0) continue; - pair_rates[i*num_states+i] += (state_freq[i]*(state_freq[i]-1)/2)*it->frequency; + if (site_state_freq[i] == 0) continue; + state_freq[i] += site_state_freq[i]; + double *pair_freq_ptr = pair_freq + (i*num_states); + pair_freq_ptr[i] += (site_state_freq[i]*(site_state_freq[i]-1)/2)*it->frequency; for (j = i+1; j < num_states; j++) - pair_rates[i*num_states+j] += state_freq[i]*state_freq[j]*it->frequency; + pair_freq_ptr[j] += site_state_freq[i]*site_state_freq[j]*it->frequency; + } + } + + // symmerize pair_freq + for (i = 0; i < num_states; i++) + for (j = 0; j < num_states; j++) + pair_freq[j*num_states+i] = pair_freq[i*num_states+j]; + + if (normalize) { + double sum = 0.0; + for (i = 0; i < num_states; i++) + sum += state_freq[i]; + sum = 1.0/sum; + for (i = 0; i < num_states; i++) + state_freq[i] *= sum; + for (i = 0; i < num_states; i++) { + sum = 0.0; + double *pair_freq_ptr = pair_freq + (i*num_states); + for (j = 0; j < num_states; j++) + sum += pair_freq_ptr[j]; + sum = 1.0/sum; + for (j = 0; j < num_states; j++) + pair_freq_ptr[j] *= sum; } -// int state1 = it->at(i); -// if (state1 >= num_states) continue; -// int *this_pair = pair_rates + state1*num_states; -// for (j = i+1; j < nseqs; j++) { -// int state2 = it->at(j); -// if (state2 < num_states) this_pair[state2] += it->frequency; -// } -// } } + delete [] site_state_freq; +} - k = 0; - double last_rate = pair_rates[(num_states-2)*num_states+num_states-1] + pair_rates[(num_states-1)*num_states+num_states-2]; - if (last_rate == 0) last_rate = 1; - for (i = 0; i < num_states-1; i++) - for (j = i+1; j < num_states; j++) { - rates[k++] = (pair_rates[i*num_states+j] + pair_rates[j*num_states+i]) / last_rate; - // BIG WARNING: zero rates might cause numerical instability! -// if (rates[k-1] <= 0.0001) rates[k-1] = 0.01; -// if (rates[k-1] > 100.0) rates[k-1] = 50.0; +double binomial_cdf(int x, int n, double p) { + ASSERT(p > 0.0 && p < 1.0 && x <= n && x >= 0); + double cdf = 0.0; + double b = 0; + double logp = log(p), log1p = log(1-p); + for (int k = 0; k < x; k++) { + if (k > 0) { + b += log(n-k+1) - log(k); } - rates[k-1] = 1; - if (verbose_mode >= VB_MAX) { - cout << "Empirical rates: "; - for (k = 0; k < num_states*(num_states-1)/2; k++) - cout << rates[k] << " "; - cout << endl; + double log_pmf_k = b + k * logp + (n-k) * log1p; + cdf += exp(log_pmf_k); } + if (cdf > 1.0) cdf = 1.0; + return 1.0-cdf; +} -// for (i = num_states-1; i >= 0; i--) { -// delete [] pair_rates[i]; -// } - delete [] state_freq; - delete [] pair_rates; +void SymTestResult::computePvalue() { + if (significant_pairs <= 0) { + pvalue_binom = 1.0; + return; + } +#ifdef USE_BOOST + boost::math::binomial binom(included_pairs, Params::getInstance().symtest_pcutoff); + pvalue_binom = cdf(complement(binom, significant_pairs-1)); +#else + pvalue_binom = binomial_cdf(significant_pairs, included_pairs, Params::getInstance().symtest_pcutoff); +#endif } -void Alignment::computeDivergenceMatrixNonRev (double *rates) { - double *rates_mat = new double[num_states*num_states]; - int i, j, k; +std::ostream& operator<<(std::ostream& stream, const SymTestResult& res) { + stream << res.significant_pairs << "," + << res.included_pairs - res.significant_pairs << ","; + if (Params::getInstance().symtest == SYMTEST_BINOM) + stream << res.pvalue_binom; + else + stream << res.pvalue_maxdiv; + if (Params::getInstance().symtest_shuffle > 1) + stream << "," << res.max_stat << ',' << res.pvalue_perm; + return stream; +} - computeDivergenceMatrix(rates); +void Alignment::doSymTest(size_t vecid, vector &vec_sym, vector &vec_marsym, + vector &vec_intsym, int *rstream, vector *stats) +{ + size_t nseq = getNSeq(); - for (i = 0, k = 0; i < num_states-1; i++) - for (j = i+1; j < num_states; j++) - rates_mat[i*num_states+j] = rates_mat[j*num_states+i] = rates[k++]; + const double chi2_cutoff = Params::getInstance().symtest_pcutoff; + + SymTestResult sym, marsym, intsym; + sym.max_stat = -1.0; + marsym.max_stat = -1.0; + intsym.max_stat = -1.0; + sym.pvalue_maxdiv = 1.0; + marsym.pvalue_maxdiv = 1.0; + intsym.pvalue_maxdiv = 1.0; + + vector ptn_shuffled; + + if (rstream) { + // random shuffle alignment columns + int nsite = getNSite(); + for (int site = 0; site < nsite; site++) { + Pattern ptn = getPattern(site); + my_random_shuffle(ptn.begin(), ptn.end(), rstream); + ptn_shuffled.push_back(ptn); + } + } + if (stats) + { + stats->reserve(nseq*(nseq-1)/2); + } + double max_divergence = 0.0; + + for (int seq1 = 0; seq1 < nseq; seq1++) { + for (int seq2 = seq1+1; seq2 < nseq; seq2++) { + MatrixXd pair_freq = MatrixXd::Zero(num_states, num_states); + if (rstream) { + for (auto it = ptn_shuffled.begin(); it != ptn_shuffled.end(); it++) + if (it->at(seq1) < num_states && it->at(seq2) < num_states) + pair_freq(it->at(seq1), it->at(seq2))++; - for (i = 0, k = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) - if (j != i) rates[k++] = rates_mat[i*num_states+j]; - delete [] rates_mat; + } else { + for (auto it = begin(); it != end(); it++) { + if (it->at(seq1) < num_states && it->at(seq2) < num_states) + pair_freq(it->at(seq1), it->at(seq2)) += it->frequency; + } + } + + // 2020-06-03: Bug fix found by Peter Foster + double sum_elems = pair_freq.sum(); + double divergence = (sum_elems == 0.0) ? 0.0 : (sum_elems - pair_freq.diagonal().sum()) / sum_elems; + + // performing test of symmetry + int i, j; + + SymTestStat stat; + stat.seq1 = seq1; + stat.seq2 = seq2; + stat.pval_sym = nan(""); + stat.pval_marsym = nan(""); + stat.pval_intsym = nan(""); + + int df_sym = num_states*(num_states-1)/2; + bool applicable = true; + MatrixXd sum = (pair_freq + pair_freq.transpose()); + ArrayXXd res = (pair_freq - pair_freq.transpose()).array().square() / sum.array(); + + for (i = 0; i < num_states; i++) + for (j = i+1; j < num_states; j++) { + if (!std::isnan(res(i,j))) { + stat.chi2_sym += res(i,j); + } else { + if (Params::getInstance().symtest_keep_zero) + applicable = false; + df_sym--; + } + } + if (df_sym == 0) + applicable = false; + + if (applicable) { + stat.pval_sym = chi2prob(df_sym, stat.chi2_sym); + if (stat.pval_sym < chi2_cutoff) + sym.significant_pairs++; + sym.included_pairs++; + if (sym.max_stat < stat.chi2_sym) + sym.max_stat = stat.chi2_sym; + } else { + sym.excluded_pairs++; + } + // performing test of marginal symmetry + VectorXd row_sum = pair_freq.rowwise().sum().head(num_states-1); + VectorXd col_sum = pair_freq.colwise().sum().head(num_states-1); + VectorXd U = (row_sum - col_sum); + MatrixXd V = (row_sum + col_sum).asDiagonal(); + V -= sum.topLeftCorner(num_states-1, num_states-1); + + FullPivLU lu(V); + + if (lu.isInvertible()) { + stat.chi2_marsym = U.transpose() * lu.inverse() * U; + int df_marsym = num_states-1; + stat.pval_marsym = chi2prob(df_marsym, stat.chi2_marsym); + if (stat.pval_marsym < chi2_cutoff) + marsym.significant_pairs++; + marsym.included_pairs++; + if (marsym.max_stat < stat.chi2_marsym) + marsym.max_stat = stat.chi2_marsym; + + // internal symmetry + stat.chi2_intsym = stat.chi2_sym - stat.chi2_marsym; + int df_intsym = df_sym - df_marsym; + if (df_intsym > 0 && applicable) { + stat.pval_intsym = chi2prob(df_intsym, stat.chi2_intsym); + if (stat.pval_intsym < chi2_cutoff) + intsym.significant_pairs++; + intsym.included_pairs++; + if (intsym.max_stat < stat.chi2_intsym) + intsym.max_stat = stat.chi2_intsym; + } else + intsym.excluded_pairs++; + } else { + marsym.excluded_pairs++; + intsym.excluded_pairs++; + } + if (stats) + stats->push_back(stat); + if (divergence > max_divergence) { + sym.pvalue_maxdiv = stat.pval_sym; + intsym.pvalue_maxdiv = stat.pval_intsym; + marsym.pvalue_maxdiv = stat.pval_marsym; + max_divergence = divergence; + } else if (divergence == max_divergence && random_double(rstream) < 0.5) { + sym.pvalue_maxdiv = stat.pval_sym; + intsym.pvalue_maxdiv = stat.pval_intsym; + marsym.pvalue_maxdiv = stat.pval_marsym; + } + } + } + sym.computePvalue(); + marsym.computePvalue(); + intsym.computePvalue(); + vec_sym[vecid] = sym; + vec_marsym[vecid] = marsym; + vec_intsym[vecid] = intsym; } void Alignment::convfreq(double *stateFrqArr) { - if (Params::getInstance().keep_zero_freq) + if (Params::getInstance().keep_zero_freq) { return; + } int i, maxi=0; double freq, maxfreq, sum; int zero_states = 0; @@ -4192,9 +5298,9 @@ void Alignment::convfreq(double *stateFrqArr) { freq = stateFrqArr[i]; // Do not check for a minimum frequency with PoMo because very // low frequencies are expected for polymorphic sites. - if ((freq < MIN_FREQUENCY) && + if ((freq < Params::getInstance().min_state_freq) && (seq_type != SEQ_POMO)) { - stateFrqArr[i] = MIN_FREQUENCY; + stateFrqArr[i] = Params::getInstance().min_state_freq; } if (freq > maxfreq) { maxfreq = freq; @@ -4224,8 +5330,9 @@ double Alignment::computeUnconstrainedLogL() { double logl = 0.0; int nsite = getNSite(), i; double lognsite = log(nsite); - for (i = 0; i < nptn; i++) + for (i = 0; i < nptn; i++) { logl += (log(at(i).frequency) - lognsite) * at(i).frequency; + } return logl; } @@ -4237,11 +5344,11 @@ void Alignment::printSiteGaps(const char *filename) { out.open(filename); int nsite = getNSite(); out << nsite << endl << "Site_Gap "; - for (int site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { out << " " << at(getPatternID(site)).computeGapChar(num_states, STATE_UNKNOWN); } out << endl << "Site_Ambi "; - for (int site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { out << " " << at(getPatternID(site)).computeAmbiguousChar(num_states); } out << endl; @@ -4253,10 +5360,18 @@ void Alignment::printSiteGaps(const char *filename) { } void Alignment::getPatternFreq(IntVector &freq) { - freq.resize(getNPattern()); - int cnt = 0; - for (iterator it = begin(); it < end(); it++, cnt++) - freq[cnt] = (*it).frequency; + freq.resize(getNPattern()); + int cnt = 0; + for (iterator it = begin(); it < end(); it++, cnt++) { + freq[cnt] = (*it).frequency; + } +} + +void Alignment::getPatternFreq(int *freq) { + int cnt = 0; + for (iterator it = begin(); it < end(); it++, cnt++) { + freq[cnt] = (*it).frequency; + } } //added by MA @@ -4265,14 +5380,13 @@ void Alignment::multinomialProb(Alignment refAlign, double &prob) // cout << "Computing the probability of this alignment given the multinomial distribution determined by a reference alignment ..." << endl; //should we check for compatibility of sequence's names and sequence's order in THIS alignment and in the objectAlign?? //check alignment length - int nsite = getNSite(); + size_t nsite = getNSite(); ASSERT(nsite == refAlign.getNSite()); double sumFac = 0; double sumProb = 0; double fac = logFac(nsite); int index; - for ( iterator it = begin(); it != end() ; it++) - { + for ( iterator it = begin(); it != end() ; it++) { PatternIntMap::iterator pat_it = refAlign.pattern_index.find((*it)); if ( pat_it == refAlign.pattern_index.end() ) //not found ==> error outError("Pattern in the current alignment is not found in the reference alignment!"); @@ -4297,7 +5411,7 @@ void Alignment::multinomialProb (DoubleVector logLL, double &prob) ASSERT(logLL.size() == patNum); - int alignLen = getNSite(); + size_t alignLen = getNSite(); //resize the expectedNorFre vector expectedNorFre.resize(patNum,-1); @@ -4321,7 +5435,6 @@ void Alignment::multinomialProb (DoubleVector logLL, double &prob) ell[i] = (double)alignLen * LL[i] / sumLL; } - //Vector containing r_i where r_0 = ell_0; r_{i+1} = ell_{i+1} + r_i - ordinaryRounding(r_i) DoubleVector r(patNum, -1.0); //Compute r_i and the expected normalized frequencies @@ -4372,8 +5485,7 @@ void Alignment::multinomialProb (double *logLL, double &prob) double sumLL = 0; //sum of the likelihood of the patterns in the alignment double max_logl = *max_element(logLL, logLL + patNum); // to rescale the log-likelihood //Compute the `relative' (to the first pattern) likelihood from the logLL - for ( int i = 0; i < patNum; i++ ) - { + for ( int i = 0; i < patNum; i++ ) { LL[i] = exp(logLL[i]-max_logl); //LL[i] = exp(logLL[i]); sumLL += LL[i]; @@ -4382,8 +5494,7 @@ void Alignment::multinomialProb (double *logLL, double &prob) //Vector containing l_i = p_i*ell/sum_i(p_i) DoubleVector ell(patNum, -1.0); //Compute l_i - for ( int i = 0; i < patNum; i++ ) - { + for ( int i = 0; i < patNum; i++ ) { ell[i] = (double)alignLen * LL[i] / sumLL; } @@ -4394,8 +5505,7 @@ void Alignment::multinomialProb (double *logLL, double &prob) r[0] = ell[0]; expectedNorFre[0] = (int)floor(ell[0]+0.5); //note that floor(_number+0.5) returns the ordinary rounding of _number //int sum = expectedNorFre[0]; - for (int j = 1; j < patNum; j++ ) - { + for (int j = 1; j < patNum; j++ ) { r[j] = ell[j] + r[j-1] - floor(r[j-1]+0.5); expectedNorFre[j] = (int)floor(r[j]+0.5); //sum += expectedNorFre[j]; @@ -4425,8 +5535,8 @@ double Alignment::multinomialProb (IntVector &pattern_freq) //return expectedNorFre; //compute the probability of having expectedNorFre given the observed pattern frequencies of THIS alignment ASSERT(size() == pattern_freq.size()); - int patNum = getNPattern(); - int alignLen = getNSite(); + size_t patNum = getNPattern(); + size_t alignLen = getNSite(); double sumFac = 0; double sumProb = 0; double fac = logFac(alignLen); @@ -4440,15 +5550,15 @@ double Alignment::multinomialProb (IntVector &pattern_freq) bool Alignment::readSiteStateFreq(const char* site_freq_file) { - cout << endl << "Reading site-specific state frequency file " << site_freq_file << " ..." << endl; - site_model.resize(getNSite(), -1); - int i; + cout << endl << "Reading site-specific state frequency file " << site_freq_file << " ..." << endl; + site_model.resize(getNSite(), -1); IntVector pattern_to_site; // vector from pattern to the first site pattern_to_site.resize(getNPattern(), -1); - for (i = 0; i < getNSite(); i++) - if (pattern_to_site[getPatternID(i)] == -1) + for (size_t i = 0; i < getNSite(); ++i) { + if (pattern_to_site[getPatternID(i)] == -1) { pattern_to_site[getPatternID(i)] = i; - + } + } bool aln_changed = false; try { @@ -4473,7 +5583,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) } double *site_freq_entry = new double[num_states]; double sum = 0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; ++i) { in >> freq; if (freq <= 0.0 || freq >= 1.0) throw "Frequencies must be strictly positive and smaller than 1"; site_freq_entry[i] = freq; @@ -4483,7 +5593,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) if (fabs(sum-1.0) > 1e-3) outWarning("Frequencies of site " + site_spec + " do not sum up to 1 and will be normalized"); sum = 1.0/sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; ++i) site_freq_entry[i] *= sum; } convfreq(site_freq_entry); // regularize frequencies (eg if some freq = 0) @@ -4494,7 +5604,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) // compare freq with prev_site bool matched_freq = true; double *prev_freq = site_state_freq[site_model[prev_site]]; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; ++i) { if (site_freq_entry[i] != prev_freq[i]) { matched_freq = false; break; @@ -4515,7 +5625,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) aln_changed = true; // there are some unspecified sites cout << site_model.size() - specified_sites << " unspecified sites will get default frequencies" << endl; - for (i = 0; i < site_model.size(); i++) + for (size_t i = 0; i < site_model.size(); ++i) if (site_model[i] == -1) site_model[i] = site_state_freq.size(); site_state_freq.push_back(NULL); @@ -4531,7 +5641,6 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) } catch(ios::failure) { outError(ERR_READ_INPUT); } - if (aln_changed) { cout << "Regrouping alignment sites..." << endl; regroupSitePattern(site_state_freq.size(), site_model); diff --git a/alignment/alignment.h b/alignment/alignment.h index 53604d883..e1163eb3e 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -12,61 +12,91 @@ #ifndef ALIGNMENT_H #define ALIGNMENT_H +#include #include #include #include "pattern.h" #include "ncl/ncl.h" -#include "utils/tools.h" - -// IMPORTANT: refactor STATE_UNKNOWN -//const char STATE_UNKNOWN = 126; - -// TODO DS: This seems like a significant restriction. -/* PoMo: STATE_INVALID is not handled in PoMo. Set STATE_INVALID to - 127 to remove warning about comparison to char in alignment.cpp. - This is important if the maximum N will be increased above 21 - because then the state space is larger than 127 and we have to - think about something else. */ -/* const unsigned char STATE_INVALID = 255; */ -const unsigned char STATE_INVALID = 127; -const int NUM_CHAR = 256; + const double MIN_FREQUENCY = 0.0001; const double MIN_FREQUENCY_DIFF = 0.00001; +const int NUM_CHAR = 256; typedef bitset StateBitset; -enum SeqType { - SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_POMO, SEQ_UNKNOWN +/** class storing results of symmetry tests */ +class SymTestResult { +public: + SymTestResult() { + significant_pairs = included_pairs = excluded_pairs = 0; + pvalue_binom = -1.0; + max_stat = pvalue_maxdiv = pvalue_perm = 0.0; + } + + /** compute pvalue using bionomial test */ + void computePvalue(); + + int significant_pairs; // number of significant sequence pairs + int included_pairs; // total number of included sequence pairs + int excluded_pairs; // number of excluded sequence pairs + double max_stat; // maximum of the pair statistics + double pvalue_binom; // pvalue of binomial test of symmetry + double pvalue_maxdiv; // p-value of the sequence pair with maximum divergence + double pvalue_perm; // p-value of permutation test of symmetry }; +/** class storing all pairwise statistics */ +class SymTestStat { +public: + SymTestStat() { + part = 0; + seq1 = seq2 = 0; + chi2_sym = 0.0; + chi2_marsym = std::numeric_limits::quiet_NaN(); + chi2_intsym = std::numeric_limits::quiet_NaN(); + pval_sym = std::numeric_limits::quiet_NaN(); + pval_marsym = std::numeric_limits::quiet_NaN(); + pval_intsym = std::numeric_limits::quiet_NaN(); + } + int part; // partition ID + int seq1, seq2; // ID of sequence 1 and 2 + double chi2_sym; // chi2 statistic test of symmetry + double chi2_marsym; // chi2 statistic test of marginal symmetry + double chi2_intsym; // chi2 statistic test of internal symmetry + double pval_sym; // chi2 p-value test of symmetry + double pval_marsym; // chi2 p-value test of marginal symmetry + double pval_intsym; // chi2 p-value test of internal symmetry +}; + +std::ostream& operator<< (std::ostream& stream, const SymTestResult& res); #ifdef USE_HASH_MAP struct hashPattern { - size_t operator()(const vector &sp) const { - size_t sum = 0; - for (Pattern::const_iterator it = sp.begin(); it != sp.end(); it++) - sum = (*it) + (sum << 6) + (sum << 16) - sum; - return sum; - } + size_t operator()(const vector &sp) const { + size_t sum = 0; + for (Pattern::const_iterator it = sp.begin(); it != sp.end(); it++) + sum = (*it) + (sum << 6) + (sum << 16) - sum; + return sum; + } }; -typedef unordered_map StringIntMap; -typedef unordered_map StringDoubleHashMap; typedef unordered_map, int, hashPattern> PatternIntMap; -typedef unordered_map IntIntMap; #else -typedef map StringIntMap; -typedef map StringDoubleHashMap; typedef map, int> PatternIntMap; -typedef map IntIntMap; #endif + +constexpr int EXCLUDE_GAP = 1; // exclude gaps +constexpr int EXCLUDE_INVAR = 2; // exclude invariant sites +constexpr int EXCLUDE_UNINF = 4; // exclude uninformative sites + /** Multiple Sequence Alignment. Stored by a vector of site-patterns @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler */ -class Alignment : public vector { +class Alignment : public vector, public CharSet, public StateSpace { friend class SuperAlignment; + friend class SuperAlignmentUnlinked; public: @@ -81,7 +111,14 @@ class Alignment : public vector { @param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL @param intype (OUT) input format of the file */ - Alignment(char *filename, char *sequence_type, InputType &intype); + Alignment(char *filename, char *sequence_type, InputType &intype, string model); + + /** + constructor + @param data_block nexus DATA block + @param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL + */ + Alignment(NxsDataBlock *data_block, char *sequence_type, string model); /** destructor @@ -96,21 +133,40 @@ class Alignment : public vector { /** get the SeqType for a given string */ static SeqType getSeqType(const char *sequence_type); + /** + add a pattern into the alignment + @param pat the pattern + @param site the site index of the pattern from the alignment + @param freq frequency of pattern + @return TRUE if this pattern hadn't already been seen. + */ + + bool addPatternLazy(Pattern &pat, int site, int freq, bool& gaps_only); + /** add a pattern into the alignment @param pat the pattern @param site the site index of the pattern from the alignment @param freq frequency of pattern @return TRUE if pattern contains only gaps or unknown char. - In that case, the pattern won't be added. */ bool addPattern(Pattern &pat, int site, int freq = 1); + + /** + Update a bunch of patterns that have been added via addPatternLazy + (by calling + */ + + void updatePatterns(size_t oldPatternCount); + + + /** determine if the pattern is constant. update the is_const variable. */ - void computeConst(Pattern &pat); + virtual void computeConst(Pattern &pat); void printSiteInfoHeader(ostream& out, const char* filename, bool partition = false); @@ -233,16 +289,16 @@ class Alignment : public vector { void buildStateMap(char *map, SeqType seq_type); - virtual char convertState(char state, SeqType seq_type); + virtual StateType convertState(char state, SeqType seq_type); /** * convert state if the number of states (num_states is known) * @param state input char to convert * @return output char from 0 to 0-num_states or STATE_INVALID or STATE_UNKNOWN */ - char convertState(char state); + StateType convertState(char state); - virtual void convertStateStr(string &str, SeqType seq_type); + //virtual void convertStateStr(string &str, SeqType seq_type); /** * convert from internal state to user-readable state (e.g., to ACGT for DNA) @@ -258,7 +314,7 @@ class Alignment : public vector { * @param state internal state code * @return user-readable state string */ - string convertStateBackStr(char state); + string convertStateBackStr(StateType state); /** get alignment site range from the residue range relative to a sequence @@ -270,17 +326,23 @@ class Alignment : public vector { bool getSiteFromResidue(int seq_id, int &residue_left, int &residue_right); int buildRetainingSites(const char *aln_site_list, IntVector &kept_sites, - bool exclude_gaps, bool exclude_const_sites, const char *ref_seq_name); + int exclude_sites, const char *ref_seq_name); - void printPhylip(const char *filename, bool append = false, const char *aln_site_list = NULL, - bool exclude_gaps = false, bool exclude_const_sites = false, const char *ref_seq_name = NULL); + void printAlignment(InputType format, const char *filename, bool append = false, const char *aln_site_list = NULL, + int exclude_sites = 0, const char *ref_seq_name = NULL); + + virtual void printAlignment(InputType format, ostream &out, const char* file_name + , bool append = false, const char *aln_site_list = NULL + , int exclude_sites = 0, const char *ref_seq_name = NULL); void printPhylip(ostream &out, bool append = false, const char *aln_site_list = NULL, - bool exclude_gaps = false, bool exclude_const_sites = false, const char *ref_seq_name = NULL, bool print_taxid = false); + int exclude_sites = 0, const char *ref_seq_name = NULL, bool print_taxid = false); - void printFasta(const char *filename, bool append = false, const char *aln_site_list = NULL, - bool exclude_gaps = false, bool exclude_const_sites = false, const char *ref_seq_name = NULL); + void printFasta(ostream &out, bool append = false, const char *aln_site_list = NULL, + int exclude_sites = 0, const char *ref_seq_name = NULL); + void printNexus(ostream &out, bool append = false, const char *aln_site_list = NULL, + int exclude_sites = 0, const char *ref_seq_name = NULL, bool print_taxid = false); /** Print the number of gaps per site @param filename output file name @@ -294,21 +356,21 @@ class Alignment : public vector { /** @return number of sequences */ - inline int getNSeq() { + inline size_t getNSeq() const { return seq_names.size(); } /** @return number of sites (alignment columns) */ - inline int getNSite() { + inline size_t getNSite() { return site_pattern.size(); } /** @return number of patterns */ - inline int getNPattern() { + inline size_t getNPattern() { return size(); } @@ -332,6 +394,11 @@ class Alignment : public vector { */ virtual void getPatternFreq(IntVector &freq); + /** + * @param[out] freq vector of site-pattern frequencies + */ + virtual void getPatternFreq(int *freq); + /** @param i sequence index @return sequence name @@ -383,6 +450,14 @@ class Alignment : public vector { */ virtual Alignment *removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs); + /** + * calculating hashes for sequences + * @param v state at a given site, in the sequence being hashed + * @param hash running hash value for the sequence (modified) + */ + void adjustHash(StateType v, size_t& hash) const; + void adjustHash(bool v, size_t& hash) const; + /** Quit if some sequences contain only gaps or missing data */ @@ -397,7 +472,7 @@ class Alignment : public vector { @return TRUE if seq_id contains only gaps or missing characters @param seq_id sequence ID */ - bool isGapOnlySeq(int seq_id); + bool isGapOnlySeq(size_t seq_id); virtual bool isSuperAlignment() { return false; @@ -509,11 +584,26 @@ class Alignment : public vector { */ void convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2aa = false); + /** + convert this codon alignment to AA + */ + Alignment *convertCodonToAA(); + + /** + convert this codon alignment to DNA + */ + Alignment *convertCodonToDNA(); + + /** + @param quartet ID of four taxa + @param[out] support number of sites supporting 12|34, 13|24 and 14|23 + */ + virtual void computeQuartetSupports(IntVector &quartet, vector &support); + /**************************************************************************** Distance functions ****************************************************************************/ - /** compute the observed distance (number of different pairs of positions per site) between two sequences @@ -523,10 +613,16 @@ class Alignment : public vector { */ virtual double computeObsDist(int seq1, int seq2); + /** + @param obs_dist the observed distance between two sequences + @return Jukes-Cantor corrected distance between those sequences + */ + double computeJCDistanceFromObservedDistance(double obs_dist) const; + /** @param seq1 index of sequence 1 @param seq2 index of sequence 2 - @return Juke-Cantor correction distance between seq1 and seq2 + @return Jukes-Cantor correction distance between seq1 and seq2 */ double computeJCDist(int seq1, int seq2); @@ -576,10 +672,33 @@ class Alignment : public vector { some statistics ****************************************************************************/ + /** + count occurrences for each state from 0 to STATE_UNKNOWN + @param startSite ordinal of first site (assumed 0 and <= stopSite) + @param stopSite ordinal of last site (assumed +ve and <= size()) + @param[out] state_count counts for all states (for a subset of sites) + */ + void countStatesForSites(size_t startSite, size_t stopSite, size_t *state_count); + + /** + count occurrences for each state from 0 to STATE_UNKNOWN + @param[out] state_count counts for all states + @param num_unknown_states number of unknown states e.g. for missing data + */ + void countStates(size_t *state_count, size_t num_unknown_states); + + /** + convert counts to frequencies using EM algorithm + @param[in] state_count counts for all states + @paramp[out] state_freq normalized state frequency vector + */ + void convertCountToFreq(size_t *state_count, double *state_freq); + /** compute empirical state frequencies from the alignment @param state_freq (OUT) is filled with state frequencies, assuming state_freq was allocated with at least num_states entries. + @param num_unknown_states number of unknown states e.g. for missing data */ virtual void computeStateFreq(double *state_freq, size_t num_unknown_states = 0); @@ -619,16 +738,24 @@ class Alignment : public vector { void computeCodonFreq(StateFreqType freq, double *state_freq, double *ntfreq); /** - compute empirical rates between state pairs - @param rates (OUT) vector of size num_states*(num_states-1)/2 for the rates + compute empirical substitution counts between state pairs + @param normalize true to normalize row sum to 1, false otherwise + @param[out] pair_freq matrix of size num_states*num_states + @param[out] state_freq vector of size num_states */ - virtual void computeDivergenceMatrix(double *rates); + virtual void computeDivergenceMatrix(double *pair_freq, double *state_freq, bool normalize = true); /** - compute non-reversible empirical rates between state pairs - @param rates (OUT) vector of size num_states*(num_states-1) for the rates + perform matched-pair tests of symmetry of Lars Jermiin et al. + @param[out] sym results of test of symmetry + @param[out] marsym results of test of marginal symmetry + @param[out] intsym results of test of internal symmetry + @param out output stream to print results + @param rstream random stream to shuffle alignment columns + @param out_stat output stream to print pairwise statistics */ - virtual void computeDivergenceMatrixNonRev(double *rates); + virtual void doSymTest(size_t vecid, vector &sym, vector &marsym, + vector &intsym, int *rstream = NULL, vector *stats = NULL); /** count the fraction of constant sites in the alignment, update the variable frac_const_sites @@ -636,9 +763,15 @@ class Alignment : public vector { virtual void countConstSite(); /** - * @return unobserved constant patterns, each entry encoding for one constant character + * generate uninformative patterns */ - string getUnobservedConstPatterns(); + void generateUninfPatterns(StateType repeat, vector &singleton, vector &seq_pos, vector &unobserved_ptns); + + /** + * @param missing_data TRUE for missing data aware correction (for Mark Holder) + * @param[out] unobserved_ptns unobserved constant patterns, each entry encoding for one constant character + */ + void getUnobservedConstPatterns(ASCType ASC_type, vector &unobserved_ptns); /** @return the number of ungappy and unambiguous characters from a sequence @@ -661,11 +794,6 @@ class Alignment : public vector { StateType STATE_UNKNOWN; - /** - number of states - */ - int num_states; - /** fraction of constant sites */ @@ -720,8 +848,6 @@ class Alignment : public vector { vector pomo_sampled_states; IntIntMap pomo_sampled_states_index; // indexing, to quickly find if a PoMo-2-state is already present - vector > seq_states; // state set for each sequence in the alignment - /* for site-specific state frequency model with Huaichun, Edward, Andrew */ /* site to model ID map */ @@ -745,7 +871,7 @@ class Alignment : public vector { /* build seq_states containing set of states per sequence * @param add_unobs_const TRUE to add all unobserved constant states (for +ASC model) */ - void buildSeqStates(bool add_unobs_const = false); + //virtual void buildSeqStates(vector > &seq_states, bool add_unobs_const = false); /** Added by MA Compute the probability of this alignment according to the multinomial distribution with parameters determined by the reference alignment @@ -799,6 +925,38 @@ class Alignment : public vector { */ bool readSiteStateFreq(const char* site_freq_file); + // added by TD + /** + * Compute pairwise summary statistics between two sequences, resulting in 26 values: + * - 4 nucleotide frequencies for sequence 1 + * - 4 nucleotide frequencies for sequence 2 + * - 1 count for total number transitions between sequence 1 and sequence 2 + * - 1 count for total number of transversions between sequence 1 and sequence 2 + * - 16 transition/transversion counts between sequence 1 and sequence 2 + * @param seq1_idx + * @param seq2_idx + * @return + */ + vector computeSummaryStats(int seq1_idx, int seq2_idx); + + // added by TD + /** + * Replaces ambiguous characters (W, S, M, K, R, Y, B, D, H, V; N is treated like a gap). For each + * ambiguous character, we randomly choose one of A, C, G, T while respecting the constraints of + * the characters (i.e. for R we choose either A or G. + * @return modified (new) alignment + */ + Alignment* replaceAmbiguousChars(); + + // added by TD + /** + * Removes sites of alignments where >70% are gaps. With >0 but <=70% gaps, gaps are replaced by + * the most frequent base. This strategy is used for the model selection and alpha inference via + * the neural network. + * @return modified (new) alignment + */ + Alignment* removeAndFillUpGappySites(); + protected: @@ -830,4 +988,13 @@ class Alignment : public vector { void extractSiteID(Alignment *aln, const char* spec, IntVector &site_id); +/** + create a new Alignment object with possibility of comma-separated file names + @param aln_file alignment file name, can be a comma-separated list of file names + @param sequence_type sequence data type + @param input input file format + @param model_name model name + */ +Alignment *createAlignment(string aln_file, const char *sequence_type, InputType intype, string model_name); + #endif diff --git a/alignment/alignmentpairwise.cpp b/alignment/alignmentpairwise.cpp index fb9cb073e..8b39bfeac 100644 --- a/alignment/alignmentpairwise.cpp +++ b/alignment/alignmentpairwise.cpp @@ -23,76 +23,166 @@ AlignmentPairwise::AlignmentPairwise() : Alignment(), Optimization() { - pair_freq = NULL; + total_size = 0; + pair_freq = nullptr; + tree = nullptr; + num_states = 0; + num_states_squared = 0; + STATE_UNKNOWN = 0; + trans_size = 0; + trans_mat = nullptr; + sum_trans_mat = nullptr; + trans_derv1 = nullptr; + trans_derv2 = nullptr; + sum_derv1 = nullptr; + sum_derv2 = nullptr; + sum_trans = nullptr; + pairCount = 0; + derivativeCalculationCount = 0; + costCalculationCount = 0; } -AlignmentPairwise::AlignmentPairwise(PhyloTree *atree, int seq1, int seq2) : Alignment(), Optimization() { - tree = atree; - seq_id1 = seq1; - seq_id2 = seq2; - num_states = tree->aln->num_states; - STATE_UNKNOWN = tree->aln->STATE_UNKNOWN; - pair_freq = NULL; +void AlignmentPairwise::setTree(PhyloTree* atree) { + // + //Note: Should only be called from constructors; + // If it is called multiple times on the same instance + // it will leak memory. + // + tree = atree; + num_states = tree->aln->num_states; + num_states_squared = num_states * num_states; + STATE_UNKNOWN = tree->aln->STATE_UNKNOWN; + trans_size = 0; + auto rate = tree->getRate(); + bool isRateSiteSpecific = (rate==nullptr) ? false : rate->isSiteSpecificRate(); + auto model = tree->getModel(); + bool isModelSiteSpecific = (model==nullptr) ? false: model->isSiteSpecificModel(); + if (model!=nullptr) { + trans_size = model->getTransMatrixSize(); + } + if (!isModelSiteSpecific && !isRateSiteSpecific + && rate!=nullptr && rate->getPtnCat(0) >= 0) { + total_size *= rate->getNDiscreteRate(); + } + trans_mat = new double[trans_size]; + sum_trans_mat = new double[trans_size]; + sum_trans = new double[trans_size]; + sum_derv1 = new double[trans_size]; + sum_derv2 = new double[trans_size]; + trans_derv1 = new double[trans_size]; + trans_derv2 = new double[trans_size]; + total_size = num_states_squared; + pair_freq = new double[total_size]; + + pairCount = 0; + derivativeCalculationCount = 0; + costCalculationCount = 0; +} - if (tree->getRate()->isSiteSpecificRate() || tree->getModel()->isSiteSpecificModel()) return; +AlignmentPairwise::AlignmentPairwise(PhyloTree* tree) { + setTree(tree); +} - // categorized rates - if (tree->getRate()->getPtnCat(0) >= 0) { - int size_sqr = num_states * num_states; - int total_size = size_sqr * tree->getRate()->getNDiscreteRate(); - pair_freq = new double[total_size]; - memset(pair_freq, 0, sizeof(double)*total_size); +void AlignmentPairwise::setSequenceNumbers(int seq1, int seq2) { + ++pairCount; + seq_id1 = seq1; + seq_id2 = seq2; + auto rate = tree->getRate(); + bool isRateSiteSpecific = (rate==nullptr) ? false : rate->isSiteSpecificRate(); + auto model = tree->getModel(); + bool isModelSiteSpecific = (model==nullptr) ? false: model->isSiteSpecificModel(); + if (isRateSiteSpecific || isModelSiteSpecific) { + return; + } + memset(pair_freq, 0, sizeof(double)*total_size); + if (tree->hasMatrixOfConvertedSequences() + && rate->getPtnCat(0) < 0 ) { + auto sequence1 = tree->getConvertedSequenceByNumber(seq1); + auto sequence2 = tree->getConvertedSequenceByNumber(seq2); + auto frequencies = tree->getConvertedSequenceFrequencies(); + size_t sequenceLength = tree->getConvertedSequenceLength(); + for (size_t i=0; igetSumOfFrequenciesForSitesWithConstantState(state); + } + //Todo: Handle the multiple category case here + return; + } else if (tree->getRate()->getPtnCat(0) >= 0) { int i = 0; - for (Alignment::iterator it = tree->aln->begin(); it != tree->aln->end(); it++, i++) { + for (auto it = tree->aln->begin(); it != tree->aln->end(); it++, i++) { int state1 = tree->aln->convertPomoState((*it)[seq_id1]); int state2 = tree->aln->convertPomoState((*it)[seq_id2]); - addPattern(state1, state2, it->frequency, tree->getRate()->getPtnCat(i)); - /* - if (state1 < num_states && state2 < num_states) - pair_freq[tree->getRate()->getPtnCat(i)*size_sqr + state1*num_states + state2] += it->frequency;*/ + addPattern(state1, state2, it->frequency, rate->getPtnCat(i)); + } + return; + } else { + for (auto it = tree->aln->begin(); it != tree->aln->end(); it++) { + int state1 = tree->aln->convertPomoState((*it)[seq_id1]); + int state2 = tree->aln->convertPomoState((*it)[seq_id2]); + addPattern(state1, state2, it->frequency); } return; - } - - pair_freq = new double[num_states * num_states]; - memset(pair_freq, 0, sizeof(double) * num_states * num_states); - for (Alignment::iterator it = tree->aln->begin(); it != tree->aln->end(); it++) { - int state1 = tree->aln->convertPomoState((*it)[seq_id1]); - int state2 = tree->aln->convertPomoState((*it)[seq_id2]); - addPattern(state1, state2, it->frequency); - /* if (state1 < num_states && state2 < num_states) - pair_freq[state1 * num_states + state2] += it->frequency;*/ } } +AlignmentPairwise::AlignmentPairwise(PhyloTree *atree, int seq1, int seq2) + : Alignment(), Optimization() { + setTree(atree); + setSequenceNumbers(seq1,seq2); +} + bool AlignmentPairwise::addPattern(int state1, int state2, int freq, int cat) { int i; - if (state1 == STATE_UNKNOWN || state2 == STATE_UNKNOWN) return true; - - double *pair_pos = pair_freq + (cat*num_states*num_states); - // unambiguous case + if (state1 == STATE_UNKNOWN || state2 == STATE_UNKNOWN) { + return true; + } + double *pair_pos = pair_freq; + if (0getRate(); int ncat = site_rate->getNDiscreteRate(); ModelSubst *model = tree->getModel(); - int trans_size = tree->getModel()->getTransMatrixSize(); - int cat, i; int nptn = tree->aln->getNPattern(); double lh = 0.0; + if (tree->hasMatrixOfConvertedSequences()) { + auto sequence1 = tree->getConvertedSequenceByNumber(seq_id1); + auto sequence2 = tree->getConvertedSequenceByNumber(seq_id2); + auto frequencies = tree->getConvertedSequenceFrequencies(); + size_t sequenceLength = tree->getConvertedSequenceLength(); + + if (site_rate->isSiteSpecificRate()) { + for (int i = 0; i < sequenceLength; i++) { + int state1 = sequence1[i]; + int state2 = sequence2[i]; + if (state1 >= num_states || state2 >= num_states) { + continue; + } + double trans = tree->getModelFactory()->computeTrans(value * site_rate->getPtnRate(i), state1, state2); + lh -= log(trans) * frequencies[i]; + } + return lh; + } else if (tree->getModel()->isSiteSpecificModel()) { + for (int i = 0; i < nptn; i++) { + int state1 = sequence1[i]; + int state2 = sequence2[i]; + if (state1 >= num_states || state2 >= num_states) { + continue; + } + double trans = tree->getModelFactory()->computeTrans(value * site_rate->getPtnRate(i), state1, state2); + lh -= log(trans) * frequencies[i]; + } + return lh; + } + } // site-specific rates if (site_rate->isSiteSpecificRate()) { - for (i = 0; i < nptn; i++) { + for (int i = 0; i < nptn; i++) { int state1 = tree->aln->at(i)[seq_id1]; int state2 = tree->aln->at(i)[seq_id2]; if (state1 >= num_states || state2 >= num_states) continue; double trans = tree->getModelFactory()->computeTrans(value * site_rate->getPtnRate(i), state1, state2); lh -= log(trans) * tree->aln->at(i).frequency; - } return lh; } - if (tree->getModel()->isSiteSpecificModel()) { - for (i = 0; i < nptn; i++) { + for (int i = 0; i < nptn; i++) { int state1 = tree->aln->at(i)[seq_id1]; int state2 = tree->aln->at(i)[seq_id2]; if (state1 >= num_states || state2 >= num_states) continue; double trans = tree->getModel()->computeTrans(value, model->getPtnModelID(i), state1, state2); lh -= log(trans) * tree->aln->at(i).frequency; - } return lh; } - double *trans_mat = new double[trans_size]; - // categorized rates if (site_rate->getPtnCat(0) >= 0) { - for (cat = 0; cat < ncat; cat++) { + for (int cat = 0; cat < ncat; cat++) { tree->getModelFactory()->computeTransMatrix(value*site_rate->getRate(cat), trans_mat); double *pair_pos = pair_freq + cat*trans_size; - for (i = 0; i < trans_size; i++) if (pair_pos[i] > Params::getInstance().min_branch_length) { - if (trans_mat[i] <= 0) throw "Negative transition probability"; + for (int i = 0; i < trans_size; i++) + if (pair_pos[i] > Params::getInstance().min_branch_length) { + if (trans_mat[i] <= 0) { + throw "Negative transition probability"; + } lh -= pair_pos[i] * log(trans_mat[i]); } } - delete [] trans_mat; return lh; } - double *sum_trans_mat = new double[trans_size]; - if (tree->getModelFactory()->site_rate->getGammaShape() == 0.0) tree->getModelFactory()->computeTransMatrix(value, sum_trans_mat); else { tree->getModelFactory()->computeTransMatrix(value * site_rate->getRate(0), sum_trans_mat); - for (cat = 1; cat < ncat; cat++) { + for (int cat = 1; cat < ncat; cat++) { tree->getModelFactory()->computeTransMatrix(value * site_rate->getRate(cat), trans_mat); - for (i = 0; i < trans_size; i++) + for (int i = 0; i < trans_size; i++) sum_trans_mat[i] += trans_mat[i]; } } - for (i = 0; i < trans_size; i++) { + for (int i = 0; i < trans_size; i++) { lh -= pair_freq[i] * log(sum_trans_mat[i]); } - delete [] sum_trans_mat; - delete [] trans_mat; // negative log-likelihood (for minimization) return lh; } void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) { + ++derivativeCalculationCount; RateHeterogeneity *site_rate = tree->getRate(); int ncat = site_rate->getNDiscreteRate(); ModelSubst *model = tree->getModel(); int trans_size = tree->getModel()->getTransMatrixSize(); - int cat, i; int nptn = tree->aln->getNPattern(); -// double lh = 0.0; df = 0.0; ddf = 0.0; + + auto sequence1 = tree->getConvertedSequenceByNumber(seq_id1); + auto sequence2 = tree->getConvertedSequenceByNumber(seq_id2); + auto frequencies = tree->getConvertedSequenceFrequencies(); + size_t sequenceLength = tree->getConvertedSequenceLength(); + if (sequenceLength!=nptn) { + sequence1 = sequence2 = nullptr; + frequencies = nullptr; + } if (site_rate->isSiteSpecificRate()) { - for (i = 0; i < nptn; i++) { - int state1 = tree->aln->at(i)[seq_id1]; - int state2 = tree->aln->at(i)[seq_id2]; - if (state1 >= num_states || state2 >= num_states) continue; - double rate_val = site_rate->getPtnRate(i); - double rate_sqr = rate_val * rate_val; - double derv1, derv2; - double trans = tree->getModelFactory()->computeTrans(value * rate_val, state1, state2, derv1, derv2); -// lh -= log(trans) * tree->aln->at(i).frequency; - double d1 = derv1 / trans; - df -= rate_val * d1 * tree->aln->at(i).frequency; - ddf -= rate_sqr * (derv2/trans - d1*d1) * tree->aln->at(i).frequency; - + if (sequence1!=nullptr && sequence2!=nullptr && frequencies!=nullptr) { + #pragma omp parallel for reduction(-:df,ddf) schedule(dynamic,100) + for (int i = 0; i < nptn; ++i) { + int state1 = sequence1[i]; + if (num_states<=state1) { + continue; + } + int state2 = sequence2[i]; + if (num_states<=state2) { + continue; + } + double freq = frequencies[i]; + double rate_val = site_rate->getPtnRate(i); + double rate_sqr = rate_val * rate_val; + double derv1, derv2; + double trans = tree->getModelFactory()->computeTrans(value * rate_val, state1, state2, derv1, derv2); + double d1 = derv1 / trans; + df -= rate_val * d1 * freq; + ddf -= rate_sqr * (derv2/trans - d1*d1) * freq; + } + } else { + for (int i = 0; i < nptn; i++) { + int state1 = tree->aln->at(i)[seq_id1]; + if (num_states<=state1) { + continue; + } + int state2 = tree->aln->at(i)[seq_id2]; + if (num_states<=state2) { + continue; + } + double rate_val = site_rate->getPtnRate(i); + double rate_sqr = rate_val * rate_val; + double derv1, derv2; + double trans = tree->getModelFactory()->computeTrans(value * rate_val, state1, state2, derv1, derv2); + double d1 = derv1 / trans; + double freq = tree->aln->at(i).frequency; + df -= rate_val * d1 * freq; + ddf -= rate_sqr * (derv2/trans - d1*d1) * freq; + } } -// return lh; return; } - if (tree->getModel()->isSiteSpecificModel()) { - for (i = 0; i < nptn; i++) { - int state1 = tree->aln->at(i)[seq_id1]; - int state2 = tree->aln->at(i)[seq_id2]; - if (state1 >= num_states || state2 >= num_states) continue; - double rate_val = site_rate->getPtnRate(i); - double rate_sqr = rate_val * rate_val; - double derv1, derv2; - double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2); -// lh -= log(trans) * tree->aln->at(i).frequency; - double d1 = derv1 / trans; - df -= rate_val * d1 * tree->aln->at(i).frequency; - ddf -= rate_sqr * (derv2/trans - d1*d1) * tree->aln->at(i).frequency; - + if (sequence1!=nullptr && sequence2!=nullptr && frequencies!=nullptr) { + #pragma omp parallel for reduction(-:df,ddf) schedule(dynamic,100) + for (int i = 0; i < nptn; i++) { + int state1 = sequence1[i]; + if (num_states<=state1) { + continue; + } + int state2 = sequence2[i]; + if (num_states<=state2) { + continue; + } + double freq = frequencies[i]; + double rate_val = site_rate->getPtnRate(i); + double rate_sqr = rate_val * rate_val; + double derv1, derv2; + double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2); + double d1 = derv1 / trans; + df -= rate_val * d1 * freq; + ddf -= rate_sqr * (derv2/trans - d1*d1) * freq; + } + } else { + for (int i = 0; i < nptn; i++) { + int state1 = tree->aln->at(i)[seq_id1]; + if (num_states<=state1) { + continue; + } + int state2 = tree->aln->at(i)[seq_id2]; + if (num_states<=state2) { + continue; + } + double rate_val = site_rate->getPtnRate(i); + double rate_sqr = rate_val * rate_val; + double derv1, derv2; + double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2); + double d1 = derv1 / trans; + double freq = tree->aln->at(i).frequency; + df -= rate_val * d1 * freq; + ddf -= rate_sqr * (derv2/trans - d1*d1) * freq; + } } -// return lh; return; } - - double *trans_mat = new double[trans_size]; - double *trans_derv1 = new double[trans_size]; - double *trans_derv2 = new double[trans_size]; - + // categorized rates if (site_rate->getPtnCat(0) >= 0) { - for (cat = 0; cat < ncat; cat++) { + for (int cat = 0; cat < ncat; cat++) { double rate_val = site_rate->getRate(cat); double derv1 = 0.0, derv2 = 0.0; tree->getModelFactory()->computeTransDerv(value*rate_val, trans_mat, trans_derv1, trans_derv2); double *pair_pos = pair_freq + cat*trans_size; - for (i = 0; i < trans_size; i++) if (pair_pos[i] > 0) { - if (trans_mat[i] <= 0) throw "Negative transition probability"; - double d1 = trans_derv1[i] / trans_mat[i]; - derv1 += pair_pos[i] * d1; - derv2 += pair_pos[i] * (trans_derv2[i]/trans_mat[i] - d1 * d1); -// lh -= pair_pos[i] * log(trans_mat[i]); + for (int i = 0; i < trans_size; i++) if (pair_pos[i] > 0) { + if (trans_mat[i] <= 0) { + throw "Negative transition probability"; } + double d1 = trans_derv1[i] / trans_mat[i]; + derv1 += pair_pos[i] * d1; + derv2 += pair_pos[i] * (trans_derv2[i]/trans_mat[i] - d1 * d1); + } df -= derv1 * rate_val; ddf -= derv2 * rate_val * rate_val; } - delete [] trans_derv2; - delete [] trans_derv1; - delete [] trans_mat; -// return lh; return; } - - double *sum_trans = new double[trans_size]; - double *sum_derv1 = new double[trans_size]; - double *sum_derv2 = new double[trans_size]; memset(sum_trans, 0, sizeof(double) * trans_size); memset(sum_derv1, 0, sizeof(double) * trans_size); memset(sum_derv2, 0, sizeof(double) * trans_size); - for (cat = 0; cat < ncat; cat++) { + for (int cat = 0; cat < ncat; cat++) { double rate_val = site_rate->getRate(cat); + double prop_val = site_rate->getProp(cat); if (tree->getModelFactory()->site_rate->getGammaShape() == 0.0) + { rate_val = 1.0; - - double rate_sqr = rate_val * rate_val; + } + double coeff1 = rate_val * prop_val; + double coeff2 = rate_val * coeff1; + //cout << "cat " << cat << "," << (intptr_t)trans_mat << ", " << (intptr_t)trans_derv1 << ", " << (intptr_t)trans_derv2 << endl; tree->getModelFactory()->computeTransDerv(value * rate_val, trans_mat, trans_derv1, trans_derv2); - for (i = 0; i < trans_size; i++) { - sum_trans[i] += trans_mat[i]; - sum_derv1[i] += trans_derv1[i] * rate_val; - sum_derv2[i] += trans_derv2[i] * rate_sqr; + for (int i = 0; i < trans_size; i++) { + sum_trans[i] += trans_mat[i] * prop_val; + sum_derv1[i] += trans_derv1[i] * coeff1; + sum_derv2[i] += trans_derv2[i] * coeff2; } } - for (i = 0; i < trans_size; i++) + + // 2019-07-03: incorporate p_invar + double p_invar = site_rate->getPInvar(); + if (p_invar > 0.0) { + for (int i = 0; i < num_states; i++) { + sum_trans[i*num_states+i] += p_invar; + } + } + + for (int i = 0; i < trans_size; i++) { if (pair_freq[i] > Params::getInstance().min_branch_length && sum_trans[i] > 0.0) { -// lh -= pair_freq[i] * log(sum_trans[i]); double d1 = sum_derv1[i] / sum_trans[i]; - df -= pair_freq[i] * d1; + df -= pair_freq[i] * d1; ddf -= pair_freq[i] * (sum_derv2[i]/sum_trans[i] - d1 * d1); } - delete [] sum_derv2; - delete [] sum_derv1; - delete [] sum_trans; - delete [] trans_derv2; - delete [] trans_derv1; - delete [] trans_mat; - // negative log-likelihood (for minimization) -// return lh; + } return; } double AlignmentPairwise::optimizeDist(double initial_dist, double &d2l) { // initial guess of the distance using Juke-Cantor correction double dist = initial_dist; - d2l = -1.0; - + // if no model or rate is specified, return the JC distance and set variance to const - if (!tree->getModelFactory() || !tree->getRate()) return dist; - + if (!tree->getModelFactory() || !tree->getRate()) { + return dist; + } double negative_lh, ferror; double max_genetic_dist = MAX_GENETIC_DIST; if (tree->aln->seq_type == SEQ_POMO) { int N = tree->aln->virtual_pop_size; max_genetic_dist *= N*N; } - if (tree->optimize_by_newton) // Newton-Raphson method - dist = minimizeNewton(Params::getInstance().min_branch_length, dist, max_genetic_dist, Params::getInstance().min_branch_length, d2l); - else // Brent method - dist = minimizeOneDimen(Params::getInstance().min_branch_length, dist, max_genetic_dist, Params::getInstance().min_branch_length, &negative_lh, &ferror); - + ++costCalculationCount; + double min_branch = Params::getInstance().min_branch_length; + if (tree->optimize_by_newton) { // Newton-Raphson method + dist = minimizeNewton(min_branch, dist, max_genetic_dist, min_branch, d2l); + } else { // Brent method + dist = minimizeOneDimen(min_branch, dist, max_genetic_dist, min_branch, &negative_lh, &ferror); + } return dist; } @@ -316,10 +480,56 @@ double AlignmentPairwise::optimizeDist(double initial_dist) { return optimizeDist(initial_dist, d2l); } +double AlignmentPairwise::recomputeDist + ( int seq1, int seq2, double initial_dist, double &d2l ) { + //Only called when -experimental has been passed + if (initial_dist == 0.0) { + if (tree->hasMatrixOfConvertedSequences()) { + int distance = 0; + int denominator = 0; + auto sequence1 = tree->getConvertedSequenceByNumber(seq1); + auto sequence2 = tree->getConvertedSequenceByNumber(seq2); + auto nonConstSiteFreq = tree->getConvertedSequenceNonConstFrequencies(); + size_t sequenceLength = tree->getConvertedSequenceLength(); + for (size_t i=0; iparams->compute_obs_dist) { + return initial_dist; + } + initial_dist = tree->aln->computeJCDistanceFromObservedDistance(initial_dist); + } + else if (tree->params->compute_obs_dist) + return (initial_dist = tree->aln->computeObsDist(seq1, seq2)); + else + initial_dist = tree->aln->computeDist(seq1, seq2); + } + if (!tree->hasModelFactory() || !tree->hasRateHeterogeneity()) + { + return initial_dist; + } + setSequenceNumbers(seq1, seq2); + return optimizeDist(initial_dist, d2l); +} AlignmentPairwise::~AlignmentPairwise() { - if (pair_freq) delete [] pair_freq; + delete [] sum_derv2; + delete [] sum_derv1; + delete [] sum_trans; + delete [] trans_derv2; + delete [] trans_derv1; + delete [] sum_trans_mat; + delete [] trans_mat; + delete [] pair_freq; } - - diff --git a/alignment/alignmentpairwise.h b/alignment/alignmentpairwise.h index 8458ba271..cae1c75b0 100644 --- a/alignment/alignmentpairwise.h +++ b/alignment/alignmentpairwise.h @@ -33,14 +33,30 @@ class AlignmentPairwise : public Alignment, public Optimization public: AlignmentPairwise(); - /** + /** + pairwise alignment with sequence numbers not yet set + @param atree input multiple alignment + */ + AlignmentPairwise(PhyloTree *atree); + + /** construct the pairwise alignment from two sequences of a multiple alignment - @param aln input multiple alignment - @param seq_id1 ID of the first sequence - @param seq_id2 ID of the second sequence + @param atree input multiple alignment + @param seq1 ID of the first sequence + @param seq2 ID of the second sequence */ AlignmentPairwise(PhyloTree *atree, int seq1, int seq2); + + /** + recalculate the pairwise alignment for a different pair of sequences of + the same multiple alignment it was constructed for + @param seq1 ID of the first sequence + @param seq2 ID of the second sequence + */ + void setSequenceNumbers(int seq1, int seq2); + + /** compute the likelihood for a distance between two sequences. Used for the ML optimization of the distance. @param value x-value of the function @@ -69,7 +85,6 @@ class AlignmentPairwise : public Alignment, public Optimization double optimizeDist(double initial_dist, double &d2l); - /** add a pattern into the alignment @param state1 @@ -81,20 +96,49 @@ class AlignmentPairwise : public Alignment, public Optimization */ bool addPattern(int state1, int state2, int freq, int cat = 0); - + /** + calculate the distance (or branch length) between two sequences + @param seq1 + @param seq2 states of the pattern + @param initial_dist previous estimate of distance + @param d2l + @return a new estimate of branch length + */ + + virtual double recomputeDist( int seq1, int seq2, double initial_dist, double &d2l ); + /** destructor */ virtual ~AlignmentPairwise(); - /** - pairwise state frequencies - */ - double *pair_freq; - - PhyloTree *tree; - - int seq_id1, seq_id2; + size_t pairCount; + size_t derivativeCalculationCount; + size_t costCalculationCount; + +protected: + PhyloTree* tree; //multi-species alignment tree from which sequences + //to be aligned are to be drawn + int num_states_squared; //the square of num_states + int total_size; //number of elements in pair_freq + double* pair_freq; //array of frequency counts (owned by this instance) + //size is num_states_squared times 1 (or by the number + //of categories). + int trans_size; //number of elements (rows x columns) in transition matrices + double* trans_mat; //used in computeFunction(), + double* sum_trans_mat; //used in computeFunction() + double* trans_derv1; //used in computeFuncDerv() + double* trans_derv2; //used in computeFuncDerv() + double* sum_derv1; //used in computeFuncDerv() + double* sum_derv2; //used in computeFuncDerv() + double* sum_trans; //used in computeFuncDerv() + + int seq_id1; + int seq_id2; +protected: + void setTree(PhyloTree* atree); + + }; #endif diff --git a/alignment/alignmentsummary.cpp b/alignment/alignmentsummary.cpp new file mode 100644 index 000000000..505d335f1 --- /dev/null +++ b/alignment/alignmentsummary.cpp @@ -0,0 +1,161 @@ +// +// alignmentsummary.cpp +// alignment +// +// Created by James Barbetti on 1/7/20. +// + +#include "alignment.h" +#include "alignmentsummary.h" + +AlignmentSummary::AlignmentSummary(const Alignment* a + , bool keepConstSites + , bool keepBoringSites) { + alignment = a; + sequenceMatrix = nullptr; + sequenceCount = a->getNSeq(); + totalFrequency = 0; + totalFrequencyOfNonConstSites = 0; + if (sequenceCount==0) { + minState = a->STATE_UNKNOWN; + maxState = a->STATE_UNKNOWN; + return; + } + + struct SiteSummary + { + public: + bool isConst; + int frequency; + StateType minState; + StateType maxState; + SiteSummary(): isConst(false), frequency(0), minState(0), maxState(0) {} + }; + + size_t siteCount = alignment->size(); + std::vector sites; + sites.resize(siteCount); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t site=0; sitebegin() + site; + SiteSummary &s = sites[site]; + StateType minStateForSite = (*itSite)[0]; + StateType maxStateForSite = minStateForSite; + s.isConst = itSite->isConst(); + s.frequency = itSite->frequency; + for (size_t seq=1; seq & map = stateToSumOfConstantSiteFrequencies; + for (size_t site=0; sitesecond; +} + +bool AlignmentSummary::constructSequenceMatrixNoisily(bool treatAllAmbiguousStatesAsUnknown + , const char* taskName, const char* verb) { + progress_display progress(sequenceCount, taskName, verb, "sequence"); + return constructSequenceMatrix(treatAllAmbiguousStatesAsUnknown, &progress); +} + +bool AlignmentSummary::constructSequenceMatrix ( bool treatAllAmbiguousStatesAsUnknown + , progress_display* progress) { + delete [] sequenceMatrix; + sequenceMatrix = nullptr; + if ( minState<0 || 127 < maxState ) { + return false; + } + sequenceMatrix = new char[ sequenceCount * sequenceLength ]; + const int* posToSite = siteNumbers.data(); + if (treatAllAmbiguousStatesAsUnknown) + { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t seq=0; seqat(posToSite[seqPos])[seq] ; + if ( this->alignment->num_states <= state ) { + state = this->alignment->STATE_UNKNOWN; + } + sequence[seqPos] = static_cast ( state ); + //the state at the (seqPos)th non-constant site, in the (seq)th sequence + } + if (progress!=nullptr && (seq % 100) == 0) { + (*progress) += 100; + } + } + } + else + { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t seq=0; seq ( alignment->at(posToSite[seqPos])[seq] ); + //the state at the (seqPos)th non-constant site, in the (seq)th sequence + } + if (progress != nullptr && (seq % 100) == 0) { + (*progress) += 100; + } + } + } + return true; +} diff --git a/alignment/alignmentsummary.h b/alignment/alignmentsummary.h new file mode 100644 index 000000000..015d00075 --- /dev/null +++ b/alignment/alignmentsummary.h @@ -0,0 +1,47 @@ +// +// alignmentsummary.hpp +// alignment +// +// Created by James Barbetti on 1/7/20. +// + +#ifndef alignmentsummary_hpp +#define alignmentsummary_hpp + +#include +#include +#include //for progress_display + +/** +Summary (for an Alignment) of sites where there are variations + @author James Barbetti + */ + +class Alignment; + +struct AlignmentSummary +{ +public: + AlignmentSummary(const Alignment* a, bool keepConstSites, bool keepBoringSites); + ~AlignmentSummary(); + const Alignment* alignment; + std::vector siteNumbers; //of sites with variation + std::vector siteFrequencies; //ditto + std::vector nonConstSiteFrequencies; //ditto, but zeroed if site + //isConst according to alignment + std::map stateToSumOfConstantSiteFrequencies; + size_t totalFrequency; //sum of frequencies (*including* constant sites!) + size_t totalFrequencyOfNonConstSites; //ditto (*excluding* constant sites!) + StateType minState; //found on any site where there is variation + StateType maxState; //ditto + char* sequenceMatrix; + size_t sequenceLength; //Sequence length + size_t sequenceCount; //The number of sequences + size_t getSumOfConstantSiteFrequenciesForState(int state); + bool constructSequenceMatrix ( bool treatAllAmbiguousStatesAsUnknown + , progress_display *progress = nullptr); + bool constructSequenceMatrixNoisily ( bool treatAllAmbiguousStatesAsUnknown, + const char* taskName, const char* verb); +}; + +#endif /* alignmentsummary_hpp */ diff --git a/alignment/maalignment.cpp b/alignment/maalignment.cpp index 353f87474..53c962f8c 100644 --- a/alignment/maalignment.cpp +++ b/alignment/maalignment.cpp @@ -88,8 +88,8 @@ IntVector MaAlignment::computeExpectedNorFre() if ( logLL.empty()) outError("Error: log likelihood of patterns are not given!"); - int patNum = getNPattern(); - int alignLen = getNSite(); + size_t patNum = getNPattern(); + size_t alignLen = getNSite(); //resize the expectedNorFre vector expectedNorFre.resize(patNum,-1); @@ -144,13 +144,12 @@ void MaAlignment::printPatObsExpFre(const char *fileName, const IntVector expect out.open(fileName); out << "Pattern\tLogLL\tObservedFre\tExpectedFre" << endl; - int patNum = getNPattern(); - int seqNum = getNSeq(); - int seqID; + size_t patNum = getNPattern(); + size_t seqNum = getNSeq(); - for ( int i = 0; i < patNum; i++ ) + for ( size_t i = 0; i < patNum; ++i ) { - for ( seqID = 0; seqID < seqNum; seqID++ ){ + for ( size_t seqID = 0; seqID < seqNum; ++seqID ){ out << convertStateBackStr(at(i)[seqID]); } out << "\t" << logLL[i] << "\t" << (*this)[i].frequency << "\t" << expectedNorFre[i] << endl; @@ -175,7 +174,6 @@ void MaAlignment::generateExpectedAlignment(MaAlignment *aln, double &prob) VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int patID; int site = 0; int npat = aln->getNPattern(); @@ -186,7 +184,7 @@ void MaAlignment::generateExpectedAlignment(MaAlignment *aln, double &prob) double sumFacMax = 0; double sumProbMax = 0; - for (patID = 0; patID < npat; patID++) { + for (int patID = 0; patID < npat; ++patID) { int patFre = expectedNorFre[patID]; for ( int patSite = 0; patSite < patFre; patSite++) { diff --git a/alignment/maalignment.h b/alignment/maalignment.h index d70072c88..64dd2866d 100644 --- a/alignment/maalignment.h +++ b/alignment/maalignment.h @@ -34,7 +34,7 @@ class MaAlignment : public Alignment public: MaAlignment() : Alignment() {}; - MaAlignment(char *filename, char *sequence_type, InputType &intype) : Alignment(filename, sequence_type, intype){}; + MaAlignment(char *filename, char *sequence_type, InputType &intype, string model) : Alignment(filename, sequence_type, intype, model){}; MaAlignment(Alignment &align) : Alignment(align){}; diff --git a/alignment/pattern.cpp b/alignment/pattern.cpp index 43448b78d..b00cb0509 100644 --- a/alignment/pattern.cpp +++ b/alignment/pattern.cpp @@ -11,6 +11,7 @@ // #include "pattern.h" #include "alignment/alignment.h" +#include Pattern::Pattern() : vector() @@ -19,8 +20,21 @@ Pattern::Pattern() // is_const = false; // is_informative = false; flag = 0; - const_char = 255; + const_char = -1; num_chars = 0; + freqs = {}; // added by TD +} + +Pattern::Pattern(int nseq, int freq) +: vector(nseq) +{ + frequency = freq; + // is_const = false; + // is_informative = false; + flag = 0; + const_char = -1; + num_chars = 0; + freqs = {}; // added by TD } Pattern::Pattern(const Pattern &pat) @@ -32,6 +46,7 @@ Pattern::Pattern(const Pattern &pat) flag = pat.flag; const_char = pat.const_char; num_chars = pat.num_chars; + freqs = pat.freqs; // added by TD } Pattern::~Pattern() @@ -45,10 +60,32 @@ int Pattern::computeAmbiguousChar(int num_states) { return num; } -int Pattern::computeGapChar(int num_states, int STATE_UNKNOWN) { +#define VECTORIZE_GAPCHAR_COUNT 1 +int Pattern::computeGapChar(int num_states, int STATE_UNKNOWN) const { int num = 0; +#if VECTORIZE_GAPCHAR_COUNT + //This won't compile unless value_type is based on uint32_t + //(nor should it! You'd need to use different vector types!) + const uint32_t* dataStart = data(); + size_type count = size(); + size_type vecSize = Vec8ui::size(); + Vec8ui unknown = STATE_UNKNOWN; + const uint32_t* dataStop = dataStart + count; + const uint32_t* blockStop = dataStop - (count & (vecSize-1)); + for (const uint32_t* block=dataStart; block -#include -#include -#include +#include "phylo-yaml/statespace.h" using namespace std; +using namespace PML; const int PAT_CONST = 1; // const site pattern, e.g. AAAAAA, CC-C-CCCC const int PAT_INVARIANT = 2; // invariant site pattern, including const patterns and e.g., GS--G-GGG (S = G/C) const int PAT_INFORMATIVE = 4; // parsimony informative sites const int PAT_VARIANT = 8; // variant site pattern -typedef uint32_t StateType; - /** Site-patterns in a multiple sequence alignment @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler @@ -38,6 +34,11 @@ class Pattern : public vector */ Pattern(); + /** + constructor + */ + Pattern(int nseq, int freq = 1); + Pattern(const Pattern &pat); /** @@ -50,7 +51,7 @@ class Pattern : public vector @param num_states number of states of the model @return the number of gaps */ - int computeGapChar(int num_states, int STATE_UNKNOWN); + int computeGapChar(int num_states, int STATE_UNKNOWN) const; // Pattern &operator= (Pattern pat); @@ -59,15 +60,15 @@ class Pattern : public vector */ virtual ~Pattern(); - inline bool isConst() { + inline bool isConst() const { return (flag & PAT_CONST) != 0; } - inline bool isInvariant() { + inline bool isInvariant() const { return (flag & PAT_INVARIANT) != 0; } - inline bool isInformative() { + inline bool isInformative() const { return (flag & PAT_INFORMATIVE) != 0; } @@ -92,6 +93,11 @@ class Pattern : public vector /** number of different character states */ int num_chars; + + // added by TD + /** character frequencies */ + vector freqs; + }; #endif diff --git a/alignment/superalignment.cpp b/alignment/superalignment.cpp index 56809a0dc..016c81544 100644 --- a/alignment/superalignment.cpp +++ b/alignment/superalignment.cpp @@ -19,38 +19,156 @@ ***************************************************************************/ #include -#include "tree/phylotree.h" #include "superalignment.h" -#include "tree/phylosupertree.h" +#include "nclextra/msetsblock.h" +#include "nclextra/myreader.h" +#include "main/phylotesting.h" +#include "utils/timeutil.h" //for getRealTime() + +Alignment *createAlignment(string aln_file, const char *sequence_type, InputType intype, string model_name) { + bool is_dir = isDirectory(aln_file.c_str()); + + if (!is_dir && aln_file.find(',') == string::npos) + return new Alignment((char*)aln_file.c_str(), (char*)sequence_type, intype, model_name); + + SuperAlignment *super_aln = new SuperAlignment; + if (is_dir) + super_aln->readPartitionDir(aln_file, (char*)sequence_type, intype, model_name, true); + else + super_aln->readPartitionList(aln_file, (char*)sequence_type, intype, model_name, true); + super_aln->init(); + Alignment *aln = super_aln->concatenateAlignments(); + if (aln->isSuperAlignment()) + outError("Cannot concatenate alignments of different data type ", aln_file); + delete super_aln; + return aln; +} SuperAlignment::SuperAlignment() : Alignment() { max_num_states = 0; } -SuperAlignment::SuperAlignment(PhyloSuperTree *super_tree) : Alignment() +SuperAlignment::SuperAlignment(Params ¶ms) : Alignment() { + readFromParams(params); + + init(); + + cout << "Degree of missing data: " << computeMissingData() << endl; + +#ifdef _OPENMP + if (params.num_threads > partitions.size()) { + cout << "Info: multi-threading strategy over alignment sites" << endl; + } else { + cout << "Info: multi-threading strategy over partitions" << endl; + } +#endif + cout << endl; + +} + +void SuperAlignment::readFromParams(Params ¶ms) { + if (isDirectory(params.partition_file)) { + // reading all files in the directory + readPartitionDir(params.partition_file, params.sequence_type, params.intype, params.model_name, params.remove_empty_seq); + } else if (strstr(params.partition_file, ",") != nullptr) { + // reading all files in a comma-separated list + readPartitionList(params.partition_file, params.sequence_type, params.intype, params.model_name, params.remove_empty_seq); + } else { + cout << "Reading partition model file " << params.partition_file << " ..." << endl; + if (detectInputFile(params.partition_file) == IN_NEXUS) { + readPartitionNexus(params); + if (partitions.empty()) { + outError("No partition found in SETS block. An example syntax looks like: \n#nexus\nbegin sets;\n charset part1=1-100;\n charset part2=101-300;\nend;"); + } + } else + readPartitionRaxml(params); + } + if (partitions.empty()) + outError("No partition found"); + + // check for duplicated partition names + unordered_set part_names; + for (auto pit = partitions.begin(); pit != partitions.end(); pit++) { + if (part_names.find((*pit)->name) != part_names.end()) + outError("Duplicated partition name ", (*pit)->name); + part_names.insert((*pit)->name); + } + + if (params.subsampling != 0) { + // sumsample a number of partitions + int subsample = params.subsampling; + if (abs(subsample) >= partitions.size()) + outError("--subsample must be between -" + convertIntToString(partitions.size()-1) + " and " + convertIntToString(partitions.size()-1)); + cout << "Random subsampling " << ((subsample > 0) ? subsample : partitions.size() + subsample) + << " partitions (seed: " << params.subsampling_seed << ")..." << endl; + int *rstream; + init_random(params.subsampling_seed, false, &rstream); + // make sure to sub-sample exact number + vector sample; + int i; + sample.resize(partitions.size(), false); + for (int num = 0; num < abs(subsample); ) { + i = random_int(sample.size(), rstream); + if (!sample[i]) { + sample[i] = true; + num++; + } + } + finish_random(rstream); + if (subsample < 0) { + // reverse sampling + for (i = 0; i < sample.size(); i++) + sample[i] = !sample[i]; + } + vector keep_partitions; + for (i = 0; i < sample.size(); i++) + if (sample[i]) + keep_partitions.push_back(partitions[i]); + // now replace partitions + partitions = keep_partitions; + } + + // Initialize the counter for evaluated NNIs on subtrees + cout << "Subset\tType\tSeqs\tSites\tInfor\tInvar\tModel\tName" << endl; + int part = 0; + for (auto it = partitions.begin(); it != partitions.end(); it++, part++) { + cout << part+1 << "\t" << (*it)->sequence_type << "\t" << (*it)->getNSeq() + << "\t" << (*it)->getNSite() << "\t" << (*it)->num_informative_sites + << "\t" << (*it)->getNSite()-(*it)->num_variant_sites << "\t" + << (*it)->model_name << "\t" << (*it)->name << endl; + if ((*it)->num_variant_sites == 0) { + outWarning("No variant sites in partition " + (*it)->name); + } else if ((*it)->num_informative_sites == 0) { + outWarning("No parsimony-informative sites in partition " + (*it)->name); + } + } +} + +void SuperAlignment::init(StrVector *sequence_names) { + // start original code + max_num_states = 0; // first build taxa_index and partitions - int site, seq, nsite = super_tree->size(); - PhyloSuperTree::iterator it; + size_t nsite = partitions.size(); // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed // get the taxa names from existing tree - if (super_tree->root) { - super_tree->getTaxaName(seq_names); + if (sequence_names && !sequence_names->empty()) { + seq_names = *sequence_names; taxa_index.resize(seq_names.size()); for (auto i = taxa_index.begin(); i != taxa_index.end(); i++) i->resize(nsite, -1); } - - for (site = 0, it = super_tree->begin(); it != super_tree->end(); it++, site++) { - partitions.push_back((*it)->aln); - int nseq = (*it)->aln->getNSeq(); + + size_t site = 0; + for (auto it = partitions.begin(); it != partitions.end(); ++it, ++site) { + size_t nseq = (*it)->getNSeq(); //cout << "nseq = " << nseq << endl; - for (seq = 0; seq < nseq; seq++) { - int id = getSeqID((*it)->aln->getSeqName(seq)); + for (size_t seq = 0; seq < nseq; ++seq) { + int id = getSeqID((*it)->getSeqName(seq)); if (id < 0) { - seq_names.push_back((*it)->aln->getSeqName(seq)); + seq_names.push_back((*it)->getSeqName(seq)); id = seq_names.size()-1; IntVector vec(nsite, -1); vec[site] = seq; @@ -64,8 +182,7 @@ SuperAlignment::SuperAlignment(PhyloSuperTree *super_tree) : Alignment() } void SuperAlignment::buildPattern() { - int site, seq, nsite = partitions.size(); - + size_t nsite = partitions.size(); seq_type = SEQ_BINARY; num_states = 2; // binary type because the super alignment presents the presence/absence of taxa in the partitions STATE_UNKNOWN = 2; @@ -74,27 +191,595 @@ void SuperAlignment::buildPattern() { pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nseq = getNSeq(); - for (site = 0; site < nsite; site++) { + size_t nseq = getNSeq(); + for (size_t site = 0; site < nsite; site++) { Pattern pat; pat.resize(nseq, 0); - for (seq = 0; seq < nseq; seq++) + for (size_t seq = 0; seq < nseq; seq++) pat[seq] = (taxa_index[seq][site] >= 0)? 1 : 0; addPattern(pat, site); } verbose_mode = save_mode; countConstSite(); - buildSeqStates(); +// buildSeqStates(); } +void SuperAlignment::readPartition(Params ¶ms) { + try { + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(params.partition_file); + in.exceptions(ios::badbit); + + while (!in.eof()) { + CharSet info; + getline(in, info.name, ','); + if (in.eof()) break; + getline(in, info.model_name, ','); + if (model_name == "") info.model_name = params.model_name; + getline(in, info.aln_file, ','); + if (info.aln_file == "" && params.aln_file) info.aln_file = params.aln_file; + getline(in, info.sequence_type, ','); + if (info.sequence_type=="" && params.sequence_type) + info.sequence_type = params.sequence_type; + safeGetline(in, info.position_spec); + trimString(info.sequence_type); + // cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" << + // info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; + + // TODO move this to supertree +// info.nniMoves[0].ptnlh = NULL; +// info.nniMoves[1].ptnlh = NULL; +// info.cur_ptnlh = NULL; +// part_info.push_back(info); + Alignment *part_aln = createAlignment(info.aln_file, info.sequence_type.c_str(), params.intype, info.model_name); + if (!info.position_spec.empty()) { + Alignment *new_aln = new Alignment(); + new_aln->extractSites(part_aln, info.position_spec.c_str()); + delete part_aln; + part_aln = new_aln; + } + part_aln->name = info.name; + part_aln->model_name = info.model_name; + part_aln->position_spec = info.position_spec; + part_aln->aln_file = info.aln_file; + part_aln->sequence_type = info.sequence_type; + partitions.push_back(part_aln); + // TODO move this to supertree +// PhyloTree *tree = new PhyloTree(part_aln); +// push_back(tree); + } + + in.clear(); + // set the failbit again + in.exceptions(ios::failbit | ios::badbit); + in.close(); + } catch(ios::failure) { + outError(ERR_READ_INPUT); + } catch (string str) { + outError(str); + } + + +} + +void SuperAlignment::readPartitionRaxml(Params ¶ms) { + try { + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(params.partition_file); + in.exceptions(ios::badbit); +// PartitionInfo info; + Alignment *input_aln = NULL; + if (!params.aln_file) + outError("Please supply an alignment with -s option"); + + input_aln = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name); + + cout << endl << "Partition file is not in NEXUS format, assuming RAxML-style partition file..." << endl; + + size_t pos = params.model_name.find_first_of("+*"); + string rate_type = ""; + if (pos != string::npos) rate_type = params.model_name.substr(pos); + + while (!in.eof()) { + CharSet info; + getline(in, info.model_name, ','); + if (in.eof()) break; + trimString(info.model_name); + // std::transform(info.model_name.begin(), info.model_name.end(), info.model_name.begin(), ::toupper); + + bool is_ASC = info.model_name.substr(0,4) == "ASC_"; + if (is_ASC) info.model_name.erase(0, 4); + StateFreqType freq = FREQ_UNKNOWN; + if (info.model_name.find_first_of("*+{") == string::npos ) { + if (*info.model_name.rbegin() == 'F' && info.model_name != "DAYHOFF") { + freq = FREQ_EMPIRICAL; + info.model_name.erase(info.model_name.length()-1); + } else if (*info.model_name.rbegin() == 'X' && info.model_name != "LG4X") { + freq = FREQ_ESTIMATE; + info.model_name.erase(info.model_name.length()-1); + } + } + + if (info.model_name.empty()) + outError("Please give model names in partition file!"); + if (info.model_name == "BIN") { + info.sequence_type = "BIN"; + info.model_name = "GTR2"; + } else if (info.model_name == "DNA") { + info.sequence_type = "DNA"; + info.model_name = "GTR"; + } else if (info.model_name == "MULTI") { + info.sequence_type = "MORPH"; + info.model_name = "MK"; + } else if (info.model_name.substr(0,5) == "CODON") { + info.sequence_type = info.model_name; + info.model_name = "GY"; + } else { + info.sequence_type = "AA"; + if (*info.model_name.begin() == '[') { + if (*info.model_name.rbegin() != ']') + outError("User-defined protein model should be [myProtenSubstitutionModelFileName]"); + info.model_name = info.model_name.substr(1, info.model_name.length()-2); + } + } + + if (freq == FREQ_EMPIRICAL) + info.model_name += "+F"; + else if (freq == FREQ_ESTIMATE) + info.model_name += "+FO"; + if (is_ASC) + info.model_name += "+ASC"; + info.model_name += rate_type; + + getline(in, info.name, '='); + trimString(info.name); + if (info.name.empty()) + outError("Please give partition names in partition file!"); + + safeGetline(in, info.position_spec); + trimString(info.position_spec); + if (info.position_spec.empty()) + outError("Please specify alignment positions for partition" + info.name); + std::replace(info.position_spec.begin(), info.position_spec.end(), ',', ' '); + + // cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; + + // TODO to supertree +// info.nniMoves[0].ptnlh = NULL; +// info.nniMoves[1].ptnlh = NULL; +// info.cur_ptnlh = NULL; +// part_info.push_back(info); + Alignment *part_aln = new Alignment(); + part_aln->extractSites(input_aln, info.position_spec.c_str()); + + Alignment *new_aln; + if (params.remove_empty_seq) + new_aln = part_aln->removeGappySeq(); + else + new_aln = part_aln; + // also rebuild states set of each sequence for likelihood computation +// new_aln->buildSeqStates(); + + if (part_aln != new_aln) delete part_aln; + + new_aln->name = info.name; + new_aln->model_name = info.model_name; + new_aln->position_spec = info.position_spec; + new_aln->aln_file = info.aln_file; + new_aln->sequence_type = info.sequence_type; + partitions.push_back(new_aln); + // TODO move to supertree +// PhyloTree *tree = new PhyloTree(new_aln); +// push_back(tree); + // cout << new_aln->getNSeq() << " sequences and " << new_aln->getNSite() << " sites extracted" << endl; + // params = origin_params; + } + + in.clear(); + // set the failbit again + in.exceptions(ios::failbit | ios::badbit); + in.close(); + } catch(ios::failure) { + outError(ERR_READ_INPUT); + } catch (string str) { + outError(str); + } + + +} + +void SuperAlignment::readPartitionNexus(Params ¶ms) { +// Params origin_params = params; + MSetsBlock *sets_block = new MSetsBlock(); + NxsTaxaBlock *taxa_block = NULL; + NxsAssumptionsBlock *assumptions_block = NULL; + NxsDataBlock *data_block = NULL; + MyReader nexus(params.partition_file); + nexus.Add(sets_block); + + if (!params.aln_file) { + taxa_block = new NxsTaxaBlock(); + assumptions_block = new NxsAssumptionsBlock(taxa_block); + data_block = new NxsDataBlock(taxa_block, assumptions_block); + nexus.Add(taxa_block); + nexus.Add(assumptions_block); + nexus.Add(data_block); + } + + MyToken token(nexus.inf); + nexus.Execute(token); + + Alignment *input_aln = NULL; + if (params.aln_file) { + input_aln = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name); + } else { + if (data_block->GetNTax() > 0) { + input_aln = new Alignment(data_block, params.sequence_type, params.model_name); + } + delete data_block; + delete assumptions_block; + delete taxa_block; + } + + bool empty_partition = true; + vector::iterator it; + for (it = sets_block->charsets.begin(); it != sets_block->charsets.end(); it++) + if ((*it)->model_name != "") { + empty_partition = false; + break; + } + if (empty_partition) { + cout << "NOTE: No CharPartition defined, use all CharSets" << endl; + } + + cout << endl << "Loading " << sets_block->charsets.size() << " partitions..." << endl; + + for (it = sets_block->charsets.begin(); it != sets_block->charsets.end(); it++) + if (empty_partition || (*it)->char_partition != "") { + if ((*it)->model_name == "") + (*it)->model_name = params.model_name; + if ((*it)->aln_file == "" && !input_aln) { + if (!(*it)->position_spec.empty()) { + (*it)->aln_file = (*it)->position_spec; + (*it)->position_spec = ""; + } else + outError("No input data for partition ", (*it)->name); + } + if ((*it)->sequence_type=="" && params.sequence_type) + (*it)->sequence_type = params.sequence_type; + + if ((*it)->sequence_type == "" && !(*it)->model_name.empty()) { + // try to get sequence type from model + //TODO: why compile error? + (*it)->sequence_type = detectSeqTypeName((*it)->model_name.substr(0, (*it)->model_name.find_first_of("+*"))); + } + if ((*it)->aln_file == "" && ((*it)->position_spec == "" || (*it)->position_spec == "*")) + outError("Empty position range for partition ", (*it)->name); + trimString((*it)->sequence_type); + // cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" << + // info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; + if ((*it)->sequence_type != "" && Alignment::getSeqType((*it)->sequence_type.c_str()) == SEQ_UNKNOWN) + outError("Unknown sequence type " + (*it)->sequence_type); + + // TODO move to supertree +// info.nniMoves[0].ptnlh = NULL; +// info.nniMoves[1].ptnlh = NULL; +// info.cur_ptnlh = NULL; +// part_info.push_back(info); + Alignment *part_aln; + if ((*it)->aln_file != "") { + part_aln = createAlignment((*it)->aln_file, (*it)->sequence_type.c_str(), params.intype, (*it)->model_name); + } else { + part_aln = input_aln; + } + if (!(*it)->position_spec.empty() && (*it)->position_spec != "*") { + Alignment *new_aln = new Alignment(); + new_aln->extractSites(part_aln, (*it)->position_spec.c_str()); + if (part_aln != input_aln) delete part_aln; + part_aln = new_aln; + } + if (part_aln->seq_type == SEQ_DNA && ((*it)->sequence_type.substr(0, 5) == "CODON" || (*it)->sequence_type.substr(0, 5) == "NT2AA")) { + Alignment *new_aln = new Alignment(); + new_aln->convertToCodonOrAA(part_aln, &(*it)->sequence_type[5], (*it)->sequence_type.substr(0, 5) == "NT2AA"); + if (part_aln != input_aln) delete part_aln; + part_aln = new_aln; + } + Alignment *new_aln; + if (params.remove_empty_seq) + new_aln = part_aln->removeGappySeq(); + else + new_aln = part_aln; + // also rebuild states set of each sequence for likelihood computation +// new_aln->buildSeqStates(); + + if (part_aln != new_aln && part_aln != input_aln) delete part_aln; + new_aln->name = (*it)->name; + new_aln->model_name = (*it)->model_name; + new_aln->aln_file = (*it)->aln_file; + new_aln->position_spec = (*it)->position_spec; + new_aln->sequence_type = (*it)->sequence_type; + new_aln->tree_len = (*it)->tree_len; + partitions.push_back(new_aln); +// PhyloTree *tree = new PhyloTree(new_aln); +// push_back(tree); +// params = origin_params; + // cout << new_aln->getNSeq() << " sequences and " << new_aln->getNSite() << " sites extracted" << endl; + } + + if (input_aln) + delete input_aln; + delete sets_block; +} + +void SuperAlignment::readPartitionDir(string partition_dir, char *sequence_type, + InputType &intype, string model, bool remove_empty_seq) { + // Params origin_params = params; + + StrVector filenames; + string dir = partition_dir; + if (dir.back() != '/') + dir.append("/"); + getFilesInDir(partition_dir.c_str(), filenames); + if (filenames.empty()) + outError("No file found in ", partition_dir); + std::sort(filenames.begin(), filenames.end()); + cout << "Reading " << filenames.size() << " alignment files in directory " << partition_dir << endl; + + for (auto it = filenames.begin(); it != filenames.end(); it++) + { + Alignment *part_aln; + part_aln = createAlignment(dir+*it, sequence_type, intype, model_name); +// if (part_aln->seq_type == SEQ_DNA && (strncmp(params.sequence_type, "CODON", 5) == 0 || strncmp(params.sequence_type, "NT2AA", 5) == 0)) { +// Alignment *new_aln = new Alignment(); +// new_aln->convertToCodonOrAA(part_aln, params.sequence_type+5, strncmp(params.sequence_type, "NT2AA", 5) == 0); +// delete part_aln; +// part_aln = new_aln; +// } + Alignment *new_aln; + if (remove_empty_seq) + new_aln = part_aln->removeGappySeq(); + else + new_aln = part_aln; + // also rebuild states set of each sequence for likelihood computation +// new_aln->buildSeqStates(); + + if (part_aln != new_aln) delete part_aln; + new_aln->name = *it; + new_aln->model_name = model_name; + new_aln->aln_file = dir + *it; + new_aln->position_spec = ""; + if (sequence_type) + new_aln->sequence_type = sequence_type; + partitions.push_back(new_aln); + } +} + +void SuperAlignment::readPartitionList(string file_list, char *sequence_type, + InputType &intype, string model, bool remove_empty_seq) +{ + // Params origin_params = params; + + StrVector filenames; + stringstream ss(file_list); + string token; + while (getline(ss, token, ',')) + filenames.push_back(token); + if (filenames.empty()) + outError("No file found in ", file_list); + cout << "Reading " << filenames.size() << " alignment files..." << endl; + + for (auto it = filenames.begin(); it != filenames.end(); it++) + { + Alignment *part_aln; + part_aln = createAlignment(*it, sequence_type, intype, model_name); + // if (part_aln->seq_type == SEQ_DNA && (strncmp(params.sequence_type, "CODON", 5) == 0 || strncmp(params.sequence_type, "NT2AA", 5) == 0)) { + // Alignment *new_aln = new Alignment(); + // new_aln->convertToCodonOrAA(part_aln, params.sequence_type+5, strncmp(params.sequence_type, "NT2AA", 5) == 0); + // delete part_aln; + // part_aln = new_aln; + // } + Alignment *new_aln; + if (remove_empty_seq) + new_aln = part_aln->removeGappySeq(); + else + new_aln = part_aln; + // also rebuild states set of each sequence for likelihood computation +// new_aln->buildSeqStates(); + + if (part_aln != new_aln) delete part_aln; + new_aln->name = *it; + new_aln->model_name = model_name; + new_aln->aln_file = *it; + new_aln->position_spec = ""; + if (sequence_type) + new_aln->sequence_type = sequence_type; + partitions.push_back(new_aln); + } +} + +void SuperAlignment::printPartition(const char *filename, const char *aln_file) { + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + printPartition(out, aln_file); + out.close(); + cout << "Partition information was printed to " << filename << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} + +void SuperAlignment::printPartition(ostream &out, const char *aln_file, bool append) { + if (append) + out << endl; + else + out << "#nexus" << endl; + if (aln_file) + out << "[ partition information for alignment written in " << aln_file <<" file ]" << endl; + out << "begin sets;" << endl; + int part; + int start_site = 1; + for (size_t part = 0; part < partitions.size(); ++part) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + int end_site = start_site + partitions[part]->getNSite(); + out << " charset " << name << " = " << start_site << "-" << end_site-1 << ";" << endl; + start_site = end_site; + } + bool ok_model = true; + for (size_t part = 0; part < partitions.size(); ++part) + if (partitions[part]->model_name.empty()) { + ok_model = false; + break; + } + if (ok_model) { + out << " charpartition mymodels =" << endl; + for (part = 0; part < partitions.size(); part++) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + if (part > 0) out << "," << endl; +// out << " " << at(part)->getModelNameParams() << ":" << name; + out << " " << partitions[part]->model_name << ":" << name; + } + out << ";" << endl; + } + out << "end;" << endl; +} + +void SuperAlignment::printBestPartition(const char *filename) { + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + out << "#nexus" << endl + << "begin sets;" << endl; + int part; + for (part = 0; part < partitions.size(); part++) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + out << " charset " << name << " = "; + if (!partitions[part]->aln_file.empty()) out << partitions[part]->aln_file << ": "; + if (partitions[part]->seq_type == SEQ_CODON) + out << "CODON, "; + string pos = partitions[part]->position_spec; + replace(pos.begin(), pos.end(), ',' , ' '); + out << pos << ";" << endl; + } + bool ok_model = true; + for (part = 0; part < partitions.size(); part++) + if (partitions[part]->model_name.empty()) { + ok_model = false; + break; + } + if (ok_model) { + out << " charpartition mymodels =" << endl; + for (part = 0; part < partitions.size(); part++) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + if (part > 0) out << "," << endl; + out << " " << partitions[part]->model_name << ": " << name; + } + out << ";" << endl; + } + out << "end;" << endl; + out.close(); + cout << "Partition information was printed to " << filename << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} + + +void SuperAlignment::printPartitionRaxml(const char *filename) { + int part; +// for (part = 0; part < partitions.size(); part++) { +// if (partitions[part]->aln_file != "") { +// cout << "INFO: Printing partition in RAxML format is not possible" << endl; +// return; +// } +// } + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + int start_site; + for (part = 0, start_site = 1; part < partitions.size(); part++) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + int end_site = start_site + partitions[part]->getNSite(); + switch (partitions[part]->seq_type) { + case SEQ_DNA: out << "DNA, "; break; + case SEQ_BINARY: out << "BIN, "; break; + case SEQ_MORPH: out << "MULTI, "; break; + default: out << partitions[part]->model_name << ","; break; + } + out << name << " = " << start_site << "-" << end_site-1 << endl; + start_site = end_site; + } + out.close(); + cout << "Partition information in Raxml format was printed to " << filename << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} + +void SuperAlignment::printBestPartitionRaxml(const char *filename) { + int part; +// for (part = 0; part < partitions.size(); part++) { +// if (partitions[part]->aln_file != "") { +// cout << "INFO: Printing partition in RAxML format is not possible" << endl; +// return; +// } +// } + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + for (part = 0; part < partitions.size(); part++) { + string name = partitions[part]->name; + replace(name.begin(), name.end(), '+', '_'); + if (partitions[part]->model_name.find("+ASC") != string::npos) + out << "ASC_"; + switch (partitions[part]->seq_type) { + case SEQ_DNA: out << "DNA"; break; + case SEQ_BINARY: out << "BIN"; break; + case SEQ_MORPH: out << "MULTI"; break; + case SEQ_PROTEIN: + out << partitions[part]->model_name.substr(0, partitions[part]->model_name.find_first_of("*{+")); + break; + case SEQ_CODON: + out << "CODON_" << partitions[part]->model_name.substr(0, partitions[part]->model_name.find_first_of("*{+")); + break; + default: out << partitions[part]->model_name; break; + } + if (partitions[part]->model_name.find("+FO") != string::npos) + out << "X"; + else if (partitions[part]->model_name.find("+F") != string::npos) + out << "F"; + + out << ", " << name << " = " << partitions[part]->position_spec << endl; + } + out.close(); + cout << "Partition information in Raxml format was printed to " << filename << endl; + } catch (ios::failure &) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} void SuperAlignment::linkSubAlignment(int part) { ASSERT(taxa_index.size() == getNSeq()); - int nseq = getNSeq(), seq; + size_t nseq = getNSeq(); vector checked; checked.resize(partitions[part]->getNSeq(), false); - for (seq = 0; seq < nseq; seq++) { + for (size_t seq = 0; seq < nseq; seq++) { int id = partitions[part]->getSeqID(getSeqName(seq)); if (id < 0) taxa_index[seq][part] = -1; @@ -102,12 +787,9 @@ void SuperAlignment::linkSubAlignment(int part) { taxa_index[seq][part] = id; checked[id] = true; } - } - if (verbose_mode >= VB_MED) { - } // sanity check that all seqnames in partition must be present in superalignment - for (seq = 0; seq < checked.size(); seq++) { + for (size_t seq = 0; seq < checked.size(); seq++) { ASSERT(checked[seq]); } } @@ -115,10 +797,13 @@ void SuperAlignment::linkSubAlignment(int part) { void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) { ASSERT(aln->isSuperAlignment()); SuperAlignment *saln = (SuperAlignment*)aln; + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; - int i; - IntVector::iterator it; - for (it = seq_id.begin(); it != seq_id.end(); it++) { + for (auto it = seq_id.begin(); it != seq_id.end(); it++) { ASSERT(*it >= 0 && *it < aln->getNSeq()); seq_names.push_back(aln->getSeqName(*it)); } @@ -127,8 +812,9 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int //Alignment::extractSubAlignment(aln, seq_id, 0); taxa_index.resize(getNSeq()); - for (i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); ++i) { taxa_index[i].resize(saln->partitions.size(), -1); + } int part = 0; // partitions.resize(saln->partitions.size()); @@ -150,26 +836,136 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int } if (partitions.size() < saln->partitions.size()) { - for (i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); ++i) { taxa_index[i].resize(partitions.size()); + } } // now build the patterns based on taxa_index buildPattern(); } +SuperAlignment *SuperAlignment::extractPartitions(IntVector &part_id) { + SuperAlignment *newaln = new SuperAlignment; + newaln->name = name; + newaln->model_name = model_name; + newaln->sequence_type = sequence_type; + newaln->position_spec = position_spec; + newaln->aln_file = aln_file; + + unordered_set seq_names_set; + IntVector::iterator it; + for (it = part_id.begin(); it != part_id.end(); it++) { + for (auto seq = partitions[*it]->seq_names.begin(); seq != partitions[*it]->seq_names.end(); seq++) + if (seq_names_set.find(*seq) == seq_names_set.end()) { + newaln->seq_names.push_back(*seq); + seq_names_set.insert(*seq); + } + } + + newaln->taxa_index.resize(newaln->getNSeq()); + for (size_t i = 0; i < newaln->getNSeq(); ++i) { + newaln->taxa_index[i].resize(part_id.size(), -1); + } + + size_t part = 0; + for (auto ait = part_id.begin(); ait != part_id.end(); ++ait, ++part) { + newaln->partitions.push_back(partitions[*ait]); + newaln->linkSubAlignment(newaln->partitions.size()-1); + } + + // now build the patterns based on taxa_index + newaln->buildPattern(); + return newaln; +} + +void SuperAlignment::removePartitions(set &removed_id) { + // remove part_id from partitions + vector new_partitions; + for (size_t i = 0; i < partitions.size(); ++i) + if (removed_id.find(i) == removed_id.end()) { + // not found in the removed set + new_partitions.push_back(partitions[i]); + } else { + delete partitions[i]; + partitions[i] = NULL; + } + + ASSERT(new_partitions.size() + removed_id.size() == partitions.size()); + partitions = new_partitions; + + // get the union seq_names of remaining partitions + unordered_set seq_names_set; + seq_names.clear(); + for (auto it = partitions.begin(); it != partitions.end(); it++) { + for (auto seq = (*it)->seq_names.begin(); seq != (*it)->seq_names.end(); seq++) + if (seq_names_set.find(*seq) == seq_names_set.end()) { + seq_names.push_back(*seq); + seq_names_set.insert(*seq); + } + } + + + // build the taxa_index + taxa_index.resize(getNSeq()); + for (size_t i = 0; i < getNSeq(); ++i) + taxa_index[i].resize(partitions.size(), -1); + for (size_t i = 0; i < partitions.size(); ++i) + linkSubAlignment(i); + + // now build the patterns based on taxa_index + buildPattern(); +} + Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs) { + auto n = getNSeq(); IntVector checked; vector removed; - checked.resize(getNSeq(), 0); - removed.resize(getNSeq(), false); - int seq1; + checked.resize(n, 0); + removed.resize(n, false); + + //JB2020-06-23 Begin : Determine hashes for all the sequences + auto startHash = getRealTime(); + vector hashes; + hashes.resize(n, 0); + #ifdef USE_BOOST + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (int seq1=0; seq1begin(); it != (*ait)->end(); it++) { + adjustHash((*it)[subseq1],hash); + } + } + } + hashes[seq1] = hash; + } + if (verbose_mode >= VB_MED) { + auto hashTime = getRealTime() - startHash; + cout << "Hashing sequences took " << hashTime << " wall-clock seconds" << endl; + } + #endif + //JB2020-06-23 Finish + + bool listIdentical = !Params::getInstance().suppress_duplicate_sequence_warnings; - for (seq1 = 0; seq1 < getNSeq(); seq1++) { + auto startCheck = getRealTime(); + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (checked[seq1]) continue; bool first_ident_seq = true; - for (int seq2 = seq1+1; seq2 < getNSeq(); seq2++) { - if (getSeqName(seq2) == not_remove) continue; + for (size_t seq2 = seq1+1; seq2 < getNSeq(); ++seq2) { + if (getSeqName(seq2) == not_remove || removed[seq2]) { continue; + } + if (hashes[seq1]!=hashes[seq2]) { + continue; + } bool equal_seq = true; int part = 0; // check if seq1 and seq2 are identical over all partitions @@ -189,36 +985,47 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, break; } // now if both seqs are present, check sequence content - for (iterator it = (*ait)->begin(); it != (*ait)->end(); it++) + for (iterator it = (*ait)->begin(); it != (*ait)->end(); it++) { if ((*it)[subseq1] != (*it)[subseq2]) { equal_seq = false; break; } - } - if (equal_seq) { - if (removed_seqs.size() < getNSeq()-3 && (!keep_two || !first_ident_seq)) { - removed_seqs.push_back(getSeqName(seq2)); - target_seqs.push_back(getSeqName(seq1)); - removed[seq2] = true; - } else { - cout << "NOTE: " << getSeqName(seq2) << " is identical to " << getSeqName(seq1) << " but kept for subsequent analysis" << endl; } - checked[seq2] = 1; - first_ident_seq = false; } + if (!equal_seq) { + continue; + } + if (removed_seqs.size() + 3 < getNSeq() && (!keep_two || !first_ident_seq)) { + removed_seqs.push_back(getSeqName(seq2)); + target_seqs.push_back(getSeqName(seq1)); + removed[seq2] = true; + } else { + if (listIdentical) { + cout << "NOTE: " << getSeqName(seq2) << " is identical to " << getSeqName(seq1) + << " but kept for subsequent analysis" << endl; + } + } + checked[seq2] = 1; + first_ident_seq = false; } checked[seq1] = 1; } + if (verbose_mode >= VB_MED) { + auto checkTime = getRealTime() - startCheck; + cout << "Checking for identical sequences took " << checkTime << " wall-clock seconds" << endl; + } if (removed_seqs.empty()) return this; // do nothing if the list is empty - if (removed_seqs.size() >= getNSeq()-3) + if (removed_seqs.size() + 3 >= getNSeq()) { outWarning("Your alignment contains too many identical sequences!"); - + } // now remove identical sequences IntVector keep_seqs; - for (seq1 = 0; seq1 < getNSeq(); seq1++) + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) + { if (!removed[seq1]) keep_seqs.push_back(seq1); + } SuperAlignment *aln; aln = new SuperAlignment; aln->extractSubAlignment(this, keep_seqs, 0); @@ -227,7 +1034,7 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, int SuperAlignment::checkAbsentStates(string msg) { int count = 0; - for (auto it = partitions.begin(); it != partitions.end(); it++) + for (auto it = partitions.begin(); it != partitions.end(); ++it) count += (*it)->checkAbsentStates("partition " + convertIntToString((it-partitions.begin())+1)); return count; } @@ -281,7 +1088,7 @@ void SuperAlignment::getSitePatternIndex(IntVector &pattern_index) { void SuperAlignment::getPatternFreq(IntVector &pattern_freq) { ASSERT(isSuperAlignment()); - int offset = 0; + size_t offset = 0; if (!pattern_freq.empty()) pattern_freq.resize(0); for (vector::iterator it = partitions.begin(); it != partitions.end(); it++) { IntVector freq; @@ -291,6 +1098,15 @@ void SuperAlignment::getPatternFreq(IntVector &pattern_freq) { } } +void SuperAlignment::getPatternFreq(int *pattern_freq) { + ASSERT(isSuperAlignment()); + size_t offset = 0; + for (vector::iterator it = partitions.begin(); it != partitions.end(); it++) { + (*it)->getPatternFreq(pattern_freq + offset); + offset += (*it)->getNPattern(); + } +} + void SuperAlignment::printSiteInfo(const char* filename) { try { ofstream out(filename); @@ -304,11 +1120,80 @@ void SuperAlignment::printSiteInfo(const char* filename) { } } +void SuperAlignment::computeDivergenceMatrix(double *pair_freq, double *state_freq, bool normalize) { + int nstates = partitions[0]->num_states; + int nstates2 = nstates*nstates; + memset(pair_freq, 0, sizeof(double)*nstates2); + memset(state_freq, 0, sizeof(double)*nstates); + + double *part_pair_freq = new double[nstates2]; + double *part_state_freq = new double[nstates]; + int i, j; + + for (auto it = partitions.begin(); it != partitions.end(); it++) { + (*it)->computeDivergenceMatrix(part_pair_freq, part_state_freq, false); + for (i = 0; i < nstates2; i++) + pair_freq[i] += part_pair_freq[i]; + for (i = 0; i < nstates; i++) + state_freq[i] += part_state_freq[i]; + } + if (normalize) { + double sum = 0.0; + for (i = 0; i < nstates; i++) + sum += state_freq[i]; + sum = 1.0/sum; + for (i = 0; i < nstates; i++) + state_freq[i] *= sum; + for (i = 0; i < nstates; i++) { + sum = 0.0; + double *pair_freq_ptr = pair_freq + (i*nstates); + for (j = 0; j < nstates; j++) + sum += pair_freq_ptr[j]; + sum = 1.0/sum; + for (j = 0; j < nstates; j++) + pair_freq_ptr[j] *= sum; + } + } + delete [] part_state_freq; + delete [] part_pair_freq; +} + +void SuperAlignment::doSymTest(size_t vecid, vector &vec_sym, vector &vec_marsym, + vector &vec_intsym, int *rstream, vector *stats) { + + vector >all_stats; + if (stats) + all_stats.resize(partitions.size()); + + int nparts = partitions.size(); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (int i = 0; i < nparts; i++) { + if (stats) { + partitions[i]->doSymTest(vecid + i, vec_sym, vec_marsym, vec_intsym, rstream, &all_stats[i]); + for (auto it = all_stats[i].begin(); it != all_stats[i].end(); it++) + it->part = i; + } else + partitions[i]->doSymTest(vecid + i, vec_sym, vec_marsym, vec_intsym, rstream); + } + if (stats) { + for (int i = 0; i < nparts; i++) + stats->insert(stats->end(), all_stats[i].begin(), all_stats[i].end()); + } +} + +/* void SuperAlignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq, const char *spec) { ASSERT(aln->isSuperAlignment()); Alignment::copyAlignment(aln); SuperAlignment *super_aln = (SuperAlignment*) aln; ASSERT(partitions.empty()); + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; if (spec && strncmp(spec, "GENE", 4) == 0) { // resampling whole genes @@ -381,6 +1266,72 @@ void SuperAlignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern taxa_index = super_aln->taxa_index; countConstSite(); } +*/ + +void SuperAlignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq, const char *spec) { + ASSERT(aln->isSuperAlignment()); + SuperAlignment *super_aln = (SuperAlignment*) aln; + ASSERT(partitions.empty()); + name = aln->name; + model_name = aln->model_name; + sequence_type = aln->sequence_type; + position_spec = aln->position_spec; + aln_file = aln->aln_file; + + if (!spec) { + // resampling sites within genes + Alignment::copyAlignment(aln); + partitions.reserve(super_aln->partitions.size()); + for (vector::iterator it = super_aln->partitions.begin(); it != super_aln->partitions.end(); it++) { + Alignment *boot_aln = new Alignment; + if (pattern_freq) { + IntVector part_pattern_freq; + boot_aln->createBootstrapAlignment(*it, &part_pattern_freq); + pattern_freq->insert(pattern_freq->end(), part_pattern_freq.begin(), part_pattern_freq.end()); + } else { + boot_aln->createBootstrapAlignment(*it); + } + partitions.push_back(boot_aln); + } + taxa_index = super_aln->taxa_index; + countConstSite(); + } else if (strcmp(spec, "GENE") == 0) { + ASSERT(!pattern_freq); + // resampling whole genes + IntVector gene_freq; + random_resampling(super_aln->partitions.size(), gene_freq); + for (int i = 0; i < gene_freq.size(); i++) + if (gene_freq[i] > 0) { + Alignment *boot_aln = new Alignment; + boot_aln->copyAlignment(super_aln->partitions[i]); + if (gene_freq[i] > 1) { + for (auto it = boot_aln->begin(); it != boot_aln->end(); it++) + it->frequency *= gene_freq[i]; + auto site_pattern = boot_aln->site_pattern; + for (int j = 1; j < gene_freq[i]; j++) + boot_aln->site_pattern.insert(boot_aln->site_pattern.end(), site_pattern.begin(), site_pattern.end()); + boot_aln->countConstSite(); + } + partitions.push_back(boot_aln); + } + init(); + } else if (strcmp(spec, "GENESITE") == 0) { + ASSERT(!pattern_freq); + // resampling whole genes then sites within resampled genes + IntVector gene_freq; + random_resampling(super_aln->partitions.size(), gene_freq); + for (int i = 0; i < gene_freq.size(); i++) + for (int rep = 0; rep < gene_freq[i]; rep++) { + Alignment *boot_aln = new Alignment; + boot_aln->createBootstrapAlignment(super_aln->partitions[i]); + boot_aln->name = boot_aln->name + "." + convertIntToString(rep); + partitions.push_back(boot_aln); + } + init(); + } else { + outError("Wrong -bsam, either -bsam GENE or -bsam GENESITE"); + } +} void SuperAlignment::createBootstrapAlignment(IntVector &pattern_freq, const char *spec) { ASSERT(isSuperAlignment()); @@ -410,19 +1361,23 @@ void SuperAlignment::createBootstrapAlignment(int *pattern_freq, const char *spe nptn += (*it)->getNPattern(); } memset(pattern_freq, 0, nptn * sizeof(int)); - for (int i = 0; i < partitions.size(); i++) { - int part = random_int(partitions.size(), rstream); + IntVector gene_freq; + random_resampling(partitions.size(), gene_freq, rstream); + for (int part = 0; part < partitions.size(); part++) + for (int rep = 0; rep < gene_freq[part]; rep++){ Alignment *aln = partitions[part]; if (strncmp(spec,"GENESITE",8) == 0) { // then resampling sites in resampled gene - for (int j = 0; j < aln->getNSite(); j++) { - int ptn_id = aln->getPatternID(random_int(aln->getNPattern(), rstream)); + IntVector sample; + random_resampling(aln->getNSite(), sample, rstream); + for (int site = 0; site < sample.size(); site++) + for (int rep2 = 0; rep2 < sample[site]; rep2++) { + int ptn_id = aln->getPatternID(site); pattern_freq[ptn_id + part_pos[part]]++; } - } else { - for (int j = 0; j < aln->getNPattern(); j++) - pattern_freq[j + part_pos[part]] += aln->at(j).frequency; + for (int ptn = 0; ptn < aln->getNPattern(); ptn++) + pattern_freq[ptn + part_pos[part]] += aln->at(ptn).frequency; } } } else { @@ -450,9 +1405,8 @@ void SuperAlignment::shuffleAlignment() { double SuperAlignment::computeObsDist(int seq1, int seq2) { - int site; int diff_pos = 0, total_pos = 0; - for (site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { int id1 = taxa_index[seq1][site]; int id2 = taxa_index[seq2][site]; if (id1 < 0 || id2 < 0) continue; @@ -510,74 +1464,29 @@ SuperAlignment::~SuperAlignment() partitions.clear(); } -void SuperAlignment::printCombinedAlignment(ostream &out, bool print_taxid) { - vector::iterator pit; - int final_length = 0; - for (pit = partitions.begin(); pit != partitions.end(); pit++) - if ((*pit)->seq_type == SEQ_CODON) - final_length += 3*(*pit)->getNSite(); - else - final_length += (*pit)->getNSite(); - - out << getNSeq() << " " << final_length << endl; - int max_len = getMaxSeqNameLength(); - if (print_taxid) max_len = 10; - if (max_len < 10) max_len = 10; - int seq_id; - for (seq_id = 0; seq_id < seq_names.size(); seq_id++) { - out.width(max_len); - if (print_taxid) - out << left << seq_id << " "; - else - out << left << seq_names[seq_id] << " "; - int part = 0; - for (pit = partitions.begin(); pit != partitions.end(); pit++, part++) { - int part_seq_id = taxa_index[seq_id][part]; - int nsite = (*pit)->getNSite(); - if (part_seq_id >= 0) { - for (int i = 0; i < nsite; i++) - out << (*pit)->convertStateBackStr((*pit)->getPattern(i) [part_seq_id]); - } else { - string str(nsite, '?'); - out << str; - } - } - out << endl; - } -} - -void SuperAlignment::printCombinedAlignment(const char *file_name, bool append) { - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - - if (append) - out.open(file_name, ios_base::out | ios_base::app); - else - out.open(file_name); - printCombinedAlignment(out); - out.close(); - cout << "Concatenated alignment was printed to " << file_name << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, file_name); - } +void SuperAlignment::printAlignment(InputType format, ostream &out, const char* file_name + , bool append, const char *aln_site_list + , int exclude_sites, const char *ref_seq_name) +{ + Alignment *concat = concatenateAlignments(); + concat->printAlignment(format, out, file_name, append, aln_site_list, exclude_sites, ref_seq_name); + delete concat; + if (format == IN_NEXUS) + printPartition(out, NULL, true); } -void SuperAlignment::printSubAlignments(Params ¶ms, vector &part_info) { +void SuperAlignment::printSubAlignments(Params ¶ms) { vector::iterator pit; string filename; int part; - ASSERT(part_info.size() == partitions.size()); for (pit = partitions.begin(), part = 0; pit != partitions.end(); pit++, part++) { if (params.aln_output) filename = params.aln_output; else filename = params.out_prefix; - filename += "." + part_info[part].name; - if (params.aln_output_format == ALN_PHYLIP) - (*pit)->printPhylip(filename.c_str(), false, NULL, params.aln_nogaps, false, NULL); - else if (params.aln_output_format == ALN_FASTA) - (*pit)->printFasta(filename.c_str(), false, NULL, params.aln_nogaps, false, NULL); + filename += "." + (*pit)->name; + int exclude_sites = (params.aln_nogaps) ? EXCLUDE_GAP : 0; + (*pit)->printAlignment(params.aln_output_format, filename.c_str(), false, NULL, exclude_sites, NULL); } } @@ -591,7 +1500,7 @@ double SuperAlignment::computeUnconstrainedLogL() { double SuperAlignment::computeMissingData() { double ret = 0.0; - int len = 0; + size_t len = 0; vector::iterator pit; for (pit = partitions.begin(); pit != partitions.end(); pit++) { ret += (*pit)->getNSeq() * (*pit)->getNSite(); @@ -649,21 +1558,25 @@ Alignment *SuperAlignment::concatenateAlignments(set &ids) { int site = 0; for (it = ids.begin(); it != ids.end(); it++) { int id = *it; - string taxa_set; - Pattern taxa_pat = getPattern(id); - taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end()); + // 2018-08-23: important bugfix in v1.6: taxa_set has wrong correspondance + //string taxa_set; + //Pattern taxa_pat = getPattern(id); + //taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end()); for (Alignment::iterator it = partitions[id]->begin(); it != partitions[id]->end(); it++) { Pattern pat; - int part_seq = 0; + //int part_seq = 0; for (int seq = 0; seq < union_taxa.size(); seq++) if (union_taxa[seq] == 1) { char ch = aln->STATE_UNKNOWN; - if (taxa_set[seq] == 1) { - ch = (*it)[part_seq++]; - } + int seq_part = taxa_index[seq][id]; + if (seq_part >= 0) + ch = (*it)[seq_part]; + //if (taxa_set[seq] == 1) { + // ch = (*it)[part_seq++]; + //} pat.push_back(ch); } - ASSERT(part_seq == partitions[id]->getNSeq()); + //ASSERT(part_seq == partitions[id]->getNSeq()); aln->addPattern(pat, site, (*it).frequency); // IMPORTANT BUG FIX FOLLOW int ptnindex = aln->pattern_index[pat]; @@ -673,16 +1586,62 @@ Alignment *SuperAlignment::concatenateAlignments(set &ids) { } } aln->countConstSite(); - aln->buildSeqStates(); +// aln->buildSeqStates(); return aln; } Alignment *SuperAlignment::concatenateAlignments() { - set ids; - for (int i = 0; i < partitions.size(); i++) - ids.insert(i); - return concatenateAlignments(ids); + vector seq_types; + vector genetic_codes; + vector > ids; + for (int i = 0; i < partitions.size(); i++) { + bool found = false; + for (int j = 0; j < seq_types.size(); j++) + if (partitions[i]->seq_type == seq_types[j] && partitions[i]->genetic_code == genetic_codes[j]) { + ids[j].insert(i); + found = true; + break; + } + if (found) + continue; + // create a new partition + seq_types.push_back(partitions[i]->seq_type); + genetic_codes.push_back(partitions[i]->genetic_code); + ids.push_back(set()); + ids.back().insert(i); + } + if (seq_types.size() == 1) + return concatenateAlignments(ids[0]); + + // mixed data with >= 2 partitions + SuperAlignment *saln = new SuperAlignment(); + saln->max_num_states = 0; + // first build taxa_index and partitions + size_t nsite = ids.size(); + + // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed + // get the taxa names from existing tree + + saln->seq_names = seq_names; + saln->taxa_index.resize(saln->seq_names.size()); + for (auto it = saln->taxa_index.begin(); it != saln->taxa_index.end(); it++) + it->resize(nsite, -1); + + for (size_t site = 0; site != nsite; ++site) { + Alignment *part_aln = concatenateAlignments(ids[site]); + saln->partitions.push_back(part_aln); + size_t nseq = part_aln->getNSeq(); + //cout << "nseq = " << nseq << endl; + for (size_t seq = 0; seq < nseq; ++seq) { + int id = saln->getSeqID(part_aln->getSeqName(seq)); + ASSERT(id >= 0); + saln->taxa_index[id][site] = seq; + } + } + // now the patterns of sequence-genes presence/absence + saln->buildPattern(); + return saln; } void SuperAlignment::countConstSite() { @@ -717,15 +1676,17 @@ void SuperAlignment::orderPatternByNumChars(int pat_type) { int maxi = (num_parsimony_sites+UINT_BITS-1)/UINT_BITS; pars_lower_bound = new UINT[maxi+1]; memset(pars_lower_bound, 0, (maxi+1)*sizeof(UINT)); - int part, nseq = getNSeq(); + size_t nseq = getNSeq(); // compute ordered_pattern ordered_pattern.clear(); // UINT sum_scores[npart]; - for (part = 0; part != partitions.size(); part++) { + for (size_t part = 0; part != partitions.size(); ++part) { partitions[part]->orderPatternByNumChars(pat_type); // partial_partition - for (vector::iterator pit = partitions[part]->ordered_pattern.begin(); pit != partitions[part]->ordered_pattern.end(); pit++) { + if (Params::getInstance().partition_type == TOPO_UNLINKED) + continue; + for (auto pit = partitions[part]->ordered_pattern.begin(); pit != partitions[part]->ordered_pattern.end(); ++pit) { Pattern pattern(*pit); pattern.resize(nseq); // maximal unknown states for (int j = 0; j < nseq; j++) diff --git a/alignment/superalignment.h b/alignment/superalignment.h index bb61ff9e6..d2b26a812 100644 --- a/alignment/superalignment.h +++ b/alignment/superalignment.h @@ -22,37 +22,6 @@ #include "alignment.h" - -struct PartitionInfo { - string name; // partition name - string model_name; // model name - string aln_file; // alignment file associated - string sequence_type; // sequence type (DNA/AA/BIN) - string position_spec; // position specification, e.g., "1-100\1 1-100\2" - - double cur_score; // current log-likelihood - double part_rate; // partition heterogeneity rate - int evalNNIs; // number of evaluated NNIs on subtree - - //DoubleVector null_score; // log-likelihood of each branch collapsed to zero - //DoubleVector opt_score; // optimized log-likelihood for every branch - //DoubleVector nni1_score; // log-likelihood for 1st NNI for every branch - //DoubleVector nni2_score; // log-likelihood for 2nd NNI for every branch - - vector cur_brlen; // current branch lengths - //DoubleVector opt_brlen; // optimized branch lengths for every branch - vector nni1_brlen; // branch length for 1st NNI for every branch - vector nni2_brlen; // branch length for 2nd NNI for every branch - - //double *mem_ptnlh; // total memory allocated for all pattern likelihood vectors - double *cur_ptnlh; // current pattern likelihoods of the tree - //double *nni1_ptnlh; // pattern likelihoods of 1st NNI tree - //double *nni2_ptnlh; // pattern likelihoods of 2nd NNI tree - NNIMove nniMoves[2]; -}; - -class PhyloSuperTree; - /** Super alignment representing presence/absence of sequences in k partitions for a total of n sequences. It has the form: @@ -73,7 +42,7 @@ class SuperAlignment : public Alignment { public: /** constructor initialize from a supertree */ - SuperAlignment(PhyloSuperTree *super_tree); + SuperAlignment(Params ¶ms); /** constructor initialize empty alignment */ SuperAlignment(); @@ -81,9 +50,43 @@ class SuperAlignment : public Alignment /** destructor */ ~SuperAlignment(); + /** + load partitions from program Params + @param params program Params + */ + void readFromParams(Params ¶ms); + + /** + initialize seq_names, taxon_index, buildPattern + */ + virtual void init(StrVector *sequence_names = NULL); + /** return that this is a super-alignment structure */ virtual bool isSuperAlignment() { return true; } + /** read partition model file */ + void readPartition(Params ¶ms); + + /** read RAxML-style partition file */ + void readPartitionRaxml(Params ¶ms); + + /** read partition model file in NEXUS format into variable info */ + void readPartitionNexus(Params ¶ms); + + /** read partition as files in a directory */ + void readPartitionDir(string partition_dir, char *sequence_type, InputType &intype, string model, bool remove_empty_seq); + + /** read partition as a comma-separated list of files */ + void readPartitionList(string file_list, char *sequence_type, InputType &intype, string model, bool remove_empty_seq); + + void printPartition(const char *filename, const char *aln_file); + void printPartition(ostream &out, const char *aln_file = NULL, bool append = false); + + void printPartitionRaxml(const char *filename); + + void printBestPartition(const char *filename); + void printBestPartitionRaxml(const char *filename); + /** * create taxa_index from super-alignment to sub-alignment * @param part index of sub-alignment @@ -102,12 +105,36 @@ class SuperAlignment : public Alignment */ virtual void getPatternFreq(IntVector &pattern_freq); + /** + * @param[out] freq vector of site-pattern frequencies + */ + virtual void getPatternFreq(int *freq); + /** Print all site information to a file @param filename output file name */ virtual void printSiteInfo(const char* filename); + /** + compute empirical substitution counts between state pairs + @param normalize true to normalize row sum to 1, false otherwise + @param[out] pair_freq matrix of size num_states*num_states + @param[out] state_freq vector of size num_states + */ + virtual void computeDivergenceMatrix(double *pair_freq, double *state_freq, bool normalize = true); + + /** + perform matched-pair tests of symmetry of Lars Jermiin et al. + @param[out] sym results of test of symmetry + @param[out] marsym results of test of marginal symmetry + @param[out] intsym results of test of internal symmetry + @param out output stream to print results + @param rstream random stream to shuffle alignment columns + @param out_stat output stream to print pairwise statistics + */ + virtual void doSymTest(size_t vecid, vector &sym, vector &marsym, + vector &intsym, int *rstream = NULL, vector *stats = NULL); /** extract sub-alignment of a sub-set of sequences @@ -119,6 +146,19 @@ class SuperAlignment : public Alignment */ virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa = 0, IntVector *kept_partitions = NULL); + /** + extract a subset of partitions to form a new SuperAlignment object + @param part_id vector of partition IDs + @return new alignment containing only part_id partitions + */ + SuperAlignment *extractPartitions(IntVector &part_id); + + /** + remove a subset of partitions + @param part_id vector of partition IDs + */ + void removePartitions(set &part_id); + /** * remove identical sequences from alignment * @param not_remove name of sequence where removal is avoided @@ -195,22 +235,22 @@ class SuperAlignment : public Alignment * @param filename * @param append TRUE to append to this file, false to write new file */ - void printCombinedAlignment(const char *filename, bool append = false); - - /** - * print the super-alignment to a stream - * @param out output stream - * @param print_taxid true to print taxa IDs instead of names, default: false - */ - - void printCombinedAlignment(ostream &out, bool print_taxid = false); + virtual void printAlignment(InputType format, ostream &out, const char* file_name + , bool append = false, const char *aln_site_list = NULL + , int exclude_sites = 0, const char *ref_seq_name = NULL); /** * print all sub alignments into files with prefix, suffix is the charset name * @param prefix prefix of output files */ - void printSubAlignments(Params ¶ms, vector &part_info); + void printSubAlignments(Params ¶ms); + /** + @param quartet ID of four taxa + @param[out] support number of sites supporting 12|34, 13|24 and 14|23 + */ + virtual void computeQuartetSupports(IntVector &quartet, vector &support); + /** @return unconstrained log-likelihood (without a tree) */ @@ -226,7 +266,7 @@ class SuperAlignment : public Alignment * it is in form of a binary alignment, where 0 means absence and 1 means presence * of a gene in a sequence */ - void buildPattern(); + virtual void buildPattern(); /** count the fraction of constant sites in the alignment, update the variable frac_const_sites diff --git a/alignment/superalignmentpairwise.cpp b/alignment/superalignmentpairwise.cpp index 49544b3be..4a7ca19dc 100644 --- a/alignment/superalignmentpairwise.cpp +++ b/alignment/superalignmentpairwise.cpp @@ -32,43 +32,34 @@ SuperAlignmentPairwise::SuperAlignmentPairwise(PhyloSuperTree *atree, int seq1, seq_id2 = seq2; SuperAlignment *aln = (SuperAlignment*) atree->aln; int part = 0; - for (PhyloSuperTree::iterator it = atree->begin(); it != atree->end(); it++, part++) { + partitions.reserve(atree->size()); + for ( auto it = atree->begin(); it != atree->end(); it++, part++ ) { int id1 = aln->taxa_index[seq1][part]; int id2 = aln->taxa_index[seq2][part]; if (id1 >= 0 && id2 >= 0) - partitions.push_back(new AlignmentPairwise((*it), id1, id2)); + partitions.emplace_back(AlignmentPairwise((*it), id1, id2)); } } double SuperAlignmentPairwise::computeFunction(double value) { double lh = 0.0; - for (vector::iterator it = partitions.begin(); it != partitions.end(); it++) { - lh += (*it)->computeFunction(value); + for (auto it = partitions.begin(); it != partitions.end(); it++) { + lh += it->computeFunction(value); } return lh; } - void SuperAlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) { -// double lh = 0.0; df = 0.0; ddf = 0.0; - for (vector::iterator it = partitions.begin(); it != partitions.end(); it++) { + for (auto it = partitions.begin(); it != partitions.end(); it++) { double d1, d2; -// lh += (*it)->computeFuncDerv(value, d1, d2); - (*it)->computeFuncDerv(value, d1, d2); + it->computeFuncDerv(value, d1, d2); df += d1; ddf += d2; } -// return lh; } - SuperAlignmentPairwise::~SuperAlignmentPairwise() { - for (vector::reverse_iterator it = partitions.rbegin(); it != partitions.rend(); it++) - delete (*it); - partitions.clear(); } - - diff --git a/alignment/superalignmentpairwise.h b/alignment/superalignmentpairwise.h index dade42dae..9507787c5 100644 --- a/alignment/superalignmentpairwise.h +++ b/alignment/superalignmentpairwise.h @@ -59,7 +59,7 @@ class SuperAlignmentPairwise : public AlignmentPairwise */ virtual void computeFuncDerv(double value, double &df, double &ddf); - vector partitions; + vector partitions; }; diff --git a/alignment/superalignmentpairwiseplen.cpp b/alignment/superalignmentpairwiseplen.cpp index 4e9427273..b7e343ce4 100644 --- a/alignment/superalignmentpairwiseplen.cpp +++ b/alignment/superalignmentpairwiseplen.cpp @@ -13,7 +13,6 @@ * class SuperAlignmentPairwisePlen **********************************************************/ - SuperAlignmentPairwisePlen::SuperAlignmentPairwisePlen(PhyloSuperTreePlen *atree, int seq1, int seq2) : SuperAlignmentPairwise((PhyloSuperTree*) atree, seq1, seq2) { @@ -23,24 +22,22 @@ SuperAlignmentPairwisePlen::SuperAlignmentPairwisePlen(PhyloSuperTreePlen *atree double SuperAlignmentPairwisePlen::computeFunction(double value) { int part = 0; double lh = 0.0; - for (vector::iterator it = partitions.begin(); it != partitions.end(); it++, part++) { - lh += (*it)->computeFunction(part_info->at(part).part_rate*value); + for (auto it = partitions.begin(); it != partitions.end(); it++, part++) { + lh += it->computeFunction(part_info->at(part).part_rate*value); } return lh; } void SuperAlignmentPairwisePlen::computeFuncDerv(double value, double &df, double &ddf) { int part = 0; - // double lh = 0.0; df = 0.0; ddf = 0.0; - for (vector::iterator it = partitions.begin(); it != partitions.end(); it++, part++) { + for (auto it = partitions.begin(); it != partitions.end(); it++, part++) { double d1, d2; - (*it)->computeFuncDerv(part_info->at(part).part_rate*value, d1, d2); + it->computeFuncDerv(part_info->at(part).part_rate*value, d1, d2); df += part_info->at(part).part_rate*d1; ddf += part_info->at(part).part_rate*part_info->at(part).part_rate*d2; } - // return lh; } SuperAlignmentPairwisePlen::~SuperAlignmentPairwisePlen() diff --git a/alignment/superalignmentunlinked.cpp b/alignment/superalignmentunlinked.cpp new file mode 100644 index 000000000..c4a2d215f --- /dev/null +++ b/alignment/superalignmentunlinked.cpp @@ -0,0 +1,161 @@ +/*************************************************************************** + * Copyright (C) 2018 by BUI Quang Minh * + * m.bui@anu.edu.au * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + +#include "superalignmentunlinked.h" +#include "utils/timeutil.h" + +/** constructor initialize from a supertree */ +SuperAlignmentUnlinked::SuperAlignmentUnlinked(Params ¶ms) +: SuperAlignment() +{ + readFromParams(params); + init(); +} + +/** constructor initialize empty alignment */ +SuperAlignmentUnlinked::SuperAlignmentUnlinked() +: SuperAlignment() +{ + unlinked_taxa = true; +} + +void SuperAlignmentUnlinked::init(StrVector *sequence_names) { + // start original code + + max_num_states = 0; + // first build taxa_index and partitions + map name2part; + unlinked_taxa = true; + for (auto it = partitions.begin(); it != partitions.end(); it++) { + // Make sure that all partitions have different seq names + for (auto sit = (*it)->seq_names.begin(); sit != (*it)->seq_names.end(); sit++) { + if (name2part.find(*sit) != name2part.end()) { + unlinked_taxa = false; + break; + } + name2part[*sit] = (it) - partitions.begin(); + } + } + + if (!unlinked_taxa) { + // if some taxon sets are overlapping + SuperAlignment::init(sequence_names); + cout << "Linked " << seq_names.size() << " total sequences" << endl; + return; + } + + for (auto it = partitions.begin(); it != partitions.end(); it++) { + seq_names.insert(seq_names.end(), (*it)->seq_names.begin(), (*it)->seq_names.end()); + } + + cout << "Unlinked " << seq_names.size() << " total sequences" << endl; + + /* + taxa_index.resize(total_seqs, IntVector(npart, -1)); + for (auto it = partitions.begin(), part = 0, seq = 0; it != partitions.end(); it++, part++) { + int part_nseq = (*it)->getNSeq(); + for (int part_seq = 0; part_seq < part_nseq; part_seq++, seq++) { + taxa_index[seq][part] = part_seq; + } + } + ASSERT(seq == total_seqs); + */ + // now the patterns of sequence-genes presence/absence + buildPattern(); +} + +void SuperAlignmentUnlinked::buildPattern() { + if (!unlinked_taxa) { + SuperAlignment::buildPattern(); + return; + } + int part, npart = partitions.size(); + seq_type = SEQ_BINARY; + num_states = 2; // binary type because the super alignment presents the presence/absence of taxa in the partitions + STATE_UNKNOWN = 2; + site_pattern.resize(npart, -1); + clear(); + pattern_index.clear(); + /* + VerboseMode save_mode = verbose_mode; + verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern + size_t nseq = getNSeq(); + int start_seq = 0; + resize(npart, Pattern(nseq)); + for (part = 0; part < npart; part++) { + Pattern *pat = &at(part); + for (int seq = 0; seq < partitions[part]->getNSeq(); seq++) + pat->at(start_seq + seq) = 1; + //addPattern(pat, part); + computeConst(*pat); + + // NOT USED FOR TOPO_UNLINKED + //pattern_index[*pat] = part; + site_pattern[part] = part; + start_seq += partitions[part]->getNSeq(); + } + ASSERT(start_seq == nseq); + verbose_mode = save_mode; + */ + resize(1, Pattern(getNSeq(), npart)); + computeConst(at(0)); + for (part = 0; part < npart; part++) { + site_pattern[part] = 0; + } + + countConstSite(); +// buildSeqStates(); +} + +void SuperAlignmentUnlinked::computeConst(Pattern &pat) { + if (!unlinked_taxa) { + SuperAlignment::computeConst(pat); + return; + } + bool is_const = (partitions.size() == 1); + bool is_invariant = (partitions.size() == 1); + bool is_informative = (partitions.size() > 1); + pat.const_char = (is_const) ? 1 : (STATE_UNKNOWN+1); + + pat.num_chars = (is_const) ? 1 : 2; // number of states with >= 1 appearance + + pat.flag = 0; + if (is_const) pat.flag |= PAT_CONST; + if (is_invariant) pat.flag |= PAT_INVARIANT; + if (is_informative) pat.flag |= PAT_INFORMATIVE; +} + +/* +void SuperAlignmentUnlinked::buildSeqStates(bool add_unobs_const) { + if (!unlinked_taxa) { + SuperAlignment::buildSeqStates(add_unobs_const); + return; + } + seq_states.clear(); + if (add_unobs_const) { + seq_states.resize(getNSeq(), IntVector({0,1})); + } else { + if (partitions.size() == 1) + seq_states.resize(getNSeq(), IntVector({1})); + else + seq_states.resize(getNSeq(), IntVector({0,1})); + } +} +*/ diff --git a/alignment/superalignmentunlinked.h b/alignment/superalignmentunlinked.h new file mode 100644 index 000000000..3c4862dc1 --- /dev/null +++ b/alignment/superalignmentunlinked.h @@ -0,0 +1,62 @@ +/*************************************************************************** + * Copyright (C) 2018 by BUI Quang Minh * + * m.bui@anu.edu.au * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef SUPERALIGNMENTUNLINKED_H +#define SUPERALIGNMENTUNLINKED_H + +#include "superalignment.h" + +class SuperAlignmentUnlinked : public SuperAlignment +{ +public: + /** constructor initialize from a supertree */ + SuperAlignmentUnlinked(Params ¶ms); + + /** constructor initialize empty alignment */ + SuperAlignmentUnlinked(); + + /** + initialize seq_names, taxon_index, buildPattern + */ + virtual void init(StrVector *sequence_names = NULL); + + /** + * build all patterns of super alignent from partitions and taxa_index + * it is in form of a binary alignment, where 0 means absence and 1 means presence + * of a gene in a sequence + */ + virtual void buildPattern(); + + /** + determine if the pattern is constant. update the is_const variable. + */ + virtual void computeConst(Pattern &pat); + + /* build seq_states containing set of states per sequence + * @param add_unobs_const TRUE to add all unobserved constant states (for +ASC model) + */ +// void buildSeqStates(bool add_unobs_const = false); + + /** TRUE if all taxon sets are separate */ + bool unlinked_taxa; + +}; + +#endif + diff --git a/booster/CMakeLists.txt b/booster/CMakeLists.txt new file mode 100644 index 000000000..6ebdb978a --- /dev/null +++ b/booster/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library(booster +bitset_index.c hashmap.c io.c sort.c tree_utils.h +bitset_index.h hashmap.h io.h sort.h tree.c +booster.c hashtables_bfields.c prng.c stats.c tree.h +externs.h hashtables_bfields.h prng.h stats.h tree_utils.c +booster.h +) + diff --git a/booster/Makefile b/booster/Makefile new file mode 100644 index 000000000..0cd6a5c74 --- /dev/null +++ b/booster/Makefile @@ -0,0 +1,71 @@ +# Version of booster +GIT_VERSION := $(shell git describe --abbrev=10 --dirty --always --tags) + +UNAME := $(shell uname) + +CFLAGS = -Wall -g -O3 -DVERSION=\"$(GIT_VERSION)\" +CFLAGS_OMP = -Wall -g -fopenmp + +# Compiler: gcc +ifeq ($(cross),win32) + CC = i686-w64-mingw32-gcc +else + ifeq ($(cross),win64) + CC = x86_64-w64-mingw32-gcc + else + ifeq ($(cross),linux32) + CFLAGS_OMP += -m32 + CFLAGS += -m32 + else + CC = gcc + endif + endif +endif + +ifeq ($(UNAME),Darwin) + CFLAGS_OMP += -static-libgcc +#else +# CFLAGS_OMP += -static +endif + +LIBS = -lm +OBJS = hashtables_bfields.o tree.o stats.o prng.o hashmap.o version.o sort.o io.o tree_utils.o bitset_index.o + +# default target +ALL = booster + +INSTALL_PATH=$$HOME/bin/ + +all : $(ALL) + +%.o: %.c %.h + $(CC) $(CFLAGS) -c $< + +# **** +# the "booster" supports. Needs ref tree and bt trees. +# **** +booster: $(OBJS) booster.c + $(CC) $(CFLAGS_OMP) -o $@ $^ $(LIBS) + + +# **** +# TESTS +# **** +tests: $(OBJS) test.c + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +test : tests + ./tests + +.PHONY: clean + +clean: + rm -f *~ *.o $(ALL) tests + rm -rf *.dSYM + +install: all + mkdir -p $(INSTALL_PATH) + cp $(ALL) $(INSTALL_PATH) + +uninstall: + rm $(addprefix $(INSTALL_PATH),$(ALL)) diff --git a/booster/bitset_index.c b/booster/bitset_index.c new file mode 100644 index 000000000..f91cbffe7 --- /dev/null +++ b/booster/bitset_index.c @@ -0,0 +1,196 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "bitset_index.h" + +bitset_hashmap* new_bitset_hashmap(int size, float loadfactor) { + int i; + bitset_hashmap* bh = malloc(sizeof(bitset_hashmap)); + bh->capacity = size; + bh->loadfactor = loadfactor; + bh->total = 0; + bh->map_array = malloc(size*sizeof(bitset_bucket)); + for(i=0;imap_array[i]=NULL; + } + return bh; +} + +void free_bitset_hashmap(bitset_hashmap *hm){ + bitset_hash_map_free_map_array(hm->map_array, hm->capacity); + free(hm); +} + +void bitset_hash_map_free_map_array(bitset_bucket **map_array, int total){ + int i; + for(i=0;ivalues, map_array[i]->size); + free(map_array[i]); + } + } + free(map_array); +} + +void bitset_hash_map_free_buckets(bitset_keyvalue ** values, int total){ + int i; + for(i=0;icapacity); + int k; + if(hm->map_array[index] != NULL){ + for (k=0;kmap_array[index]->size;k++){ + if(bitset_hashEquals(hm->map_array[index]->values[k]->key,bitset,nb_taxa)) { + return hm->map_array[index]->values[k]->value; + } + } + } + return -1; +} + +void bitset_hashmap_putvalue(bitset_hashmap *hm, id_hash_table_t *bitset, int nb_taxa, int value) { + int index = bitset_hashmap_indexfor(bitset_hashcode(bitset,nb_taxa), hm->capacity); + int k; + if(hm->map_array[index] == NULL) { + hm->map_array[index] = malloc(sizeof(bitset_bucket)); + hm->map_array[index]->size=1; + hm->map_array[index]->capacity=3; + hm->map_array[index]->values=malloc(3*sizeof(bitset_keyvalue*)); + hm->map_array[index]->values[0] = malloc(sizeof(bitset_keyvalue)); + hm->map_array[index]->values[0]->key = bitset; + hm->map_array[index]->values[0]->value = value; + hm->total++; + } else { + for (k=0;kmap_array[index]->size;k++){ + if(bitset_hashEquals(hm->map_array[index]->values[k]->key,bitset,nb_taxa)) { + hm->map_array[index]->values[k]->value = value; + return; + } + } + if(hm->map_array[index]->size>=hm->map_array[index]->capacity){ + hm->map_array[index]->values = realloc(hm->map_array[index]->values,hm->map_array[index]->capacity*2*sizeof(bitset_keyvalue*)); + hm->map_array[index]->capacity *= 2; + } + hm->map_array[index]->values[hm->map_array[index]->size] = malloc(sizeof(bitset_keyvalue)); + hm->map_array[index]->values[hm->map_array[index]->size]->key = bitset; + hm->map_array[index]->values[hm->map_array[index]->size]->value = value; + hm->map_array[index]->size++; + hm->total++; + } +} + +// Computes a hash code for the bitset associated to an edge +int bitset_hashcode(id_hash_table_t *hashtable, int nb_taxa){ + int hashCodeSet = 1; + int hashCodeUnset = 1; + int hashCodeAll = 1; + int nbset = 0; + int nbunset = 0; + int bit; + for (bit = 0; bit < nb_taxa; bit++) { + if (lookup_id(hashtable, bit)){ + hashCodeSet = 31*hashCodeSet + bit; + nbset++; + } else { + hashCodeUnset = 31*hashCodeUnset + bit; + nbunset++; + } + hashCodeAll = 31*hashCodeAll + bit; + } + // If the number of species on the left is the same + // than the number of species on the right + // We return the hashcode of the all species + // Otherwise, we return the hashcode for the minimum + // between left and right + // Allows an edge to be kind of "unique" + if(nbset == nbunset){ + return hashCodeAll; + } else if(nbset < nbunset){ + return hashCodeSet; + } + return hashCodeUnset; +} + +// HashCode for an edge bitset. +// Used for insertion in an EdgeMap +int bitset_hashEquals(id_hash_table_t *tbl1, id_hash_table_t *tbl2, int nb_taxa) { + return equal_or_complement_id_hashtables(tbl1, tbl2, nb_taxa); +} + + +// Reconstructs the HashMap if the capacity is almost attained (loadfactor) +void bitset_hashmap_rehash(bitset_hashmap *hm, int nb_taxa) { + // We rehash everything with a new capacity + if (((float)hm->total) >= ((float)hm->capacity) * hm->loadfactor) { + int newcapacity = hm->capacity * 2; + int i,l,k; + bitset_bucket **new_map_array = malloc(newcapacity*sizeof(bitset_bucket*)); + for(i=0;icapacity;k++){ + if (hm->map_array[k] != NULL) { + for(l=0;lmap_array[k]->size;l++){ + int index = bitset_hashmap_indexfor(bitset_hashcode(hm->map_array[k]->values[l]->key,nb_taxa), newcapacity); + if (new_map_array[index] == NULL) { + new_map_array[index] = malloc(sizeof(bitset_bucket)); + new_map_array[index]->size=1; + new_map_array[index]->capacity=3; + new_map_array[index]->values=malloc(3*sizeof(bitset_keyvalue*)); + new_map_array[index]->values[0] = malloc(sizeof(bitset_keyvalue)); + new_map_array[index]->values[0]->key = hm->map_array[k]->values[l]->key; + new_map_array[index]->values[0]->value = hm->map_array[k]->values[l]->value; + } else { + if(new_map_array[index]->size>=new_map_array[index]->capacity){ + new_map_array[index]->values = realloc(new_map_array[index]->values,new_map_array[index]->capacity*2*sizeof(bitset_keyvalue*)); + new_map_array[index]->capacity *= 2; + } + new_map_array[index]->values[new_map_array[index]->size] = malloc(sizeof(bitset_keyvalue)); + new_map_array[index]->values[new_map_array[index]->size]->key = hm->map_array[k]->values[l]->key; + new_map_array[index]->values[new_map_array[index]->size]->value = hm->map_array[k]->values[l]->value; + new_map_array[index]->size++; + } + } + } + } + hm->capacity = newcapacity; + bitset_hash_map_free_map_array(hm->map_array,hm->total); + hm->map_array = new_map_array; + } +} diff --git a/booster/bitset_index.h b/booster/bitset_index.h new file mode 100644 index 000000000..e0448be20 --- /dev/null +++ b/booster/bitset_index.h @@ -0,0 +1,75 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _BITSET_INDEX_H_ +#define _BITSET_INDEX_H_ + +#include "hashtables_bfields.h" + +typedef struct bitset_keyvalue{ +id_hash_table_t* key; +int value; +} bitset_keyvalue; + +typedef struct bitset_bucket{ +int size; +int capacity; +struct bitset_keyvalue **values; +} bitset_bucket; + + +typedef struct bitset_hashmap{ +struct bitset_bucket **map_array; +int capacity; +float loadfactor; +int total; +} bitset_hashmap; + + +// Allocates a new bitset hasmap +bitset_hashmap* new_bitset_hashmap(int size, float loadfactor); +// Free the whole bitset hashmap +void free_bitset_hashmap(bitset_hashmap *hm); +// Free a map_array +void bitset_hash_map_free_map_array(bitset_bucket **map_array, int total); +// Free a set of bitset_keyvalue +void bitset_hash_map_free_buckets(bitset_keyvalue ** values, int total); +// returns the index in the hash map, given a hashcode +int bitset_hashmap_indexfor(int hashcode, int capacity); +// Returns the count for the given Edge +// If the edge is not present, returns -1 +// If the edge is present, returns the value +int bitset_hashmap_value(bitset_hashmap *hm, id_hash_table_t *bitset, int nb_taxa); +// Inserts a value in the hashmap +void bitset_hashmap_putvalue(bitset_hashmap *hm, id_hash_table_t *bitset, int nb_taxa, int value); +// Computes a hash code for the bitset associated with an edge +int bitset_hashcode(id_hash_table_t *hashtable, int nb_taxa); +// HashCode for an edge bitset. +// Used for insertion in an EdgeMap +int bitset_hashEquals(id_hash_table_t *tbl1, id_hash_table_t *tbl2, int nb_taxa); +// Reconstructs the HashMap if the capacity is almost attained (loadfactor) +void bitset_hashmap_rehash(bitset_hashmap *hm, int nb_taxa); + +#endif diff --git a/booster/booster.c b/booster/booster.c new file mode 100644 index 000000000..a6dde78c4 --- /dev/null +++ b/booster/booster.c @@ -0,0 +1,627 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "io.h" +#include "tree.h" +#include "bitset_index.h" + +#include /* for strcpy, strdup, etc */ +#ifndef CLANG_UNDER_VS +#include +#endif +#ifdef _OPENMP +#include /* OpenMP */ +#endif +#include + +#include "version.h" + +/** + A large part of the code was initally implemented by Jean-Baka Domelevo-Entfellner + (tree structures, tbe algorithm) +*/ + +void tbe(Tree *ref_tree, Tree *ref_raw_tree, char **alt_tree_strings,char** taxname_lookup_table, FILE *stat_file, int num_trees, int quiet, double dist_cutoff,int count_per_branch); +void fbp(Tree *ref_tree, char **alt_tree_strings,char** taxname_lookup_table, int num_trees, int quiet); +int* species_to_move(Edge* re, Edge* be, int dist, int nb_taxa); +/* +void usage(FILE * out,char *name){ + fprintf(out,"Usage: "); + fprintf(out,"%s -i -b [-@ -d -r -S -o -v]\n",name); + fprintf(out,"Options:\n"); + fprintf(out," -i, --input : Input tree file\n"); + fprintf(out," -b, --boot : Bootstrap tree file (1 file containing all bootstrap trees)\n"); + fprintf(out," -o, --out : Output file (optional) with normalized support values, default : stdout\n"); + fprintf(out," -r, --out-raw : Output file (optional) with raw support values in the form of id|avgdist|depth, default : none\n"); + fprintf(out," -@, --num-threads : Number of threads (default 1)\n"); + fprintf(out," -S, --stat-file : Prints output statistics for each branch in the given output file (optional)\n"); + fprintf(out," -c, --count-per-branch : Prints individual taxa moves for each branches in the log file (only with -S & -a tbe)\n"); + fprintf(out," -d, --dist-cutoff : Distance cutoff to consider a branch for taxa transfer index computation (-a tbe only, default 0.3)\n"); + fprintf(out," -a, --algo : tbe or fbp (default tbe)\n"); + fprintf(out," -q, --quiet : Does not print progress messages during analysis\n"); + fprintf(out," -v, --version : Prints version (optional)\n"); + fprintf(out," -h, --help : Prints this help\n"); + fprintf(out,"\n"); + fprintf(out,"If you use BOOSTER, please cite:\n"); + fprintf(out,"Renewing Felsenstein's Phylogenetic Bootstrap in the Era of Big Data\n"); + fprintf(out,"F. Lemoine, J.-B. Domelevo-Entfellner, E. Wilkinson, D. Correia, M. Davila Felipe, T. De Oliveira, O. Gascuel.\n"); + fprintf(out,"Nature 556, 452-456 (2018)\n"); +} + +void printOptions(FILE * out,char* input_tree,char * boot_trees, char * output_tree, char * output_raw_tree, char *output_stat, char *algo, int nb_threads, int quiet, double dist_cutoff, int count_per_branch){ + fprintf(out,"**************************\n"); + fprintf(out,"* Options *\n"); + fprintf(out,"**************************\n"); + short_version(out); + fprintf(out,"Input Tree : %s\n", input_tree); + fprintf(out,"Bootstrap Trees : %s\n", boot_trees); + if(output_tree==NULL) + fprintf(out,"Output tree : stdout\n"); + else + fprintf(out,"Output tree : %s\n",output_tree); + if(output_raw_tree!=NULL) + fprintf(out,"Output raw tree : %s\n",output_raw_tree); + if(output_stat==NULL) + fprintf(out,"Stat file : None\n"); + else + fprintf(out,"Stat file : %s\n",output_stat); + fprintf(out,"Algo : %s\n", algo); + if(count_per_branch){ + fprintf(out,"Count tax move/branch: true\n"); + }else{ + fprintf(out,"Count tax move/branch: false\n"); + } + fprintf(out,"Threads : %d\n", nb_threads); + fprintf(out,"Dist cutoff : %f\n", dist_cutoff); + if(quiet) + fprintf(out,"Quiet : true\n"); + else + fprintf(out,"Quiet : false\n"); + fprintf(out,"**************************\n"); +} +*/ +void reset_matrices(int nb_taxa, int nb_edges_ref, int nb_edges_boot, short unsigned*** c_matrix, short unsigned*** i_matrix, short unsigned*** hamming, short unsigned** min_dist, short unsigned** min_dist_edges){ + int i; + (*min_dist) = (short unsigned*) malloc(nb_edges_ref*sizeof(short unsigned)); /* array of min Hamming distances */ + (*min_dist_edges) = (short unsigned*) malloc(nb_edges_ref*sizeof(short unsigned)); /* array of edge ids corresponding to min Hamming distances */ + (*c_matrix) = (short unsigned**) malloc(nb_edges_ref*sizeof(short unsigned*)); /* matrix of cardinals of complements */ + (*i_matrix) = (short unsigned**) malloc(nb_edges_ref*sizeof(short unsigned*)); /* matrix of cardinals of intersections */ + (*hamming) = (short unsigned**) malloc(nb_edges_ref*sizeof(short unsigned*)); /* matrix of Hamming distances */ + for (i=0; i0){ + if(num_threads > omp_get_max_threads()) + num_threads = omp_get_max_threads(); + }else{ + num_threads = 1; + } + omp_set_num_threads(num_threads); +*/ + + if(stat_out !=NULL){ + stat_file = fopen(stat_out,"w"); + if(stat_file == NULL){ + fprintf(stderr,"File %s not found or not writable. Aborting.\n", stat_out); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + } else stat_file = NULL; + + /* writing the output tree to the file given on the commandline */ + if(out_tree == NULL){ + output_file = stdout; + }else{ + output_file = fopen(out_tree,"w"); + if(output_file == NULL){ + fprintf(stderr,"File %s not found or not writable. Aborting.\n", out_tree); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + } + + /* writing the output tree to the file given on the commandline */ + if(out_raw_tree != NULL){ + output_raw_file = fopen(out_raw_tree,"w"); + if(output_raw_file == NULL){ + fprintf(stderr,"File %s not found or not writable. Aborting.\n", out_raw_tree); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + } + + /* + if(!quiet) printOptions(stderr, input_tree, boot_trees, out_tree, out_raw_tree, stat_out, algo, num_threads, quiet, dist_cutoff, count_per_branch); +*/ + intree_file = fopen(input_tree,"r"); + if (intree_file == NULL) { + fprintf(stderr,"File %s not found or impossible to access media. Aborting.\n", input_tree); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + /* we copy the tree into a large string */ + unsigned int treefilesize = 3 * tell_size_of_one_tree(input_tree); + if (treefilesize > MAX_TREELENGTH) { + fprintf(stderr,"Tree filesize for %s bigger than %d bytes: are you sure it's a valid NH tree? Aborting.\n", input_tree, MAX_TREELENGTH/3); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + char *big_string = (char*) calloc(treefilesize+1, sizeof(char)); + retcode = copy_nh_stream_into_str(intree_file, big_string); + if (retcode != 1) { + fprintf(stderr,"Unexpected EOF while parsing the reference tree! Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + fclose(intree_file); + + /* and then feed this string to the parser */ + char** taxname_lookup_table = NULL; + ref_tree = complete_parse_nh(big_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + if(out_raw_tree !=NULL){ + ref_raw_tree = complete_parse_nh(big_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + } + + + /***********************************************************************/ + /* Establishing the list of bootstrapped trees we are going to analyze */ + /***********************************************************************/ + int init_boot_trees = 10; + int i_tree; + int num_trees = 0; /* this is the number of trees really analyzed */ + + alt_tree_strings = (char**)malloc(init_boot_trees * sizeof(char*)); + boottree_file = fopen(boot_trees,"r"); + if (boottree_file == NULL) { + fprintf(stderr,"File %s not found or impossible to access media. Aborting.\n", boot_trees); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + if (tell_size_of_one_tree(boot_trees) > treefilesize /* this value is still reachable */) { + fprintf(stderr,"error: size of one alternate tree bigger than three times the size of the ref tree! Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + /* we copy the tree into a large string */ + while(copy_nh_stream_into_str(boottree_file, big_string)) /* reads from the current point in the stream, retcode 1 iff no error */ + { + if(num_trees >= init_boot_trees){ + alt_tree_strings = (char**)realloc(alt_tree_strings,init_boot_trees*2*sizeof(char*)); + init_boot_trees *= 2; + } + alt_tree_strings[num_trees] = strdup(big_string); + num_trees++; + } + fclose(boottree_file); + + if(!quiet) fprintf(stderr,"Num trees: %d\n",num_trees); + + if(!strcmp(algo,"tbe")){ + tbe(ref_tree, ref_raw_tree, alt_tree_strings, taxname_lookup_table, stat_file, num_trees, quiet, dist_cutoff, count_per_branch); + }else{ + fbp(ref_tree, alt_tree_strings, taxname_lookup_table, num_trees, quiet); + } + write_nh_tree(ref_tree, output_file); + if(output_raw_file!=NULL && ref_raw_tree!=NULL){ + write_nh_tree(ref_raw_tree, output_raw_file); + } + + fclose(output_file); + if(stat_file != NULL) fclose(stat_file); + // FREEING STUFF + free(big_string); + + /* free the stuff for the calculation of the mast-like distances */ + for(i_tree=0; i_tree < num_trees;i_tree++){ + free(alt_tree_strings[i_tree]); + } + free(alt_tree_strings); + + /* we also have to free the taxname lookup table */ + for(i=0; i < ref_tree->nb_taxa; i++) free(taxname_lookup_table[i]); /* freeing (char*)'s */ + free(taxname_lookup_table); /* which is a (char**) */ + free_tree(ref_tree); + return 0; +} + + +void fbp(Tree *ref_tree, char **alt_tree_strings,char** taxname_lookup_table, int num_trees, int quiet){ + int j; + Tree *alt_tree; + int i_tree,i; + short unsigned* nb_found = (short unsigned*)malloc(ref_tree->nb_edges * sizeof(short unsigned)); + double support; + // We initialize the reference edge hashmap + bitset_hashmap *hm = new_bitset_hashmap(ref_tree->nb_edges*2, 0.75); + + for(i=0; i< ref_tree->nb_edges; i++){ + nb_found[i] = 0; + bitset_hashmap_putvalue(hm,ref_tree->a_edges[i]->hashtbl[1],ref_tree->nb_taxa,i); + } + + +#pragma omp parallel for private( j, alt_tree, support) shared(nb_found, hm, ref_tree, alt_tree_strings, taxname_lookup_table, quiet, num_trees) schedule(dynamic) + for(i_tree=0; i_tree< num_trees; i_tree++){ + if(!quiet) fprintf(stderr,"New bootstrap tree : %d\n",i_tree); + alt_tree = complete_parse_nh(alt_tree_strings[i_tree], &taxname_lookup_table); + + if (alt_tree == NULL) { + fprintf(stderr,"Not a correct NH tree (%d). Skipping.\n%s\n",i_tree,alt_tree_strings[i_tree]); + continue; /* some files maybe not containing trees */ + } + if (alt_tree->nb_taxa != ref_tree->nb_taxa) { + fprintf(stderr,"This tree doesn't have the same number of taxa as the reference tree. Skipping.\n"); + continue; /* some files maybe not containing trees */ + } + + /****************************************************/ + /* comparison of the bipartitions, FBP method */ + /****************************************************/ + for (j = 0; j < alt_tree->nb_edges; j++) { + // We query the hashmap to see if the edge is present, and then get its reference index + int refindex = bitset_hashmap_value(hm, alt_tree->a_edges[j]->hashtbl[1], alt_tree->nb_taxa); + if (refindex>-1){ + #pragma omp atomic update + nb_found[refindex]++; + } + } + free_tree(alt_tree); + } + + #pragma omp barrier + + if(num_trees != 0) { + for (i = 0; i < ref_tree->nb_edges; i++) { + if(ref_tree->a_edges[i]->right->nneigh == 1) { continue; } + /* the bootstrap value for a branch is inscribed as the name of its descendant (always right side of the edge, by convention) */ + if(ref_tree->a_edges[i]->right->name) free(ref_tree->a_edges[i]->right->name); /* clear name if existing */ + ref_tree->a_edges[i]->right->name = (char*) malloc(16 * sizeof(char)); + support = (double) nb_found[i] * 1.0 / num_trees; + sprintf(ref_tree->a_edges[i]->right->name, "%.6f", support); + ref_tree->a_edges[i]->branch_support = support; + } + } + free(nb_found); + free_bitset_hashmap(hm); +} + +void tbe(Tree *ref_tree, Tree *ref_raw_tree, char **alt_tree_strings,char** taxname_lookup_table, FILE *stat_file, int num_trees, int quiet, double dist_cutoff, int count_per_branch){ + short unsigned** c_matrix; + short unsigned** i_matrix; + short unsigned** hamming; + short unsigned* min_dist_edge; /* array of edge ids corresponding to min Hamming distances */ + short unsigned* min_dist; + int i,j; + int m = ref_tree->nb_edges; + int n = ref_tree->nb_taxa; + Tree *alt_tree; + int i_tree; + int *dist_accu = (int*) calloc(m,sizeof(int)); /* array of distance sums, one per branch. Initialized to 0. */ + int **dist_accu_tmp; + double *moved_species_counts; /* array of average branch rate in which each taxon moves */ + int *moved_species; /* array of number of branches in which each taxon moves, in one bootstrap tree: initialized at each bootstrap tree */ + /** Max number of branches we can see in the bootstrap tree: If it has no multifurcation : binary tree--> ntax*2-2 (if rooted...) */ + int max_branches_boot = ref_tree->nb_taxa*2-2; + + /* array a[i][j] of number of bootstrap tree from which each taxon j moves around the branch i and that are closer than given distance */ + int **moved_species_counts_per_branch; + + if(stat_file != NULL && count_per_branch){ + moved_species_counts_per_branch = (int**) calloc(m,sizeof(int*)); + for(i=0;inb_taxa != n) { + fprintf(stderr,"This tree doesn't have the same number of taxa as the reference tree. Skipping.\n"); + continue; /* some files maybe not containing trees */ + } + + /* resetting the arrays that need be reset. By construction of the post-order traversal, + the other arrays (i_matrix, c_matrix and hamming) need not be reset. */ + reset_matrices(n, m, max_branches_boot, &c_matrix, &i_matrix, &hamming, &min_dist,&min_dist_edge); + + /****************************************************/ + /* comparison of the bipartitions, Transfer method */ + /****************************************************/ + /* calculation of the C and I matrices (see Brehelin/Gascuel/Martin) */ + update_all_i_c_post_order_ref_tree(ref_tree, alt_tree, i_matrix, c_matrix); + update_all_i_c_post_order_boot_tree(ref_tree, alt_tree, i_matrix, c_matrix, hamming, min_dist, min_dist_edge); + + /* Looking at number of times each taxon moves around low distance branches */ + moved_species = (int*) calloc(n,sizeof(int)); + int nb_branches_close=0; + int j; + for(i=0;ia_edges[i]; + if (re->right->nneigh == 1) continue; + Edge* be = alt_tree->a_edges[min_dist_edge[i]]; + + double norm = ((double)min_dist[i]) * 1.0 / (((double)re->topo_depth) - 1.0); + int mindepth = (int)(ceil(1.0/dist_cutoff + 1.0)); + int* sm = species_to_move(re, be, min_dist[i], n); + for(j=0;jtopo_depth >= mindepth ){ + moved_species[sm[j]]++; + } + if(stat_file != NULL && count_per_branch){ + #pragma omp atomic update + moved_species_counts_per_branch[i][sm[j]]++; + } + } + if (norm <= dist_cutoff && re->topo_depth >= mindepth ){ + nb_branches_close++; + } + free(sm); + } + + /* output, just to see */ + for (i = 0; i < m; i++) { + /* Just backup for pvalue computation */ + dist_accu_tmp[i_tree][i] = min_dist[i]; + } + for (i=0; i < n; i++){ + #pragma omp atomic update + moved_species_counts[i] += ((double)moved_species[i])*1.0/((double)nb_branches_close); + } + + free_matrices(m, &c_matrix, &i_matrix, &hamming, &min_dist,&min_dist_edge); + free_tree(alt_tree); + free(moved_species); + } + + #pragma omp barrier + + for (i = 0; i < m; i++){ + for(i_tree=0; i_tree < num_trees; i_tree++){ + dist_accu[i] += dist_accu_tmp[i_tree][i]; + } + } + + double bootstrap_val, avg_dist; + + if(num_trees != 0) { + if(stat_file != NULL) + fprintf(stat_file,"EdgeId\tDepth\tMeanMinDist\n"); + + /* OUTPUT FINAL STATISTICS and UPDATE REF TREE WITH BOOTSTRAP VALUES */ + for (i = 0; i < ref_tree->nb_edges; i++) { + if(ref_tree->a_edges[i]->right->nneigh == 1) { continue; } + + /* the bootstrap value for a branch is inscribed as the name of its descendant (always right side of the edge, by convention) */ + if(ref_tree->a_edges[i]->right->name) free(ref_tree->a_edges[i]->right->name); /* clear name if existing */ + ref_tree->a_edges[i]->right->name = (char*) malloc(16 * sizeof(char)); + avg_dist = (double) dist_accu[i] * 1.0 / num_trees; + bootstrap_val = (double) 1.0 - avg_dist * 1.0 / (1.0 * ref_tree->a_edges[i]->topo_depth-1.0); + + if(stat_file != NULL) + fprintf(stat_file,"%d\t%d\t%f\n", i, (ref_tree->a_edges[i]->topo_depth), avg_dist); + + sprintf(ref_tree->a_edges[i]->right->name, "%.6f", bootstrap_val); + + ref_tree->a_edges[i]->branch_support = bootstrap_val; + + if(ref_raw_tree!=NULL){ + /* the bootstrap value for a branch is inscribed as the name of its descendant as id|avgdist|depth */ + if(ref_raw_tree->a_edges[i]->right->name) free(ref_raw_tree->a_edges[i]->right->name); /* clear name if existing */ + ref_raw_tree->a_edges[i]->right->name = (char*) malloc(16 * sizeof(char)); + avg_dist = (double) dist_accu[i] * 1.0 / num_trees; + sprintf(ref_raw_tree->a_edges[i]->right->name, "%d|%.6f|%d", ref_raw_tree->a_edges[i]->id, avg_dist,ref_tree->a_edges[i]->topo_depth); + } + } + + if(stat_file != NULL){ + fprintf(stat_file,"Taxon\ttIndex\n"); + for(i=0; ia_edges[i]->right->nneigh == 1) { continue; } + fprintf(stat_file,"%d\t%s", i,ref_tree->a_edges[i]->right->name); + for(j=0;j= dist) maxnb=nb_taxa-dist; + int *diff = (int*)calloc(maxnb,sizeof(int)); + int *equ = (int*)calloc(maxnb,sizeof(int)); + int nbdiff=0, nbequ=0; + + for(i = 0; i < nb_taxa; i++) { + if(lookup_id(re->hashtbl[1],i) != lookup_id(be->hashtbl[1],i)){ + diff[nbdiff]=i; + nbdiff++; + } else { + equ[nbequ] = i; + nbequ++; + } + } + if(nbdiff < nbequ){ + if(nbdiff != dist){ + fprintf(stderr,"Length of moved species array (%d) is not equal to the minimum distance found (%d)\n", nbdiff, dist); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + free(equ); + return diff; + } + if(nbequ != dist){ + fprintf(stderr,"Length of moved species array (%d) is not equal to the minimum distance found (%d)\n", nbequ, dist); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + free(diff); + return equ; +} diff --git a/booster/booster.h b/booster/booster.h new file mode 100644 index 000000000..c23f6292d --- /dev/null +++ b/booster/booster.h @@ -0,0 +1,14 @@ + +/** + interface to call booster for transfer bootstrap expectation (TBE) + @param input_tree reference tree file + @param boot_trees bootstrap trees file + @param out_tree output tree + @param out_raw_tree output raw tree + @param stat_out statistic output file + @param num_threads number of threads + @param quiet 1 to stay quiet, 0 otherwise + */ +int main_booster (const char* input_tree, const char *boot_trees, + const char* out_tree, const char* out_raw_tree, const char* stat_out, + int quiet); diff --git a/booster/externs.h b/booster/externs.h new file mode 100644 index 000000000..d6f0aa743 --- /dev/null +++ b/booster/externs.h @@ -0,0 +1,25 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ +extern int ntax; /* this is set in parse_nh, in tree.c */ diff --git a/booster/hashmap.c b/booster/hashmap.c new file mode 100644 index 000000000..3e6072efa --- /dev/null +++ b/booster/hashmap.c @@ -0,0 +1,423 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +/* + * Generic map implementation. + */ +#include "hashmap.h" + +#include +#include +#include + +#define INITIAL_SIZE (256) +#define MAX_CHAIN_LENGTH (8) + +/* We need to keep keys and values */ +typedef struct _hashmap_element{ + char* key; + int in_use; + any_t data; +} hashmap_element; + +/* A hashmap has some maximum size and current size, + * as well as the data to hold. */ +typedef struct _hashmap_map{ + int table_size; + int size; + hashmap_element *data; +} hashmap_map; + +/* + * Return an empty hashmap, or NULL on failure. + */ +map_t hashmap_new() { + hashmap_map* m = (hashmap_map*) malloc(sizeof(hashmap_map)); + if(!m) goto err; + + m->data = (hashmap_element*) calloc(INITIAL_SIZE, sizeof(hashmap_element)); + if(!m->data) goto err; + + m->table_size = INITIAL_SIZE; + m->size = 0; + + return m; + err: + if (m) + hashmap_free(m); + return NULL; +} + +/* The implementation here was originally done by Gary S. Brown. I have + borrowed the tables directly, and made some minor changes to the + crc32-function (including changing the interface). //ylo */ + + /* ============================================================= */ + /* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or */ + /* code or tables extracted from it, as desired without restriction. */ + /* */ + /* First, the polynomial itself and its table of feedback terms. The */ + /* polynomial is */ + /* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */ + /* */ + /* Note that we take it "backwards" and put the highest-order term in */ + /* the lowest-order bit. The X^32 term is "implied"; the LSB is the */ + /* X^31 term, etc. The X^0 term (usually shown as "+1") results in */ + /* the MSB being 1. */ + /* */ + /* Note that the usual hardware shift register implementation, which */ + /* is what we're using (we're merely optimizing it by doing eight-bit */ + /* chunks at a time) shifts bits into the lowest-order term. In our */ + /* implementation, that means shifting towards the right. Why do we */ + /* do it this way? Because the calculated CRC must be transmitted in */ + /* order from highest-order term to lowest-order term. UARTs transmit */ + /* characters in order from LSB to MSB. By storing the CRC this way, */ + /* we hand it to the UART in the order low-byte to high-byte; the UART */ + /* sends each low-bit to hight-bit; and the result is transmission bit */ + /* by bit from highest- to lowest-order term without requiring any bit */ + /* shuffling on our part. Reception works similarly. */ + /* */ + /* The feedback terms table consists of 256, 32-bit entries. Notes: */ + /* */ + /* The table can be generated at runtime if desired; code to do so */ + /* is shown later. It might not be obvious, but the feedback */ + /* terms simply represent the results of eight shift/xor opera- */ + /* tions for all combinations of data and CRC register values. */ + /* */ + /* The values must be right-shifted by eight bits by the "updcrc" */ + /* logic; the shift must be unsigned (bring in zeroes). On some */ + /* hardware you could probably optimize the shift in assembler by */ + /* using byte-swap instructions. */ + /* polynomial $edb88320 */ + /* */ + /* -------------------------------------------------------------------- */ + +static unsigned long crc32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL + }; + +/* Return a 32-bit CRC of the contents of the buffer. */ + +unsigned long crc32_booster(const unsigned char *s, unsigned int len) +{ + unsigned int i; + unsigned long crc32val; + + crc32val = 0; + for (i = 0; i < len; i ++) + { + crc32val = + crc32_tab[(crc32val ^ s[i]) & 0xff] ^ + (crc32val >> 8); + } + return crc32val; +} + +/* + * Hashing function for a string + */ +unsigned int hashmap_hash_int(hashmap_map * m, char* keystring){ + + unsigned long key = crc32_booster((unsigned char*)(keystring), strlen(keystring)); + + /* Robert Jenkins' 32 bit Mix Function */ + key += (key << 12); + key ^= (key >> 22); + key += (key << 4); + key ^= (key >> 9); + key += (key << 10); + key ^= (key >> 2); + key += (key << 7); + key ^= (key >> 12); + + /* Knuth's Multiplicative Method */ + key = (key >> 3) * 2654435761; + + return key % m->table_size; +} + +/* + * Return the integer of the location in data + * to store the point to the item, or MAP_FULL. + */ +int hashmap_hash(map_t in, char* key){ + int curr; + int i; + + /* Cast the hashmap */ + hashmap_map* m = (hashmap_map *) in; + + /* If full, return immediately */ + if(m->size >= (m->table_size/2)) return MAP_FULL; + + /* Find the best index */ + curr = hashmap_hash_int(m, key); + + /* Linear probing */ + for(i = 0; i< MAX_CHAIN_LENGTH; i++){ + if(m->data[curr].in_use == 0) + return curr; + + if(m->data[curr].in_use == 1 && (strcmp(m->data[curr].key,key)==0)) + return curr; + + curr = (curr + 1) % m->table_size; + } + + return MAP_FULL; +} + +/* + * Doubles the size of the hashmap, and rehashes all the elements + */ +int hashmap_rehash(map_t in){ + int i; + int old_size; + hashmap_element* curr; + + /* Setup the new elements */ + hashmap_map *m = (hashmap_map *) in; + hashmap_element* temp = (hashmap_element *) + calloc(2 * m->table_size, sizeof(hashmap_element)); + if(!temp) return MAP_OMEM; + + /* Update the array */ + curr = m->data; + m->data = temp; + + /* Update the size */ + old_size = m->table_size; + m->table_size = 2 * m->table_size; + m->size = 0; + + /* Rehash the elements */ + for(i = 0; i < old_size; i++){ + int status; + + if (curr[i].in_use == 0) + continue; + + status = hashmap_put(m, curr[i].key, curr[i].data); + if (status != MAP_OK) + return status; + } + + free(curr); + + return MAP_OK; +} + +/* + * Add a pointer to the hashmap with some key + */ +int hashmap_put(map_t in, char* key, any_t value){ + int index; + hashmap_map* m; + + /* Cast the hashmap */ + m = (hashmap_map *) in; + + /* Find a place to put our value */ + index = hashmap_hash(in, key); + while(index == MAP_FULL){ + if (hashmap_rehash(in) == MAP_OMEM) { + return MAP_OMEM; + } + index = hashmap_hash(in, key); + } + + /* Set the data */ + m->data[index].data = value; + m->data[index].key = key; + m->data[index].in_use = 1; + m->size++; + + return MAP_OK; +} + +/* + * Get your pointer out of the hashmap with a key + */ +int hashmap_get(map_t in, char* key, any_t *arg){ + int curr; + int i; + hashmap_map* m; + + /* Cast the hashmap */ + m = (hashmap_map *) in; + + /* Find data location */ + curr = hashmap_hash_int(m, key); + + /* Linear probing, if necessary */ + for(i = 0; idata[curr].in_use; + if (in_use == 1){ + if (strcmp(m->data[curr].key,key)==0){ + *arg = (m->data[curr].data); + return MAP_OK; + } + } + + curr = (curr + 1) % m->table_size; + } + + *arg = NULL; + + /* Not found */ + return MAP_MISSING; +} + +/* + * Iterate the function parameter over each element in the hashmap. The + * additional any_t argument is passed to the function as its first + * argument and the hashmap element is the second. + */ +int hashmap_iterate(map_t in, PFany f, any_t item) { + int i; + + /* Cast the hashmap */ + hashmap_map* m = (hashmap_map*) in; + + /* On empty hashmap, return immediately */ + if (hashmap_length(m) <= 0) + return MAP_MISSING; + + /* Linear probing */ + for(i = 0; i< m->table_size; i++) + if(m->data[i].in_use != 0) { + any_t data = (any_t) (m->data[i].data); + any_t key = (any_t) (m->data[i].key); + int status = f(item, key, data); + if (status != MAP_OK) { + return status; + } + } + + return MAP_OK; +} + +/* + * Remove an element with that key from the map + */ +int hashmap_remove(map_t in, char* key){ + int i; + int curr; + hashmap_map* m; + + /* Cast the hashmap */ + m = (hashmap_map *) in; + + /* Find key */ + curr = hashmap_hash_int(m, key); + + /* Linear probing, if necessary */ + for(i = 0; idata[curr].in_use; + if (in_use == 1){ + if (strcmp(m->data[curr].key,key)==0){ + /* Blank out the fields */ + m->data[curr].in_use = 0; + m->data[curr].data = NULL; + m->data[curr].key = NULL; + + /* Reduce the size */ + m->size--; + return MAP_OK; + } + } + curr = (curr + 1) % m->table_size; + } + + /* Data not found */ + return MAP_MISSING; +} + +/* Deallocate the hashmap */ +void hashmap_free(map_t in){ + hashmap_map* m = (hashmap_map*) in; + free(m->data); + free(m); +} + +/* Return the length of the hashmap */ +int hashmap_length(map_t in){ + hashmap_map* m = (hashmap_map *) in; + if(m != NULL) return m->size; + else return 0; +} diff --git a/booster/hashmap.h b/booster/hashmap.h new file mode 100644 index 000000000..05e1c25a5 --- /dev/null +++ b/booster/hashmap.h @@ -0,0 +1,106 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +/* + * Generic hashmap manipulation functions + * + * Originally by Elliot C Back - http://elliottback.com/wp/hashmap-implementation-in-c/ + * + * Modified by Pete Warden to fix a serious performance problem, support strings as keys + * and removed thread synchronization - http://petewarden.typepad.com + */ +#ifndef __HASHMAP_H__ +#define __HASHMAP_H__ + +#define MAP_MISSING -3 /* No such element */ +#define MAP_FULL -2 /* Hashmap is full */ +#define MAP_OMEM -1 /* Out of Memory */ +#define MAP_OK 0 /* OK */ + +/* + * any_t is a pointer. This allows you to put arbitrary structures in + * the hashmap. + */ +typedef void *any_t; + +/* + * PFany is a pointer to a function that can take two any_t arguments + * and return an integer. Returns status code.. + */ +typedef int (*PFany)(any_t, any_t, any_t); + +/* + * map_t is a pointer to an internally maintained data structure. + * Clients of this package do not need to know how hashmaps are + * represented. They see and manipulate only map_t's. + */ +typedef any_t map_t; + +/* + * Return an empty hashmap. Returns NULL if empty. +*/ +extern map_t hashmap_new(); + +/* + * Iteratively call f with argument (item, data) for + * each element data in the hashmap. The function must + * return a map status code. If it returns anything other + * than MAP_OK the traversal is terminated. f must + * not reenter any hashmap functions, or deadlock may arise. + */ +extern int hashmap_iterate(map_t in, PFany f, any_t item); + +/* + * Add an element to the hashmap. Return MAP_OK or MAP_OMEM. + */ +extern int hashmap_put(map_t in, char* key, any_t value); + +/* + * Get an element from the hashmap. Return MAP_OK or MAP_MISSING. + */ +extern int hashmap_get(map_t in, char* key, any_t *arg); + +/* + * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING. + */ +extern int hashmap_remove(map_t in, char* key); + +/* + * Get any element. Return MAP_OK or MAP_MISSING. + * remove - should the element be removed from the hashmap + */ +extern int hashmap_get_one(map_t in, any_t *arg, int remove); + +/* + * Free the hashmap + */ +extern void hashmap_free(map_t in); + +/* + * Get the current size of a hashmap + */ +extern int hashmap_length(map_t in); + +#endif diff --git a/booster/hashtables_bfields.c b/booster/hashtables_bfields.c new file mode 100644 index 000000000..7a13071c4 --- /dev/null +++ b/booster/hashtables_bfields.c @@ -0,0 +1,269 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +/* This file implements bit arrays to store Taxon_ids, for use in the Edges of the Tree objects. */ +#include "hashtables_bfields.h" +/* ntax is defined as an extern int in this header file. + chunksize is also defined there. */ + + +id_hash_table_t* create_id_hash_table(int size) +{ + /* here we leave the size parameter for compatibility with the old hashtable implementation, + but this parameter IS NOT USED in this one. We use the static variable nbchunks_bitarray insead. */ + id_hash_table_t *new_table = (id_hash_table_t*) malloc(sizeof(id_hash_table_t)); + new_table->num_items = 0; + + /* Attempt to allocate and initialize to 0 the memory for the bitfield */ + if ((new_table->bitarray = (bfield_t) calloc(nbchunks_bitarray, sizeof(unsigned long))) == NULL) + return NULL; + else + return new_table; +} + +id_hash_table_t* complement_id_hashtbl(id_hash_table_t* h, int nbtaxa) { + /* this creates a new hashtable and populates it with the complement of h */ + id_hash_table_t* c = create_id_hash_table(0); + int retval; + Taxon_id my_id; + for (my_id = 0; my_id < nbtaxa; my_id++) { + if (!lookup_id(h,my_id)) { retval = add_id(c, my_id); assert(retval == 0); } + } + return c; +} + + +int lookup_id(id_hash_table_t *hashtable, Taxon_id my_id) +{ + /* Returns whether the taxon is in the hashtable */ + if(my_id >= ntax) { + fprintf(stderr,"Error in %s: taxon ID %d is out of range. Aborting.\n", __FUNCTION__, my_id); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + int chunk = my_id / chunksize; + unsigned long *pointer = hashtable->bitarray + chunk; /* pointer to the long we want to access */ + int bit_index = my_id % chunksize; + unsigned long mask = 1UL << bit_index; /* within a long, the lsb corresponds to the taxon with lowest TaxonID */ + return ((*pointer & mask) != 0); +} + + +int add_id(id_hash_table_t *hashtable, Taxon_id my_id) +{ + /* retcodes: + 0 -> no error, insertion has been performed successfully + 1 -> memory allocation failed: no space in memory (impossible in this implementation, though) + 2 -> the id we want to add already exists in the id_hashtable + */ + int chunk = my_id / chunksize; + unsigned long *pointer = hashtable->bitarray + chunk; /* pointer to the long we want to access */ + int bit_index = my_id % chunksize; + unsigned long mask = (1UL << bit_index); /* within a long, the lsb corresponds to the taxon with lowest TaxonID */ + if (*pointer & mask) return 2; + else { + *pointer |= mask; /* sets to 1 the bit corresponding to the taxon. */ + /* and update the total number of items in the hashtable */ + hashtable->num_items++; + return 0; + } +} + +int delete_id(id_hash_table_t *hashtable, Taxon_id my_id) +{ + /* retcodes: + 0 -> no error, deletion has been performed successfully + 2 -> the id we are asked to delete was already set at 0 in the id_hashtable + */ + int chunk = my_id / chunksize; + unsigned long *pointer = hashtable->bitarray + chunk; /* pointer to the long we want to access */ + int bit_index = my_id % chunksize; + unsigned long mask = (1UL << bit_index); /* within a long, the lsb corresponds to the taxon with lowest TaxonID */ + if (!(*pointer & mask)) return 2; + else { + *pointer &= ~mask; /* sets to 0 the bit corresponding to the taxon. */ + /* and update the total number of items in the hashtable */ + hashtable->num_items--; + return 0; + } +} + + +void clear_id_hashtable(id_hash_table_t *hashtable) { /* clears completely the hashtable (no taxa) */ + int chunk; + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) hashtable->bitarray[chunk] = 0UL; + hashtable->num_items = 0; +} + + +void fill_id_hashtable(id_hash_table_t *hashtable, int nb_taxa) { /* sets all bits to 1 in the whole hashtable (all taxa) */ + int chunk; + unsigned long full_one = ~(0UL); + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) hashtable->bitarray[chunk] = full_one; + /* the last bits of the last chunk are MEANINGLESS when chunksize is not a divisor of nb_taxa. */ + hashtable->num_items = nb_taxa; +} + +void complement_id_hashtable(id_hash_table_t *destination, const id_hash_table_t *source, int nb_taxa) { + /* transforms destination into the complement of source */ + int chunk; + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) destination->bitarray[chunk] = ~(source->bitarray[chunk]); + destination->num_items = nb_taxa - source->num_items; +} + +unsigned int bitCount (unsigned long value) { + unsigned int count = 0; + while (value) { // until all bits are zero + if (value & 0x1) // check LSB + count++; + value >>= 1; // shift bits, deleting LSB + } + return count; +} + +void update_id_hashtable(id_hash_table_t *source, id_hash_table_t *destination) { + /* copies all the items from source into destination. Doesn't erase anything anywhere. + Doesn't produce duplicate entries in the destination. */ + int chunk; + unsigned int added; + + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) { + /* we first need to know how many new taxa we are going to add in destination */ + added = bitCount(source->bitarray[chunk] & ~destination->bitarray[chunk]); /* 1 in source AND O in dest */ + if (added) { + /* copy all items from source->bitarray[chunk] into destination */ + destination->bitarray[chunk] = (destination->bitarray[chunk] | source->bitarray[chunk]); + destination->num_items += added; + } /* end if added */ + } /* end of the for loop */ +} /* end update_id_hashtable */ + + +int equal_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2) { + /* this function compares the contents of the id_hashtables and returns a non-zero when tables are identical, + 0 otherwise */ + if(tbl1 == NULL) return (tbl2 == NULL); + if(tbl2 == NULL) return 0; /* because tbl1 not null */ + if(tbl1->num_items != tbl2->num_items) return 0; /* tables cannot be identical if they don't have the + same number of stored elements */ + int chunk; + /* we simply test the equality of the successive longs */ + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) { + if (tbl1->bitarray[chunk] != tbl2->bitarray[chunk]) return 0; + } + /* here all the ids in tbl1 have been found also in tbl2, and the two tables have same size: */ + return 1; + +} /* end equal_id_hashtables */ + + +int complement_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2,int nb_taxa){ + /* this function compares the contents of the id_hashtables and returns a non-zero when tables are complement, + 0 otherwise */ + if(tbl1 == NULL) return (tbl2 == NULL); + if(tbl2 == NULL) return 0; /* because tbl1 not null */ + + int chunk; + /* we simply test the equality of the successive longs ==> Does not work for the last chunk */ + /* If the last long is < nbtaxa : the direct complement does not work! + Example: + n taxa = 5 + chunk1 = 00000000 00000000 00000000 00011010 + chunk2 = 00000000 00000000 00000000 00000101 + ==> ~chunk2 = 11111111 11111111 11111111 11111010 + It does not work directly, we must put a mask depending on (nb_taxa%chunksize) + for the last chunk + chunk1 & mask = 00000000 00000000 00000000 00011010 + ~chunk2 & mask = 00000000 00000000 00000000 00011010 + ==> OK + The mask is (((unsigned long)1 << (nb_taxa%chunksize)) - 1); + */ + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) { + /* Initialize Mask with 1111....11*/ + unsigned long mask = -1; + if(nb_taxa<(chunk+1)*chunksize){ + mask = (((unsigned long)1 << (nb_taxa%chunksize)) - 1); + } + if ((tbl1->bitarray[chunk]&mask) != ((~(tbl2->bitarray[chunk]))&mask)) return 0; + } + /* here all the ids in tbl1 have been found also in tbl2, and the two tables have same size: */ + return 1; +} /* end equal_id_hashtables */ + + +int equal_or_complement_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2, int total) { + return(complement_id_hashtables(tbl1,tbl2,total) || + equal_id_hashtables(tbl1,tbl2)); +} /* end equal_or_complement_id_hashtables */ + + +id_hash_table_t* suffle_hash_table(id_hash_table_t *hashtable, int total){ + id_hash_table_t * output = create_id_hash_table(total); + Taxon_id* taxid_array = malloc(total*sizeof(Taxon_id)); + Taxon_id i = 0; + for(i=0;ibitarray); + free(hashtable); +} + + + +void print_id_hashtable(FILE* stream, id_hash_table_t *hashtable, int nbtaxa) { + int i, chunk; + unsigned long mylong, base = 0, mask = 1, true_index; + char c; + for (chunk = 0; chunk < nbchunks_bitarray; chunk++) { + mylong = hashtable->bitarray[chunk]; + for (i = 0; i < chunksize; i++) { /* for all the bits in the unsigned long, starting with the LSB */ + true_index = base + i; + if (true_index == nbtaxa) break; /* end of the last loop */ + if (true_index % 8 == 0 && !(chunk==0 && i == 0)) fputc(' ', stream); /* write blocks of 8 chars for legibility */ + if ((mylong & mask) == 1) c= '1' ; else c = '0'; + fputc(c, stream); + mylong >>= 1; + } /* end for on all the bits of the long */ + base += chunksize; /* so that in every loop, base is equal to chunk * chunksize */ + } /* end for on all the chunks (unsigned longs) */ + fputc('\n', stream); +} /* end print_id_hashtable */ + diff --git a/booster/hashtables_bfields.h b/booster/hashtables_bfields.h new file mode 100644 index 000000000..c0dbd80de --- /dev/null +++ b/booster/hashtables_bfields.h @@ -0,0 +1,88 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _HASHTABLES_BFIELDS_H_ +#define _HASHTABLES_BFIELDS_H_ + +#include +#include +#include +#include +#include +#include "stats.h" +#include "externs.h" /* gives the extern declaration of ntax, actual number of taxa in the tree(s) dealt with */ + +/* here we implement bit arrays to store taxon IDs. A taxon ID is an integer, and thus an index in a large bit array. + A bipartition (== a subset of all the taxa) is a bit array in which the taxa that are present are all the bits set to 1. + To be efficient in terms of storing the bipartitions, it is essential to have a variable length for our large bitfields. + The bitfields are allocated at runtime, when we know the value of ntax, the number of taxa in the tree. +*/ + +/* TYPE DEFINITIONS */ + +#define MAX_TAXON_ID USHRT_MAX +typedef unsigned short Taxon_id; /* this gives us room for at least 65,536 taxa in the tree, maybe more + (depending on implementation). Taxon id 0 IS VALID. We can tweak it further here. */ + + +typedef unsigned long* bfield_t; /* the bitfield type: a series of consecutive unsigned longs. */ +#define chunksize (8 * sizeof(unsigned long)) /* number of bits in a bitfield chunk, e.g. sizeof(unsigned long) = 4 means that chunksize = 32 */ +#define nbchunks_bitarray (ntax/chunksize + (ntax%chunksize != 0 ? 1 : 0)) /* euclidean division */ +/* and then this value never changes, it is the size of a bitarray in longs for this number of taxa. */ + + + +typedef struct _id_hash_table_t_ { + int num_items; /* the true number of items (ids) stored in this bit field */ + bfield_t bitarray; /* the bit field */ +} id_hash_table_t; + + +/* FUNCTIONS */ + + +/* on id hash tables */ +id_hash_table_t* create_id_hash_table(int size); +id_hash_table_t* complement_id_hashtbl(id_hash_table_t* h, int nbtaxa); + +int lookup_id(id_hash_table_t *hashtable, Taxon_id my_id); +int add_id(id_hash_table_t *hashtable, Taxon_id my_id); +int delete_id(id_hash_table_t *hashtable, Taxon_id my_id); +void clear_id_hashtable(id_hash_table_t *hashtable); +void fill_id_hashtable(id_hash_table_t *hashtable, int nb_taxa); +void complement_id_hashtable(id_hash_table_t *destination, const id_hash_table_t *source, int nb_taxa); +unsigned int bitCount (unsigned long value); +void update_id_hashtable(id_hash_table_t *source, id_hash_table_t *destination); +int equal_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2); +int complement_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2,int nb_taxa); +int equal_or_complement_id_hashtables(id_hash_table_t *tbl1, id_hash_table_t *tbl2, int total); +void free_id_hashtable(id_hash_table_t *hashtable); + +id_hash_table_t* suffle_hash_table(id_hash_table_t *hashtable, int total); + +void print_id_hashtable(FILE* stream, id_hash_table_t *hashtable, int nbtaxa); + + +#endif /* _HASHTABLES_BFIELDS_H_ */ diff --git a/booster/io.c b/booster/io.c new file mode 100644 index 000000000..8920f8b51 --- /dev/null +++ b/booster/io.c @@ -0,0 +1,31 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "io.h" + +void Generic_Exit(const char *file, int line, const char *function, int code){ + fprintf(stderr,"\n== Err. in file '%s' (line %d), function '%s'\n",file,line,function); + exit(code); +} diff --git a/booster/io.h b/booster/io.h new file mode 100644 index 000000000..274cfd309 --- /dev/null +++ b/booster/io.h @@ -0,0 +1,34 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _IO_H +#define _IO_H +#include +#include + +/* Taken from PhyML*/ +void Generic_Exit(const char *file, int line, const char *function, int ret_code); + +#endif diff --git a/booster/prng.c b/booster/prng.c new file mode 100644 index 000000000..ee023e655 --- /dev/null +++ b/booster/prng.c @@ -0,0 +1,270 @@ +/* + * prng.c - Portable, ISO C90 and C99 compliant high-quality + * pseudo-random number generator based on the alleged RC4 + * cipher. This PRNG should be suitable for most general-purpose + * uses. Not recommended for cryptographic or financial + * purposes. Not thread-safe. + */ + +/* + * Copyright (c) 2004 Ben Pfaff . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the + * following conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + */ + +#include "prng.h" +#include +#include +#include +#include +#include +#include + +/* RC4-based pseudo-random state. */ +static unsigned char s[256]; +static int s_i, s_j; + +/* Nonzero if PRNG has been seeded. */ +static int seeded; + +/* Swap bytes that A and B point to. */ +#define SWAP_BYTE(A, B) \ + do { \ + unsigned char swap_temp = *(A); \ + *(A) = *(B); \ + *(B) = swap_temp; \ + } while (0) + +/* Seeds the pseudo-random number generator based on the current + time. + + If the user calls neither this function nor prng_seed_bytes() + before any prng_get*() function, this function is called + automatically to obtain a time-based seed. */ +long +prng_seed_time (void) +{ + static time_t t; + if (t == 0) + t = time (NULL); + else + t++; + + prng_seed_bytes (&t, sizeof t); + return((long)t); +} + +/* Retrieves one octet from the array BYTES, which is N_BYTES in + size, starting at an offset of OCTET_IDX octets. BYTES is + treated as a circular array, so that accesses past the first + N_BYTES bytes wrap around to the beginning. */ +static unsigned char +get_octet (const void *bytes_, size_t n_bytes, size_t octet_idx) +{ + const unsigned char *bytes = bytes_; + if (CHAR_BIT == 8) + return bytes[octet_idx % n_bytes]; + else + { + size_t first_byte = octet_idx * 8 / CHAR_BIT % n_bytes; + size_t start_bit = octet_idx * 8 % CHAR_BIT; + unsigned char c = (bytes[first_byte] >> start_bit) & 255; + + size_t bits_filled = CHAR_BIT - start_bit; + if (CHAR_BIT % 8 != 0 && bits_filled < 8) + { + size_t bits_left = 8 - bits_filled; + unsigned char bits_left_mask = (1u << bits_left) - 1; + size_t second_byte = first_byte + 1 < n_bytes ? first_byte + 1 : 0; + + c |= (bytes[second_byte] & bits_left_mask) << bits_filled; + } + + return c; + } +} + +/* Seeds the pseudo-random number based on the SIZE bytes in + KEY. At most the first 2048 bits in KEY are used. */ +void +prng_seed_bytes (const void *key, size_t size) +{ + int i, j; + + assert (key != NULL && size > 0); + + for (i = 0; i < 256; i++) + s[i] = i; + for (i = j = 0; i < 256; i++) + { + j = (j + s[i] + get_octet (key, size, i)) & 255; + SWAP_BYTE (s + i, s + j); + } + + s_i = s_j = 0; + seeded = 1; +} + +/* Returns a pseudo-random integer in the range [0, 255]. */ +unsigned char +prng_get_octet (void) +{ + if (!seeded) + prng_seed_time (); + + s_i = (s_i + 1) & 255; + s_j = (s_j + s[s_i]) & 255; + SWAP_BYTE (s + s_i, s + s_j); + + return s[(s[s_i] + s[s_j]) & 255]; +} + +/* Returns a pseudo-random integer in the range [0, UCHAR_MAX]. */ +unsigned char +prng_get_byte (void) +{ + unsigned byte; + int bits; + + byte = prng_get_octet (); + for (bits = 8; bits < CHAR_BIT; bits += 8) + byte = (byte << 8) | prng_get_octet (); + return byte; +} + +/* Fills BUF with SIZE pseudo-random bytes. */ +void +prng_get_bytes (void *buf_, size_t size) +{ + unsigned char *buf; + + for (buf = buf_; size-- > 0; buf++) + *buf = prng_get_byte (); +} + +/* Returns a pseudo-random unsigned long in the range [0, + ULONG_MAX]. */ +unsigned long +prng_get_ulong (void) +{ + unsigned long ulng; + size_t bits; + + ulng = prng_get_octet (); + for (bits = 8; bits < CHAR_BIT * sizeof ulng; bits += 8) + ulng = (ulng << 8) | prng_get_octet (); + return ulng; +} + +/* Returns a pseudo-random long in the range [0, LONG_MAX]. */ +long +prng_get_long (void) +{ + return prng_get_ulong () & LONG_MAX; +} + +/* Returns a pseudo-random unsigned int in the range [0, + UINT_MAX]. */ +unsigned +prng_get_uint (void) +{ + unsigned uint; + size_t bits; + + uint = prng_get_octet (); + for (bits = 8; bits < CHAR_BIT * sizeof uint; bits += 8) + uint = (uint << 8) | prng_get_octet (); + return uint; +} + +/* Returns a pseudo-random int in the range [0, INT_MAX]. */ +int +prng_get_int (void) +{ + return prng_get_uint () & INT_MAX; +} + +/* Returns a pseudo-random floating-point number from the uniform + distribution with range [0,1). */ +double +prng_get_double (void) +{ + for (;;) + { + double dbl = prng_get_ulong () / (ULONG_MAX + 1.0); + if (dbl >= 0.0 && dbl < 1.0) + return dbl; + } +} + +/* Returns a pseudo-random floating-point number from the + distribution with mean 0 and standard deviation 1. (Multiply + the result by the desired standard deviation, then add the + desired mean.) */ +double +prng_get_double_normal (void) +{ + /* Knuth, _The Art of Computer Programming_, Vol. 2, 3.4.1C, + Algorithm P. */ + static int has_next = 0; + static double next_normal; + double this_normal; + + if (has_next) + { + this_normal = next_normal; + has_next = 0; + } + else + { + static double limit; + double v1, v2, s; + + if (limit == 0.0) + limit = log (DBL_MAX / 2) / (DBL_MAX / 2); + + for (;;) + { + double u1 = prng_get_double (); + double u2 = prng_get_double (); + v1 = 2.0 * u1 - 1.0; + v2 = 2.0 * u2 - 1.0; + s = v1 * v1 + v2 * v2; + if (s > limit && s < 1) + break; + } + + this_normal = v1 * sqrt (-2. * log (s) / s); + next_normal = v2 * sqrt (-2. * log (s) / s); + has_next = 1; + } + + return this_normal; +} diff --git a/booster/prng.h b/booster/prng.h new file mode 100644 index 000000000..8dd96643a --- /dev/null +++ b/booster/prng.h @@ -0,0 +1,43 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef PRNG_H_INCLUDED +#define PRNG_H_INCLUDED + +#include + +long prng_seed_time (void); +void prng_seed_bytes (const void *, size_t); +unsigned char prng_get_octet (void); +unsigned char prng_get_byte (void); +void prng_get_bytes (void *, size_t); +unsigned long prng_get_ulong (void); +long prng_get_long (void); +unsigned prng_get_uint (void); +int prng_get_int (void); +double prng_get_double (void); +double prng_get_double_normal (void); + +#endif /* prng.h */ diff --git a/booster/sort.c b/booster/sort.c new file mode 100644 index 000000000..045bea2b9 --- /dev/null +++ b/booster/sort.c @@ -0,0 +1,64 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "sort.h" + +void sort_double(double*tab, int size){ + qsort(tab, size, sizeof(double), comp_double); +} + +/* void sort_indexes_double(int * indexes, int size, double * values){ */ +/* #ifdef __APPLE__ */ +/* qsort_r(indexes, size, sizeof(int), values, comp_indexes_apple); */ +/* #else */ +/* qsort_r(indexes, size, sizeof(int), comp_indexes, values); */ +/* #endif */ +/* } */ + +int comp_double(const void * elem1, const void * elem2){ + double f = *((double*)elem1); + double s = *((double*)elem2); + if (f > s) return 1; + if (f < s) return -1; + return 0; +} + +int comp_indexes(const void * elem1, const void * elem2, void * other_array){ + int i1 = *((int*)elem1); + int i2 = *((int*)elem2); + + double * other = (double*)other_array; + + double val1 = other[i1]; + double val2 = other[i2]; + + if (val1 > val2) return 1; + if (val1 < val2) return -1; + return 0; +} + +int comp_indexes_apple(void * other_array, const void * elem1, const void * elem2){ + return(comp_indexes(elem1,elem2,other_array)); +} diff --git a/booster/sort.h b/booster/sort.h new file mode 100644 index 000000000..7d93f371c --- /dev/null +++ b/booster/sort.h @@ -0,0 +1,39 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#define _GNU_SOURCE +#include + +#ifndef _SORT_H +#define _SORT_H + +int comp_double(const void * elem1, const void * elem2); +int comp_indexes(const void * elem1, const void * elem2, void * other_array); +int comp_indexes_apple(void * other_array, const void * elem1, const void * elem2); + +void sort_double(double * tab, int size); +void sort_indexes_double(int * indexes, int size, double * values); + +#endif diff --git a/booster/stats.c b/booster/stats.c new file mode 100644 index 000000000..e64ce8952 --- /dev/null +++ b/booster/stats.c @@ -0,0 +1,585 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "stats.h" + +/************************************************/ +/* BASIC FUNCTIONS */ +/************************************************/ + +/* this file contains basic operations on numerical arrays (min, max, mean, sorting, median, debug printing, etc) */ +int min_int(int a, int b) { + return (ab ? a : b); +} + +int max_int_vec(int* myvec, int length) { + if (length==0) return -1; + int i, maximum = myvec[0]; + for(i=1;ib ? a : b); +} + + +void print_int_vec(FILE* out, int* myvec, int length) { + int i; + for(i=0;i vec[1]) swap_ints(vec,vec+1); /* swapping with pointer arithmetic */ + return; /* we're done */ + } /* end if length == 2 */ + + /* implicit else: here length > 2 */ + int breakpoint = (int) floor(length / 2); + /* breakpoint is the number of values in the first half */ + int length1 = breakpoint, length2 = length - breakpoint; + + divide_and_conquer_int_vec(vec, length1); + divide_and_conquer_int_vec(vec+breakpoint, length2); + merge_sorted_int_vecs(vec, length1, length2); + return ; + +} /* end divide_and_conquer_int_vec */ + + +void merge_sorted_double_vecs(double* myvec, int length1, int length2) { + /* this function assumes that we have myvec[0..(length1-1)] + and myvec[length1..(length1+length2-1)] that are two sorted vectors. + It merges the two in place, reusing the initial space. */ + int i, index1=0, index2=0, index_res=0, total_length = length1 + length2; + double temp[total_length]; + double* vec1 = myvec, *vec2 = myvec+length1; /* pointer arithmetic */ + /* index1 and index2 indicate the next elements of the two subvectors to be processed */ + while(index1 < length1 && index2 < length2) { + /* there are still elements to treat in both vectors */ + if(vec1[index1] <= vec2[index2]) temp[index_res++] = vec1[index1++]; + else temp[index_res++] = vec2[index2++]; + } + /* now at least one of the input subvecs is fully processed, remains the other: */ + if (index1 < length1) for (i = index1; i < length1; i++) temp[index_res++] = vec1[i]; + else for (i = index2; i < length2; i++) temp[index_res++] = vec2[i]; + /* sanity check */ + if (index_res != total_length) { + fprintf(stderr,"fatal error : input lengths do not sum up to output length. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + /* now we copy the result back into the original vector, to do the thing in place */ + for(i=0;i vec[1]) swap_doubles(vec,vec+1); /* swapping with pointer arithmetic */ + return; /* we're done */ + } /* end if length == 2 */ + + /* implicit else: here length > 2 */ + int breakpoint = (int) floor(length / 2); + /* breakpoint is the number of values in the first half */ + int length1 = breakpoint, length2 = length - breakpoint; + + divide_and_conquer_double_vec(vec, length1); + divide_and_conquer_double_vec(vec+breakpoint, length2); + merge_sorted_double_vecs(vec, length1, length2); + return ; + +} /* end divide_and_conquer_double_vec */ + + +/************************************************/ +/* STAT FUNCTIONS */ +/************************************************/ + + +double unif(){ + double unif = 0.5; + unif = (unif + prng_get_int())/ INT_MAX; + return(unif); +} + +double exponentiel(double lambda){ + double exponentiel = unif(); + exponentiel = -log(1 - exponentiel) / lambda; + return(exponentiel); +} + +double gauss(){ + double unif1 = unif(); + double unif2 = unif(); + double gauss = sqrt(-2*log(unif1))*sin(2 * S_PI * (unif2)); + return(gauss); +} + +double normal(double mu, double sig){ + return(mu + (sig*gauss())); +} + +int proba(double p){ + return(unif() 1 ) { + size_t k = rand_to(n--); + memcpy(temp, BYTE(obj) + n*size, size); + memcpy(BYTE(obj) + n*size, BYTE(obj) + k*size, size); + memcpy(BYTE(obj) + k*size, temp, size); + } + free(temp); +} + +/* take a random int from [0,max[ */ +int rand_to(int max){ + return(prng_get_int()%max); +} + +double sigma(double * values, int nb_values){ + double mean = 0.0; + double var = 0.0; + int i; + for(i = 0; i < nb_values; i++){ + mean += values[i]; + } + + for(i = 0; i < nb_values; i++){ + var += pow((values[i] - mean),2); + } + return(sqrt(var)); +} + +double sum(double * array, int size){ + int i; + double sum = 0; + for(i = 0; i < size; i++){ + sum += array[i]; + } + return(sum); +} + +/* Original C++ implementation found at http://www.wilmott.com/messageview.cfm?catid=10&threadid=38771 */ +/* C# implementation found at http://weblogs.asp.net/esanchez/archive/2010/07/29/a-quick-and-dirty-implementation-of-excel-norminv-function-in-c.aspx*/ +/* + * Compute the quantile function for the normal distribution. + * + * For small to moderate probabilities, algorithm referenced + * below is used to obtain an initial approximation which is + * polished with a final Newton step. + * + * For very large arguments, an algorithm of Wichura is used. + * + * REFERENCE + * + * Beasley, J. D. and S. G. Springer (1977). + * Algorithm AS 111: The percentage points of the normal distribution, + * Applied Statistics, 26, 118-121. + * + * Wichura, M.J. (1988). + * Algorithm AS 241: The Percentage Points of the Normal Distribution. + * Applied Statistics, 37, 477-484. + */ +/* Taken from https://gist.github.com/kmpm/1211922/ */ +double qnorm(double p, double mu, double sigma){ + double q, r, val; + + if (p < 0 || p > 1){ + fprintf(stderr,"Warning: p is < 0 or > 1 : returning DBL_MIN\n"); + return NAN; + } + if (sigma < 0){ + fprintf(stderr,"Warning: sigma is < 0 : returning NaN\n"); + return NAN; + } + if (p == 0){ + return -INFINITY; + } + if (p == 1){ + return INFINITY; + } + + if (sigma == 0){ + return mu; + } + q = p - 0.5; + /*-- use AS 241 --- */ + /* double ppnd16_(double *p, long *ifault)*/ + /* ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3 + Produces the normal deviate Z corresponding to a given lower + tail area of P; Z is accurate to about 1 part in 10**16. + */ + if (fabs(q) <= .425){/* 0.075 <= p <= 0.925 */ + r = .180625 - q * q; + val = + q * (((((((r * 2509.0809287301226727 + + 33430.575583588128105) * r + 67265.770927008700853) * r + + 45921.953931549871457) * r + 13731.693765509461125) * r + + 1971.5909503065514427) * r + 133.14166789178437745) * r + + 3.387132872796366608) + / (((((((r * 5226.495278852854561 + + 28729.085735721942674) * r + 39307.89580009271061) * r + + 21213.794301586595867) * r + 5394.1960214247511077) * r + + 687.1870074920579083) * r + 42.313330701600911252) * r + 1); + } else { /* closer than 0.075 from {0,1} boundary */ + /* r = min(p, 1-p) < 0.075 */ + if (q > 0) + r = 1 - p; + else + r = p; + r = sqrt(-log(r)); + /* r = sqrt(-log(r)) <==> min(p, 1-p) = exp( - r^2 ) */ + if (r <= 5){ /* <==> min(p,1-p) >= exp(-25) ~= 1.3888e-11 */ + r += -1.6; + val = (((((((r * 7.7454501427834140764e-4 + + .0227238449892691845833) * r + .24178072517745061177) * + r + 1.27045825245236838258) * r + + 3.64784832476320460504) * r + 5.7694972214606914055) * + r + 4.6303378461565452959) * r + + 1.42343711074968357734) + / (((((((r * + 1.05075007164441684324e-9 + 5.475938084995344946e-4) * + r + .0151986665636164571966) * r + + .14810397642748007459) * r + .68976733498510000455) * + r + 1.6763848301838038494) * r + + 2.05319162663775882187) * r + 1); + } else { /* very close to 0 or 1 */ + r += -5; + val = (((((((r * 2.01033439929228813265e-7 + + 2.71155556874348757815e-5) * r + + .0012426609473880784386) * r + .026532189526576123093) * + r + .29656057182850489123) * r + + 1.7848265399172913358) * r + 5.4637849111641143699) * + r + 6.6579046435011037772) + / (((((((r * + 2.04426310338993978564e-15 + 1.4215117583164458887e-7) * + r + 1.8463183175100546818e-5) * r + + 7.868691311456132591e-4) * r + .0148753612908506148525) + * r + .13692988092273580531) * r + + .59983220655588793769) * r + 1); + } + if (q < 0.0){ + val = -val; + } + } + return mu + sigma * val; +} + + +/* From https://en.wikipedia.org/wiki/Normal_distribution */ +double pnorm(double x){ + double value,sum,result; + int i; + sum = x; + value=x; + for(i=1;i<=100;i++){ + value=(value*x*x/(2*i+1)); + sum=sum+value; + } + result=0.5+(sum/sqrt(2*S_PI))*exp(-(x*x)/2); + return(result); +} + +double log_fact(int n){ + int i; + double lf = (double) 0.0; + for (i = 2; i <= n; i++){ + lf = lf + (double) log((double)i); + } + return lf; +} + +double factorial_log_rmnj(int n){ + if (n==0) { + return(0.0); + } else if (n<=100) { + return(log_fact(n)); + } else { + double accu = 0.0; + accu += (double) log((double)n*(1.0+4.0*n*(1.0+2.0*n)) + 1.0/30.0 - 11.0/(240.0*n))/6.0; + accu += (double) log(S_PI)/ 2.0; + accu -= (double) n; + accu += (double) n * log(n); + return( accu ); + } +} diff --git a/booster/stats.h b/booster/stats.h new file mode 100644 index 000000000..5abd38ed0 --- /dev/null +++ b/booster/stats.h @@ -0,0 +1,126 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _STAT_H +#define _STAT_H + +#include +#include +#include +#include +#include +#include + +#include "prng.h" +#include "io.h" + +#define S_PI 3.14159265358979323846264338327950288 + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +/************************************************/ +/* BASIC FUNCTIONS */ +/************************************************/ + +int min_int(int a, int b); +int max_int(int a, int b); + +int max_int_vec(int* myvec, int length); +short unsigned max_short_unsigned_vec(short unsigned* myvec, int length); + +double min_double(double a, double b); +double max_double(double a, double b); + +void print_int_vec(FILE* out, int* myvec, int length); +void print_double_vec(FILE* out, double* myvec, int length); + +double mean_int_vec(int* myvec, int length); +double mean_double_vec(double* myvec, int length); + +int median_int_vec(int* myvec, int length); +double median_double_vec(double* myvec, int length); + +void summary_double_vec(double* myvec, int length, double* result); +void summary_double_vec_nocopy(double* myvec, int length, double* result); + +int sum_vec_of_ints(int* table, int size); +int sum_vec_of_ints_but_one(int* table, int size, int index_to_ignore); + +int swap_ints(int* a, int* b); +int swap_doubles(double* a, double* b); + +void merge_sorted_int_vecs(int* myvec, int length1, int length2); +void divide_and_conquer_int_vec(int* vec, int length); + +void merge_sorted_double_vecs(double* myvec, int length1, int length2); +void divide_and_conquer_double_vec(double* vec, int length); + +/************************************************/ +/* STAT FUNCTIONS */ +/************************************************/ +double unif(); +double exponentiel(double lambda); +double gauss(); +double normal(double mu, double sig); +int proba(double p); +int binomial(double p, int nb); + +/* Sample num ints from the data (of length size) + if !replace then without replacement +*/ +int* sample(int* data, int size, int num, int replace); +/* Shuffles the array */ +#define BYTE(X) ((unsigned char *)(X)) +void shuffle(void *obj, size_t nmemb, size_t size); +/* Samples num values from the ungrouped version of the data array: + Example: data array: + data[0]=3; data[1]=0; data[2]=4 + It will return a sample (of size num ) from : + 0,0,0,2,2,2,2 + num must be <= sum(data) : otherwize returns 0 filled array + The output is grouped by indice , i.e: + output[0]=2; output[1]=0; output[2]=3 + AND NOT: + 0,0,2,2,2 + So the output has the same size than data , i.e : length +*/ +int* sample_from_counts(int* data, int length, int num, int replace); + +/* rand in [0,max[ */ +int rand_to(int max); + +/* ecart type */ +double sigma(double * values, int nb_values); +double sum(double * array, int size); +double qnorm(double x, double mean, double sd); +double pnorm(double x); + +/* Computes the factorial of n */ +double log_fact(int n); +/* Computes the log of factorial of n using rmnj approximation */ +double factorial_log_rmnj(int n); + +#endif diff --git a/booster/test.c b/booster/test.c new file mode 100644 index 000000000..b2028828a --- /dev/null +++ b/booster/test.c @@ -0,0 +1,1472 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "hashtables_bfields.h" +#include "stats.h" +#include "hashmap.h" +#include "tree.h" +#include "tree_utils.h" + +/* Returns a table of all node ids of the tree, with 1 if they are taxon on the side of the edge, 0 if not (or internal) */ +int fill_all_taxa_ids(Node *node, Node *prev, int *output){ + int i; + int nbt = 0; + if(node->nneigh == 1){ + output[node->id] = 1; + return 1; + } else{ + for(i=0; i < node->nneigh; i++){ + if(node->neigh[i] != prev) + nbt+=fill_all_taxa_ids(node->neigh[i], node, output); + } + return(nbt); + } +} + + +/* Returns a table of a sample of the node ids that are taxon and on the orientation given */ +int * sample_taxa(Tree *t, int n_to_sample, Node *node, Node *prev){ + int * allnodes = (int*) calloc(t->nb_nodes, sizeof(int)); + int nbtax = fill_all_taxa_ids(node,prev,allnodes); + int * alltax = (int*) calloc(nbtax, sizeof(int)); + int *output; + int cur = 0; + int i; + for(i=0; i < t->nb_nodes; i++){ + if(allnodes[i]){ + alltax[cur] = i; + cur++; + } + } + output = sample(alltax, nbtax, n_to_sample, 0); + free(allnodes); + free(alltax); + + return(output); +} + +/** + This method swaps 2 edges connected to the given edge. + If e is terminal, does nothing + Before + a d + \ e / + left.---.right + / \ + b c + + After + c d a b + \ / \ / + .---. or .---. + / \ / \ + b a d c + + Randomly one of the two options + returns the min topo_depth of the 2 swaped branches + */ +int swap_branches(Tree *t, Edge *e){ + Node *left = e->left; + Node *right = e->right; + if(left->nneigh == 1 || right->nneigh==1){ + return(0); + } + + int dir_left_to_right = dir_a_to_b(left, right); + int dir_right_to_left = dir_a_to_b(right, left); + + /* The two edges are chosen randomly in the left and in the right of e */ + int picked_left_index = rand_to(left->nneigh-1)+1; + int picked_right_index = rand_to(right->nneigh-1)+1; + + picked_left_index = (dir_left_to_right+picked_left_index)%left->nneigh; + picked_right_index= (dir_right_to_left+picked_right_index)%right->nneigh; + + Edge *picked_left_branch = left->br[picked_left_index]; + Edge *picked_right_branch = right->br[picked_right_index]; + + int i; + /* fprintf(stderr,"left branch %d | Topo= %d\n",picked_left_branch->id,picked_left_branch->topo_depth); */ + /* fprintf(stderr,"right branch %d | Topo= %d\n",picked_right_branch->id,picked_right_branch->topo_depth); */ + /* for(i=0;inb_taxa;i++){ */ + /* if(lookup_id(t->a_edges[picked_left_branch->id]->hashtbl[1],i)) */ + /* fprintf(stderr," %s",t->taxa_names[i]); */ + /* } */ + /* fprintf(stderr,"\n"); */ + /* for(i=0;inb_taxa;i++){ */ + /* if(lookup_id(t->a_edges[picked_right_branch->id]->hashtbl[1],i)) */ + /* fprintf(stderr," %s",t->taxa_names[i]); */ + /* } */ + /* fprintf(stderr,"\n"); */ + + + int sum_depth = picked_left_branch->topo_depth + picked_right_branch->topo_depth; + + /** + All the other participants to this swap (see figure) + */ + Node *a,*c; + int a_to_left_dir, + left_to_a_dir; + int c_to_right_dir, + right_to_c_dir; + + if(picked_left_branch->right==left){ + a=picked_left_branch->left; + }else{ + a=picked_left_branch->right; + } + if(picked_right_branch->right==right){ + c=picked_right_branch->left; + }else{ + c=picked_right_branch->right; + } + a_to_left_dir = dir_a_to_b(a, left); + left_to_a_dir = dir_a_to_b(left, a); + c_to_right_dir = dir_a_to_b(c, right); + right_to_c_dir = dir_a_to_b(right, c); + + /** + We swap the two edges + */ + + /* First swap the edge pointers of the edges */ + if(picked_left_branch->right==left){ + picked_left_branch->right = right; + }else{ + picked_left_branch->left = right; + } + if(picked_right_branch->right==right){ + picked_right_branch->right = left; + }else{ + picked_right_branch->left = left; + } + + /*We then swap the node pointers of the nodes*/ + a->neigh[a_to_left_dir] = right; + c->neigh[c_to_right_dir]= left; + left->neigh[left_to_a_dir] = c; + right->neigh[right_to_c_dir] = a; + + /* And the final edges pointers of the nodes */ + left->br[picked_left_index] = picked_right_branch; + right->br[picked_right_index] = picked_left_branch; + + /** + We recompute hashtables and node depths + */ + for (i = 0; i < t->nb_edges; i++) { + if(t->a_edges[i]->hashtbl[0] != NULL) + free_id_hashtable(t->a_edges[i]->hashtbl[0]); + if(t->a_edges[i]->hashtbl[1] != NULL) + free_id_hashtable(t->a_edges[i]->hashtbl[1]); + t->a_edges[i]->hashtbl[0] = create_id_hash_table(t->length_hashtables); + t->a_edges[i]->hashtbl[1] = create_id_hash_table(t->length_hashtables); + } + + update_hashtables_post_alltree(t); + update_hashtables_pre_alltree(t); + update_node_depths_post_alltree(t); + update_node_depths_pre_alltree(t); + + for (i = 0; i < t->nb_edges; i++) { + free_id_hashtable(t->a_edges[i]->hashtbl[0]); + t->a_edges[i]->hashtbl[0] = NULL; + } + + /* topological depths of branches */ + update_all_topo_depths_from_hashtables(t); + return(sum_depth); +} + +/** + Here we will test the classical bootstrap with a very simple case (to test hashtables) + + */ +int test_classical_bootstrap(){ + /* + Tree 1 Bootstrap tree: + a d a d + \ e / \ e / + .---(.) (.)---. + / \ / \ + b c b c + The node (.) is the top node of the newick file: + It changes the orientation of the hashtables for the edge e + 2 newick representations of the SAME tree + */ + char *ref_tree_string = "((a:1,b:1):1,c:1,d:1);"; + char *boot_tree_string = "(b:1,a:1,(c:1,d:1):1);"; + char** taxname_lookup_table = NULL; + + Tree* ref_tree = complete_parse_nh(ref_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + Tree* boot_tree = complete_parse_nh(boot_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + int i,j; + int common_splits = 0; + int splits_not_found = 0; + for (i = 0; i < ref_tree->nb_edges; i++) { + if(ref_tree->a_edges[i]->right->nneigh == 1) continue; + /* we skip the branches leading to leaves */ + + if(ref_tree->a_edges[i]->had_zero_length) continue; + /* a branch with length == 0 is not to be considered as a valid bipartition */ + + for (j = 0; j < boot_tree->nb_edges; j++) { + if(boot_tree->a_edges[j]->had_zero_length) continue; + /* a branch with length == 0 is not to be considered as a valid bipartition */ + if (equal_or_complement_id_hashtables(ref_tree->a_edges[i]->hashtbl[1], + boot_tree->a_edges[j]->hashtbl[1], + ref_tree->nb_taxa)) { + //printf("result: splits ARE equal!\n"); + common_splits++; + break; + } + } /* end for on j */ + if (j == boot_tree->nb_edges) splits_not_found++; + } /* end for on i */ + free_tree(boot_tree); + free_tree(ref_tree); + free(taxname_lookup_table); /* which is a (char**) */ + + if(common_splits != 1){ + fprintf(stderr,"Classical Bootstrap test error: Number of common splits is: %d, and should be: %d\n",common_splits,1); + return EXIT_FAILURE; + } + if(splits_not_found != 0){ + fprintf(stderr,"Classical Bootstrap test error: Number of splits not found is: %d, and should be: %d\n",splits_not_found,0); + return EXIT_FAILURE; + } + fprintf(stderr,"Classical Bootstrap test : OK\n"); + return(EXIT_SUCCESS); +} + +void test_fill_hashtable_post_order(Node* current, Node* orig, Tree* t, id_hash_table_t *h) { + /* we are going to update one of the two hashtables sitting on the branch between current and orig. */ + int i, n = current->nneigh; + if(orig == NULL) return; + int curr_to_orig = dir_a_to_b(current, orig); + + Edge* br = current->br[curr_to_orig]; /* br: current to orig; br2: any _other_ branch from current */ + + for(i=1 ; i < n ; i++) { + test_fill_hashtable_post_order(current->neigh[(curr_to_orig+i)%n], current,t,h); + } + + /* but if n = 1 we haven't done anything (leaf): we must put the info corresponding to the taxon into the branch */ + if (n == 1) { + assert(br->right == current); + /* add the id of the taxon to the right hashtable of the branch */ + add_id(h,get_tax_id_from_tax_name(current->name, t->taxname_lookup_table, t->nb_taxa)); + } +} /* end update_hashtables_post_doer */ + + + + +int test_swap_branches(){ + /** + a e d + \ | / + .---.---. + / * * \ + b c + + We will swap the edges from one of the edges * + */ + char** taxname_lookup_table = NULL; + char *ref_tree_string = "((a:1,b:1):1,e:1,(c:1,d:1):1);"; + Tree* ref_tree = complete_parse_nh(ref_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + + int e_index; + int swaped = 0; + for(e_index=0;e_indexnb_edges;e_index++){ + Edge *e = ref_tree->a_edges[e_index]; + if(e->right->nneigh>1 && + e->left->nneigh>1 + && !swaped){ + /*On swap la première qui vient*/ + swap_branches(ref_tree,e); + swaped = 1; + } + } + fprintf(stderr,"Swap branch Test: OK\n"); + return(EXIT_SUCCESS); +} + +/** + We test the TRANSFER Support for branches of the initial tree compared to another tree + + */ +int test_transfer_1(){ + srand(time(NULL)); + char *ref_tree_string = "((a:1,b:1,c:1):1,(d:1,e:1,f:1):1,((g:1,h:1,i:1):1,(j:1,k:1,l:1):1,(m:1,n:1,o:1):1):1);"; + char *swap_tree_string = "((g:1.000000,h:1.000000,i:1.000000):1.000000,(m:1.000000,n:1.000000,o:1.000000):1.000000,((a:1.000000,b:1.000000,c:1.000000):1.000000,(j:1.000000,k:1.000000,l:1.000000):1.000000,(d:1.000000,e:1.000000,f:1.000000):1.000000):1.000000);"; + + /* and then feed this string to the parser */ + char** taxname_lookup_table = NULL; + Tree* ref_tree = complete_parse_nh(ref_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + Tree* swap_tree = NULL; + + int e_index; + int min_num_moved=0; + + int max_branches_boot = ref_tree->nb_taxa*2-2; + int n = ref_tree->nb_taxa; + int m = ref_tree->nb_edges; + int i; + short unsigned** c_matrix = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of cardinals of complements */ + for (i=0; i min_num_moved){ + fprintf(stderr,"TRANSFER Test 1 : Error : The min_dist of the swaped branch is > the number of swaped taxa %d>%d\n",min_dist[e_index],min_num_moved); + exit(EXIT_FAILURE); + } + + free_tree(swap_tree); + + for (i=0; inb_taxa*2-2; + int n = ref_tree->nb_taxa; + int m = ref_tree->nb_edges; + int i; + short unsigned** c_matrix = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of cardinals of complements */ + for (i=0; iright->nneigh == 1 || + e->left->nneigh == 1 ){ + e_index = rand_to(swap_tree->nb_edges); + e = swap_tree->a_edges[e_index]; + } + min_num_moved = swap_branches(swap_tree,e); + + /* calculation of the C and I matrices (see Brehelin/Gascuel/Martin) */ + update_all_i_c_post_order_ref_tree(ref_tree, swap_tree, i_matrix, c_matrix); + update_all_i_c_post_order_boot_tree(ref_tree, swap_tree, i_matrix, c_matrix, hamming, min_dist); + + if(min_dist[e_index] > min_num_moved){ + fprintf(stderr,"TRANSFER Test 2 after branch swap : Error : The min_dist of the swaped branch is > the number of swaped taxa\n"); + exit(EXIT_FAILURE); + } + free_tree(swap_tree); + swap_tree = NULL; + } + + for (i=0; inb_taxa*2-2; + int n = ref_tree->nb_taxa; + int m = ref_tree->nb_edges; + int i; + short unsigned** c_matrix = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of cardinals of complements */ + for (i=0; inb_edges; i++) { + if (ref_tree->a_edges[i]->brlen < thresh) { + if (ref_tree->a_edges[i]->right->nneigh == 1) { /* don't collapse terminal edges */ + uncollapsed_terminal++; + }else{ + collapse_branch(ref_tree->a_edges[i], ref_tree); + collapse_branch(swap_tree->a_edges[i], swap_tree); + collapsed_one = 1; + collapsed_internal++; + break; /* breaking the for so that we start again from the beginning because tree->a_edges has changed */ + } + } + } /* end for */ + } while (collapsed_one); + /* fprintf(stderr,"Collapsed %d branches\n",collapsed_internal); */ + + int m = ref_tree->nb_edges; + int max_branches_boot = ref_tree->nb_taxa*2-2; + + short unsigned** c_matrix = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of cardinals of complements */ + short unsigned** i_matrix = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of cardinals of intersections */ + short unsigned** hamming = (short unsigned**) malloc(m*sizeof(short unsigned*)); /* matrix of Hamming distances */ + short unsigned* min_dist = (short unsigned*) malloc(m*sizeof(short unsigned)); /* array of min Hamming distances */ + + for (i=0; inb_edges;i_edge++){ + if(min_dist[i_edge] != 0){ + /* fprintf(stderr,"TRANSFER Test 4 : Error : The min_dist of the internal branch is != 0 (%d)\n",min_dist[i_edge]); */ + return(EXIT_FAILURE); + } + } + /* fprintf(stderr,"TRANSFER Test 4/1 : OK : The min_dist of the internal branch is == 0 (%d)\n",min_dist[i_edge]); */ + + /* Then we exchange n_move taxa names from left to right of the edge */ + int edge = rand_to(ref_tree->nb_edges); + int d = swap_tree->a_edges[edge]->topo_depth; + int n_move = rand_to(d); + /* fprintf(stderr,"\tWill swap %d taxa from left to right of branch %d (depth=%d)\n",n_move,edge,d); */ + int* left_taxa = sample_taxa(swap_tree, n_move, swap_tree->a_edges[edge]->right, swap_tree->a_edges[edge]->left); + int* right_taxa = sample_taxa(swap_tree, n_move, swap_tree->a_edges[edge]->left , swap_tree->a_edges[edge]->right); + int i_move; + for(i_move=0; i_move < n_move; i_move++){ + /* fprintf(stderr,"\tMoving %s <-> %s\n",swap_tree->a_nodes[left_taxa[i_move]]->name, swap_tree->a_nodes[right_taxa[i_move]]->name); */ + char *tmp; + tmp = swap_tree->a_nodes[left_taxa[i_move]]->name; + swap_tree->a_nodes[left_taxa[i_move]]->name = swap_tree->a_nodes[right_taxa[i_move]]->name; + swap_tree->a_nodes[right_taxa[i_move]]->name = tmp; + } + + for (i = 0; i < m; i++) { + min_dist[i] = n; /* initialization to the nb of taxa */ + } + + update_all_i_c_post_order_ref_tree(ref_tree, swap_tree, i_matrix, c_matrix); + update_all_i_c_post_order_boot_tree(ref_tree, swap_tree, i_matrix, c_matrix, hamming, min_dist); + + /* fprintf(stderr,"\tTRANSFER Test 4 : The min_dist of the internal branch is %d\n",min_dist[edge]); */ + + if(min_dist[edge] > n_move*2 ){ + fprintf(stderr,"TRANSFER Test 4 : Error : The min_dist of the internal branch is > 2*%d (%d)\n",n_move,min_dist[edge]); + return(EXIT_FAILURE); + } + + /* We leave the tree as it was at the beginning */ + for(i_move=0; i_move < n_move; i_move++){ + char *tmp; + tmp = swap_tree->a_nodes[left_taxa[i_move]]->name; + swap_tree->a_nodes[left_taxa[i_move]]->name = swap_tree->a_nodes[right_taxa[i_move]]->name; + swap_tree->a_nodes[right_taxa[i_move]]->name = tmp; + } + + free(left_taxa); + free(right_taxa); + } + + for (i=0; inb_nodes;t++){ + Node *n = rand_tree->a_nodes[t]; + if(n->nneigh==1){ + int ref_lookid = get_tax_id_from_tax_name(n->name,ref_tree->taxname_lookup_table, ref_tree->nb_taxa); + int rand_lookid= get_tax_id_from_tax_name(n->name,rand_tree->taxname_lookup_table, rand_tree->nb_taxa); + if(ref_lookid != rand_lookid){ + fprintf(stderr,"Random tree test error: tax id in lookup table is : %d and should be : %d\n",rand_lookid,ref_lookid); + free_tree(rand_tree); + free_tree(ref_tree); + return EXIT_FAILURE; + } + } + /* For each Edge we will test the hashtables */ + int e; + for(e=0;enb_edges;e++){ + id_hash_table_t * h = rand_tree->a_edges[e]->hashtbl[1]; + id_hash_table_t * h2 = create_id_hash_table(rand_tree->nb_taxa); + id_hash_table_t * h3 = create_id_hash_table(rand_tree->nb_taxa); + test_fill_hashtable_post_order(rand_tree->a_edges[e]->left,rand_tree->a_edges[e]->right, rand_tree, h2); + test_fill_hashtable_post_order(rand_tree->a_edges[e]->right,rand_tree->a_edges[e]->left, rand_tree, h3); + + if(!equal_id_hashtables(h,h2) && !equal_id_hashtables(h,h3)){ + /* if(!equal_or_complement_id_hashtables(h,h2,rand_tree->nb_taxa)){ */ + fprintf(stderr,"Random tree test error: hashtables are not consistent with the lookup table\n"); + print_id_hashtable(stderr, h, rand_tree->nb_taxa); + print_id_hashtable(stderr, h2, rand_tree->nb_taxa); + print_id_hashtable(stderr, h3, rand_tree->nb_taxa); + + free_tree(rand_tree); + free_tree(ref_tree); + free_id_hashtable(h2); + return EXIT_FAILURE; + } + free_id_hashtable(h2); + } + } + free_tree(rand_tree); + } + free_tree(ref_tree); + + fprintf(stderr,"Random tree Test: OK\n"); + + return(EXIT_SUCCESS); +} + +int test_id_hash_table_shuffle(){ + ntax = 1000; + + id_hash_table_t * h; + id_hash_table_t * h2; + int i=0; + int total = 0; + int total_expect = 0; + h = create_id_hash_table(ntax); + + /* We will set the tax to 1 randomly */ + for(i = 0;i250){ + printf("Test proba : error - %d != %d\n",result,expected); + return(EXIT_FAILURE); + } + printf("Test stat_proba : OK\n"); + return(EXIT_SUCCESS); +} + +int test_qnorm(){ + int i; + double alphas[14] = {0.01,0.02,0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.99}; + double expect[14] = {-2.32634787404084075746, + -2.05374891063182252182, + -1.64485362695147263601, + -1.28155156554460081253, + -0.84162123357291418468, + -0.52440051270804066696, + -0.25334710313579977825, + 0.00000000000000000000, + 0.25334710313579977825, + 0.52440051270804066696, + 0.84162123357291440673, + 1.28155156554460081253, + 1.64485362695147152579, + 2.32634787404084075746}; + double res; + + for(i=0; i <14; i++){ + res = qnorm(alphas[i], 0, 1); + if(expect[i] != res){ + printf("Test qnorm : error - %f != %f\n",expect[i],res); + return(EXIT_FAILURE); + } + /*printf("%f = %1.30f | %1.30f (%s)\n",alphas[i],expected[i],res,(expected[i]==res)?"true":"false");*/ + } + printf("Test qnorm : OK\n"); + return(EXIT_SUCCESS); +} + +int test_pnorm(){ + int i; + double q[5] = {-2,-1,0,1,2}; + double expect[5] = { + 0.022750131948179212055, + 0.15865525393145704647, + 0.5, + 0.84134474606854292578, + 0.97724986805182079141 + }; + + double res; + double relative_error; + double accepted_error = 0.000000000000001; + for(i=0; i <2; i++){ + res = pnorm(q[i]); + relative_error = fabs((res - expect[i])); + if(expect[i] != res && relative_error > accepted_error){ + printf("Test qnorm : error - %1.20f != %1.20f\n",expect[i],res); + return(EXIT_FAILURE); + } + } + printf("Test pnorm : OK\n"); + return(EXIT_SUCCESS); +} + +int test_unif(){ + int seed = 25684; + double expected[7] = {0.614942,0.295840,0.981761,0.359667,0.436287,0.827348,0.813658}; + double result; + int i; + + prng_seed_bytes(&seed, sizeof(seed)); + + for(i = 0; i < 7; i++){ + result = unif(); + if((int)round(expected[i]*1000000) != (int)round(result*1000000)){ + printf("Test unif : error - %d != %d\n",(int)(expected[i]*1000000),(int)(result*1000000)); + return(EXIT_FAILURE); + } + } + printf("Test unif : OK\n"); + return(EXIT_SUCCESS); +} + +#define TEST_MAX_INT 124 +int test_rand_to(){ + int nb_simu = 1000000; + double expected_nb = nb_simu/TEST_MAX_INT; + double threshold = 0.1; + double res_nb [TEST_MAX_INT]; + int i; + int result; + + prng_seed_time(); + + for(i = 0; i < TEST_MAX_INT; i++){ + res_nb[i] = 0; + } + + for(i = 0; i < nb_simu; i++){ + result = rand_to(TEST_MAX_INT); + if(result >= TEST_MAX_INT){ + printf("Test rand_to : integer %d > %d\n",result,TEST_MAX_INT-1); + } + res_nb[result]++; + } + /* Test for frequency of each int */ + for(i=0;i threshold*expected_nb){ + printf("Test rand_to : frequency error - freq %d = %f != %f \n",i,res_nb[i],expected_nb); + return(EXIT_FAILURE); + } + } + + printf("Test rand_to: OK (~1/TEST_MAX_INT of each nt)\n"); + return(EXIT_SUCCESS); +} + +int test_sum(){ + double array[10] = {1,2,3,4,5,6,7,8,9,10}; + double result = sum(array,10); + double exp = 55; + if(result!=exp){ + printf("Test sum : error - Sum %f != %f\n",result,exp); + } + printf("Test sum: OK\n"); + return(EXIT_SUCCESS); +} + +int comp_int(const void * elem1, const void * elem2){ + int f = *((int*)elem1); + int s = *((int*)elem2); + if (f > s) return 1; + if (f < s) return -1; + return 0; +} + +/* Test sampling function */ +int test_sample(){ + int length = 10000; + int nbsamp = 500; + int * array = malloc(length * sizeof(int)); + int * sampled; + int * sampled2; + int found, found2; + int i = 0, j = 0; + int duplicate = 0; + + for(i=0;inb_taxa); + sum_brlen=0.0; + write_nh_tree(ref_tree,stdout); + remove_taxon(tax_id,ref_tree); + + for(i=0;inb_edges;i++){ + sum_brlen += (ref_tree->a_edges[i]->brlen); + } + if(sum_brlen != 5.7){ + fprintf(stderr,"Test remove taxon: error - The sum of br len is %f, and should be 5.7\n",sum_brlen); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + + if(ref_tree->nb_nodes != 6){ + fprintf(stderr,"Test remove taxon: error - The number of nodes is %d, and should be 6\n",ref_tree->nb_nodes); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + if(ref_tree->nb_taxa != 4){ + fprintf(stderr,"Test remove taxon: error - The number of taxa is %d, and should be 4\n",ref_tree->nb_taxa); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + + for(i=0;inb_nodes;i++){ + if(ref_tree->a_nodes[i]->nneigh==1 && strcmp(ref_tree->a_nodes[i]->name,"a") == 0){ + fprintf(stderr,"Test remove taxon: error - The original taxon \"a\" is still present after its removal\n"); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + } + write_nh_tree(ref_tree,stdout); + free(taxname_lookup_table); + taxname_lookup_table=NULL; + free_tree(ref_tree); + + /* On essaie avec un arbre multifurcation */ + ref_tree = complete_parse_nh(boot4_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + tax_id = get_tax_id_from_tax_name("a", taxname_lookup_table, ref_tree->nb_taxa); + remove_taxon(tax_id,ref_tree); + if(ref_tree->nb_nodes != 5){ + fprintf(stderr,"Test remove taxon: error - The number of nodes is %d, and should be 5\n",ref_tree->nb_nodes); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + if(ref_tree->nb_taxa != 4){ + fprintf(stderr,"Test remove taxon: error - The number of taxa is %d, and should be 4\n",ref_tree->nb_taxa); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + + for(i=0;inb_nodes;i++){ + if(ref_tree->a_nodes[i]->nneigh==1 && strcmp(ref_tree->a_nodes[i]->name,"a") == 0){ + fprintf(stderr,"Test remove taxon: error - The original taxon \"a\" is still present after its removal\n"); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + } + write_nh_tree(ref_tree,stdout); + free(taxname_lookup_table); + taxname_lookup_table=NULL; + free_tree(ref_tree); + + /* On essaie avec un autre arbre multifurcation */ + ref_tree = complete_parse_nh(boot3_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + tax_id = get_tax_id_from_tax_name("a", taxname_lookup_table, ref_tree->nb_taxa); + remove_taxon(tax_id,ref_tree); + if(ref_tree->nb_nodes != 6){ + fprintf(stderr,"Test remove taxon: error - The number of nodes is %d, and should be 6\n",ref_tree->nb_nodes); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + if(ref_tree->nb_taxa != 4){ + fprintf(stderr,"Test remove taxon: error - The number of taxa is %d, and should be 4\n",ref_tree->nb_taxa); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + + for(i=0;inb_nodes;i++){ + if(ref_tree->a_nodes[i]->nneigh==1 && strcmp(ref_tree->a_nodes[i]->name,"a") == 0){ + fprintf(stderr,"Test remove taxon: error - The original taxon \"a\" is still present after its removal\n"); + free_tree(ref_tree); + free(taxname_lookup_table); + return(EXIT_FAILURE); + } + } + + write_nh_tree(ref_tree,stdout); + free(taxname_lookup_table); + taxname_lookup_table=NULL; + free_tree(ref_tree); + + + ref_tree = complete_parse_nh(ref_tree_string, &taxname_lookup_table); /* sets taxname_lookup_table en passant */ + tax_id = get_tax_id_from_tax_name("e", taxname_lookup_table, ref_tree->nb_taxa); + remove_taxon(tax_id,ref_tree); + write_nh_tree(ref_tree,stdout); + + free_tree(ref_tree); + free(taxname_lookup_table); /* which is a (char**) */ + fprintf(stderr,"Test remove taxon: OK\n"); + return(EXIT_SUCCESS); + +} + +int test_hashmap(){ + + int j,k,div,mod; + int total=20000; + char* array[total]; + int min_char=65; + int max_char=90; + int base=max_char-min_char+1; + /* Wi fill the array of string with strings AAAAAAAAA then BAAAAAAAA, etc.*/ + for(j=0;jnb_edges; + for (i = 0; i < n; i++) if(tree->a_edges[i]->had_zero_length) count++; + return count; +} + +int count_leaves(Tree* tree) { + int count = 0; + int i, n = tree->nb_nodes; + for (i = 0; i < n; i++) if(tree->a_nodes[i]->nneigh == 1) count++; + return count; +} + +int count_roots(Tree* tree) { /* to ensure there is exactly zero or one root */ + int count = 0; + int i, n = tree->nb_nodes; + for (i = 0; i < n; i++) if(tree->a_nodes[i]->nneigh == 2) count++; + return count; +} + +int count_multifurcations(Tree* tree) { /* to ensure there is exactly zero or one root */ + int count = 0; + int i, n = tree->nb_nodes; + for (i = 0; i < n; i++) if(tree->a_nodes[i]->nneigh > 3) count++; + return count; +} + +int dir_a_to_b(Node* a, Node* b) { + /* this returns the direction from a to b when a and b are two neighbours, otherwise yields an error */ + int i, n = a->nneigh; + for(i=0; ineigh[i] == b) break; + if (i < n) return i; else { + fprintf(stderr,"Fatal error : nodes are not neighbours.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + return -1; +} /* end dir_a_to_b */ + + +/* various statistics on tree branch support */ + +double mean_bootstrap_support(Tree* tree) { + /* this function returns the mean bootstrap support calculated on those branches that have a bootstrap support value */ + int i, total_num = 0; + double accu = 0.0; + int n_br = tree->nb_edges; + for(i = 0; i < n_br; i++) { + if (tree->a_edges[i]->has_branch_support) { + accu += tree->a_edges[i]->branch_support; + total_num++; + } + } /* end for */ + + return accu / total_num; +} /* end mean_bootstrap_support */ + + + +double median_bootstrap_support(Tree* tree) { + /* this function returns the median bootstrap support calculated on those branches that have a bootstrap support value */ + /* we first create an array with all bootstrap supports */ + int i, j, total_num = 0, n_br = tree->nb_edges; + for(i = 0; i < n_br; i++) if(tree->a_edges[i]->has_branch_support) total_num++; + double* branch_supports = (double*) malloc (total_num * sizeof(double)); + + j=0; + for(i = 0; i < n_br; i++) if(tree->a_edges[i]->has_branch_support) branch_supports[j++] = tree->a_edges[i]->branch_support; + double result = median_double_vec(branch_supports, total_num); + free(branch_supports); + return result; +} /* end median_bootstrap_support */ + + +int summary_bootstrap_support(Tree* tree, double* result) { + /* this function stores all the bootstrap values in a vector and outputs the statistical summary + of that vector into the result array. Same order as in R. */ + /* RESULT MUST HAVE ALLOCATED SIZE >= 6 */ + /* retcode is -1 in case no support values found */ + int i, j, num_bootstrap_values = 0, n_br = tree->nb_edges; + for(i = 0; i < n_br; i++) if(tree->a_edges[i]->has_branch_support) num_bootstrap_values++; + + if (num_bootstrap_values == 0) return -1; + + /* allocating vector */ + double* bootstrap_vals = (double*) malloc(num_bootstrap_values * sizeof(double)); + /* filling in vector */ + for(i = j = 0; i < n_br; i++) if(tree->a_edges[i]->has_branch_support) bootstrap_vals[j++] = tree->a_edges[i]->branch_support; + /* summary */ + summary_double_vec_nocopy(bootstrap_vals, num_bootstrap_values, result); + free(bootstrap_vals); + return 0; +} /* end summary_bootstrap_support */ + + + + +/* parsing utils: discovering tokens */ + +int index_next_toplevel_comma(char* in_str, int begin, int end) { + /* returns the index of the next toplevel comma, from position begin included, up to position end. + the result is -1 if none is found. */ + int level = 0, i; + for (i = begin; i <= end; i++) { + switch(in_str[i]) { + case '(': + level++; + break; + case ')': + level--; + break; + case ',': + if (level == 0) return i; + } /* endswitch */ + } /* endfor */ + return -1; /* reached if no outer comma found */ +} /* end index_next_toplevel_comma */ + + + +int count_outer_commas(char* in_str, int begin, int end) { + /* returns the number of toplevel commas found, from position begin included, up to position end. */ + int count = 0, level = 0, i; + for (i = begin; i <= end; i++) { + switch(in_str[i]) { + case '(': + level++; + break; + case ')': + level--; + break; + case ',': + if (level == 0) count++; + } /* endswitch */ + } /* endfor */ + return count; +} /* end count_outer_commas */ + + + +void strip_toplevel_parentheses(char* in_str, int begin, int end, int* pair) { + /* returns the new (begin,end) pair comprising all chars found strictly inside the toplevel parentheses. + The input "pair" is an array of two integers, we are passing the output values through it. + It is intended that here, in_str[pair[0]-1] == '(' and in_str[pair[1]+1] == ')'. + In case no matching parentheses are simply return begin and end in pair[0] and pair[1]. It is NOT an error. */ + /* This function also tests the correctness of the NH syntax: if no balanced pars, then return an error and abort. */ + int i, found_par = 0; + + pair[0] = end+1; pair[1] = -1; /* to ensure termination if no parentheses are found */ + + /* first seach opening par from the beginning of the string */ + for (i = begin; i <= end; i++) if (in_str[i] == '(') { pair[0] = i+1; found_par += 1; break; } + + /* and then search the closing par from the end of the string */ + for (i = end; i >= begin; i--) if (in_str[i] == ')') { pair[1] = i-1; found_par += 1; break; } + + switch (found_par) { + case 0: + pair[0] = begin; + pair[1] = end; + break; + case 1: + fprintf(stderr,"Syntax error in NH tree: unbalanced parentheses between string indices %d and %d. Aborting.\n", begin, end); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } /* end of switch: nothing to do in case 2 (as pair[0] and pair[1] correctly set), and found_par can never be > 2 */ +} + + +int index_toplevel_colon(char* in_str, int begin, int end) { + /* returns the index of the (first) toplevel colon only, -1 if not found */ + int level = 0, i; + for (i = end; i >= begin; i--) {/* more efficient to proceed from the end in this case */ + switch(in_str[i]) { + case ')': + level++; + break; + case '(': + level--; + break; + case ':': + if (level == 0) return i; + } /* endswitch */ + } /* endfor */ + return -1; +} /* end index_toplevel_colon */ + + +void parse_double(char* in_str, int begin, int end, double* location) { + /* this function parses a numerical value and puts it into location. Meant to be used for branch lengths. */ + if (end < begin) { + fprintf(stderr,"Missing branch length at offset %d in the New Hampshire string. Branch length set to 0.\n", begin); + sscanf("0.0", "%lg", location); + return; + } + char numerical_string[52] = { '\0' }; + strncpy(numerical_string, in_str+begin, end-begin+1); + int n_matches = sscanf(numerical_string, "%lg", location); + if (n_matches != 1) { + fprintf(stderr,"Fatal error in parse_double: unable to parse a number out of \"%s\". Aborting.\n", numerical_string); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } +} /* end parse_double */ + + + + +/* CREATION OF A NEW TREE FROM SCRATCH, ADDING TAXA ONE AT A TIME */ + +Node* new_node(const char* name, Tree* t, int degree) { + int i; + Node* nn = (Node*) malloc(sizeof(Node)); + nn->nneigh = degree; + nn->neigh = malloc(degree * sizeof(Node*)); + nn->br = malloc(degree * sizeof(Edge*)); + nn->id = t->next_avail_node_id++; + if(degree==1 && !name) { fprintf(stderr,"Fatal error : won't create a leaf with no name. Aborting.\n"); Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE);} + if(name) { nn->name = strdup(name); } else nn->name = NULL; + if(degree==1) { t->taxa_names[t->next_avail_taxon_id++] = strdup(name); } + nn->comment = NULL; + for(i=0; i < nn->nneigh; i++) { nn->neigh[i] = NULL; nn->br[i] = NULL; } + nn->depth = MAX_NODE_DEPTH; + t->a_nodes[nn->id] = nn; /* warning: not checking anything here! This array haas to be big enough from start */ + t->nb_nodes++; + return nn; +} + +Edge* new_edge(Tree* t) { + Edge* ne = (Edge*) malloc(sizeof(Edge)); + ne->id = t->next_avail_edge_id++; + ne->has_branch_support = 0; + ne->hashtbl[0] = ne->hashtbl[1] = NULL; + ne->subtype_counts[0] = ne->subtype_counts[1] = NULL; + t->a_edges[ne->id] = ne; + t->nb_edges++; + return ne; +} + + +Tree* new_tree(int nb_taxa, const char* name) { + /* allocates the space for a new tree and gives it as an output (pointer to the new tree) */ + /* optional is the name of the first taxa. If we don't provide it, there exists a risk that we will build a + tree with finally one leaf with no name */ + if (nb_taxa <= 0) return NULL; /* at least one node, that is node0 */ + Tree* t = (Tree*) malloc(sizeof(Tree)); + t->taxa_names = (char**) calloc(nb_taxa, sizeof(char*)); /* store only once the taxa names */ + t->next_avail_node_id = t->next_avail_edge_id = t->next_avail_taxon_id = t->nb_nodes = t->nb_edges = 0; + t->nb_taxa = nb_taxa; /* here we don't put the actual number of taxa, but the value to be reached by growing the tree */ + + t->a_nodes = (Node**) calloc(2*nb_taxa-1, sizeof(Node*)); /* array of node pointers, enough for a rooted tree */ + t->a_edges = (Edge**) calloc(2*nb_taxa-2, sizeof(Edge*)); /* array of edge pointers, enough for a rooted tree */ + + t->node0 = new_node(name, t, 1); /* this first node _is_ a leaf */ + + t->taxname_lookup_table = NULL; + return t; +} + + +/* for the moment this function is used to create binary trees (where all internal nodes have three neighbours) */ +Node* graft_new_node_on_branch(Edge* target_edge, Tree* tree, double ratio_from_left, double new_edge_length, char* node_name) { + /* this grafts a new node on an existing branch. the ratio has to be between 0 and 1, and is relative to the "left" tip of the branch */ + int orig_dir_from_node_l, orig_dir_from_node_r; + + if(tree == NULL) { + fprintf(stderr,"Error : got a NULL tree pointer. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + if(ratio_from_left <= 0 && ratio_from_left >= 1) { + fprintf(stderr,"Error : invalid ratio %.2f for branch grafting. Aborting.\n", ratio_from_left); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + if(new_edge_length <= 0) { + fprintf(stderr,"Error : nonpositive new branch length %.2f. Aborting.\n", new_edge_length); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + + if(node_name == NULL) { + fprintf(stderr,"Error : won't create a leaf with no name. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + if(target_edge == NULL) { + /* here we treat the special case of the insertion of the second node (creation of the very first branch) */ + if (tree->nb_edges!= 0 || tree->next_avail_node_id != 1 || tree->next_avail_edge_id != 0) { + fprintf(stderr,"Error : I get a NULL branch pointer while there is at least one existing branch in the tree. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + Node* second_node = new_node(node_name, tree, 1); /* will be the right node, also a leaf */ + Edge* only_edge = new_edge(tree); + only_edge->left = tree->node0; + only_edge->right = second_node; + only_edge->brlen = new_edge_length; + only_edge->had_zero_length = 0; + + second_node->neigh[0] = tree->node0; tree->node0->neigh[0] = second_node; + second_node->br[0] = tree->node0->br[0] = only_edge; + + return second_node; + + } /* end of the treatment of the insertion in the case of the second node */ + + if(tree->a_edges[target_edge->id] != target_edge) { + fprintf(stderr,"Error : wrong edge id rel. to the tree. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + /* create two new nodes in the tree: the father and the son. The father breaks the existing edge into two. */ + /* Steps: + (1) create a new node, the breaking point + (2) create a new edge (aka. right edge, because the target edge remains left of the breakpoint) + (3) shorten the initial edge and give length value to the new edge + (4) rearrange the tips and update the node + (4bis) VERY IMPORTANT: FOR EACH END OF THE INITIAL EDGE THAT IS A LEAF, MAKE SURE THAT THE BREAKPOINT IS IN DIR 0 FROM IT + (5) create the son node + (6) create the edge leading to this son and update it and the node + */ + + /* record the original situation */ + Node* node_l = target_edge->left; + Node* node_r = target_edge->right; + orig_dir_from_node_l = dir_a_to_b(node_l,node_r); + orig_dir_from_node_r = dir_a_to_b(node_r,node_l); + + /* (1) */ + Node* breakpoint = new_node(NULL, tree, 3); /* not a leaf, so has three neighbours */ + + /* (2) */ + Edge* split_edge = new_edge(tree); /* the breakpoint sits between the target_edge and the split_edge */ + + /* (3) */ + split_edge->brlen = 2.0 * (1.0 - ratio_from_left) * target_edge->brlen; /* double the length so that we never get tiny edges after multiple insertions on the same branch */ + split_edge->had_zero_length = 0; + target_edge->brlen *= 2.0 * ratio_from_left; + + /* (4) */ + /* edge tips */ + + split_edge->left = breakpoint; + split_edge->right = node_r; + target_edge->right = breakpoint; + + if(node_l->nneigh ==1){ + /* Case of the first edge that connects TWO leaves + We need to connect the left leaf to the right side of the branch + to be consistent with the tree definition + */ + target_edge->right = target_edge->left; + target_edge->left = breakpoint; + } + + /* update the 3 nodes */ + breakpoint->neigh[0] = node_l; + breakpoint->br[0] = target_edge; + + breakpoint->neigh[1] = node_r; + breakpoint->br[1] = split_edge; + + /* (4bis) */ + if (node_l->nneigh == 1 && orig_dir_from_node_l != 0) { /* change direction to 0 */ + node_l->neigh[0] = breakpoint; + node_l->br[0] = target_edge; + node_l->neigh[orig_dir_from_node_l] = NULL; + node_l->br[orig_dir_from_node_l] = NULL; + } else { + node_l->neigh[orig_dir_from_node_l] = breakpoint; + /* target_edge was already registered as the branch in this direction */ + } + + if (node_r->nneigh == 1 && orig_dir_from_node_r != 0) { /* change direction to 0 */ + node_r->neigh[0] = breakpoint; + node_r->br[0] = split_edge; + node_r->neigh[orig_dir_from_node_r] = NULL; + node_r->br[orig_dir_from_node_r] = NULL; + } else { + node_r->neigh[orig_dir_from_node_r] = breakpoint; + node_r->br[orig_dir_from_node_r] = split_edge; + } + + /* (5) */ + Node* son = new_node(node_name, tree, 1); /* a leaf */ + + /* (6) */ + Edge* outer_edge = new_edge(tree); + outer_edge->left = breakpoint; + outer_edge->right = son; /* the leaf is right of the branch */ + outer_edge->brlen = new_edge_length; + outer_edge->had_zero_length = (new_edge_length == 0); /* but was already ruled out, see beginning of func. */ + + son->neigh[0] = breakpoint; breakpoint->neigh[2] = son; /* necessarily the father is in direction 0 from the leaf */ + son->br[0] = breakpoint->br[2] = outer_edge; + + return son; +} + +/* collapsing a branch */ +void collapse_branch(Edge* branch, Tree* tree) { + /* this function collapses the said branch and creates a higher-order multifurcation (n1 + n2 - 2 neighbours for the resulting node). + We also have to remove the extra node from tree->a_nodes and the extra edge from t->a_edges. + to be done: + (1) create a new node with n1+n2-2 neighbours. Ultimately we will destroy the original node. + (2) populate its list of neighbours from the lists of neighbours corresponding to the two original nodes + (3) populate its list of neighbouring edges form the br lists of the two original nodes + (4) for each of the neighbours, set the info regarding their new neighbour (that is, our new node) + (5) for each of the neighbouring branches, set the info regarding their new side (that is, our new node) + (6) destroy the two original nodes and commit this info to a_nodes. Modify tree->nb_nodes + (7) destroy the original edge and commit this info to a_edges. Modify tree->nb_edges */ + + /* WARNING: this function won't accept to collapse terminal edges */ + Node *node1 = branch->left, *node2 = branch->right; + int i, j, n1 = node1->nneigh, n2 = node2->nneigh; + if (n1 == 1 || n2 == 1) { fprintf(stderr,"Warning: %s() won't collapse terminal edges.\n",__FUNCTION__); return; } + int degree = n1+n2-2; + /* (1) */ + /* Node* new = new_node("collapsed", tree, n1 + n2 - 2); */ /* we cannot use that because we want to reuse n1's spot in tree->a_nodes */ + Node* new = (Node*) malloc(sizeof(Node)); + new->nneigh = degree; + new->neigh = malloc(degree * sizeof(Node*)); + new->br = malloc(degree * sizeof(Edge*)); + new->id = node1->id; /* because we are going to store the node at this index in tree->a_nodes */ + new->name = strdup("collapsed"); + new->comment = NULL; + new->depth = min_int(node1->depth, node2->depth); + + /* very important: set tree->node0 to new in case it was either node1 or node2 */ + if (tree->node0 == node1 || tree->node0 == node2) tree->node0 = new; + + + int ind = 0; /* index in the data structures in new */ + /* (2) and (3) and (4) and (5) */ + for (i=0; i < n1; i++) { + if (node1->neigh[i] == node2) continue; + new->neigh[ind] = node1->neigh[i]; + /* then change one of the neighbours of that neighbour to be the new node... */ + for (j=0; j < new->neigh[ind]->nneigh; j++) { + if(new->neigh[ind]->neigh[j] == node1) { + new->neigh[ind]->neigh[j] = new; + break; + } + } /* end for j */ + + new->br[ind] = node1->br[i]; + /* then change one of the two ends of that branch to be the new node... */ + if (new->neigh[ind] == new->br[ind]->right) new->br[ind]->left = new; else new->br[ind]->right = new; + ind++; + } + + for (i=0; i < n2; i++) { + if (node2->neigh[i] == node1) continue; + new->neigh[ind] = node2->neigh[i]; + /* then change one of the neighbours of that neighbour to be the new node... */ + for (j=0; j < new->neigh[ind]->nneigh; j++) { + if(new->neigh[ind]->neigh[j] == node2) { + new->neigh[ind]->neigh[j] = new; + break; + } + } /* end for j */ + + new->br[ind] = node2->br[i]; + /* then change one of the two ends of that branch to be the new node... */ + if (new->neigh[ind] == new->br[ind]->right) new->br[ind]->left = new; else new->br[ind]->right = new; + ind++; + } + + /* (6) tidy up tree->a_nodes and destroy old nodes */ + assert(tree->a_nodes[new->id] == node1); + tree->a_nodes[new->id] = new; + /* current last node in tree->a_edges changes id and is now placed at the position were node2 was */ + int id2 = node2->id; + assert(tree->a_nodes[id2] == node2); + tree->a_nodes[id2] = tree->a_nodes[-- tree->next_avail_node_id]; /* moving the last node into the spot occupied by node2... */ + tree->a_nodes[id2]->id = id2; /* and changing its id accordingly */ + tree->a_nodes[tree->next_avail_node_id] = NULL; /* not strictly necessary, but... */ + tree->nb_nodes--; + free_node(node1); + free_node(node2); + + /* (7) tidy up tree->a_edges and destroy the old branch */ + assert(tree->a_edges[branch->id] == branch); + tree->a_edges[branch->id] = tree->a_edges[-- tree->next_avail_edge_id]; /* moving the last branch into the spot occupied by 'branch' */ + tree->a_edges[branch->id]->id = branch->id; /* ... and changing its id accordingly */ + tree->a_edges[tree->next_avail_edge_id] = NULL; /* not strictly necessary, but... */ + tree->nb_edges--; + free_edge(branch); + +} /* end collapse_branch */ + + +/** + This function removes a taxon from the tree (identified by its taxon_id) + And recomputed the branch length of the branch it was branched on. + + Be careful: The taxnames_lookup_table is modified after this function! + Do not use this function if you share the same taxnames_lookup_table in + several trees. + + connect_node + l_edge r_edge + l_node *-------*--------* r_node + |e_to_remove_index + | e_to_remove + | + * + n_to_remove +*/ +void remove_taxon(int taxon_id, Tree* tree){ + Node *n_to_remove = NULL; + Edge *e_to_remove, *r_edge; + Node *connect_node, *r_node; + + int i,j; + int e_to_remove_local_index = 0; + int e_to_remove_global_index = 0; + int n_to_remove_global_index = 0; + int connect_node_global_index = -1; + int r_edge_global_index = -1; + + char **new_taxa_names; + + /** + initialization of nodes and edge to delete + */ + if(taxon_id>tree->nb_taxa){ + fprintf(stderr,"Warning: %s - the given taxon_id is > the number of taxa: %d\n",__FUNCTION__,taxon_id); + return; + } + + for(i=0;inb_nodes;i++){ + if(tree->a_nodes[i]->nneigh==1 && strcmp(tree->a_nodes[i]->name,tree->taxname_lookup_table[taxon_id])==0){ + n_to_remove = tree->a_nodes[i]; + } + } + + if(n_to_remove==NULL || n_to_remove->nneigh != 1){ + fprintf(stderr,"Warning: %s() won't remove non terminal node.\n",__FUNCTION__); + return; + } + + e_to_remove = n_to_remove->br[0]; + connect_node = n_to_remove->neigh[0]; + + e_to_remove_global_index = e_to_remove->id; + n_to_remove_global_index = n_to_remove->id; + connect_node_global_index = connect_node->id; + + /* We get the index of the node/edge to remove*/ + for(i=0;inneigh;i++){ + if(connect_node->neigh[i] == n_to_remove){ + e_to_remove_local_index = i; + } + } + + /** + We remove the branch e_to_remove from the connect_node + And the node n_to_remove from its neighbors + */ + for(i=e_to_remove_local_index; i < connect_node->nneigh-1;i++){ + connect_node->br[i] = connect_node->br[i+1]; + connect_node->neigh[i] = connect_node->neigh[i+1]; + } + connect_node->nneigh--; + + new_taxa_names = malloc((tree->nb_taxa-1)*sizeof(char*)); + + /** + We remove the name of the taxon from the taxa_names array + */ + j=0; + for(i=0;inb_taxa;i++){ + if(strcmp(n_to_remove->name,tree->taxa_names[i]) != 0){ + new_taxa_names[j] = strdup(tree->taxa_names[i]); + j++; + } + free(tree->taxa_names[i]); + } + free(tree->taxa_names); + tree->taxa_names=new_taxa_names; + free_node(n_to_remove); + free_edge(e_to_remove); + + tree->a_nodes[n_to_remove_global_index] = NULL; + tree->a_edges[e_to_remove_global_index] = NULL; + + /** + If there remains 1 neighbor, it means that connect node is the root of + a rooted tree + -----*r_node + |r_edge + *connect_node + |e_to_remove + -----*n_to_remove + */ + if(connect_node->nneigh == 1){ + r_edge = connect_node->br[0]; + r_node = connect_node->neigh[0]; + r_edge_global_index = r_edge->id; + int index = -1; + /** + We remove the branch r_edge from the r_node + And the node connect_node from its neighbors + */ + for(i=0;inneigh-1;i++){ + if(r_node->neigh[i] == connect_node){ + index = i; + } + if(index != -1){ + r_node->br[i] = r_node->br[i+1]; + r_node->neigh[i] = r_node->neigh[i+1]; + } + } + r_node->nneigh--; + + /* The new root is r_node*/ + if(tree->node0 == connect_node){ + tree->node0 = r_node; + } + free_edge(r_edge); + free_node(connect_node); + + tree->a_nodes[connect_node_global_index] = NULL; + tree->a_edges[r_edge_global_index] = NULL; + + } else if(connect_node->nneigh == 2){ + /** + If there remains 2 neighbors to connect_node + We connect them directly and delete connect_node + We keep l_edge and delete r_edge + */ + remove_single_node(tree, connect_node); + } + recompute_identifiers(tree); + + /** + We update the taxname_lookup_table + */ + for(i=0; i < tree->nb_taxa; i++){ + free(tree->taxname_lookup_table[i]); + if(i<(tree->nb_taxa-1)) + tree->taxname_lookup_table[i] = strdup(tree->taxa_names[i]); + } + + /** + We update the hashtables + */ + for(i=0;inb_edges;i++){ + free_id_hashtable(tree->a_edges[i]->hashtbl[1]); + } + tree->length_hashtables = (int)((tree->nb_taxa-1) / ceil(log10((double)(tree->nb_taxa-1)))); + for(i=0;inb_edges;i++){ + tree->a_edges[i]->hashtbl[0] = create_id_hash_table(tree->length_hashtables); + tree->a_edges[i]->hashtbl[1] = create_id_hash_table(tree->length_hashtables); + } + tree->nb_taxa--; + ntax--; + update_hashtables_post_alltree(tree); + update_hashtables_pre_alltree(tree); + update_node_depths_post_alltree(tree); + update_node_depths_pre_alltree(tree); + + /** + now for all the branches we can delete the **left** hashtables, because the information is redundant and + we have the equal_or_complement function to compare hashtables + */ + for (i = 0; i < tree->nb_edges; i++) { + free_id_hashtable(tree->a_edges[i]->hashtbl[0]); + tree->a_edges[i]->hashtbl[0] = NULL; + } + + /** + topological depths of branches + */ + update_all_topo_depths_from_hashtables(tree); +} + +/** + This method recomputes all the identifiers + of the nodes and of the edges + for which the tree->a_nodes is not null + or tree->a_edges is not null + It also recomputes the total number of edges + and nodes in the tree + */ +void recompute_identifiers(Tree *tree){ + int new_nb_edges = 0; + int new_nb_nodes = 0; + + Node **new_nodes; + Edge **new_edges; + + int i, j; + + for(i=0;inb_edges;i++){ + if(tree->a_edges[i]!=NULL){ + new_nb_edges++; + } + } + + for(i=0;inb_nodes;i++){ + if(tree->a_nodes[i]!=NULL){ + new_nb_nodes++; + } + } + + /** + We recompute all node identifiers + */ + new_nodes = malloc(new_nb_nodes*sizeof(Node*)); + new_edges = malloc(new_nb_edges*sizeof(Edge*)); + + j=0; + for(i=0;inb_nodes;i++){ + if(tree->a_nodes[i]!=NULL){ + tree->a_nodes[i]->id=j; + new_nodes[j] = tree->a_nodes[i]; + j++; + } + } + + /** + We recompute all edge identifiers + */ + j=0; + for(i=0;inb_edges;i++){ + if(tree->a_edges[i] != NULL){ + tree->a_edges[i]->id=j; + new_edges[j] = tree->a_edges[i]; + j++; + } + } + free(tree->a_nodes); + tree->a_nodes = new_nodes; + tree->nb_nodes=new_nb_nodes; + free(tree->a_edges); + tree->a_edges = new_edges; + tree->nb_edges=new_nb_edges; +} + +/** + If there remains 2 neighbors to connect_node + We connect them directly and delete connect_node + We keep l_edge and delete r_edge + -> If nneigh de connect node != 2 : Do nothing + connect_node + l_edge r_edge + l_node *-------*--------* r_node + => Careful: After this function, you may want to call + => recompute_identifiers() +*/ +void remove_single_node(Tree *tree, Node *connect_node){ + + Edge *l_edge = connect_node->br[0]; + Edge *r_edge = connect_node->br[1]; + int r_edge_global_index = r_edge->id; + int connect_node_global_index = connect_node->id; + + Node *l_node = (l_edge->left == connect_node) ? l_edge->right : l_edge->left; + Node *r_node = (r_edge->left == connect_node) ? r_edge->right : r_edge->left; + + Node *tmp; + double sum_brlengths = 0; + char * new_right_name = NULL; + double new_branch_support = -1000; + int i; + + if(connect_node->nneigh!=2){ + return; + } + + new_right_name = NULL; + for(i=0;inneigh;i++){ + sum_brlengths+=connect_node->br[i]->brlen; + if(connect_node->br[i]->has_branch_support + && connect_node->br[i]->branch_support > new_branch_support){ + new_branch_support = connect_node->br[i]->branch_support; + new_right_name = connect_node->br[i]->right->name; + } + } + + /** + We replace connect_node by r_node from l_node neighbors + */ + for(i=0;inneigh;i++){ + if(l_node->neigh[i] == connect_node){ + l_node->neigh[i] = r_node; + } + } + + /** + We replace connect_node by l_node from r_node neighbors + */ + for(i=0;inneigh;i++){ + if(r_node->neigh[i] == connect_node){ + r_node->neigh[i] = l_node; + r_node->br[i] = l_edge; + } + } + + /** + We replace the left or right of l_edge by r_edge + */ + if(l_edge->left == connect_node){ + l_edge->left = r_node; + }else{ + l_edge->right = r_node; + } + + /** + We check that the left is not a tax node, otherwise, we swap them + */ + if(l_edge->left->nneigh==1){ + tmp = l_edge->left; + l_edge->left = l_edge->right; + l_edge->right = tmp; + } + + l_edge->brlen = sum_brlengths; + + /** + If right is a tax node, then no branch support anymore + */ + if(l_edge->right->nneigh==1){ + l_edge->has_branch_support = 0; + l_edge->branch_support = 0; + }else{ + /** + Otherwise we take the max branch_support computed earlier + */ + l_edge->branch_support = new_branch_support; + if(l_edge->right->name != new_right_name) + strcpy(l_edge->right->name,new_right_name); + } + + /** + if the root was the deleted node, we take a new root + */ + if(tree->node0 == connect_node){ + tree->node0 = l_edge->left; + free(tree->node0->name); + tree->node0->name = NULL; + } + + tree->a_edges[r_edge_global_index] = NULL; + tree->a_nodes[connect_node_global_index] = NULL; + + free_edge(r_edge); + free_node(connect_node); +} + +/** + This function shuffles the taxa of an input tree + It takes also in argument an array of indices that + will be shuffled, and will be used to shuffle taxa + names. + - If the array is NULL: then it will init it with [0..nb_taxa] + and then shuffle it. It is freed at the end + - If the array is not NULL: it must contain all indices from 0 + to nb_taxa (in any order), and it will be shuffled. It will not be freed + + if duplicate taxnames : then it will copy string from tax_name array to nodes->name + else : it will just assign pointer from tax_name array to nodes->name + + --> it the last case : be careful of assign NULL to node->name after the function + otherwise the memory from tax_name and node->name will be freed twice when free_tree will + be applied +*/ +void shuffle_taxa(Tree *tree){ + int * shuffled_indices = NULL; + int i = 0; + int node = 0; + + shuffled_indices = (int*) malloc(tree->nb_taxa * sizeof(int)); + for(i=0; i < tree->nb_taxa ; i++){ + shuffled_indices[i]=i; + } + + for (i=0; i < tree->nb_nodes; i++) { + if (tree->a_nodes[i]->nneigh > 1) continue; + if(tree->a_nodes[i]->name) { + free(tree->a_nodes[i]->name); + tree->a_nodes[i]->name = NULL; + } + } /* end freeing all leaf names */ + + shuffle(shuffled_indices,tree->nb_taxa, sizeof(int)); + /* and then we change accordingly all the pointers node->name for the leaves of the tree */ + node = 0; + for (i=0; i < tree->nb_nodes; i++){ + if (tree->a_nodes[i]->nneigh == 1){ + /* if(input_tree->a_nodes[i]->name) { free(input_tree->a_nodes[i]->name); input_tree->a_nodes[i]->name = NULL; } */ + tree->a_nodes[i]->name = strdup(tree->taxa_names[shuffled_indices[node]]); + node++; + } + } + + /** + We update the hashtables + */ + for(i=0;inb_edges;i++){ + free_id_hashtable(tree->a_edges[i]->hashtbl[1]); + } + for(i=0;inb_edges;i++){ + tree->a_edges[i]->hashtbl[0] = create_id_hash_table(tree->length_hashtables); + tree->a_edges[i]->hashtbl[1] = create_id_hash_table(tree->length_hashtables); + } + + update_hashtables_post_alltree(tree); + update_hashtables_pre_alltree(tree); + update_node_depths_post_alltree(tree); + update_node_depths_pre_alltree(tree); + + /** + now for all the branches we can delete the **left** hashtables, because the information is redundant and + we have the equal_or_complement function to compare hashtables + */ + for (i = 0; i < tree->nb_edges; i++) { + free_id_hashtable(tree->a_edges[i]->hashtbl[0]); + tree->a_edges[i]->hashtbl[0] = NULL; + } + /** + topological depths of branches + */ + update_all_topo_depths_from_hashtables(tree); + + free(shuffled_indices); +} + + +void reroot_acceptable(Tree* t) { + /* this function replaces t->node0 on a trifurcated node (or bigger polytomy) selected at random */ + int i, myrandom, chosen_index_in_a_nodes, nb_trifurcated = 0; + Node *candidate, *chosen; + /* we first create a table of all indices of the trifurcated nodes */ + int* mytable = calloc(t->nb_nodes, sizeof(int)); + for (i = 0; i < t->nb_nodes; i++) { + candidate = t->a_nodes[i]; + if(candidate->nneigh >= 3) mytable[nb_trifurcated++] = i; + } + if(nb_trifurcated == 0) { + fprintf(stderr,"Warning: %s was not able to find a trifurcated node! No rerooting.\n", __FUNCTION__); + return; } + else { + myrandom = rand_to(nb_trifurcated); /* between 0 and nb_trifurcated excluded */ + chosen_index_in_a_nodes = mytable[myrandom]; + chosen = t->a_nodes[chosen_index_in_a_nodes]; + t->node0 = chosen; + } + + reorient_edges(t); + + free(mytable); +} /* end reroot_acceptable */ + + +void reorient_edges(Tree *t){ + int i=0; + for(i=0; i < t->node0->nneigh; i++) + reorient_edges_recur(t->node0->neigh[i], t->node0, t->node0->br[i]); +} + +void reorient_edges_recur(Node *n, Node *prev, Edge *e){ + int i; + /* We reorient the edge */ + if(e->left == n && e->right == prev){ + e->left = prev; + e->right= n; + }else{ + assert(e->left == prev && e->right == n); /* descendant */ + } + + for(i = 0; i < n->nneigh ; i++){ + if(n->neigh[i] != prev){ + reorient_edges_recur(n->neigh[i], n, n->br[i]); + } + } +} + + +void unrooted_to_rooted(Tree* t) { + /* this function takes an unrooted tree and simply roots it on node0: + at the end of the process, t->node0 has exactly two neighbours */ + /* it assumes there is enough space in the tree's node pointer and edge pointer arrays. */ + if (t->node0->nneigh == 2) { + fprintf(stderr,"Warning: %s was called on a tree that was already rooted! Nothing to do.\n", __FUNCTION__); + return; + } + Node* old_root = t->node0; + Node* son0 = old_root->neigh[0]; + Edge* br0 = old_root->br[0]; + /* we create a new root node whose left son will be what was in dir0 from the old root, and right son will be the old root. */ + Node* new_root = new_node("root", t, 2); /* will have only two neighbours */ + t->node0 = new_root; + + + Edge* new_br = new_edge(t); /* this branch will have length MIN_BRLEN and links the new root to the old root as its right son */ + new_br->left = new_root; + new_br->right = old_root; + new_br->brlen = MIN_BRLEN; + new_br->had_zero_length = 1; + new_br->has_branch_support = 0; + /* copying hashtables */ + assert(br0->right == son0); /* descendant */ + /* the hashtable for br0 is not modified: subtree rooted on son0 remains same */ + new_br->hashtbl[1] = complement_id_hashtbl(br0->hashtbl[1], t->nb_taxa); + /* WARNING: not dealing with subtype counts nor topological depth */ + + new_root->neigh[0] = son0; + new_root->br[0] = br0; + + new_root->neigh[1] = old_root; + new_root->br[1] = new_br; + + assert(son0->br[0] == br0 && br0->right == son0); /* must be the case because son0 was the neighbour of the old root in direction 0 */ + son0->neigh[0] = new_root; + + br0->left = new_root; + + old_root->neigh[0] = new_root; + old_root->br[0] = new_br; + /* done rerooting */ +} + + + +/* THE FOLLOWING FUNCTIONS ARE USED TO BUILD A TREE FROM A STRING (PARSING) */ + +/* utility functions to deal with NH files */ + +unsigned int tell_size_of_one_tree(const char* filename) { + /* the only purpose of this is to know about the size of a treefile (NH format) in order to save memspace in allocating the string later on */ + /* wew open and close this file independently of any other fopen */ + unsigned int mysize = 0; + char u; + FILE* myfile = fopen(filename, "r"); + if (myfile) { + while ( (u = fgetc(myfile))!= ';' ) { /* termination character of the tree */ + if (u == EOF) break; /* shouldn't happen anyway */ + if (isspace(u)) continue; else mysize++; + } + fclose(myfile); + } /* end if(myfile) */ + return (mysize+1); +} + + +int copy_nh_stream_into_str(FILE* nh_stream, char* big_string) { + int index_in_string = 0; + char u; + /* rewind(nh_stream); DO NOT go to the beginning of the stream if we want to make this flexible enough to read several trees per file */ + while ( (u = fgetc(nh_stream))!= ';' ) { /* termination character of the tree */ + if (u == EOF) { big_string[index_in_string] = '\0'; return 0; } /* error code telling that no tree has been read properly */ + if (index_in_string == MAX_TREELENGTH - 1) { + fprintf(stderr,"Fatal error: tree file seems too big, are you sure it is an NH tree file? Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + if (isspace(u)) continue; + big_string[index_in_string++] = u; + } + big_string[index_in_string++] = ';'; + big_string[index_in_string] = '\0'; + return 1; /* leaves the stream right after the terminal ';' */ +} /*end copy_nh_stream_into_str */ + + + + +/* actually parsing a tree */ + + +void process_name_and_brlen(Node* son_node, Edge* edge, Tree* current_tree, char* in_str, int begin, int end) { + /* looks into in_str[begin..end] for the branch length of the "father" edge + and updates the edge and node structures accordingly */ + int colon = index_toplevel_colon(in_str,begin,end); + int closing_par = -1, opening_bracket = -1; + int i, ignore_mode, name_begin, name_end, name_length, effective_length; + double brlen = .0; + + /* processing the optional BRANCH LENGTH... */ + if (colon == -1) { + edge->had_zero_length = TRUE; + edge->brlen = MIN_BRLEN; + } else { + parse_double(in_str,colon+1,end,&brlen); + edge->had_zero_length = (brlen == 0.0); + edge->brlen = (brlen < MIN_BRLEN ? MIN_BRLEN : brlen); + } + + + /* then scan backwards from the colon (or from the end if no branch length) to get the NODE NAME, + not going further than the first closing par */ + /* we ignore the NHX-style comments for the moment, hence the detection of the brackets, which can contain anything but nested brackets */ + ignore_mode = 0; + for (i = (colon == -1 ? end : colon - 1); i >= begin; i--) { + if (in_str[i] == ']' && ignore_mode == 0) { ignore_mode = 1; } + else if (in_str[i] == ')' && ignore_mode == 0) { closing_par = i; break; } + else if (in_str[i] == '[' && ignore_mode) { ignore_mode = 0; opening_bracket = i; } + } /* endfor */ + + name_begin = (closing_par == -1 ? begin : closing_par + 1); + if (opening_bracket != -1) name_end = opening_bracket - 1; else name_end = (colon == -1 ? end : colon - 1); + /* but now if the name starts and ends with single or double quotes, remove them */ + if (in_str[name_begin] == in_str[name_end] && ( in_str[name_begin] == '"' || in_str[name_begin] == '\'' )) { name_begin++; name_end--; } + name_length = name_end - name_begin + 1; + effective_length = (name_length > MAX_NAMELENGTH ? MAX_NAMELENGTH : name_length); + if (name_length >= 1) { + son_node->name = (char*) malloc((effective_length+1) * sizeof(char)); + strncpy(son_node->name, in_str+name_begin, effective_length); + son_node->name[effective_length] = '\0'; /* terminating the string */ + } + + +} /* end of process_name_and_brlen */ + + + + +Node* create_son_and_connect_to_father(Node* current_node, Tree* current_tree, int direction, char* in_str, int begin, int end) { + /* This function creates (allocates) the son node in the given direction from the current node. + It also creates a new branch to connect the son to the father. + The array structures in the tree (a_nodes and a_edges) are updated accordingly. + Branch length and node name are processed. + The input string given between the begin and end indices (included) is of the type: + (...)node_name:length + OR + leaf_name:length + OR + a:1,b:0.31,c:1.03 + In both cases the length is optional, and replaced by MIN_BR_LENGTH if absent. */ + + if (direction < 0) { + fprintf(stderr,"Error in the direction given to create a son! Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + int i; + Node* son = (Node*) malloc(sizeof(Node)); + son->id = current_tree->next_avail_node_id++; + current_tree->a_nodes[son->id] = son; + current_tree->nb_nodes++; + + son->name = son->comment = NULL; + son->depth = MAX_NODE_DEPTH; + + Edge* edge = (Edge*) malloc(sizeof(Edge)); + edge->id = current_tree->next_avail_edge_id++; + current_tree->a_edges[edge->id] = edge; + current_tree->nb_edges++; + + edge->hashtbl[0] = create_id_hash_table(current_tree->length_hashtables); + edge->hashtbl[1] = create_id_hash_table(current_tree->length_hashtables); + + // for (i=0; i<2; i++) edge->subtype_counts[i] = (int*) calloc(NUM_SUBTYPES, sizeof(int)); + for (i=0; i<2; i++) edge->subtype_counts[i] = NULL; /* subtypes.c will have to create that space */ + + edge->right = son; + edge->left = current_node; + + edge->has_branch_support = 0; + + current_node->neigh[direction] = son; + current_node->br[direction] = edge; + + /* process node name (of the son) and branch length (of the edge we just created)... */ + process_name_and_brlen(son, edge, current_tree, in_str, begin, end); + + return son; +} /* end of create_son_and_connect_to_father */ + + + +void parse_substring_into_node(char* in_str, int begin, int end, Node* current_node, int has_father, Tree* current_tree) { + /* this function supposes that current_node is already allocated, but not the data structures in there. + It reads starting from character of in_str at index begin and stops at character at index end. + It is supposed that the input to this function is what has been seen immediately within a set of parentheses. + The outer parentheses themselves are not included in the range [begin, end]. + So we expect in_str[begin, end] to contain something like: + MyTaxa:1.2e-3 + OR + (A:4,B:6)Archae:0.45,Ctax:0.004 + OR + MyTaxa + OR + (A:4,B:6),Ctax + OR + A,B,C,D,E,etc (we allow large multifurcations, with no limit on the number of sons) + */ + + /* When called, the current node has just been created but doesn't know yet its number of neighbours. We are going to discover + this when counting the number of outer commas in the substring. This function: + (1) checks how many outer commas are here: this is the number of "sons" of this node. Add one to it if the node has a father. + (2) creates the stuctures (array of node pointers and array of edge pointers) accordingly (+1 for the father) + (3) fills them. index 0 corresponds to the "father", the other to the "sons". */ + + if (begin>end) { + fprintf(stderr,"Error in parse_substring_into_node: begin > end. Aborting.\n"); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } + + int i; + int pair[2]; /* to be the beginning and end points of the substrings describing the various nodes */ + int inner_pair[2]; /* to be the beginning and end points of the substrings after removing the name and branch length */ + int nb_commas = count_outer_commas(in_str, begin, end); + int comma_index = begin - 1; + int direction; + Node* son; + + /* allocating the data structures for the current node */ + current_node->nneigh = (nb_commas==0 ? 1 : nb_commas + 1 + has_father); + current_node->neigh = malloc(current_node->nneigh * sizeof(Node*)); + current_node->br = malloc(current_node->nneigh * sizeof(Edge*)); + + if (nb_commas == 0) { /* leaf: no recursive call */ + /* this means there is no split here, terminal node: we know that the current node is a leaf. + Its name is already there in node->name, we just have to update the taxname table and all info related + to the fact that we have a taxon here. */ + /* that's also the moment when we check that there are no two identical taxa on different leaves of the tree */ + for(i=0;i < current_tree->next_avail_taxon_id; i++) { + if (!strcmp(current_node->name, current_tree->taxa_names[i])) { + fprintf(stderr,"Fatal error: duplicate taxon %s.\n", current_node->name); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + } /* end if */ + } /* end for */ + + current_tree->taxa_names[current_tree->next_avail_taxon_id++] = strdup(current_node->name); + + } else { /* at least one comma, so at least two sons: */ + for (i=0; i <= nb_commas; i++) { /* e.g. three iterations for two commas */ + direction = i + has_father; + pair[0] = comma_index + 1; /* == begin at first iteration */ + comma_index = (i == nb_commas ? end + 1 : index_next_toplevel_comma(in_str, pair[0], end)); + pair[1] = comma_index - 1; + + son = create_son_and_connect_to_father(current_node, current_tree, direction /* dir from current */, + in_str, pair[0], pair[1]); + /* RECURSIVE TREATMENT OF THE SON */ + strip_toplevel_parentheses(in_str,pair[0],pair[1],inner_pair); /* because name and brlen already processed by create_son */ + parse_substring_into_node(in_str,inner_pair[0],inner_pair[1], son, 1, current_tree); /* recursive treatment */ + /* after the recursive treatment of the son, the data structures of the son have been created, so now we can write + in it the data corresponding to its direction0 (father) */ + son->neigh[0] = current_node; + son->br[0] = current_node->br[direction]; + } /* end for i (treatment of the various sons) */ + + + } /* end if/else on the number of commas */ + + +} /* end parse_substring_into_node */ + + + +Tree* parse_nh_string(char* in_str) { + /* this function allocates, populates and returns a new tree. */ + /* returns NULL if the file doesn't correspond to NH format */ + int in_length = (int) strlen(in_str); + int i; /* loop counter */ + int begin, end; /* to delimitate the string to further process */ + int n_otu = 0; + + /* SYNTACTIC CHECKS on the input string */ + i = 0; while (isspace(in_str[i])) i++; + if (in_str[i] != '(') { fprintf(stderr,"Error: tree doesn't start with an opening parenthesis.\n"); return NULL; } + else begin = i+1; + /* begin: AFTER the very first parenthesis */ + + i = in_length-1; + while (isspace(in_str[i])) i--; + if (in_str[i] != ';') { fprintf(stderr,"Error: tree doesn't end with a semicolon.\n"); return NULL; } + while (in_str[--i] != ')') ; + end = i-1; + /* end: BEFORE the very last parenthesis, discarding optional name for the root and uncanny branch length for its "father" branch */ + + /* we make a first pass on the string to discover the number of taxa. */ + /* there are as many OTUs as commas plus 1 in the nh string */ + for (i = 0; i < in_length; i++) if (in_str[i] == ',') n_otu++; + n_otu++; + + /* immediately, we set the global variable ntax. TODO: see if we can simply get rid of this global var. */ + ntax = n_otu; + + + + /************************************ + initialisation of the tree structure + *************************************/ + Tree *t = (Tree *) malloc(sizeof(Tree)); + /* in a rooted binary tree with n taxa, (2n-2) branches and (2n-1) nodes in total. + this is the maximum we can have. multifurcations will reduce the number of nodes and branches, so set the data structures to the max size */ + t->nb_taxa = n_otu; + + t->a_nodes = (Node**) calloc(2*n_otu-1, sizeof(Node*)); + t->nb_nodes = 1; /* for the moment we only have the node0 node. */ + + t->a_edges = (Edge**) calloc(2*n_otu-2, sizeof(Edge*)); + t->nb_edges = 0; /* none at the moment */ + + t->node0 = (Node*) malloc(sizeof(Node)); + t->a_nodes[0] = t->node0; + + t->node0->id = 0; + t->node0->name = NULL; + t->node0->comment = NULL; + + t->node0->depth = MAX_NODE_DEPTH; + t->taxa_names = (char**) malloc(n_otu * sizeof(char*)); + t->length_hashtables = (int) (n_otu / ceil(log10((double)n_otu))); + + t->taxname_lookup_table = NULL; + + t->next_avail_node_id = 1; /* root node has id 0 */ + t->next_avail_edge_id = 0; /* no branch added so far */ + t->next_avail_taxon_id = 0; /* no taxon added so far */ + + /* ACTUALLY READING THE TREE... */ + + parse_substring_into_node(in_str, begin, end, t->node0, 0 /* no father node */, t); + + /* SANITY CHECKS AFTER READING THE TREE */ + + //printf("\n*** BASIC STATISTICS ***\n\n", in_str); + //printf("Number of taxa in the tree read: %d\n", t->nb_taxa); + //printf("Number of nodes in the tree read: %d\n", t->nb_nodes); + //printf("Next available node id in the new tree: %d\n", t->next_avail_node_id); + //printf("Number of edges in the tree read: %d\n", t->nb_edges); + //printf("Next available edge id in the new tree: %d\n\n", t->next_avail_edge_id); + //printf("Number of leaves according to the tree structure: %d\n", count_leaves(t)); + //printf("Number of roots in the whole tree (must be 1): %d\n", count_roots(t)); + //printf("Number of edges with zero length: %d\n", count_zero_length_branches(t)); + + /* DEBUG printf("Array of node pointers:\n"); + for(i=0; inb_nodes; i++) printf("%p\t",t->a_nodes[i]); printf("\n"); + printf("Node names:\n"); + for(i=0; inb_nodes; i++) printf("%s\n",t->a_nodes[i]->name); + */ + + return t; + +} /* end parse_nh_string */ + + +Tree *complete_parse_nh(char* big_string, char*** taxname_lookup_table) { + /* trick: iff taxname_lookup_table is NULL, we set it according to the tree read, otherwise we use it as the reference taxname lookup table */ + int i; + Tree* mytree = parse_nh_string(big_string); + if(mytree == NULL) { fprintf(stderr,"Not a syntactically correct NH tree.\n"); return NULL; } + + if(*taxname_lookup_table == NULL) *taxname_lookup_table = build_taxname_lookup_table(mytree); + mytree->taxname_lookup_table = *taxname_lookup_table; + + update_bootstrap_supports_from_node_names(mytree); + /* update_subtype_counts_post_alltree(mytree); + update_subtype_counts_pre_alltree(mytree); + update_branch_subtype_counts_from_nodes(mytree); */ + + update_hashtables_post_alltree(mytree); + update_hashtables_pre_alltree(mytree); + + update_node_depths_post_alltree(mytree); + update_node_depths_pre_alltree(mytree); + + /* for all branches in the tree, we should assert that the sum of the number of taxa on the left + and on the right of the branch is equal to tree->nb_taxa */ + for (i = 0; i < mytree->nb_edges; i++) + if(!mytree->a_edges[i]->had_zero_length) + assert(mytree->a_edges[i]->hashtbl[0]->num_items + + mytree->a_edges[i]->hashtbl[1]->num_items + == mytree->nb_taxa); + + + /* now for all the branches we can delete the **left** hashtables, because the information is redundant and + we have the equal_or_complement function to compare hashtables */ + + for (i = 0; i < mytree->nb_edges; i++) { + free_id_hashtable(mytree->a_edges[i]->hashtbl[0]); + mytree->a_edges[i]->hashtbl[0] = NULL; + } + + /* topological depths of branches */ + update_all_topo_depths_from_hashtables(mytree); + + return mytree; +} + + + + +/* taxname lookup table functions */ + +char** build_taxname_lookup_table(Tree* tree) { + /* this function ALLOCATES a lookup table, a mere array of strings */ + /* lookup tables are shared between trees, be able to compare hashtables (one taxon == one index in the lookup table) */ + int i; + char** output = (char**) malloc(tree->nb_taxa * sizeof(char*)); + for(i=0; i < tree->nb_taxa; i++) output[i] = strdup(tree->taxa_names[i]); + return output; +} + +/** + The tax_id_lookup table is useful to make the correspondance between a + node id and a taxon id: Is avoids to look for taxon name in the lookup_table + which is very time consuming: traverse the whole array and compare strings + This structure is not stored in the tree, but may be computed when needed +*/ +map_t build_taxid_hashmap(char** taxname_lookup_table, int nb_taxa){ + map_t h = hashmap_new(); + + int i; + for(i=0;itaxname_lookup_table; +} + + +Taxon_id get_tax_id_from_tax_name(char* str, char** lookup_table, int length) { + /* just exits on an error if the taxon is not to be found by this linear search */ + int i; + for(i=0; i < length; i++) if (!strcmp(str,lookup_table[i])) return i; + fprintf(stderr,"Fatal error : taxon %s not found! Aborting.\n", str); + Generic_Exit(__FILE__,__LINE__,__FUNCTION__,EXIT_FAILURE); + return MAX_TAXON_ID; /* just in case the compiler would complain */ +} /* end get_tax_id_from_tax_name */ + + +/* (unnecessary/deprecated) multifurcation treatment */ +void regraft_branch_on_node(Edge* edge, Node* target_node, int dir) { + /* this function modifies the given edge and target node, but nothing concerning the hashtables, subtype_counts, etc. + This function is meant to be called during the tree construction or right afterwards, at a moment when the complex + recursive structures are not yet populated. */ + + /* modifying the info into the branch */ + edge->left = target_node; /* modify the ancestor, not the descendant */ + + /* modifying the info into the node on which we graft */ + target_node->br[dir] = edge; + target_node->neigh[dir] = edge->right; + + /* modifying the info on the node at the right end of the branch we just grafted */ + Node* son = edge->right; + son->neigh[0] = target_node; /* the father is always in direction 0 */ + +} /* end regraft_branch_on_node */ + + + +/*************************************************************** + ******************* neatly implementing tree traversals ****** +***************************************************************/ + +/* in all cases below we accept that origin can be NULL: + this describes the situation where we are on the pseudoroot node. */ + +void post_order_traversal_recur(Node* current, Node* origin, Tree* tree, void (*func)(Node*, Node*, Tree*)) { + /* does the post order traversal on current Node and its "descendants" (i.e. not including origin, who is a neighbour of current */ + int i, n = current->nneigh; + int cur_to_orig = (origin ? dir_a_to_b(current, origin) : -1); /* direction from the current node to the origin of the traversal */ + + /* process children first */ + if (cur_to_orig == -1) { /* current is the pseudoroot node */ + for(i=0; i < n; i++) post_order_traversal_recur(current->neigh[i], current, tree, func); + } else { + for(i=1; i < n; i++) post_order_traversal_recur(current->neigh[(cur_to_orig+i)%n], current, tree, func); /* no iter when n==1 (leaf) */ + } + + /* and then in any case, call the function on the current node */ + func(current, origin /* may be NULL, it's up to func to deal with that properly */, tree); +} + +void post_order_traversal(Tree* t, void (*func)(Node*, Node*, Tree*)) { + post_order_traversal_recur(t->node0, NULL, t, func); +} + +/* Post order traversal with any data that can be passed to the recur function */ +void post_order_traversal_data_recur(Node* current, Node* origin, Tree* tree, void* data, void (*func)(Node*, Node*, Tree*, void*)) { + /* does the post order traversal on current Node and its "descendants" (i.e. not including origin, who is a neighbour of current */ + int i, n = current->nneigh; + int cur_to_orig = (origin ? dir_a_to_b(current, origin) : -1); /* direction from the current node to the origin of the traversal */ + + /* process children first */ + if (cur_to_orig == -1) { /* current is the pseudoroot node */ + for(i=0; i < n; i++) post_order_traversal_data_recur(current->neigh[i], current, tree, data, func); + } else { + for(i=1; i < n; i++) post_order_traversal_data_recur(current->neigh[(cur_to_orig+i)%n], current, tree, data, func); /* no iter when n==1 (leaf) */ + } + + /* and then in any case, call the function on the current node */ + func(current, origin /* may be NULL, it's up to func to deal with that properly */, tree, data); +} + +void post_order_traversal_data(Tree* t, void* data, void (*func)(Node*, Node*, Tree*,void*)) { + post_order_traversal_data_recur(t->node0, NULL, t, data, func); +} + +void pre_order_traversal_recur(Node* current, Node* origin, Tree* tree, void (*func)(Node*, Node*, Tree*)) { + /* does the pre order traversal on current Node and its "descendants" (i.e. not including origin, who is a neighbour of current */ + int i, n = current->nneigh; + int cur_to_orig = (origin ? dir_a_to_b(current, origin) : -1); /* direction from the current node to the origin of the traversal */ + + /* in any case, call the function on the current node first */ + func(current, origin /* may be NULL, it's up to func to deal with that properly */, tree); + + /* if current is not a leaf, process its children */ + if (cur_to_orig == -1) { /* current is the pseudoroot node */ + for(i=0; i < n; i++) pre_order_traversal_recur(current->neigh[i], current, tree, func); + } else { + for(i=1; i < n; i++) pre_order_traversal_recur(current->neigh[(cur_to_orig+i)%n], current, tree, func); /* no iter when n==1 (leaf) */ + } +} + + +void pre_order_traversal(Tree* t, void (*func)(Node*, Node*, Tree*)) { + pre_order_traversal_recur(t->node0, NULL, t, func); +} + +/* Pre order traversal with any data that can be passed to the recur function */ +void pre_order_traversal_data_recur(Node* current, Node* origin, Tree* tree, void* data, void (*func)(Node*, Node*, Tree*, void*)) { + /* does the pre order traversal on current Node and its "descendants" (i.e. not including origin, who is a neighbour of current */ + int i, n = current->nneigh; + int cur_to_orig = (origin ? dir_a_to_b(current, origin) : -1); /* direction from the current node to the origin of the traversal */ + + /* in any case, call the function on the current node first */ + func(current, origin /* may be NULL, it's up to func to deal with that properly */, tree, data); + + /* if current is not a leaf, process its children */ + if (cur_to_orig == -1) { /* current is the pseudoroot node */ + for(i=0; i < n; i++) pre_order_traversal_data_recur(current->neigh[i], current, tree, data, func); + } else { + for(i=1; i < n; i++) pre_order_traversal_data_recur(current->neigh[(cur_to_orig+i)%n], current, tree, data, func); /* no iter when n==1 (leaf) */ + } +} + + +void pre_order_traversal_data(Tree* t, void* data, void (*func)(Node*, Node*, Tree*, void*)) { + pre_order_traversal_data_recur(t->node0, NULL, t, data, func); +} + + +/* BOOTSTRAP SUPPORT UTILITIES */ + +void update_bootstrap_supports_from_node_names(Tree* tree) { + /* this calls the recursive function to update all branch bootstrap supports, originally imported as internal node names from the NH file */ + pre_order_traversal(tree,&update_bootstrap_supports_doer); +} + +void update_bootstrap_supports_doer(Node* current, Node* origin, Tree* tree) { + /* a branch takes its support value from its descendant node (son). + The current node under examination will give its value (node name) to its father branch, if that one exists. + We modify here the bootstrap support on the edge between current and origin. It is assumed that the node "origin" is on + the path from "current" to the (pseudo-)root */ + if(!origin || current->nneigh == 1) return; /* nothing to do for a leaf or for the root */ + + double value; + Edge* edge = current->br[dir_a_to_b(current, origin)]; + + if (current->name && strlen(current->name) > 0 && sscanf(current->name,"%lf", &value) == 1) { /* if succesfully parsing a number */ + edge->has_branch_support = 1; + edge->branch_support = value; + } else { + edge->has_branch_support = 0; + } +} /* end of update_bootstrap_supports_doer */ + + + + +/* CALCULATING NODE DEPTHS */ + +void update_node_depths_post_doer(Node* target, Node* orig, Tree* t) { + /* here we update the depth of the target node */ + int i; + double depth = MAX_NODE_DEPTH; + if (target->nneigh == 1) + target->depth = 0.0; + else { + /* the following loop also takes care of the case where origin == NULL (target is root) */ + for (i=0; i < target->nneigh; i++) { + if (target->neigh[i] == orig) continue; + depth = min_double(depth, target->neigh[i]->depth + (target->br[i]->had_zero_length ? 0.0 : target->br[i]->brlen)); + } + target->depth = depth; + } +} /* end of update_node_depths_post_doer */ + + +void update_node_depths_pre_doer(Node* target, Node* orig, Tree* t) { + /* when we enter this function, orig already has its depth set to its final value. Update the target if its current depth is larger + than the one we get taking into account the min path to a leave from target via origin */ + if (!orig) return; /* nothing to do on the root for this preorder: value is already correctly set by the postorder */ + int dir_target_to_orig = dir_a_to_b(target, orig); + double alt_depth = orig->depth + (target->br[dir_target_to_orig]->had_zero_length ? 0.0 : target->br[dir_target_to_orig]->brlen); + if (alt_depth < target->depth) target->depth = alt_depth; +} /* end of update_node_depths_pre_doer */ + + +void update_node_depths_post_alltree(Tree* tree) { + post_order_traversal(tree, &update_node_depths_post_doer); +} /* end of update_node_depths_post_alltree */ + + +void update_node_depths_pre_alltree(Tree* tree) { + pre_order_traversal(tree, &update_node_depths_pre_doer); +} /* end of update_node_depths_pre_alltree */ + + + +/* working with topological depths: number of taxa on the lightest side of the branch */ + +void update_all_topo_depths_from_hashtables(Tree* tree) { + int i, m, n = tree->nb_taxa; + for (i = 0; i < tree->nb_edges; i++) { + m = tree->a_edges[i]->hashtbl[1]->num_items; + tree->a_edges[i]->topo_depth = min_int(m, n-m); + } + +} /* end update_all_topo_depths_from_hashtables */ + + + + +int greatest_topo_depth(Tree* tree) { + /* returns the greatest branch depth in the tree */ + int i, greatest = 0; + for (i = 0; i < tree->nb_edges; i++) { + if (tree->a_edges[i]->topo_depth > greatest) greatest = tree->a_edges[i]->topo_depth; + } + return greatest; +} /* end greatest_topo_depth */ + + + +/* WORKING WITH HASHTABLES */ + +void update_hashtables_post_doer(Node* current, Node* orig, Tree* t) { + /* we are going to update one of the two hashtables sitting on the branch between current and orig. */ + if (orig==NULL) return; + int i, n = current->nneigh; + int curr_to_orig = dir_a_to_b(current, orig); + Edge* br = current->br[curr_to_orig], *br2; /* br: current to orig; br2: any _other_ branch from current */ + + for(i=1 ; i < n ; i++) { + br2 = current->br[(curr_to_orig + i)%n]; + /* we are going to update the info on br with the info from br2 */ + update_id_hashtable(br2->hashtbl[current==br2->left], /* source */ + br->hashtbl[current==br->right]); /* dest */ + } + + /* but if n = 1 we haven't done anything (leaf): we must put the info corresponding to the taxon into the branch */ + if (n == 1) { + assert(br->right == current); + /* add the id of the taxon to the right hashtable of the branch */ + add_id(br->hashtbl[1],get_tax_id_from_tax_name(current->name, t->taxname_lookup_table, t->nb_taxa)); + } +} /* end update_hashtables_post_doer */ + + +void update_hashtables_pre_doer(Node* current, Node* orig, Tree* t) { + /* we are going to update one of the two hashtables sitting on the branch between current and orig. */ + if (orig==NULL) return; + int i, n = orig->nneigh; + int orig_to_curr = dir_a_to_b(orig, current); + Edge* br = orig->br[orig_to_curr], *br2; /* br: current to orig; br2: any _other_ branch from orig */ + id_hash_table_t* hash_to_update = br->hashtbl[current==br->left]; + + /* if current is a leaf we just put in the left hashtable the full hashtable minus the taxon on the leaf */ + if (current->nneigh == 1) { + assert(current == br->right); /* leaf should be on the right of the branch */ + //fill_id_hashtable(hash_to_update, t->nb_taxa); + //delete_id(hash_to_update, get_tax_id_from_tax_name(current->name, t->taxname_lookup_table, t->nb_taxa)); + complement_id_hashtable(hash_to_update /*dest*/, br->hashtbl[1] /*source*/, t->nb_taxa); + return; + } + + /* else we are going to update that hashtable with the info from the _other_ neighbours of the origin node. Origin can never be a leaf. */ + for(i=1 ; i < n ; i++) { + br2 = orig->br[(orig_to_curr + i)%n]; + /* we are going to update the info on br with the info from br2 */ + update_id_hashtable(br2->hashtbl[orig==br2->left], /* source */ + hash_to_update); /* dest */ + } +} /* end update_hashtables_pre_doer */ + + +void update_hashtables_post_alltree(Tree* tree) { + post_order_traversal(tree, &update_hashtables_post_doer); +} /* end of update_hashtables_post_alltree */ + +void update_hashtables_pre_alltree(Tree* tree) { + pre_order_traversal(tree, &update_hashtables_pre_doer); +} /* end of update_hashtables_pre_alltree */ + + + +/* UNION AND INTERSECT CALCULATIONS (FOR THE TRANSFER METHOD) */ + +void update_i_c_post_order_ref_tree(Tree* ref_tree, Node* orig, Node* target, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix) { + /* this function does the post-order traversal (recursive from the pseudoroot to the leaves, updating knowledge for the subtrees) + of the reference tree, examining only leaves (terminal edges) of the bootstrap tree. + It sends a probe from the orig node to the target node (nodes in ref_tree), calculating I_ij and C_ij + (see Brehelin, Gascuel, Martin 2008). */ + int j, k, dir, orig_to_target, target_to_orig; + Edge* my_br; /* branch of the ref tree connecting orig to target */ + int edge_id; /* its id */ + int edge_id2; + + /* we first have to determine which is the direction of the edge (orig -> target and target -> orig) */ + orig_to_target = dir_a_to_b(orig,target); + target_to_orig = dir_a_to_b(target,orig); + my_br = orig->br[orig_to_target]; + edge_id = my_br->id; /* all this is in ref_tree */ + assert(target==my_br->right); /* the descendant should always be the right side of the edge */ + + if(target->nneigh == 1) { + for (j=0; j < boot_tree->nb_edges; j++) { /* for all the terminal edges of boot_tree */ + if(boot_tree->a_edges[j]->right->nneigh != 1) continue; + /* we only want to scan terminal edges of boot_tree, where the right son is a leaf */ + /* else we update all the I_ij and C_ij with i = edge_id */ + if (strcmp(target->name,boot_tree->a_edges[j]->right->name)) { + /* here the taxa are different */ + i_matrix[edge_id][j] = 0; + c_matrix[edge_id][j] = 1; + } else { + /* same taxa here in T_ref and T_boot */ + i_matrix[edge_id][j] = 1; + c_matrix[edge_id][j] = 0; + } + } /* end for on all edges of T_boot, for my_br being terminal */ + } else { + /* now the case where my_br is not a terminal edge */ + /* first initialise (zero) the cells we are going to update */ + for (j=0; j < boot_tree->nb_edges; j++) + /** + We initialize the i and c matrices for the edge edge_id with : + * 0 for i : because afterwards we do i[edge_id] = i[edge_id] || i[edge_id2] + * 1 for c : because afterwards we do c[edge_id] = c[edge_id] && c[edge_id2] + */ + if(boot_tree->a_edges[j]->right->nneigh == 1){ + i_matrix[edge_id][j] = 0; + c_matrix[edge_id][j] = 1; + } + + for (k = 1; k < target->nneigh; k++) { + dir = (target_to_orig + k) % target->nneigh; /* direction from target to one of its "sons" (== not orig) */ + update_i_c_post_order_ref_tree(ref_tree, target, target->neigh[dir], boot_tree, i_matrix, c_matrix); + edge_id2 = target->br[dir]->id; + for (j=0; j < boot_tree->nb_edges; j++) { /* for all the terminal edges of boot_tree */ + if(boot_tree->a_edges[j]->right->nneigh != 1) continue; + + i_matrix[edge_id][j] = i_matrix[edge_id][j] || i_matrix[edge_id2][j]; + /* above is an OR between two integers, result is 0 or 1 */ + + c_matrix[edge_id][j] = c_matrix[edge_id][j] && c_matrix[edge_id2][j]; + /* above is an AND between two integers, result is 0 or 1 */ + + } /* end for j */ + } /* end for on all edges of T_boot, for my_br being internal */ + + } /* ending the case where my_br is an internal edge */ + +} /* end update_i_c_post_order_ref_tree */ + + +void update_all_i_c_post_order_ref_tree(Tree* ref_tree, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix) { + /* this function is the first step of the union and intersection calculations */ + Node* root = ref_tree->node0; + int i, n = root->nneigh; + for(i=0; ineigh[i], boot_tree, i_matrix, c_matrix); +} /* end update_all_i_c_post_order_ref_tree */ + + + + + +void update_i_c_post_order_boot_tree(Tree* ref_tree, Tree* boot_tree, Node* orig, Node* target, short unsigned** i_matrix, short unsigned** c_matrix, + short unsigned** hamming, short unsigned* min_dist, short unsigned* min_dist_edge) { + /* here we implement the second part of the Brehelin/Gascuel/Martin algorithm: + post-order traversal of the bootstrap tree, and numerical recurrence. */ + /* in this function, orig and target are nodes of boot_tree (aka T_boot). */ + /* min_dist is an array whose size is equal to the number of edges in T_ref. + It gives for each edge of T_ref its min distance to a split in T_boot. */ + + int i, j, dir, orig_to_target, target_to_orig; + Edge* my_br; /* branch of the boot tree connecting orig to target */ + int edge_id /* its id */, edge_id2 /* id of descending branches. */; + int N = ref_tree->nb_taxa; + + /* we first have to determine which is the direction of the edge (orig -> target and target -> orig) */ + orig_to_target = dir_a_to_b(orig,target); + target_to_orig = dir_a_to_b(target,orig); + my_br = orig->br[orig_to_target]; + edge_id = my_br->id; /* here this is an edge_id corresponding to T_boot */ + + if(target->nneigh != 1) { + /* because nothing to do in the case where target is a leaf: intersection and union already ok. */ + /* otherwise, keep on posttraversing in all other directions */ + + /* first initialise (zero) the cells we are going to update */ + for (i=0; i < ref_tree->nb_edges; i++) i_matrix[i][edge_id] = c_matrix[i][edge_id] = 0; + + for(j=1;jnneigh;j++) { + dir = (target_to_orig + j) % target->nneigh; + edge_id2 = target->br[dir]->id; + update_i_c_post_order_boot_tree(ref_tree, boot_tree, target, target->neigh[dir], + i_matrix, c_matrix, hamming, min_dist, min_dist_edge); + for (i=0; i < ref_tree->nb_edges; i++) { /* for all the edges of ref_tree */ + i_matrix[i][edge_id] += i_matrix[i][edge_id2]; + c_matrix[i][edge_id] += c_matrix[i][edge_id2]; + } /* end for i */ + } + + } /* end if target is not a leaf: the following loop is performed in all cases */ + + for (i=0; inb_edges; i++) { /* for all the edges of ref_tree */ + /* at this point we can calculate in all cases (internal branch or not) the Hamming distance at [i][edge_id], */ + hamming[i][edge_id] = /* card of union minus card of intersection */ + ref_tree->a_edges[i]->hashtbl[1]->num_items /* #taxa in the cluster i of T_ref */ + + c_matrix[i][edge_id] /* #taxa in cluster edge_id of T_boot BUT NOT in cluster i of T_ref */ + - i_matrix[i][edge_id]; /* #taxa in the intersection of the two clusters */ + + /* NEW!! Let's immediately calculate the right ditance, taking into account the fact that the true disance is min (dist, N-dist) */ + if (hamming[i][edge_id] > N/2 /* floor value */) hamming[i][edge_id] = N - hamming[i][edge_id]; + + + /* and update the min of all Hamming (TRANSFER) distances hamming[i][j] over all j */ + if (hamming[i][edge_id] < min_dist[i]){ + min_dist[i] = hamming[i][edge_id]; + min_dist_edge[i] = edge_id; + } + + } /* end for on all edges of T_ref */ + +} /* end update_i_c_post_order_boot_tree */ + + +void update_all_i_c_post_order_boot_tree(Tree* ref_tree, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix, + short unsigned** hamming, short unsigned* min_dist, short unsigned* min_dist_edge) { + /* this function is the second step of the union and intersection calculations */ + Node* root = boot_tree->node0; + int i, n = root->nneigh; + for(i=0 ; ineigh[i], i_matrix, c_matrix, hamming, min_dist, min_dist_edge); + + /* and then some checks to make sure everything went ok */ + for(i=0; inb_edges; i++) { + assert(min_dist[i] >= 0); + if(ref_tree->a_edges[i]->right->nneigh == 1) + assert(min_dist[i] == 0); /* any terminal edge should have an exact match in any bootstrap tree */ + } +} /* end update_all_i_c_post_order_boot_tree */ + + + + +/* writing a tree to some output (stream or string) */ + +void write_nh_tree(Tree* tree, FILE* stream) { + /* writing the tree from the current position in the stream */ + if (!tree) return; + Node* node = tree->node0; /* root or pseudoroot node */ + int i, n = node->nneigh; + putc('(', stream); + for(i=0; i < n-1; i++) { + write_subtree_to_stream(node->neigh[i], node, stream); /* a son */ + putc(',', stream); + } + write_subtree_to_stream(node->neigh[i], node, stream); /* last son */ + putc(')', stream); + + if (node->name) fprintf(stream, "%s", node->name); + /* terminate with a semicol AND and end of line */ + putc(';', stream); putc('\n', stream); +} + +/* the following function writes the subtree having root "node" and not including "node_from". */ +void write_subtree_to_stream(Node* node, Node* node_from, FILE* stream) { + int i, direction_to_exclude, n = node->nneigh; + if (node == NULL || node_from == NULL) return; + + if(n == 1) { + /* terminal node */ + fprintf(stream, "%s:%f", (node->name ? node->name : ""), node->br[0]->brlen); /* distance to father */ + } else { + direction_to_exclude = dir_a_to_b(node, node_from); + + putc('(', stream); + /* we have to write (n-1) subtrees in total. The last print is not followed by a comma */ + for(i=1; i < n-1; i++) { + write_subtree_to_stream(node->neigh[(direction_to_exclude+i) % n], node, stream); /* a son */ + putc(',', stream); + } + write_subtree_to_stream(node->neigh[(direction_to_exclude+i) % n], node, stream); /* last son */ + putc(')', stream); + fprintf(stream, "%s:%f", (node->name ? node->name : ""), node->br[0]->brlen); /* distance to father */ + } + +} /* end write_subtree_to_stream */ + + + +/* freeing */ + +void free_edge(Edge* edge) { + int i; + if (edge == NULL) return; + if(edge->hashtbl[0]) free_id_hashtable(edge->hashtbl[0]); + if(edge->hashtbl[1]) free_id_hashtable(edge->hashtbl[1]); + for (i=0; i<2; i++) if(edge->subtype_counts[i]) free(edge->subtype_counts[i]); + + free(edge); +} + +void free_node(Node* node) { + if (node == NULL) return; + + if (node->name) free(node->name); + if (node->comment) free(node->comment); + + free(node->neigh); + free(node->br); + free(node); +} + +void free_tree(Tree* tree) { + if (tree == NULL) return; + int i; + for (i=0; i < tree->nb_nodes; i++) free_node(tree->a_nodes[i]); + for (i=0; i < tree->nb_edges; i++) free_edge(tree->a_edges[i]); + for (i=0; i < tree->nb_taxa; i++) free(tree->taxa_names[i]); + + free(tree->taxa_names); + free(tree->a_nodes); + free(tree->a_edges); + free(tree); + +} + +Tree * gen_rand_tree(int nbr_taxa, char **taxa_names){ + int taxon; + Tree *my_tree; + int* indices = (int*) calloc(nbr_taxa, sizeof(int)); /* the array that we are going to shuffle around to get random order in the taxa names */ + /* zero the number of taxa inserted so far in this tree */ + int nb_inserted_taxa = 0; + + int i_edge, edge_ind; + + for(taxon = 0; taxon < nbr_taxa; taxon++) + indices[taxon] = taxon; + + shuffle(indices, nbr_taxa, sizeof(int)); + + if(taxa_names == NULL){ + taxa_names = (char**) calloc(nbr_taxa, sizeof(char*)); + for(taxon = 0; taxon < nbr_taxa; taxon++) { + taxa_names[taxon] = (char*) calloc((int)(log10(nbr_taxa)+2), sizeof(char)); + sprintf(taxa_names[taxon],"%d",taxon+1); /* names taxa by a mere integer, starting with "1" */ + } + } + + /* create a new tree */ + my_tree = new_tree(nbr_taxa, taxa_names[indices[nb_inserted_taxa++]]); + + /* graft the second taxon */ + graft_new_node_on_branch(NULL, my_tree, 0.5, 1.0, taxa_names[indices[nb_inserted_taxa++]]); + + while(nb_inserted_taxa < nbr_taxa) { + /* select a branch at random */ + edge_ind = rand_to(my_tree->nb_edges); /* outputs something between 0 and (nb_edges) exclusive */ + graft_new_node_on_branch(my_tree->a_edges[edge_ind], my_tree, 0.5, 1.0, taxa_names[indices[nb_inserted_taxa++]]); + } /* end looping on the taxa, tree is full */ + + /* here we need to re-root the tree on a trifurcated node, not on a leaf, before we write it in NH format */ + reroot_acceptable(my_tree); + + for(i_edge = 0; i_edge < my_tree->nb_edges; i_edge++){ + my_tree->a_edges[i_edge]->brlen = normal(0.1, 0.05); + if(my_tree->a_edges[i_edge]->brlen < 0) + my_tree->a_edges[i_edge]->brlen = 0; + } + + my_tree->length_hashtables = (int) (my_tree->nb_taxa / ceil(log10((double)my_tree->nb_taxa))); + ntax = nbr_taxa; + my_tree->taxname_lookup_table = build_taxname_lookup_table(my_tree); + + for(i_edge=0;i_edgenb_edges;i_edge++){ + my_tree->a_edges[i_edge]->hashtbl[0] = create_id_hash_table(my_tree->length_hashtables); + my_tree->a_edges[i_edge]->hashtbl[1] = create_id_hash_table(my_tree->length_hashtables); + } + + update_hashtables_post_alltree(my_tree); + update_hashtables_pre_alltree(my_tree); + update_node_depths_post_alltree(my_tree); + update_node_depths_pre_alltree(my_tree); + + + /* for all branches in the tree, we should assert that the sum of the number of taxa on the left + and on the right of the branch is equal to tree->nb_taxa */ + for (i_edge = 0; i_edge < my_tree->nb_edges; i_edge++) + if(!my_tree->a_edges[i_edge]->had_zero_length) + assert(my_tree->a_edges[i_edge]->hashtbl[0]->num_items + + my_tree->a_edges[i_edge]->hashtbl[1]->num_items + == my_tree->nb_taxa); + + /* now for all the branches we can delete the **left** hashtables, because the information is redundant and + we have the equal_or_complement function to compare hashtables */ + + for (i_edge = 0; i_edge < my_tree->nb_edges; i_edge++) { + free_id_hashtable(my_tree->a_edges[i_edge]->hashtbl[0]); + my_tree->a_edges[i_edge]->hashtbl[0] = NULL; + } + + /* topological depths of branches */ + update_all_topo_depths_from_hashtables(my_tree); + + return(my_tree); +} diff --git a/booster/tree.h b/booster/tree.h new file mode 100644 index 000000000..8ffedde51 --- /dev/null +++ b/booster/tree.h @@ -0,0 +1,288 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _TREE_H_ +#define _TREE_H_ + +#include "hashtables_bfields.h" /* for the hashtables to store taxa names on the branches */ +#include "hashmap.h" +#include "io.h" +#include + +#define TRUE 1 +#define FALSE 0 + +#define MIN_BRLEN 1e-8 +#define MAX_TREELENGTH 10000000 /* more or less 10MB for a tree file in NH format */ +#define MAX_NODE_DEPTH 100000 /* max depth for nodes in the tree */ + +#define MAX_NAMELENGTH 255 /* max length of a taxon name */ +#define MAX_COMMENTLENGTH 255 /* max length of a comment string in NHX format */ + +/* TYPES */ +/* Every node in our binary trees has several neighbours with indices 0, 1, 2.... We allow polytomies of any degree. + An internal node with no multifurcation has 3 outgoing directions/neighbours. + + In rooted trees, the interpretation is the following: + - for internal nodes, direction 0 is to the father, other directions are to the sons + - for tips (leaves), direction 0 is to the father, no other neighbours + - the root has only two neighbours. + So it's easy to check whether a node is a leaf (one neighbour) or the root of a rooted tree (two neighbours). + + For unrooted trees, the interpretation is the same, except that no node has two neighbours. + The pseudo-root (from the NH import) is three-furcated, so behaves exactly like any other internal node. + + It is not advisable to have several nodes of degree two in the same tree. + + The root or pseudo-root is ALWAYS assigned id 0 at beginning. May change later, upon rerooting. + In any case, it is always pointed to by tree->node0. */ + +typedef struct __Node { + char* name; + char* comment; /* for further use: store any comment (e.g. from NHX format) */ + int id; /* unique id attributed to the node */ + short int nneigh; /* number of neighbours */ + struct __Node** neigh; /* neighbour nodes */ + struct __Edge** br; /* corresponding branches going from this node */ + double depth; /* the depth of a node is its min distance to a leaf */ +} Node; + + +/* Every edge connects two nodes. + By convention, all terminal branches will have the tip on their RIGHT end */ + + +typedef struct __Edge { + int id; + Node *left, *right; /* in rooted trees the right end will always be the descendant. + In any case, a leaf is always on the right side of its branch. */ + double brlen; + double branch_support; + int* subtype_counts[2]; /* first index is 0 for the left of the branch, 1 for its right side */ + id_hash_table_t *hashtbl[2]; /* hashtables containing the ids of the taxa in each subtree */ + /* index 0 corresponds to the left of the branch, index 1 to its right. + following our implementation, we only keep hashtbl[1] populated. */ + short int had_zero_length; /* set at the moment when we read the tree, even though + we then immediately set the branch length to MIN_BRLEN */ + short int has_branch_support; + int topo_depth; /* the topological depth is the number of taxa on the lightest side of the bipar */ +} Edge; + + +typedef struct __Tree { + Node** a_nodes; /* array of node pointers */ + Edge** a_edges; /* array of edge pointers */ + Node* node0; /* the root or pseudo-root node */ + int nb_nodes; + int nb_edges; + int nb_taxa; + char** taxa_names; /* store only once the taxa names */ + int length_hashtables; /* the number of chained lists in the hashtables on the edges */ + int next_avail_node_id; + int next_avail_edge_id; + int next_avail_taxon_id; + char** taxname_lookup_table; +} Tree; + + +/* FUNCTIONS */ + +/* UTILS/DEBUG: counting specific branches or nodes in the tree */ +int count_zero_length_branches(Tree* tree); +int count_leaves(Tree* tree); +int count_roots(Tree* tree); +int count_multifurcations(Tree* tree); +int dir_a_to_b(Node* a, Node* b); + +/* various statistics on branch support */ +double mean_bootstrap_support(Tree* tree); +double median_bootstrap_support(Tree* tree); +int summary_bootstrap_support(Tree* tree, double* result); + +/* parsing utils: discovering and dealing with tokens */ +int index_next_toplevel_comma(char* in_str, int begin, int end); +int count_outer_commas(char* in_str, int begin, int end); +void strip_toplevel_parentheses(char* in_str, int begin, int end, int* pair); +int index_toplevel_colon(char* in_str, int begin, int end); +void parse_double(char* in_str, int begin, int end, double* location); + +/* creating a node, a branch, a tree: to create a tree from scratch, not from parsing */ +Node* new_node(const char* name, Tree* t, int degree); +Edge* new_edge(Tree* t); +Tree* new_tree(int nb_taxa, const char* name); +Node* graft_new_node_on_branch(Edge* target_edge, Tree* tree, double ratio_from_left, double new_edge_length, char* node_name); + + +/* collapsing a branch */ +void collapse_branch(Edge* branch, Tree* tree); + +/** + This function removes a taxon from the tree (identified by its taxon_id) + And recomputed the branch length of the branch it was branched on. + + Also, the bootstrap support (if any) is recomputed, taking the maximum of the + supports of the joined branches + + Be careful: The taxnames_lookup_table is modified after this function! + Do not use this function if you share the same taxnames_lookup_table in + several trees. +*/ +void remove_taxon(int taxon_id, Tree* tree); +/** + If there remains 2 neighbors to connect_node + We connect them directly and delete connect_node + We keep l_edge and delete r_edge + -> If nneigh de connect node != 2 : Do nothing + This function deletes a node like that: + connect_node + l_edge r_edge + l_node *-------*--------* r_node + => Careful: After this function, you may want to call + => recompute_identifiers() + +*/ +void remove_single_node(Tree *tree, Node *connect_node); +/** + This method recomputes all the identifiers + of the nodes and of the edges + for which the tree->a_nodes is not null + or tree->a_edges is not null + It also recomputes the total number of edges + and nodes in the tree + */ +void recompute_identifiers(Tree *tree); + +/** + This function shuffles the taxa of an input tree +*/ +void shuffle_taxa(Tree *tree); + +/* (re)rooting a tree */ +void reroot_acceptable(Tree* t); +void unrooted_to_rooted(Tree* t); + +/* To be called after a reroot*/ +void reorient_edges(Tree *t); +void reorient_edges_recur(Node *n, Node *prev, Edge *e); + +/* utility functions to deal with NH files */ +unsigned int tell_size_of_one_tree(const char* filename); +int copy_nh_stream_into_str(FILE* nh_stream, char* big_string); + +/* actually parsing a tree */ +void process_name_and_brlen(Node* son_node, Edge* edge, Tree* current_tree, char* in_str, int begin, int end); +Node* create_son_and_connect_to_father(Node* current_node, Tree* current_tree, int direction, char* in_str, int begin, int end); +void parse_substring_into_node(char* in_str, int begin, int end, Node* current_node, int has_father, Tree* current_tree); +Tree* parse_nh_string(char* in_str); + +/* complete parse tree: parse NH string, update hashtables and subtype counts */ +Tree *complete_parse_nh(char* big_string, char*** taxname_lookup_table); + + +/* taxname lookup table functions */ +char** build_taxname_lookup_table(Tree* tree); +map_t build_taxid_hashmap(char** taxname_lookup_table, int nb_taxa); +void free_taxid_hashmap(map_t taxmap); +int free_hashmap_data(any_t arg,any_t key, any_t elemt); + +char** get_taxname_lookup_table(Tree* tree); +Taxon_id get_tax_id_from_tax_name(char* str, char** lookup_table, int length); + +/* (unnecessary/deprecated) multifurcation treatment */ +void regraft_branch_on_node(Edge* branch, Node* target_node, int dir); + +/*************************************************************** + ******************* neatly implementing tree traversals ****** +***************************************************************/ + +void post_order_traversal_recur(Node* current, Node* origin, Tree* tree, void (*func)(Node*, Node*, Tree*)); +void post_order_traversal(Tree* t, void (*func)(Node*, Node*, Tree*)); + +/* post order traversal with any data passed to the function call */ +void post_order_traversal_data_recur(Node* current, Node* origin, Tree* tree, void*, void (*func)(Node*, Node*, Tree*, void*)); +void post_order_traversal_data(Tree* t, void*, void (*func)(Node*, Node*, Tree*, void*)); + + +void pre_order_traversal_recur(Node* current, Node* origin, Tree* tree, void (*func)(Node*, Node*, Tree*)); +void pre_order_traversal(Tree* t, void (*func)(Node*, Node*, Tree*)); + +/* pre order traversal with any data passed to the function call */ +void pre_order_traversal_data_recur(Node* current, Node* origin, Tree* tree, void* data, void (*func)(Node*, Node*, Tree*, void*)); +void pre_order_traversal_data(Tree* t, void* data, void (*func)(Node*, Node*, Tree*, void*)); + +/* bootstrap values */ +void update_bootstrap_supports_from_node_names(Tree* tree); +void update_bootstrap_supports_doer(Node* current, Node* origin, Tree* tree); + + +/* node depths */ + +void update_node_depths_post_doer(Node* target, Node* orig, Tree* t); +void update_node_depths_pre_doer(Node* target, Node* orig, Tree* t); +void update_node_depths_post_alltree(Tree* tree); +void update_node_depths_pre_alltree(Tree* tree); + +/* topological depths */ +void update_all_topo_depths_from_hashtables(Tree* tree); +int greatest_topo_depth(Tree* tree); + +/* WORKING WITH HASHTABLES */ + +void update_hashtables_post_doer(Node* current, Node* orig, Tree* t); +void update_hashtables_pre_doer(Node* current, Node* orig, Tree* t); + +void update_hashtables_post_alltree(Tree* tree); +void update_hashtables_pre_alltree(Tree* tree); + + +/* UNION AND INTERSECT CALCULATIONS FOR THE TRANSFER METHOD (from Bréhélin/Gascuel/Martin 2008) */ +void update_i_c_post_order_ref_tree(Tree* ref_tree, Node* orig, Node* target, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix); +void update_all_i_c_post_order_ref_tree(Tree* ref_tree, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix); + +void update_i_c_post_order_boot_tree(Tree* ref_tree, Tree* boot_tree, Node* orig, Node* target, short unsigned** i_matrix, short unsigned** c_matrix, short unsigned** hamming, short unsigned* min_dist, short unsigned* min_dist_edge); +void update_all_i_c_post_order_boot_tree(Tree* ref_tree, Tree* boot_tree, short unsigned** i_matrix, short unsigned** c_matrix, short unsigned** hamming, short unsigned* min_dist, short unsigned* min_dist_edge); + + +/*Generate Random Tree*/ +/** + - nbr_taxa: Number of leaves in the output tree + - taxa_names: array of leaf names: must be NULL if you do not want to use it + (names will be numbered in this case) + + - The output tree has branch lengths attributed using a normal distribution N(0.1,0.05), and any br len < 0 is set to 0 +*/ +Tree * gen_rand_tree(int nbr_taxa, char **taxa_names); + +/* writing a tree */ + +void write_nh_tree(Tree* tree, FILE* stream); +void write_subtree_to_stream(Node* node, Node* node_from, FILE* stream); + +/* freeing stuff */ + +void free_edge(Edge* edge); +void free_node(Node* node); +void free_tree(Tree* tree); +#endif /* _TREE_H_ */ diff --git a/booster/tree_utils.c b/booster/tree_utils.c new file mode 100644 index 000000000..5bb80b4f9 --- /dev/null +++ b/booster/tree_utils.c @@ -0,0 +1,102 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "tree_utils.h" + +/** + Generates a random tree based on the taxa of the tree and its lookuptable in argument + - Uses the same taxnames + - Uses the taxname_lookup_id + Advice: do a srand(time(NULL)) before calling this function a large number of times +*/ +Tree* gen_random_tree(Tree *tree){ + int* indices = (int*) calloc(tree->nb_taxa, sizeof(int)); /* the array that we are going to shuffle around to get random order in the taxa names */ + int taxon; + for(taxon = 0; taxon < tree->nb_taxa; taxon++) indices[taxon] = taxon; /* initialization */ + + /* zero the number of taxa inserted so far in this tree */ + int nb_inserted_taxa = 0,edge_ind; + Tree* my_tree = NULL; + int i; + /* shuffle the indices we are going to use to determine the names of leaves */ + shuffle(indices, tree->nb_taxa, sizeof(int)); + + /* free the previous tree if existing */ + if(my_tree) free_tree(my_tree); + + /* create a new tree */ + my_tree = new_tree(tree->nb_taxa, tree->taxa_names[indices[nb_inserted_taxa++]]); + + /* graft the second taxon */ + graft_new_node_on_branch(NULL, my_tree, 0.5, 1.0, tree->taxa_names[indices[nb_inserted_taxa++]]); + + while(nb_inserted_taxa < tree->nb_taxa) { + /* select a branch at random */ + edge_ind = rand_to(my_tree->nb_edges); /* outputs something between 0 and (nb_edges-1) exclusive */ + graft_new_node_on_branch(my_tree->a_edges[edge_ind], my_tree, 0.5, 1.0, tree->taxa_names[indices[nb_inserted_taxa++]]); + } /* end looping on the taxa, tree is full */ + + /* here we need to re-root the tree on a trifurcated node, not on a leaf, before we write it in NH format */ + reroot_acceptable(my_tree); + + my_tree->taxname_lookup_table = tree->taxname_lookup_table; + my_tree->nb_taxa = tree->nb_taxa; + my_tree->length_hashtables = (int) (my_tree->nb_taxa / ceil(log10((double)my_tree->nb_taxa))); + + int e; + for(e=0;enb_edges;e++){ + my_tree->a_edges[e]->hashtbl[0] = create_id_hash_table(my_tree->length_hashtables); + my_tree->a_edges[e]->hashtbl[1] = create_id_hash_table(my_tree->length_hashtables); + } + + /* write_nh_tree(my_tree,stdout); */ + + update_hashtables_post_alltree(my_tree); + update_hashtables_pre_alltree(my_tree); + update_node_depths_post_alltree(my_tree); + update_node_depths_pre_alltree(my_tree); + + /* for all branches in the tree, we should assert that the sum of the number of taxa on the left + and on the right of the branch is equal to tree->nb_taxa */ + for (i = 0; i < my_tree->nb_edges; i++) + if(!my_tree->a_edges[i]->had_zero_length) + assert(my_tree->a_edges[i]->hashtbl[0]->num_items + + my_tree->a_edges[i]->hashtbl[1]->num_items + == my_tree->nb_taxa); + + /* now for all the branches we can delete the **left** hashtables, because the information is redundant and + we have the equal_or_complement function to compare hashtables */ + + for (i = 0; i < my_tree->nb_edges; i++) { + free_id_hashtable(my_tree->a_edges[i]->hashtbl[0]); + my_tree->a_edges[i]->hashtbl[0] = NULL; + } + + /* topological depths of branches */ + update_all_topo_depths_from_hashtables(my_tree); + + free(indices); + return(my_tree); +} diff --git a/booster/tree_utils.h b/booster/tree_utils.h new file mode 100644 index 000000000..c9136d143 --- /dev/null +++ b/booster/tree_utils.h @@ -0,0 +1,50 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _PARSIMONY_UTILS +#define _PARSIMONY_UTILS + +#include "tree.h" +#include "hashmap.h" +#include "sort.h" +#include "stats.h" + +Tree* gen_random_tree(Tree *tree); +/** + This function precomputes the esperence of the expected number of parsimony steps + implied by a bipartition under the hypothesis that the tree is random. + In Input: + - The max depth + - The number of taxa + - A pointer to a 2D array (given by precompute_steps_probability(int max_depth, int nb_tax)): + * First dimension : depth + * Second dimension : steps + * value : probability of the step at a given depth + In output : An array with : + - the depth in index + - the expected Number of random parsimony steps +*/ + +#endif diff --git a/booster/version.c b/booster/version.c new file mode 100644 index 000000000..579689c8e --- /dev/null +++ b/booster/version.c @@ -0,0 +1,34 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#include "version.h" + +void version(FILE *out, char *executable){ + fprintf(out,"%s version %s\n",NAME, VERSION); +} + +void short_version(FILE *out){ + fprintf(out,"%s : version %s\n",NAME,VERSION); +} diff --git a/booster/version.h b/booster/version.h new file mode 100644 index 000000000..e036c0c2f --- /dev/null +++ b/booster/version.h @@ -0,0 +1,52 @@ +/* + +BOOSTER: BOOtstrap Support by TransfER: +BOOSTER is an alternative method to compute bootstrap branch supports +in large trees. It uses transfer distance between bipartitions, instead +of perfect match. + +Copyright (C) 2017 Frederic Lemoine, Jean-Baka Domelevo Entfellner, Olivier Gascuel + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +*/ + +#ifndef _VERSION_H_ +#define _VERSION_H_ + +#include +#include +#ifndef CLANG_UNDER_VS +#include +#endif + +#define NAME "booster" + +/** + Prints the version of the tools + In the output file. + (may be stdout or stderr) + */ +void version(FILE *out, char *executable); + +/** + Prints the version of the tools + In the output file. + Without the executable name + (may be stdout or stderr) + */ +void short_version(FILE *out); + +#endif diff --git a/gsl/CMakeLists.txt b/gsl/CMakeLists.txt index 06a9cef84..18c9e00b8 100644 --- a/gsl/CMakeLists.txt +++ b/gsl/CMakeLists.txt @@ -5,4 +5,5 @@ pow_int.cpp gauss.cpp gaussinv.cpp gausspdf.cpp +mygsl.h ) diff --git a/gsl/mygsl.h b/gsl/mygsl.h index 2a662470c..75c4ef616 100644 --- a/gsl/mygsl.h +++ b/gsl/mygsl.h @@ -46,6 +46,13 @@ double gsl_ran_ugaussian_pdf (const double x); */ double gsl_cdf_ugaussian_P (const double x); +/* + 1.0 - cumulative distribution function for standard normal distribution + @param x x-value + @return 1.0-CDF at x + */ +double gsl_cdf_ugaussian_Q (const double x); + /* quantile function for standard normal distribution (or CDF-inverse function) @param P probability value diff --git a/iqtree_config.h.in b/iqtree_config.h.in index bf2233e8c..15bb09070 100644 --- a/iqtree_config.h.in +++ b/iqtree_config.h.in @@ -12,6 +12,8 @@ /*#cmakedefine HAVE_PCLOSE*/ /* does the platform provide GlobalMemoryStatusEx functions? */ #cmakedefine HAVE_GLOBALMEMORYSTATUSEX +#cmakedefine HAVE_STRNDUP +#cmakedefine HAVE_STRTOK_R /* does the platform provide backtrace functions? */ #cmakedefine Backtrace_FOUND diff --git a/lbfgsb/lbfgsb_new.cpp b/lbfgsb/lbfgsb_new.cpp index 3e829dc6b..d3c30e78b 100644 --- a/lbfgsb/lbfgsb_new.cpp +++ b/lbfgsb/lbfgsb_new.cpp @@ -1031,7 +1031,7 @@ void mainlb(int n, int m, double *x, --ifun; --iback; } - strcpy(task, "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"); + strcpy(task, "WARNING: ABNORMAL_TERMINATION_IN_LNSRCH"); ++iter; goto L999; } else { diff --git a/lib/libomp.a b/lib/libomp.a new file mode 100644 index 000000000..8e60e4ff8 Binary files /dev/null and b/lib/libomp.a differ diff --git a/lib32/libomp.a b/lib32/libomp.a new file mode 100644 index 000000000..dead810de Binary files /dev/null and b/lib32/libomp.a differ diff --git a/libmac/.DS_Store b/libmac/.DS_Store deleted file mode 100644 index 5008ddfcf..000000000 Binary files a/libmac/.DS_Store and /dev/null differ diff --git a/libmac/libomp.a b/libmac/libomp.a index 4f3602cb3..8ad477e8d 100644 Binary files a/libmac/libomp.a and b/libmac/libomp.a differ diff --git a/lsd2 b/lsd2 new file mode 160000 index 000000000..b0b4c6a2f --- /dev/null +++ b/lsd2 @@ -0,0 +1 @@ +Subproject commit b0b4c6a2f8bb894bf5692960d7c91a456bf8585f diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt new file mode 100644 index 000000000..eca7d4a69 --- /dev/null +++ b/main/CMakeLists.txt @@ -0,0 +1,22 @@ +add_library(main +main.cpp +phyloanalysis.cpp +phyloanalysis.h +phylotesting.cpp +phylotesting.h +treetesting.cpp +treetesting.h +timetree.cpp +timetree.h +) + +if (USE_BOOSTER) + target_link_libraries(main booster) +endif() + +if (USE_LSD2) + target_link_libraries(main lsd2) +endif() + +target_link_libraries(main pda whtest vectorclass) + diff --git a/main/main.cpp b/main/main.cpp index af5857613..b3fb9abef 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -24,7 +24,7 @@ #include -#if defined WIN32 || defined _WIN32 || defined __WIN32__ +#if defined WIN32 || defined _WIN32 || defined __WIN32__ || defined WIN64 //#include //#include //extern __declspec(dllexport) int gethostname(char *name, int namelen); @@ -32,7 +32,6 @@ #include #endif -//#include "Eigen/Core" #include #include "tree/phylotree.h" #include @@ -43,7 +42,6 @@ #include #include "pda/greedy.h" #include "pda/pruning.h" -//#include "naivegreedy.h" #include "pda/splitgraph.h" #include "pda/circularnetwork.h" #include "tree/mtreeset.h" @@ -53,9 +51,7 @@ #include "nclextra/myreader.h" #include "phyloanalysis.h" #include "tree/matree.h" -//#include "ngs.h" -//#include "parsmultistate.h" -//#include "gss.h" +#include "obsolete/parsmultistate.h" #include "alignment/maalignment.h" //added by MA #include "tree/ncbitree.h" #include "pda/ecopd.h" @@ -63,1155 +59,1139 @@ #include "pda/ecopdmtreeset.h" #include "pda/gurobiwrapper.h" #include "utils/timeutil.h" -//#include +#include "utils/operatingsystem.h" //for getOSName() #include #include "vectorclass/instrset.h" #include "utils/MPIHelper.h" -#ifdef _IQTREE_MPI -#include -#endif #ifdef _OPENMP - #include + #include #endif using namespace std; - - void generateRandomTree(Params ¶ms) { - if (params.sub_size < 3 && !params.aln_file) { - outError(ERR_FEW_TAXA); - } - - if (!params.user_file) { - outError("Please specify an output tree file name"); - } - ////cout << "Random number seed: " << params.ran_seed << endl << endl; - - SplitGraph sg; - - try { - - if (params.tree_gen == YULE_HARDING || params.tree_gen == CATERPILLAR || - params.tree_gen == BALANCED || params.tree_gen == UNIFORM || params.tree_gen == STAR_TREE) { - if (!overwriteFile(params.user_file)) return; - ofstream out; - out.open(params.user_file); - MTree itree; - - if (params.second_tree) { - cout << "Generating random branch lengths on tree " << params.second_tree << " ..." << endl; - itree.readTree(params.second_tree, params.is_rooted); - } else - switch (params.tree_gen) { - case YULE_HARDING: - cout << "Generating random Yule-Harding tree..." << endl; - break; - case UNIFORM: - cout << "Generating random uniform tree..." << endl; - break; - case CATERPILLAR: - cout << "Generating random caterpillar tree..." << endl; - break; - case BALANCED: - cout << "Generating random balanced tree..." << endl; - break; - case STAR_TREE: - cout << "Generating star tree with random external branch lengths..." << endl; - break; - default: break; - } - ofstream out2; - if (params.num_zero_len) { - cout << "Setting " << params.num_zero_len << " internal branches to zero length..." << endl; - string str = params.user_file; - str += ".collapsed"; - out2.open(str.c_str()); - } - for (int i = 0; i < params.repeated_time; i++) { - MExtTree mtree; - if (itree.root) { - mtree.copyTree(&itree); - mtree.generateRandomBranchLengths(params); - } else { - mtree.generateRandomTree(params.tree_gen, params); - } - if (params.num_zero_len) { - mtree.setZeroInternalBranches(params.num_zero_len); - MExtTree collapsed_tree; - collapsed_tree.copyTree(&mtree); - collapsed_tree.collapseZeroBranches(); - collapsed_tree.printTree(out2); - out2 << endl; - } - mtree.printTree(out); - out << endl; - } - out.close(); - cout << params.repeated_time << " tree(s) printed to " << params.user_file << endl; - if (params.num_zero_len) { - out2.close(); - cout << params.repeated_time << " collapsed tree(s) printed to " << params.user_file << ".collapsed" << endl; - } - } - // Generate random trees if optioned - else if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) { - cout << "Generating random circular split network..." << endl; - if (!overwriteFile(params.user_file)) return; - sg.generateCircular(params); - } else if (params.tree_gen == TAXA_SET) { - sg.init(params); - cout << "Generating random taxa set of size " << params.sub_size << - " overlap " << params.overlap << " with " << params.repeated_time << " times..." << endl; - if (!overwriteFile(params.pdtaxa_file)) return; - sg.generateTaxaSet(params.pdtaxa_file, params.sub_size, params.overlap, params.repeated_time); - } - } catch (bad_alloc) { - outError(ERR_NO_MEMORY); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, params.user_file); - } - - // calculate the distance - if (params.run_mode == CALC_DIST) { - if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) { - cout << "Calculating distance matrix..." << endl; - sg.calcDistance(params.dist_file); - cout << "Distances printed to " << params.dist_file << endl; - }// else { - //mtree.calcDist(params.dist_file); - //} - } + if (params.sub_size < 3 && !params.aln_file) { + outError(ERR_FEW_TAXA); + } + + if (!params.user_file) { + outError("Please specify an output tree file name"); + } + ////cout << "Random number seed: " << params.ran_seed << endl << endl; + + SplitGraph sg; + + try { + + if (params.tree_gen == YULE_HARDING || params.tree_gen == CATERPILLAR || + params.tree_gen == BALANCED || params.tree_gen == UNIFORM || params.tree_gen == STAR_TREE) { + if (!overwriteFile(params.user_file)) return; + ofstream out; + out.open(params.user_file); + MTree itree; + + if (params.second_tree) { + cout << "Generating random branch lengths on tree " << params.second_tree << " ..." << endl; + itree.readTree(params.second_tree, params.is_rooted); + } else + switch (params.tree_gen) { + case YULE_HARDING: + cout << "Generating random Yule-Harding tree..." << endl; + break; + case UNIFORM: + cout << "Generating random uniform tree..." << endl; + break; + case CATERPILLAR: + cout << "Generating random caterpillar tree..." << endl; + break; + case BALANCED: + cout << "Generating random balanced tree..." << endl; + break; + case STAR_TREE: + cout << "Generating star tree with random external branch lengths..." << endl; + break; + default: break; + } + ofstream out2; + if (params.num_zero_len) { + cout << "Setting " << params.num_zero_len << " internal branches to zero length..." << endl; + string str = params.user_file; + str += ".collapsed"; + out2.open(str.c_str()); + } + for (int i = 0; i < params.repeated_time; i++) { + MExtTree mtree; + if (itree.root) { + mtree.copyTree(&itree); + mtree.generateRandomBranchLengths(params); + } else { + mtree.generateRandomTree(params.tree_gen, params); + } + if (params.num_zero_len) { + mtree.setZeroInternalBranches(params.num_zero_len); + MExtTree collapsed_tree; + collapsed_tree.copyTree(&mtree); + collapsed_tree.collapseZeroBranches(); + collapsed_tree.printTree(out2); + out2 << endl; + } + mtree.printTree(out); + out << endl; + } + out.close(); + cout << params.repeated_time << " tree(s) printed to " << params.user_file << endl; + if (params.num_zero_len) { + out2.close(); + cout << params.repeated_time << " collapsed tree(s) printed to " << params.user_file << ".collapsed" << endl; + } + } + // Generate random trees if optioned + else if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) { + cout << "Generating random circular split network..." << endl; + if (!overwriteFile(params.user_file)) return; + sg.generateCircular(params); + } else if (params.tree_gen == TAXA_SET) { + sg.init(params); + cout << "Generating random taxa set of size " << params.sub_size << + " overlap " << params.overlap << " with " << params.repeated_time << " times..." << endl; + if (!overwriteFile(params.pdtaxa_file)) return; + sg.generateTaxaSet(params.pdtaxa_file, params.sub_size, params.overlap, params.repeated_time); + } + } catch (bad_alloc) { + outError(ERR_NO_MEMORY); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, params.user_file); + } + + // calculate the distance + if (params.run_mode == RunMode::CALC_DIST) { + if (params.tree_gen == CIRCULAR_SPLIT_GRAPH) { + cout << "Calculating distance matrix..." << endl; + sg.calcDistance(params.dist_file); + cout << "Distances printed to " << params.dist_file << endl; + }// else { + //mtree.calcDist(params.dist_file); + //} + } } inline void separator(ostream &out, int type = 0) { - switch (type) { - case 0: - out << endl << "==============================================================================" << endl; - break; - case 1: - out << endl << "-----------------------------------------------------------" << endl; - break; - default: - break; - } + switch (type) { + case 0: + out << endl << "==============================================================================" << endl; + break; + case 1: + out << endl << "-----------------------------------------------------------" << endl; + break; + default: + break; + } } void printCopyright(ostream &out) { #ifdef IQ_TREE - out << "IQ-TREE"; + out << "IQ-TREE"; #ifdef _IQTREE_MPI out << " MPI"; #endif - #ifdef _OPENMP - out << " multicore"; - #endif + #ifdef _OPENMP + out << " multicore"; + #endif #ifdef __AVX512KNL out << " Xeon Phi KNL"; #endif - out << " version "; + out << " version "; #else - out << "PDA - Phylogenetic Diversity Analyzer version "; + out << "PDA - Phylogenetic Diversity Analyzer version "; #endif - out << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << "." << iqtree_VERSION_PATCH; - -#if defined _WIN32 || defined WIN32 - out << " for Windows"; -#elif defined __APPLE__ || defined __MACH__ - out << " for Mac OS X"; -#elif defined __linux__ - out << " for Linux"; -#elif defined __unix__ || defined __unix - out << " for Unix"; -#else - out << " for unknown platform" -#endif - - out << " " << 8*sizeof(void*) << "-bit" << " built " << __DATE__; + out << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << iqtree_VERSION_PATCH << " COVID-edition"; + out << " for " << getOSName(); + out << " built " << __DATE__; + out << " built " << __DATE__; #if defined DEBUG - out << " - debug mode"; + out << " - debug mode"; #endif #ifdef IQ_TREE - out << endl << "Developed by Bui Quang Minh, Nguyen Lam Tung, Olga Chernomor," - << endl << "Heiko Schmidt, Dominik Schrempf, Michael Woodhams." << endl << endl; + out << endl << "Developed by Bui Quang Minh, James Barbetti, Nguyen Lam Tung," + << endl << "Olga Chernomor, Heiko Schmidt, Dominik Schrempf, Michael Woodhams." << endl << endl; #else - out << endl << "Copyright (c) 2006-2014 Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl; + out << endl << "Copyright (c) 2006-2014 Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl; #endif } void printRunMode(ostream &out, RunMode run_mode) { - switch (run_mode) { - case DETECTED: out << "Detected"; break; - case GREEDY: out << "Greedy"; break; - case PRUNING: out << "Pruning"; break; - case BOTH_ALG: out << "Greedy and Pruning"; break; - case EXHAUSTIVE: out << "Exhaustive"; break; - case DYNAMIC_PROGRAMMING: out << "Dynamic Programming"; break; - case LINEAR_PROGRAMMING: out << "Integer Linear Programming"; break; - default: outError(ERR_INTERNAL); - } + switch (run_mode) { + case RunMode::DETECTED: out << "Detected"; break; + case RunMode::GREEDY: out << "Greedy"; break; + case RunMode::PRUNING: out << "Pruning"; break; + case RunMode::BOTH_ALG: out << "Greedy and Pruning"; break; + case RunMode::EXHAUSTIVE: out << "Exhaustive"; break; + case RunMode::DYNAMIC_PROGRAMMING: out << "Dynamic Programming"; break; + case RunMode::LINEAR_PROGRAMMING: out << "Integer Linear Programming"; break; + default: outError(ERR_INTERNAL); + } } /** - summarize the running with header + summarize the running with header */ void summarizeHeader(ostream &out, Params ¶ms, bool budget_constraint, InputType analysis_type) { - printCopyright(out); - out << "Input tree/split network file name: " << params.user_file << endl; - if(params.eco_dag_file) - out << "Input food web file name: "< "; - printRunMode(out, params.detected_mode); - } - out << endl; - out << "Search option: " << ((params.find_all) ? "Multiple optimal sets" : "Single optimal set") << endl; - } - out << endl; - out << "Type of analysis: "; - switch (params.run_mode) { - case PD_USER_SET: out << "PD/SD of user sets"; - if (params.pdtaxa_file) out << " (" << params.pdtaxa_file << ")"; break; - case CALC_DIST: out << "Distance matrix computation"; break; - default: - out << ((budget_constraint) ? "Budget constraint " : "Subset size k "); - if (params.intype == IN_NEWICK) - out << ((analysis_type == IN_NEWICK) ? "on tree" : "on tree -> split network"); - else - out << "on split network"; - } - out << endl; - //out << "Random number seed: " << params.ran_seed << endl; + printCopyright(out); + out << "Input tree/split network file name: " << params.user_file << endl; + if(params.eco_dag_file) + out << "Input food web file name: "< "; + printRunMode(out, params.detected_mode); + } + out << endl; + out << "Search option: " << ((params.find_all) ? "Multiple optimal sets" : "Single optimal set") << endl; + } + out << endl; + out << "Type of analysis: "; + switch (params.run_mode) { + case RunMode::PD_USER_SET: out << "PD/SD of user sets"; + if (params.pdtaxa_file) out << " (" << params.pdtaxa_file << ")"; break; + case RunMode::CALC_DIST: out << "Distance matrix computation"; break; + default: + out << ((budget_constraint) ? "Budget constraint " : "Subset size k "); + if (params.intype == IN_NEWICK) + out << ((analysis_type == IN_NEWICK) ? "on tree" : "on tree -> split network"); + else + out << "on split network"; + } + out << endl; + //out << "Random number seed: " << params.ran_seed << endl; } void summarizeFooter(ostream &out, Params ¶ms) { - separator(out); - time_t beginTime; - time (&beginTime); - char *date; - date = ctime(&beginTime); - - out << "Time used: " << params.run_time << " seconds." << endl; - out << "Finished time: " << date << endl; + separator(out); + time_t beginTime; + time (&beginTime); + char *date; + date = ctime(&beginTime); + + out << "Time used: " << params.run_time << " seconds." << endl; + out << "Finished time: " << date << endl; } int getMaxNameLen(vector &setName) { - int len = 0; - for (vector::iterator it = setName.begin(); it != setName.end(); it++) - if (len < (*it).length()) - len = (*it).length(); - return len; + int len = 0; + for (vector::iterator it = setName.begin(); it != setName.end(); it++) + if (len < (*it).length()) + len = (*it).length(); + return len; } void printPDUser(ostream &out, Params ¶ms, PDRelatedMeasures &pd_more) { - out << "List of user-defined sets of taxa with PD score computed" << endl << endl; - int maxlen = getMaxNameLen(pd_more.setName)+2; - out.width(maxlen); - out << "Name" << " PD"; - if (params.exclusive_pd) out << " excl.-PD"; - if (params.endemic_pd) out << " PD-Endem."; - if (params.complement_area) out << " PD-Compl. given area " << params.complement_area; - out << endl; - int cnt; - for (cnt = 0; cnt < pd_more.setName.size(); cnt++) { - out.width(maxlen); - out << pd_more.setName[cnt] << " "; - out.width(7); - out << pd_more.PDScore[cnt] << " "; - if (params.exclusive_pd) { - out.width(7); - out << pd_more.exclusivePD[cnt] << " "; - } - if (params.endemic_pd) { - out.width(7); - out << pd_more.PDEndemism[cnt] << " "; - } - if (params.complement_area) { - out.width(8); - out << pd_more.PDComplementarity[cnt]; - } - out << endl; - } - separator(out, 1); + out << "List of user-defined sets of taxa with PD score computed" << endl << endl; + int maxlen = getMaxNameLen(pd_more.setName)+2; + out.width(maxlen); + out << "Name" << " PD"; + if (params.exclusive_pd) out << " excl.-PD"; + if (params.endemic_pd) out << " PD-Endem."; + if (params.complement_area) out << " PD-Compl. given area " << params.complement_area; + out << endl; + int cnt; + for (cnt = 0; cnt < pd_more.setName.size(); cnt++) { + out.width(maxlen); + out << pd_more.setName[cnt] << " "; + out.width(7); + out << pd_more.PDScore[cnt] << " "; + if (params.exclusive_pd) { + out.width(7); + out << pd_more.exclusivePD[cnt] << " "; + } + if (params.endemic_pd) { + out.width(7); + out << pd_more.PDEndemism[cnt] << " "; + } + if (params.complement_area) { + out.width(8); + out << pd_more.PDComplementarity[cnt]; + } + out << endl; + } + separator(out, 1); } void summarizeTree(Params ¶ms, PDTree &tree, vector &taxa_set, - PDRelatedMeasures &pd_more) { - string filename; - if (params.out_file == NULL) { - filename = params.out_prefix; - filename += ".pda"; - } else - filename = params.out_file; - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - - summarizeHeader(out, params, false, IN_NEWICK); - out << "Tree size: " << tree.leafNum-params.is_rooted << " taxa, " << - tree.nodeNum-1-params.is_rooted << " branches" << endl; - separator(out); - - vector::iterator tid; - - if (params.run_mode == PD_USER_SET) { - printPDUser(out, params, pd_more); - } - else if (taxa_set.size() > 1) - out << "Optimal PD-sets with k = " << params.min_size-params.is_rooted << - " to " << params.sub_size-params.is_rooted << endl << endl; - - - int subsize = params.min_size-params.is_rooted; - if (params.run_mode == PD_USER_SET) subsize = 1; - for (tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) { - if (tid != taxa_set.begin()) - separator(out, 1); - if (params.run_mode == PD_USER_SET) { - out << "Set " << subsize << " has PD score of " << tid->score << endl; - } - else { - out << "For k = " << subsize << " the optimal PD score is " << (*tid).score << endl; - out << "The optimal PD set has " << subsize << " taxa:" << endl; - } - for (NodeVector::iterator it = (*tid).begin(); it != (*tid).end(); it++) - if ((*it)->name != ROOT_NAME){ - out << (*it)->name << endl; - } - if (!tid->tree_str.empty()) { - out << endl << "Corresponding sub-tree: " << endl; - out << tid->tree_str << endl; - } - tid->clear(); - } - taxa_set.clear(); - - summarizeFooter(out, params); - out.close(); - cout << endl << "Results are summarized in " << filename << endl << endl; - - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + PDRelatedMeasures &pd_more) { + string filename; + if (params.out_file == NULL) { + filename = params.out_prefix; + filename += ".pda"; + } else + filename = params.out_file; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename.c_str()); + + summarizeHeader(out, params, false, IN_NEWICK); + out << "Tree size: " << tree.leafNum-params.is_rooted << " taxa, " << + tree.nodeNum-1-params.is_rooted << " branches" << endl; + separator(out); + + vector::iterator tid; + + if (params.run_mode == RunMode::PD_USER_SET) { + printPDUser(out, params, pd_more); + } + else if (taxa_set.size() > 1) + out << "Optimal PD-sets with k = " << params.min_size-params.is_rooted << + " to " << params.sub_size-params.is_rooted << endl << endl; + + + int subsize = params.min_size-params.is_rooted; + if (params.run_mode == RunMode::PD_USER_SET) subsize = 1; + for (tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) { + if (tid != taxa_set.begin()) + separator(out, 1); + if (params.run_mode == RunMode::PD_USER_SET) { + out << "Set " << subsize << " has PD score of " << tid->score << endl; + } + else { + out << "For k = " << subsize << " the optimal PD score is " << (*tid).score << endl; + out << "The optimal PD set has " << subsize << " taxa:" << endl; + } + for (NodeVector::iterator it = (*tid).begin(); it != (*tid).end(); it++) + if ((*it)->name != ROOT_NAME){ + out << (*it)->name << endl; + } + if (!tid->tree_str.empty()) { + out << endl << "Corresponding sub-tree: " << endl; + out << tid->tree_str << endl; + } + tid->clear(); + } + taxa_set.clear(); + + summarizeFooter(out, params); + out.close(); + cout << endl << "Results are summarized in " << filename << endl << endl; + + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } void printTaxaSet(Params ¶ms, vector &taxa_set, RunMode cur_mode) { - int subsize = params.min_size-params.is_rooted; - ofstream out; - ofstream scoreout; - string filename; - filename = params.out_prefix; - filename += ".score"; - scoreout.open(filename.c_str()); - if (!scoreout.is_open()) - outError(ERR_WRITE_OUTPUT, filename); - cout << "PD scores printed to " << filename << endl; - - if (params.nr_output == 1) { - filename = params.out_prefix; - filename += ".pdtaxa"; - out.open(filename.c_str()); - if (!out.is_open()) - outError(ERR_WRITE_OUTPUT, filename); - } - for (vector::iterator tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) { - if (params.nr_output > 10) { - filename = params.out_prefix; - filename += "."; - filename += subsize; - if (params.run_mode == BOTH_ALG) { - if (cur_mode == GREEDY) - filename += ".greedy"; - else - filename += ".pruning"; - } else { - filename += ".pdtree"; - } - (*tid).printTree((char*)filename.c_str()); - - filename = params.out_prefix; - filename += "."; - filename += subsize; - filename += ".pdtaxa"; - (*tid).printTaxa((char*)filename.c_str()); - } else { - out << subsize << " " << (*tid).score << endl; - scoreout << subsize << " " << (*tid).score << endl; - (*tid).printTaxa(out); - } - } - - if (params.nr_output == 1) { - out.close(); - cout << "All taxa list(s) printed to " << filename << endl; - } - - scoreout.close(); + int subsize = params.min_size-params.is_rooted; + ofstream out; + ofstream scoreout; + string filename; + filename = params.out_prefix; + filename += ".score"; + scoreout.open(filename.c_str()); + if (!scoreout.is_open()) + outError(ERR_WRITE_OUTPUT, filename); + cout << "PD scores printed to " << filename << endl; + + if (params.nr_output == 1) { + filename = params.out_prefix; + filename += ".pdtaxa"; + out.open(filename.c_str()); + if (!out.is_open()) + outError(ERR_WRITE_OUTPUT, filename); + } + for (vector::iterator tid = taxa_set.begin(); tid != taxa_set.end(); tid++, subsize++) { + if (params.nr_output > 10) { + filename = params.out_prefix; + filename += "."; + filename += subsize; + if (params.run_mode == RunMode::BOTH_ALG) { + if (cur_mode == RunMode::GREEDY) + filename += ".greedy"; + else + filename += ".pruning"; + } else { + filename += ".pdtree"; + } + (*tid).printTree((char*)filename.c_str()); + + filename = params.out_prefix; + filename += "."; + filename += subsize; + filename += ".pdtaxa"; + (*tid).printTaxa((char*)filename.c_str()); + } else { + out << subsize << " " << (*tid).score << endl; + scoreout << subsize << " " << (*tid).score << endl; + (*tid).printTaxa(out); + } + } + + if (params.nr_output == 1) { + out.close(); + cout << "All taxa list(s) printed to " << filename << endl; + } + + scoreout.close(); } /** - run PD algorithm on trees + run PD algorithm on trees */ void runPDTree(Params ¶ms) { - if (params.run_mode == CALC_DIST) { - bool is_rooted = false; - MExtTree tree(params.user_file, is_rooted); - cout << "Tree contains " << tree.leafNum << " taxa." << endl; - cout << "Calculating distance matrix..." << endl; - tree.calcDist(params.dist_file); - cout << "Distances printed to " << params.dist_file << endl; - return; - } - - double t_begin, t_end; - //char filename[300]; - //int idx; - - vector taxa_set; - - if (params.run_mode == PD_USER_SET) { - // compute score of user-defined sets - t_begin = getCPUTime(); - cout << "Computing PD score for user-defined set of taxa..." << endl; - PDTree tree(params); - PDRelatedMeasures pd_more; - tree.computePD(params, taxa_set, pd_more); - - if (params.endemic_pd) - tree.calcPDEndemism(taxa_set, pd_more.PDEndemism); - if (params.complement_area != NULL) - tree.calcPDComplementarity(taxa_set, params.complement_area, pd_more.PDComplementarity); + if (params.run_mode == RunMode::CALC_DIST) { + bool is_rooted = false; + MExtTree tree(params.user_file, is_rooted); + cout << "Tree contains " << tree.leafNum << " taxa." << endl; + cout << "Calculating distance matrix..." << endl; + tree.calcDist(params.dist_file); + cout << "Distances printed to " << params.dist_file << endl; + return; + } - t_end = getCPUTime(); - params.run_time = (t_end-t_begin); - summarizeTree(params, tree, taxa_set, pd_more); - return; - } + double t_begin, t_end; + //char filename[300]; + //int idx; + + vector taxa_set; + + if (params.run_mode == RunMode::PD_USER_SET) { + // compute score of user-defined sets + t_begin = getCPUTime(); + cout << "Computing PD score for user-defined set of taxa..." << endl; + PDTree tree(params); + PDRelatedMeasures pd_more; + tree.computePD(params, taxa_set, pd_more); + + if (params.endemic_pd) + tree.calcPDEndemism(taxa_set, pd_more.PDEndemism); + if (params.complement_area != NULL) + tree.calcPDComplementarity(taxa_set, params.complement_area, pd_more.PDComplementarity); + + t_end = getCPUTime(); + params.run_time = (t_end-t_begin); + summarizeTree(params, tree, taxa_set, pd_more); + return; + } - /********************************************* - run greedy algorithm - *********************************************/ + /********************************************* + run greedy algorithm + *********************************************/ - if (params.sub_size < 2) { - outError(ERR_NO_K); - } + if (params.sub_size < 2) { + outError(ERR_NO_K); + } - bool detected_greedy = (params.run_mode != PRUNING); + bool detected_greedy = (params.run_mode != RunMode::PRUNING); - Greedy test_greedy; + Greedy test_greedy; - test_greedy.init(params); + test_greedy.init(params); - if (params.root == NULL && !params.is_rooted) - cout << endl << "Running PD algorithm on UNROOTED tree..." << endl; - else - cout << endl << "Running PD algorithm on ROOTED tree..." << endl; + if (params.root == NULL && !params.is_rooted) + cout << endl << "Running PD algorithm on UNROOTED tree..." << endl; + else + cout << endl << "Running PD algorithm on ROOTED tree..." << endl; - if (verbose_mode >= VB_DEBUG) - test_greedy.drawTree(cout, WT_INT_NODE + WT_BR_SCALE + WT_BR_LEN); + if (verbose_mode >= VB_DEBUG) + test_greedy.drawTree(cout, WT_INT_NODE + WT_BR_SCALE + WT_BR_LEN); - if (params.run_mode == GREEDY || params.run_mode == BOTH_ALG || - (params.run_mode == DETECTED)) { + if (params.run_mode == RunMode::GREEDY || params.run_mode == RunMode::BOTH_ALG || + (params.run_mode == RunMode::DETECTED)) { - if (params.run_mode == DETECTED && params.sub_size >= test_greedy.leafNum * 7 / 10 - && params.min_size < 2) - detected_greedy = false; + if (params.run_mode == RunMode::DETECTED && params.sub_size >= test_greedy.leafNum * 7 / 10 + && params.min_size < 2) + detected_greedy = false; - if (detected_greedy) { - params.detected_mode = GREEDY; - t_begin=getCPUTime(); - cout << endl << "Greedy Algorithm..." << endl; + if (detected_greedy) { + params.detected_mode = RunMode::GREEDY; + t_begin=getCPUTime(); + cout << endl << "Greedy Algorithm..." << endl; - taxa_set.clear(); - test_greedy.run(params, taxa_set); + taxa_set.clear(); + test_greedy.run(params, taxa_set); - t_end=getCPUTime(); - params.run_time = (t_end-t_begin); - cout << "Time used: " << params.run_time << " seconds." << endl; - if (params.min_size == params.sub_size) - cout << "Resulting tree length = " << taxa_set[0].score << endl; + t_end=getCPUTime(); + params.run_time = (t_end-t_begin); + cout << "Time used: " << params.run_time << " seconds." << endl; + if (params.min_size == params.sub_size) + cout << "Resulting tree length = " << taxa_set[0].score << endl; - if (params.nr_output > 0) - printTaxaSet(params, taxa_set, GREEDY); + if (params.nr_output > 0) + printTaxaSet(params, taxa_set, RunMode::GREEDY); - PDRelatedMeasures pd_more; + PDRelatedMeasures pd_more; - summarizeTree(params, test_greedy, taxa_set, pd_more); - } - } + summarizeTree(params, test_greedy, taxa_set, pd_more); + } + } - /********************************************* - run pruning algorithm - *********************************************/ - if (params.run_mode == PRUNING || params.run_mode == BOTH_ALG || - (params.run_mode == DETECTED)) { + /********************************************* + run pruning algorithm + *********************************************/ + if (params.run_mode == RunMode::PRUNING || params.run_mode == RunMode::BOTH_ALG || + (params.run_mode == RunMode::DETECTED)) { - Pruning test_pruning; + Pruning test_pruning; - if (params.run_mode == PRUNING || params.run_mode == BOTH_ALG) { - //Pruning test_pruning(params); - test_pruning.init(params); - } else if (!detected_greedy) { - test_pruning.init(test_greedy); - } else { - return; - } - params.detected_mode = PRUNING; - t_begin=getCPUTime(); - cout << endl << "Pruning Algorithm..." << endl; - taxa_set.clear(); - test_pruning.run(params, taxa_set); + if (params.run_mode == RunMode::PRUNING || params.run_mode == RunMode::BOTH_ALG) { + //Pruning test_pruning(params); + test_pruning.init(params); + } else if (!detected_greedy) { + test_pruning.init(test_greedy); + } else { + return; + } + params.detected_mode = RunMode::PRUNING; + t_begin=getCPUTime(); + cout << endl << "Pruning Algorithm..." << endl; + taxa_set.clear(); + test_pruning.run(params, taxa_set); - t_end=getCPUTime(); - params.run_time = (t_end-t_begin) ; - cout << "Time used: " << params.run_time << " seconds.\n"; - if (params.min_size == params.sub_size) - cout << "Resulting tree length = " << taxa_set[0].score << endl; + t_end=getCPUTime(); + params.run_time = (t_end-t_begin) ; + cout << "Time used: " << params.run_time << " seconds.\n"; + if (params.min_size == params.sub_size) + cout << "Resulting tree length = " << taxa_set[0].score << endl; - if (params.nr_output > 0) - printTaxaSet(params, taxa_set, PRUNING); + if (params.nr_output > 0) + printTaxaSet(params, taxa_set, RunMode::PRUNING); - PDRelatedMeasures pd_more; + PDRelatedMeasures pd_more; - summarizeTree(params, test_pruning, taxa_set, pd_more); + summarizeTree(params, test_pruning, taxa_set, pd_more); - } + } } void checkSplitDistance(ostream &out, PDNetwork &sg) { - mmatrix(double) dist; - sg.calcDistance(dist); - int ntaxa = sg.getNTaxa(); - int i, j; - bool found = false; - for (i = 0; i < ntaxa-1; i++) { - bool first = true; - for (j = i+1; j < ntaxa; j++) - if (abs(dist[i][j]) <= 1e-5) { - if (!found) { - out << "The following sets of taxa (each set in a line) have very small split-distance" << endl; - out << "( <= 1e-5) as computed from the split system. To avoid a lot of multiple" << endl; - out << "optimal PD sets to be reported, one should only keep one taxon from each set" << endl; - out << "and exclude the rest from the analysis." << endl << endl; - } - if (first) - out << sg.getTaxa()->GetTaxonLabel(i); - found = true; - first = false; - out << ", " << sg.getTaxa()->GetTaxonLabel(j); - } - if (!first) out << endl; - } - if (found) - separator(out); + mmatrix(double) dist; + sg.calcDistance(dist); + int ntaxa = sg.getNTaxa(); + int i, j; + bool found = false; + for (i = 0; i < ntaxa-1; i++) { + bool first = true; + for (j = i+1; j < ntaxa; j++) + if (abs(dist[i][j]) <= 1e-5) { + if (!found) { + out << "The following sets of taxa (each set in a line) have very small split-distance" << endl; + out << "( <= 1e-5) as computed from the split system. To avoid a lot of multiple" << endl; + out << "optimal PD sets to be reported, one should only keep one taxon from each set" << endl; + out << "and exclude the rest from the analysis." << endl << endl; + } + if (first) + out << sg.getTaxa()->GetTaxonLabel(i); + found = true; + first = false; + out << ", " << sg.getTaxa()->GetTaxonLabel(j); + } + if (!first) out << endl; + } + if (found) + separator(out); } /** - check if the set are nested and there are no multiple optimal sets. - If yes, return the ranking as could be produced by a greedy algorithm + check if the set are nested and there are no multiple optimal sets. + If yes, return the ranking as could be produced by a greedy algorithm */ bool makeRanking(vector &pd_set, IntVector &indices, IntVector &ranking) { - vector::iterator it; - IntVector::iterator inti; - ranking.clear(); - bool nested = true; - Split *cur_sp = NULL; - int id = 1; - for (it = pd_set.begin(); it != pd_set.end(); it++) { - if ((*it).empty()) continue; - if ((*it).size() > 1) { - nested = false; - ranking.push_back(-10); - indices.push_back(0); - } - Split *sp = (*it)[0]; - - if (!cur_sp) { - IntVector sp_tax; - sp->getTaxaList(sp_tax); - ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); - for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) - indices.push_back(id++); - } else { - if ( !cur_sp->subsetOf(*sp)) { - ranking.push_back(-1); - indices.push_back(0); - nested = false; - } - Split sp_diff(*sp); - sp_diff -= *cur_sp; - Split sp_diff2(*cur_sp); - sp_diff2 -= *sp; - IntVector sp_tax; - sp_diff2.getTaxaList(sp_tax); - ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); - for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) - indices.push_back(-id); - sp_diff.getTaxaList(sp_tax); - ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); - for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) - indices.push_back(id); - if ( !cur_sp->subsetOf(*sp)) { - ranking.push_back(-2); - indices.push_back(0); - } - id++; - } - cur_sp = sp; - } - return nested; + vector::iterator it; + IntVector::iterator inti; + ranking.clear(); + bool nested = true; + Split *cur_sp = NULL; + int id = 1; + for (it = pd_set.begin(); it != pd_set.end(); it++) { + if ((*it).empty()) continue; + if ((*it).size() > 1) { + nested = false; + ranking.push_back(-10); + indices.push_back(0); + } + Split *sp = (*it)[0]; + + if (!cur_sp) { + IntVector sp_tax; + sp->getTaxaList(sp_tax); + ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); + for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) + indices.push_back(id++); + } else { + if ( !cur_sp->subsetOf(*sp)) { + ranking.push_back(-1); + indices.push_back(0); + nested = false; + } + Split sp_diff(*sp); + sp_diff -= *cur_sp; + Split sp_diff2(*cur_sp); + sp_diff2 -= *sp; + IntVector sp_tax; + sp_diff2.getTaxaList(sp_tax); + ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); + for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) + indices.push_back(-id); + sp_diff.getTaxaList(sp_tax); + ranking.insert(ranking.end(), sp_tax.begin(), sp_tax.end()); + for (inti = sp_tax.begin(); inti != sp_tax.end(); inti++) + indices.push_back(id); + if ( !cur_sp->subsetOf(*sp)) { + ranking.push_back(-2); + indices.push_back(0); + } + id++; + } + cur_sp = sp; + } + return nested; } void printNexusSets(const char *filename, PDNetwork &sg, vector &pd_set) { - try { - ofstream out; - out.open(filename); - out << "#NEXUS" << endl << "BEGIN Sets;" << endl; - vector::iterator it; - for (it = pd_set.begin(); it != pd_set.end(); it++) { - int id = 1; - for (SplitSet::iterator sit = (*it).begin(); sit != (*it).end(); sit++, id++) { - IntVector taxa; - (*sit)->getTaxaList(taxa); - out << " TAXSET Opt_" << taxa.size() << "_" << id << " ="; - for (IntVector::iterator iit = taxa.begin(); iit != taxa.end(); iit++) { - if (sg.isPDArea()) - out << " '" << sg.getSetsBlock()->getSet(*iit)->name << "'"; - else - out << " '" << sg.getTaxa()->GetTaxonLabel(*iit) << "'"; - } - out << ";" << endl; - } - } - out << "END; [Sets]" << endl; - out.close(); - cout << endl << "Optimal sets are written to nexus file " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + try { + ofstream out; + out.open(filename); + out << "#NEXUS" << endl << "BEGIN Sets;" << endl; + vector::iterator it; + for (it = pd_set.begin(); it != pd_set.end(); it++) { + int id = 1; + for (SplitSet::iterator sit = (*it).begin(); sit != (*it).end(); sit++, id++) { + IntVector taxa; + (*sit)->getTaxaList(taxa); + out << " TAXSET Opt_" << taxa.size() << "_" << id << " ="; + for (IntVector::iterator iit = taxa.begin(); iit != taxa.end(); iit++) { + if (sg.isPDArea()) + out << " '" << sg.getSetsBlock()->getSet(*iit)->name << "'"; + else + out << " '" << sg.getTaxa()->GetTaxonLabel(*iit) << "'"; + } + out << ";" << endl; + } + } + out << "END; [Sets]" << endl; + out.close(); + cout << endl << "Optimal sets are written to nexus file " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } void computeTaxaFrequency(SplitSet &taxa_set, DoubleVector &freq) { - ASSERT(taxa_set.size()); - int ntaxa = taxa_set[0]->getNTaxa(); - int i; + ASSERT(taxa_set.size()); + int ntaxa = taxa_set[0]->getNTaxa(); + int i; - freq.resize(ntaxa, 0); - for (SplitSet::iterator it2 = taxa_set.begin(); it2 != taxa_set.end(); it2++) { - for ( i = 0; i < ntaxa; i++) - if ((*it2)->containTaxon(i)) freq[i] += 1.0; - } + freq.resize(ntaxa, 0); + for (SplitSet::iterator it2 = taxa_set.begin(); it2 != taxa_set.end(); it2++) { + for ( i = 0; i < ntaxa; i++) + if ((*it2)->containTaxon(i)) freq[i] += 1.0; + } - for ( i = 0; i < ntaxa; i++) - freq[i] /= taxa_set.size(); + for ( i = 0; i < ntaxa; i++) + freq[i] /= taxa_set.size(); } /** - summarize the running results + summarize the running results */ void summarizeSplit(Params ¶ms, PDNetwork &sg, vector &pd_set, PDRelatedMeasures &pd_more, bool full_report) { - int i; - - - if (params.nexus_output) { - string nex_file = params.out_prefix; - nex_file += ".pdsets.nex"; - printNexusSets(nex_file.c_str(), sg, pd_set); - } - string filename; - if (params.out_file == NULL) { - filename = params.out_prefix; - filename += ".pda"; - } else - filename = params.out_file; - - try { - ofstream out; - out.open(filename.c_str()); - /****************************/ - /********** HEADER **********/ - /****************************/ - summarizeHeader(out, params, sg.isBudgetConstraint(), IN_NEXUS); - - out << "Network size: " << sg.getNTaxa()-params.is_rooted << " taxa, " << - sg.getNSplits()-params.is_rooted << " splits (of which " << - sg.getNTrivialSplits() << " are trivial splits)" << endl; - out << "Network type: " << ((sg.isCircular()) ? "Circular" : "General") << endl; - - separator(out); - - checkSplitDistance(out, sg); - - int c_num = 0; - //int subsize = (sg.isBudgetConstraint()) ? params.budget : (params.sub_size-params.is_rooted); - //subsize -= pd_set.size()-1; - int subsize = (sg.isBudgetConstraint()) ? params.min_budget : params.min_size-params.is_rooted; - int stepsize = (sg.isBudgetConstraint()) ? params.step_budget : params.step_size; - if (params.detected_mode != LINEAR_PROGRAMMING) stepsize = 1; - vector::iterator it; - SplitSet::iterator it2; - - - if (params.run_mode == PD_USER_SET) { - printPDUser(out, params, pd_more); - } - - /****************************/ - /********** SUMMARY *********/ - /****************************/ - - if (params.run_mode != PD_USER_SET && !params.num_bootstrap_samples) { - out << "Summary of the PD-score and the number of optimal PD-sets with the same " << endl << "optimal PD-score found." << endl; - - if (sg.isBudgetConstraint()) - out << endl << "Budget PD-score %PD-score #PD-sets" << endl; - else - out << endl << "Size-k PD-score %PD-score #PD-sets" << endl; - - int sizex = subsize; - double total = sg.calcWeight(); - - for (it = pd_set.begin(); it != pd_set.end(); it++, sizex+=stepsize) { - out.width(6); - out << right << sizex << " "; - out.width(10); - out << right << (*it).getWeight() << " "; - out.width(10); - out << right << ((*it).getWeight()/total)*100.0 << " "; - out.width(6); - out << right << (*it).size(); - out << endl; - } - - out << endl; - if (!params.find_all) - out << "Note: You did not choose the option to find multiple optimal PD sets." << endl << - "That's why we only reported one PD-set per size-k or budget. If you want" << endl << - "to determine all multiple PD-sets, use the '-a' option."; - else { - out << "Note: The number of multiple optimal PD sets to be reported is limited to " << params.pd_limit << "." << endl << - "There might be cases where the actual #PD-sets exceeds that upper-limit but" << endl << - "won't be listed here. Please refer to the above list to identify such cases." << endl << - "To increase the upper-limit, use the '-lim ' option."; - } - out << endl; - separator(out); - } - - if (!full_report) { - out.close(); - return; - } - - - /****************************/ - /********* BOOTSTRAP ********/ - /****************************/ - if (params.run_mode != PD_USER_SET && params.num_bootstrap_samples) { - out << "Summary of the bootstrap analysis " << endl; - for (it = pd_set.begin(); it != pd_set.end(); it++) { - DoubleVector freq; - computeTaxaFrequency((*it), freq); - out << "For k/budget = " << subsize << " the " << ((sg.isPDArea()) ? "areas" : "taxa") - << " supports are: " << endl; - for (i = 0; i < freq.size(); i++) - out << ((sg.isPDArea()) ? sg.getSetsBlock()->getSet(i)->name : sg.getTaxa()->GetTaxonLabel(i)) - << "\t" << freq[i] << endl; - if ((it+1) != pd_set.end()) separator(out, 1); - } - out << endl; - separator(out); - } - - /****************************/ - /********** RANKING *********/ - /****************************/ - - if (params.run_mode != PD_USER_SET && !params.num_bootstrap_samples) { - - - IntVector ranking; - IntVector index; - - out << "Ranking based on the optimal sets" << endl; - - - if (!makeRanking(pd_set, index, ranking)) { - out << "WARNING: Optimal sets are not nested, so ranking should not be considered stable" << endl; - } - if (subsize > 1) { - out << "WARNING: The first " << subsize << " ranks should be treated equal" << endl; - } - out << endl << "Rank* "; - if (!sg.isPDArea()) - out << "Taxon names" << endl; - else - out << "Area names" << endl; - - - for (IntVector::iterator intv = ranking.begin(), intid = index.begin(); intv != ranking.end(); intv ++, intid++) { - if (*intv == -10) - out << "<--- multiple optimal set here --->" << endl; - else if (*intv == -1) - out << "<--- BEGIN: greedy does not work --->" << endl; - else if (*intv == -2) - out << "<--- END --->" << endl; - else { - out.width(5); - out << right << *intid << " "; - if (sg.isPDArea()) - out << sg.getSetsBlock()->getSet(*intv)->name << endl; - else - out << sg.getTaxa()->GetTaxonLabel(*intv) << endl; - } - } - out << endl; - out << "(*) Negative ranks indicate the point at which the greedy algorithm" << endl << - " does not work. In that case, the corresponding taxon/area names" << endl << - " should be deleted from the optimal set of the same size" << endl; - separator(out); - } - - int max_len = sg.getTaxa()->GetMaxTaxonLabelLength(); - - /****************************/ - /***** DETAILED SETS ********/ - /****************************/ - - if (params.run_mode != PD_USER_SET) - out << "Detailed information of all taxa found in the optimal PD-sets" << endl; - - if (pd_set.size() > 1) { - if (sg.isBudgetConstraint()) - out << "with budget = " << params.min_budget << - " to " << params.budget << endl << endl; - else - out << "with k = " << params.min_size-params.is_rooted << - " to " << params.sub_size-params.is_rooted << endl << endl; - } - - if (params.run_mode != PD_USER_SET) - separator(out,1); - - for (it = pd_set.begin(); it != pd_set.end(); it++, subsize+=stepsize) { - - // check if the pd-sets are the same as previous one - if (sg.isBudgetConstraint() && it != pd_set.begin()) { - vector::iterator prev, next; - for (next=it, prev=it-1; next != pd_set.end() && next->getWeight() == (*prev).getWeight() && - next->size() == (*prev).size(); next++ ) ; - if (next != it) { - // found something in between! - out << endl; - //out << endl << "**************************************************************" << endl; - out << "For budget = " << subsize << " -> " << subsize+(next-it-1)*stepsize << - " the optimal PD score and PD sets" << endl; - out << "are identical to the case when budget = " << subsize-stepsize << endl; - //out << "**************************************************************" << endl; - subsize += (next-it)*stepsize; - it = next; - if (it == pd_set.end()) break; - } - } - - if (it != pd_set.begin()) separator(out, 1); - - int num_sets = (*it).size(); - double weight = (*it).getWeight(); - - if (params.run_mode != PD_USER_SET) { - out << "For " << ((sg.isBudgetConstraint()) ? "budget" : "k") << " = " << subsize; - out << " the optimal PD score is " << weight << endl; - - if (num_sets == 1) { - if (!sg.isBudgetConstraint()) - out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted << - ((sg.isPDArea()) ? " areas" : " taxa"); - else - out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted << - ((sg.isPDArea()) ? " areas" : " taxa") << " and requires " << sg.calcCost(*(*it)[0]) << " budget"; - if (!sg.isPDArea()) out << " and covers " << sg.countSplits(*(*it)[0]) << - " splits (of which " << sg.countInternalSplits(*(*it)[0]) << " are internal splits)"; - out << endl; - } - else - out << "Found " << num_sets << " PD sets with the same optimal score." << endl; - } - for (it2 = (*it).begin(), c_num=1; it2 != (*it).end(); it2++, c_num++){ - Split *this_set = *it2; - - if (params.run_mode == PD_USER_SET && it2 != (*it).begin()) - separator(out, 1); - - if (params.run_mode == PD_USER_SET) { - if (!sg.isBudgetConstraint()) - out << "Set " << c_num << " has PD score of " << this_set->getWeight(); - else - out << "Set " << c_num << " has PD score of " << this_set->getWeight() << - " and requires " << sg.calcCost(*this_set) << " budget"; - } else if (num_sets > 1) { - if (!sg.isBudgetConstraint()) - out << endl << "PD set " << c_num; - else - out << endl << "PD set " << c_num << " has " << this_set->countTaxa()-params.is_rooted << - " taxa and requires " << sg.calcCost(*this_set) << " budget"; - } - - if (!sg.isPDArea() && (num_sets > 1 || params.run_mode == PD_USER_SET )) - out << " and covers " << sg.countSplits(*(*it)[0]) << " splits (of which " - << sg.countInternalSplits(*(*it)[0]) << " are internal splits)"; - out << endl; - - if (params.run_mode != PD_USER_SET && sg.isPDArea()) { - for (i = 0; i < sg.getSetsBlock()->getNSets(); i++) - if (this_set->containTaxon(i)) { - if (sg.isBudgetConstraint()) { - out.width(max_len); - out << left << sg.getSetsBlock()->getSet(i)->name << "\t"; - out.width(10); - out << right << sg.getPdaBlock()->getCost(i); - out << endl; - - } else { - out << sg.getSetsBlock()->getSet(i)->name << endl; - } - } - - Split sp(sg.getNTaxa()); - for (i = 0; i < sg.getSetsBlock()->getNSets(); i++) - if (this_set->containTaxon(i)) - sp += *(sg.area_taxa[i]); - out << endl << "which contains " << sp.countTaxa() - params.is_rooted << " taxa: " << endl; - for (i = 0; i < sg.getNTaxa(); i++) - if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && sp.containTaxon(i)) - out << sg.getTaxa()->GetTaxonLabel(i) << endl; - - } else - for ( i = 0; i < sg.getNTaxa(); i++) - if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && this_set->containTaxon(i)) { - if (sg.isBudgetConstraint()) { - out.width(max_len); - out << left << sg.getTaxa()->GetTaxonLabel(i) << "\t"; - out.width(10); - out << right << sg.getPdaBlock()->getCost(i); - out << endl; - - } else { - out << sg.getTaxa()->GetTaxonLabel(i) << endl; - } - } - } - } - - /****************************/ - /********** FOOTER **********/ - /****************************/ - - summarizeFooter(out, params); - - out.close(); - cout << endl << "Results are summarized in " << filename << endl << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + int i; + + + if (params.nexus_output) { + string nex_file = params.out_prefix; + nex_file += ".pdsets.nex"; + printNexusSets(nex_file.c_str(), sg, pd_set); + } + string filename; + if (params.out_file == NULL) { + filename = params.out_prefix; + filename += ".pda"; + } else + filename = params.out_file; + + try { + ofstream out; + out.open(filename.c_str()); + /****************************/ + /********** HEADER **********/ + /****************************/ + summarizeHeader(out, params, sg.isBudgetConstraint(), IN_NEXUS); + + out << "Network size: " << sg.getNTaxa()-params.is_rooted << " taxa, " << + sg.getNSplits()-params.is_rooted << " splits (of which " << + sg.getNTrivialSplits() << " are trivial splits)" << endl; + out << "Network type: " << ((sg.isCircular()) ? "Circular" : "General") << endl; + + separator(out); + + checkSplitDistance(out, sg); + + int c_num = 0; + //int subsize = (sg.isBudgetConstraint()) ? params.budget : (params.sub_size-params.is_rooted); + //subsize -= pd_set.size()-1; + int subsize = (sg.isBudgetConstraint()) ? params.min_budget : params.min_size-params.is_rooted; + int stepsize = (sg.isBudgetConstraint()) ? params.step_budget : params.step_size; + if (params.detected_mode != RunMode::LINEAR_PROGRAMMING) stepsize = 1; + vector::iterator it; + SplitSet::iterator it2; + + + if (params.run_mode == RunMode::PD_USER_SET) { + printPDUser(out, params, pd_more); + } + + /****************************/ + /********** SUMMARY *********/ + /****************************/ + + if (params.run_mode != RunMode::PD_USER_SET && !params.num_bootstrap_samples) { + out << "Summary of the PD-score and the number of optimal PD-sets with the same " << endl << "optimal PD-score found." << endl; + + if (sg.isBudgetConstraint()) + out << endl << "Budget PD-score %PD-score #PD-sets" << endl; + else + out << endl << "Size-k PD-score %PD-score #PD-sets" << endl; + + int sizex = subsize; + double total = sg.calcWeight(); + + for (it = pd_set.begin(); it != pd_set.end(); it++, sizex+=stepsize) { + out.width(6); + out << right << sizex << " "; + out.width(10); + out << right << (*it).getWeight() << " "; + out.width(10); + out << right << ((*it).getWeight()/total)*100.0 << " "; + out.width(6); + out << right << (*it).size(); + out << endl; + } + + out << endl; + if (!params.find_all) + out << "Note: You did not choose the option to find multiple optimal PD sets." << endl << + "That's why we only reported one PD-set per size-k or budget. If you want" << endl << + "to determine all multiple PD-sets, use the '-a' option."; + else { + out << "Note: The number of multiple optimal PD sets to be reported is limited to " << params.pd_limit << "." << endl << + "There might be cases where the actual #PD-sets exceeds that upper-limit but" << endl << + "won't be listed here. Please refer to the above list to identify such cases." << endl << + "To increase the upper-limit, use the '-lim ' option."; + } + out << endl; + separator(out); + } + + if (!full_report) { + out.close(); + return; + } + + + /****************************/ + /********* BOOTSTRAP ********/ + /****************************/ + if (params.run_mode != RunMode::PD_USER_SET && params.num_bootstrap_samples) { + out << "Summary of the bootstrap analysis " << endl; + for (it = pd_set.begin(); it != pd_set.end(); it++) { + DoubleVector freq; + computeTaxaFrequency((*it), freq); + out << "For k/budget = " << subsize << " the " << ((sg.isPDArea()) ? "areas" : "taxa") + << " supports are: " << endl; + for (i = 0; i < freq.size(); i++) + out << ((sg.isPDArea()) ? sg.getSetsBlock()->getSet(i)->name : sg.getTaxa()->GetTaxonLabel(i)) + << "\t" << freq[i] << endl; + if ((it+1) != pd_set.end()) separator(out, 1); + } + out << endl; + separator(out); + } + + /****************************/ + /********** RANKING *********/ + /****************************/ + + if (params.run_mode != RunMode::PD_USER_SET && !params.num_bootstrap_samples) { + + + IntVector ranking; + IntVector index; + + out << "Ranking based on the optimal sets" << endl; + + + if (!makeRanking(pd_set, index, ranking)) { + out << "WARNING: Optimal sets are not nested, so ranking should not be considered stable" << endl; + } + if (subsize > 1) { + out << "WARNING: The first " << subsize << " ranks should be treated equal" << endl; + } + out << endl << "Rank* "; + if (!sg.isPDArea()) + out << "Taxon names" << endl; + else + out << "Area names" << endl; + + + for (IntVector::iterator intv = ranking.begin(), intid = index.begin(); intv != ranking.end(); intv ++, intid++) { + if (*intv == -10) + out << "<--- multiple optimal set here --->" << endl; + else if (*intv == -1) + out << "<--- BEGIN: greedy does not work --->" << endl; + else if (*intv == -2) + out << "<--- END --->" << endl; + else { + out.width(5); + out << right << *intid << " "; + if (sg.isPDArea()) + out << sg.getSetsBlock()->getSet(*intv)->name << endl; + else + out << sg.getTaxa()->GetTaxonLabel(*intv) << endl; + } + } + out << endl; + out << "(*) Negative ranks indicate the point at which the greedy algorithm" << endl << + " does not work. In that case, the corresponding taxon/area names" << endl << + " should be deleted from the optimal set of the same size" << endl; + separator(out); + } + + int max_len = sg.getTaxa()->GetMaxTaxonLabelLength(); + + /****************************/ + /***** DETAILED SETS ********/ + /****************************/ + + if (params.run_mode != RunMode::PD_USER_SET) + out << "Detailed information of all taxa found in the optimal PD-sets" << endl; + + if (pd_set.size() > 1) { + if (sg.isBudgetConstraint()) + out << "with budget = " << params.min_budget << + " to " << params.budget << endl << endl; + else + out << "with k = " << params.min_size-params.is_rooted << + " to " << params.sub_size-params.is_rooted << endl << endl; + } + + if (params.run_mode != RunMode::PD_USER_SET) + separator(out,1); + + for (it = pd_set.begin(); it != pd_set.end(); it++, subsize+=stepsize) { + + // check if the pd-sets are the same as previous one + if (sg.isBudgetConstraint() && it != pd_set.begin()) { + vector::iterator prev, next; + for (next=it, prev=it-1; next != pd_set.end() && next->getWeight() == (*prev).getWeight() && + next->size() == (*prev).size(); next++ ) ; + if (next != it) { + // found something in between! + out << endl; + //out << endl << "**************************************************************" << endl; + out << "For budget = " << subsize << " -> " << subsize+(next-it-1)*stepsize << + " the optimal PD score and PD sets" << endl; + out << "are identical to the case when budget = " << subsize-stepsize << endl; + //out << "**************************************************************" << endl; + subsize += (next-it)*stepsize; + it = next; + if (it == pd_set.end()) break; + } + } + + if (it != pd_set.begin()) separator(out, 1); + + int num_sets = (*it).size(); + double weight = (*it).getWeight(); + + if (params.run_mode != RunMode::PD_USER_SET) { + out << "For " << ((sg.isBudgetConstraint()) ? "budget" : "k") << " = " << subsize; + out << " the optimal PD score is " << weight << endl; + + if (num_sets == 1) { + if (!sg.isBudgetConstraint()) + out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted << + ((sg.isPDArea()) ? " areas" : " taxa"); + else + out << "The optimal PD set has " << (*it)[0]->countTaxa()-params.is_rooted << + ((sg.isPDArea()) ? " areas" : " taxa") << " and requires " << sg.calcCost(*(*it)[0]) << " budget"; + if (!sg.isPDArea()) out << " and covers " << sg.countSplits(*(*it)[0]) << + " splits (of which " << sg.countInternalSplits(*(*it)[0]) << " are internal splits)"; + out << endl; + } + else + out << "Found " << num_sets << " PD sets with the same optimal score." << endl; + } + for (it2 = (*it).begin(), c_num=1; it2 != (*it).end(); it2++, c_num++){ + Split *this_set = *it2; + + if (params.run_mode == RunMode::PD_USER_SET && it2 != (*it).begin()) + separator(out, 1); + + if (params.run_mode == RunMode::PD_USER_SET) { + if (!sg.isBudgetConstraint()) + out << "Set " << c_num << " has PD score of " << this_set->getWeight(); + else + out << "Set " << c_num << " has PD score of " << this_set->getWeight() << + " and requires " << sg.calcCost(*this_set) << " budget"; + } else if (num_sets > 1) { + if (!sg.isBudgetConstraint()) + out << endl << "PD set " << c_num; + else + out << endl << "PD set " << c_num << " has " << this_set->countTaxa()-params.is_rooted << + " taxa and requires " << sg.calcCost(*this_set) << " budget"; + } + + if (!sg.isPDArea() && (num_sets > 1 || params.run_mode == RunMode::PD_USER_SET )) + out << " and covers " << sg.countSplits(*(*it)[0]) << " splits (of which " + << sg.countInternalSplits(*(*it)[0]) << " are internal splits)"; + out << endl; + + if (params.run_mode != RunMode::PD_USER_SET && sg.isPDArea()) { + for (i = 0; i < sg.getSetsBlock()->getNSets(); i++) + if (this_set->containTaxon(i)) { + if (sg.isBudgetConstraint()) { + out.width(max_len); + out << left << sg.getSetsBlock()->getSet(i)->name << "\t"; + out.width(10); + out << right << sg.getPdaBlock()->getCost(i); + out << endl; + + } else { + out << sg.getSetsBlock()->getSet(i)->name << endl; + } + } + + Split sp(sg.getNTaxa()); + for (i = 0; i < sg.getSetsBlock()->getNSets(); i++) + if (this_set->containTaxon(i)) + sp += *(sg.area_taxa[i]); + out << endl << "which contains " << sp.countTaxa() - params.is_rooted << " taxa: " << endl; + for (i = 0; i < sg.getNTaxa(); i++) + if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && sp.containTaxon(i)) + out << sg.getTaxa()->GetTaxonLabel(i) << endl; + + } else + for ( i = 0; i < sg.getNTaxa(); i++) + if (sg.getTaxa()->GetTaxonLabel(i) != ROOT_NAME && this_set->containTaxon(i)) { + if (sg.isBudgetConstraint()) { + out.width(max_len); + out << left << sg.getTaxa()->GetTaxonLabel(i) << "\t"; + out.width(10); + out << right << sg.getPdaBlock()->getCost(i); + out << endl; + + } else { + out << sg.getTaxa()->GetTaxonLabel(i) << endl; + } + } + } + } + + /****************************/ + /********** FOOTER **********/ + /****************************/ + + summarizeFooter(out, params); + + out.close(); + cout << endl << "Results are summarized in " << filename << endl << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } void printGainMatrix(char *filename, mmatrix(double) &delta_gain, int start_k) { - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - int k = start_k; - for (mmatrix(double)::iterator it = delta_gain.begin(); it != delta_gain.end(); it++, k++) { - out << k; - for (int i = 0; i < (*it).size(); i++) - out << " " << (*it)[i]; - out << endl; - } - out.close(); - cout << "PD gain matrix printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + int k = start_k; + for (mmatrix(double)::iterator it = delta_gain.begin(); it != delta_gain.end(); it++, k++) { + out << k; + for (int i = 0; i < (*it).size(); i++) + out << " " << (*it)[i]; + out << endl; + } + out.close(); + cout << "PD gain matrix printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } /** - run PD algorithm on split networks + run PD algorithm on split networks */ void runPDSplit(Params ¶ms) { - cout << "Using NCL - Nexus Class Library" << endl << endl; - - // init a split graph class from the parameters - CircularNetwork sg(params); - int i; - - // this vector of SplitSet store all the optimal PD sets - vector pd_set; - // this define an order of taxa (circular order in case of circular networks) - vector taxa_order; - // this store a particular taxa set - Split taxa_set; - - - if (sg.isCircular()) { - // is a circular network, get circular order - for (i = 0; i < sg.getNTaxa(); i++) - taxa_order.push_back(sg.getCircleId(i)); - } else - // otherwise, get the incremental order - for (i = 0; i < sg.getNTaxa(); i++) - taxa_order.push_back(i); - - PDRelatedMeasures pd_more; - - // begining time of the algorithm run - double time_begin = getCPUTime(); - //time(&time_begin); - // check parameters - if (sg.isPDArea()) { - if (sg.isBudgetConstraint()) { - int budget = (params.budget >= 0) ? params.budget : sg.getPdaBlock()->getBudget(); - if (budget < 0 && params.pd_proportion == 0.0) params.run_mode = PD_USER_SET; - } else { - int sub_size = (params.sub_size >= 1) ? params.sub_size : sg.getPdaBlock()->getSubSize(); - if (sub_size < 1 && params.pd_proportion == 0.0) params.run_mode = PD_USER_SET; - - } - } - - if (params.run_mode == PD_USER_SET) { - // compute score of user-defined sets - cout << "Computing PD score for user-defined set of taxa..." << endl; - pd_set.resize(1); - sg.computePD(params, pd_set[0], pd_more); - if (params.endemic_pd) - sg.calcPDEndemism(pd_set[0], pd_more.PDEndemism); - - if (params.complement_area != NULL) - sg.calcPDComplementarity(pd_set[0], params.complement_area, pd_more.setName, pd_more.PDComplementarity); - - } else { - // otherwise, call the main function - if (params.num_bootstrap_samples) { - cout << endl << "======= START BOOTSTRAP ANALYSIS =======" << endl; - MTreeSet *mtrees = sg.getMTrees(); - if (mtrees->size() < 100) - cout << "WARNING: bootstrap may be unstable with less than 100 trees" << endl; - vector taxname; - sg.getTaxaName(taxname); - i = 1; - for (MTreeSet::iterator it = mtrees->begin(); it != mtrees->end(); it++, i++) { - cout << "---------- TREE " << i << " ----------" << endl; - // convert tree into split sytem - SplitGraph sg2; - (*it)->convertSplits(taxname, sg2); - // change the current split system - for (SplitGraph::reverse_iterator it = sg.rbegin(); it != sg.rend(); it++) { - delete *it; - } - sg.clear(); - sg.insert(sg.begin(), sg2.begin(), sg2.end()); - sg2.clear(); - - // now findPD on the converted tree-split system - sg.findPD(params, pd_set, taxa_order); - } - cout << "======= DONE BOOTSTRAP ANALYSIS =======" << endl << endl; - } else { - sg.findPD(params, pd_set, taxa_order); - } - } - - // ending time - double time_end = getCPUTime(); - //time(&time_end); - params.run_time = time_end - time_begin; - - cout << "Time used: " << (double) (params.run_time) << " seconds." << endl; - - if (verbose_mode >= VB_DEBUG && !sg.isPDArea()) { - cout << "PD set(s) with score(s): " << endl; - for (vector::iterator it = pd_set.begin(); it != pd_set.end(); it++) - for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++ ){ - //(*it)->report(cout); - cout << " " << (*it2)->getWeight() << " "; - for (i = 0; i < sg.getNTaxa(); i++) - if ((*it2)->containTaxon(i)) - cout << sg.getTaxa()->GetTaxonLabel(i) << " "; - if (sg.isBudgetConstraint()) - cout << " (budget = " << sg.calcCost(*(*it2)) << ")"; - cout << endl; - } - } - - sg.printOutputSetScore(params, pd_set); - - - summarizeSplit(params, sg, pd_set, pd_more, true); - - if (params.calc_pdgain) { - mmatrix(double) delta_gain; - sg.calcPDGain(pd_set, delta_gain); - string filename = params.out_prefix; - filename += ".pdgain"; - printGainMatrix((char*)filename.c_str(), delta_gain, pd_set.front().front()->countTaxa()); - //cout << delta_gain; - } - - - //for (i = pd_set.size()-1; i >= 0; i--) - // delete pd_set[i]; + cout << "Using NCL - Nexus Class Library" << endl << endl; + + // init a split graph class from the parameters + CircularNetwork sg(params); + int i; + + // this vector of SplitSet store all the optimal PD sets + vector pd_set; + // this define an order of taxa (circular order in case of circular networks) + vector taxa_order; + // this store a particular taxa set + Split taxa_set; + + + if (sg.isCircular()) { + // is a circular network, get circular order + for (i = 0; i < sg.getNTaxa(); i++) + taxa_order.push_back(sg.getCircleId(i)); + } else + // otherwise, get the incremental order + for (i = 0; i < sg.getNTaxa(); i++) + taxa_order.push_back(i); + + PDRelatedMeasures pd_more; + + // begining time of the algorithm run + double time_begin = getCPUTime(); + //time(&time_begin); + // check parameters + if (sg.isPDArea()) { + if (sg.isBudgetConstraint()) { + int budget = (params.budget >= 0) ? params.budget : sg.getPdaBlock()->getBudget(); + if (budget < 0 && params.pd_proportion == 0.0) params.run_mode = RunMode::PD_USER_SET; + } else { + int sub_size = (params.sub_size >= 1) ? params.sub_size : sg.getPdaBlock()->getSubSize(); + if (sub_size < 1 && params.pd_proportion == 0.0) params.run_mode = RunMode::PD_USER_SET; + + } + } + + if (params.run_mode == RunMode::PD_USER_SET) { + // compute score of user-defined sets + cout << "Computing PD score for user-defined set of taxa..." << endl; + pd_set.resize(1); + sg.computePD(params, pd_set[0], pd_more); + if (params.endemic_pd) + sg.calcPDEndemism(pd_set[0], pd_more.PDEndemism); + + if (params.complement_area != NULL) + sg.calcPDComplementarity(pd_set[0], params.complement_area, pd_more.setName, pd_more.PDComplementarity); + + } else { + // otherwise, call the main function + if (params.num_bootstrap_samples) { + cout << endl << "======= START BOOTSTRAP ANALYSIS =======" << endl; + MTreeSet *mtrees = sg.getMTrees(); + if (mtrees->size() < 100) + cout << "WARNING: bootstrap may be unstable with less than 100 trees" << endl; + vector taxname; + sg.getTaxaName(taxname); + i = 1; + for (MTreeSet::iterator it = mtrees->begin(); it != mtrees->end(); it++, i++) { + cout << "---------- TREE " << i << " ----------" << endl; + // convert tree into split sytem + SplitGraph sg2; + (*it)->convertSplits(taxname, sg2); + // change the current split system + for (SplitGraph::reverse_iterator it = sg.rbegin(); it != sg.rend(); it++) { + delete *it; + } + sg.clear(); + sg.insert(sg.begin(), sg2.begin(), sg2.end()); + sg2.clear(); + + // now findPD on the converted tree-split system + sg.findPD(params, pd_set, taxa_order); + } + cout << "======= DONE BOOTSTRAP ANALYSIS =======" << endl << endl; + } else { + sg.findPD(params, pd_set, taxa_order); + } + } + + // ending time + double time_end = getCPUTime(); + //time(&time_end); + params.run_time = time_end - time_begin; + + cout << "Time used: " << (double) (params.run_time) << " seconds." << endl; + + if (verbose_mode >= VB_DEBUG && !sg.isPDArea()) { + cout << "PD set(s) with score(s): " << endl; + for (vector::iterator it = pd_set.begin(); it != pd_set.end(); it++) + for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++ ){ + //(*it)->report(cout); + cout << " " << (*it2)->getWeight() << " "; + for (i = 0; i < sg.getNTaxa(); i++) + if ((*it2)->containTaxon(i)) + cout << sg.getTaxa()->GetTaxonLabel(i) << " "; + if (sg.isBudgetConstraint()) + cout << " (budget = " << sg.calcCost(*(*it2)) << ")"; + cout << endl; + } + } + + sg.printOutputSetScore(params, pd_set); + + + summarizeSplit(params, sg, pd_set, pd_more, true); + + if (params.calc_pdgain) { + mmatrix(double) delta_gain; + sg.calcPDGain(pd_set, delta_gain); + string filename = params.out_prefix; + filename += ".pdgain"; + printGainMatrix((char*)filename.c_str(), delta_gain, pd_set.front().front()->countTaxa()); + //cout << delta_gain; + } + + + //for (i = pd_set.size()-1; i >= 0; i--) + // delete pd_set[i]; } void printSplitSet(SplitGraph &sg, SplitIntMap &hash_ss) { /* - for (SplitIntMap::iterator it = hash_ss.begin(); it != hash_ss.end(); it++) { - if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1) - (*it)->report(cout); - }*/ - sg.getTaxa()->Report(cout); - for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++) { - if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1) - (*it)->report(cout); - } + for (SplitIntMap::iterator it = hash_ss.begin(); it != hash_ss.end(); it++) { + if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1) + (*it)->report(cout); + }*/ + sg.getTaxa()->Report(cout); + for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++) { + if ((*it)->getWeight() > 50 && (*it)->countTaxa() > 1) + (*it)->report(cout); + } } void readTaxaOrder(char *taxa_order_file, StrVector &taxa_order) { @@ -1219,569 +1199,648 @@ void readTaxaOrder(char *taxa_order_file, StrVector &taxa_order) { } void calcTreeCluster(Params ¶ms) { - ASSERT(params.taxa_order_file); - MExtTree tree(params.user_file, params.is_rooted); -// StrVector taxa_order; - //readTaxaOrder(params.taxa_order_file, taxa_order); - NodeVector taxa; - mmatrix(int) clusters; - clusters.reserve(tree.leafNum - 3); - tree.getTaxa(taxa); - sort(taxa.begin(), taxa.end(), nodenamecmp); - tree.createCluster(taxa, clusters); - int cnt = 1; - - string treename = params.out_prefix; - treename += ".clu-id"; - tree.printTree(treename.c_str()); - - for (mmatrix(int)::iterator it = clusters.begin(); it != clusters.end(); it++, cnt++) { - ostringstream filename; - filename << params.out_prefix << "." << cnt << ".clu"; - ofstream out(filename.str().c_str()); - - ostringstream filename2; - filename2 << params.out_prefix << "." << cnt << ".name-clu"; - ofstream out2(filename2.str().c_str()); - - out << "w" << endl << "c" << endl << "4" << endl << "b" << endl << "g" << endl << 4-params.is_rooted << endl; - IntVector::iterator it2; - NodeVector::iterator it3; - for (it2 = (*it).begin(), it3 = taxa.begin(); it2 != (*it).end(); it2++, it3++) - if ((*it3)->name != ROOT_NAME) { - out << char((*it2)+'a') << endl; - out2 << (*it3)->name << " " << char((*it2)+'a') << endl; - } - out << "y" << endl; - out.close(); - out2.close(); - cout << "Cluster " << cnt << " printed to " << filename.rdbuf() << " and " << filename2.rdbuf() << endl; - } + ASSERT(params.taxa_order_file); + MExtTree tree(params.user_file, params.is_rooted); +// StrVector taxa_order; + //readTaxaOrder(params.taxa_order_file, taxa_order); + NodeVector taxa; + mmatrix(int) clusters; + clusters.reserve(tree.leafNum - 3); + tree.getTaxa(taxa); + sort(taxa.begin(), taxa.end(), nodenamecmp); + tree.createCluster(taxa, clusters); + int cnt = 1; + + string treename = params.out_prefix; + treename += ".clu-id"; + tree.printTree(treename.c_str()); + + for (mmatrix(int)::iterator it = clusters.begin(); it != clusters.end(); it++, cnt++) { + ostringstream filename; + filename << params.out_prefix << "." << cnt << ".clu"; + ofstream out(filename.str().c_str()); + + ostringstream filename2; + filename2 << params.out_prefix << "." << cnt << ".name-clu"; + ofstream out2(filename2.str().c_str()); + + out << "w" << endl << "c" << endl << "4" << endl << "b" << endl << "g" << endl << 4-params.is_rooted << endl; + IntVector::iterator it2; + NodeVector::iterator it3; + for (it2 = (*it).begin(), it3 = taxa.begin(); it2 != (*it).end(); it2++, it3++) + if ((*it3)->name != ROOT_NAME) { + out << char((*it2)+'a') << endl; + out2 << (*it3)->name << " " << char((*it2)+'a') << endl; + } + out << "y" << endl; + out.close(); + out2.close(); + cout << "Cluster " << cnt << " printed to " << filename.rdbuf() << " and " << filename2.rdbuf() << endl; + } } void printTaxa(Params ¶ms) { - MTree mytree(params.user_file, params.is_rooted); - vector taxname; - taxname.resize(mytree.leafNum); - mytree.getTaxaName(taxname); - sort(taxname.begin(), taxname.end()); - - string filename = params.out_prefix; - filename += ".taxa"; - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - for (vector::iterator it = taxname.begin(); it != taxname.end(); it++) { - if ((*it) != ROOT_NAME) out << (*it); - out << endl; - } - out.close(); - cout << "All taxa names printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + MTree mytree(params.user_file, params.is_rooted); + vector taxname; + taxname.resize(mytree.leafNum); + mytree.getTaxaName(taxname); + sort(taxname.begin(), taxname.end()); + + string filename = params.out_prefix; + filename += ".taxa"; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename.c_str()); + for (vector::iterator it = taxname.begin(); it != taxname.end(); it++) { + if ((*it) != ROOT_NAME) out << (*it); + out << endl; + } + out.close(); + cout << "All taxa names printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } void printAreaList(Params ¶ms) { - MSetsBlock *sets; - sets = new MSetsBlock(); - cout << "Reading input file " << params.user_file << "..." << endl; + MSetsBlock *sets; + sets = new MSetsBlock(); + cout << "Reading input file " << params.user_file << "..." << endl; - MyReader nexus(params.user_file); + MyReader nexus(params.user_file); - nexus.Add(sets); + nexus.Add(sets); - MyToken token(nexus.inf); - nexus.Execute(token); + MyToken token(nexus.inf); + nexus.Execute(token); - //sets->Report(cout); + //sets->Report(cout); - TaxaSetNameVector *allsets = sets->getSets(); + TaxaSetNameVector *allsets = sets->getSets(); - string filename = params.out_prefix; - filename += ".names"; + string filename = params.out_prefix; + filename += ".names"; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - for (TaxaSetNameVector::iterator it = allsets->begin(); it != allsets->end(); it++) { - out << (*it)->name; - out << endl; - } - out.close(); - cout << "All area names printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename.c_str()); + for (TaxaSetNameVector::iterator it = allsets->begin(); it != allsets->end(); it++) { + out << (*it)->name; + out << endl; + } + out.close(); + cout << "All area names printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } - delete sets; + delete sets; } void scaleBranchLength(Params ¶ms) { - params.is_rooted = true; - PDTree tree(params); - if (params.run_mode == SCALE_BRANCH_LEN) { - cout << "Scaling branch length with a factor of " << params.scaling_factor << " ..." << endl; - tree.scaleLength(params.scaling_factor, false); - } else { - cout << "Scaling clade support with a factor of " << params.scaling_factor << " ..." << endl; - tree.scaleCladeSupport(params.scaling_factor, false); - } - if (params.out_file != NULL) - tree.printTree(params.out_file); - else { - tree.printTree(cout); - cout << endl; - } + params.is_rooted = true; + PDTree tree(params); + if (params.run_mode == RunMode::SCALE_BRANCH_LEN) { + cout << "Scaling branch length with a factor of " << params.scaling_factor << " ..." << endl; + tree.scaleLength(params.scaling_factor, false); + } else { + cout << "Scaling clade support with a factor of " << params.scaling_factor << " ..." << endl; + tree.scaleCladeSupport(params.scaling_factor, false); + } + if (params.out_file != NULL) + tree.printTree(params.out_file); + else { + tree.printTree(cout); + cout << endl; + } } void calcDistribution(Params ¶ms) { - PDTree mytree(params); - - string filename = params.out_prefix; - filename += ".randompd"; - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - for (int size = params.min_size; size <= params.sub_size; size += params.step_size) { - out << size; - for (int sample = 0; sample < params.sample_size; sample++) { - Split taxset(mytree.leafNum); - taxset.randomize(size); - mytree.calcPD(taxset); - out << " " << taxset.getWeight(); - } - out << endl; - } - out.close(); - cout << "PD distribution is printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + PDTree mytree(params); + + string filename = params.out_prefix; + filename += ".randompd"; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename.c_str()); + for (int size = params.min_size; size <= params.sub_size; size += params.step_size) { + out << size; + for (int sample = 0; sample < params.sample_size; sample++) { + Split taxset(mytree.leafNum); + taxset.randomize(size); + mytree.calcPD(taxset); + out << " " << taxset.getWeight(); + } + out << endl; + } + out.close(); + cout << "PD distribution is printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } -void printRFDist(ostream &out, int *rfdist, int n, int m, int rf_dist_mode) { - int i, j; - if (rf_dist_mode == RF_ADJACENT_PAIR) { - out << "XXX "; - out << 1 << " " << n << endl; - for (i = 0; i < n; i++) - out << " " << rfdist[i]; - out << endl; - } else { - // all pairs - out << n << " " << m << endl; - for (i = 0; i < n; i++) { - out << "Tree" << i << " "; - for (j = 0; j < m; j++) - out << " " << rfdist[i*m+j]; - out << endl; - } - } +void printRFDist(string filename, double *rfdist, int n, int m, int rf_dist_mode, bool print_msg = true) { + int i, j; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + if (Params::getInstance().output_format == FORMAT_CSV) { + out << "# Robinson-Foulds distances" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# dat=read.csv('" << filename << "',comment.char='#')" << endl + << "# Columns are comma-separated with following meanings:" << endl + << "# ID1: Tree 1 ID" << endl + << "# ID2: Tree 2 ID" << endl + << "# Dist: Robinson-Foulds distance" << endl + << "ID1,ID2,Dist" << endl; + if (rf_dist_mode == RF_ADJACENT_PAIR) { + for (i = 0; i < n; i++) + out << i+1 << ',' << i+2 << ',' << rfdist[i] << endl; + } else if (Params::getInstance().rf_same_pair) { + for (i = 0; i < n; i++) + out << i+1 << ',' << i+1 << ',' << rfdist[i] << endl; + } else { + for (i = 0; i < n; i++) { + for (j = 0; j < m; j++) + out << i+1 << ',' << j+1 << ',' << rfdist[i*m+j] << endl; + } + } + } else if (rf_dist_mode == RF_ADJACENT_PAIR || Params::getInstance().rf_same_pair) { + out << "XXX "; + out << 1 << " " << n << endl; + for (i = 0; i < n; i++) + out << " " << rfdist[i]; + out << endl; + } else { + // all pairs + out << n << " " << m << endl; + for (i = 0; i < n; i++) { + out << "Tree" << i << " "; + for (j = 0; j < m; j++) + out << " " << rfdist[i*m+j]; + out << endl; + } + } + out.close(); + if (print_msg) + cout << "Robinson-Foulds distances printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } } void computeRFDistExtended(const char *trees1, const char *trees2, const char *filename) { - cout << "Reading input trees 1 file " << trees1 << endl; - int ntrees = 0, ntrees2 = 0; - int *rfdist_raw = NULL; - try { - ifstream in; + cout << "Reading input trees 1 file " << trees1 << endl; + int ntrees = 0, ntrees2 = 0; + double *rfdist_raw = NULL; + try { + ifstream in; in.exceptions(ios::failbit | ios::badbit); in.open(trees1); - IntVector rfdist; - for (ntrees = 1; !in.eof(); ntrees++) { - MTree tree; - bool is_rooted = false; - - // read in the tree and convert into split system for indexing - tree.readTree(in, is_rooted); - if (verbose_mode >= VB_DEBUG) - cout << ntrees << " " << endl; - IntVector dist; - tree.computeRFDist(trees2, dist); - ntrees2 = dist.size(); - rfdist.insert(rfdist.end(), dist.begin(), dist.end()); - char ch; - in.exceptions(ios::goodbit); - (in) >> ch; - if (in.eof()) break; - in.unget(); - in.exceptions(ios::failbit | ios::badbit); - - } - - in.close(); - ASSERT(ntrees * ntrees2 == rfdist.size()); - rfdist_raw = new int[rfdist.size()]; - copy(rfdist.begin(), rfdist.end(), rfdist_raw); - - } catch (ios::failure) { - outError(ERR_READ_INPUT, trees1); - } - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - printRFDist(out, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED); - out.close(); - cout << "Robinson-Foulds distances printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + IntVector rfdist; + for (ntrees = 1; !in.eof(); ntrees++) { + MTree tree; + bool is_rooted = false; + + // read in the tree and convert into split system for indexing + tree.readTree(in, is_rooted); + if (verbose_mode >= VB_DEBUG) + cout << ntrees << " " << endl; + DoubleVector dist; + tree.computeRFDist(trees2, dist); + ntrees2 = dist.size(); + rfdist.insert(rfdist.end(), dist.begin(), dist.end()); + char ch; + in.exceptions(ios::goodbit); + (in) >> ch; + if (in.eof()) break; + in.unget(); + in.exceptions(ios::failbit | ios::badbit); + + } + + in.close(); + ASSERT(ntrees * ntrees2 == rfdist.size()); + rfdist_raw = new double[rfdist.size()]; + copy(rfdist.begin(), rfdist.end(), rfdist_raw); + + } catch (ios::failure) { + outError(ERR_READ_INPUT, trees1); + } + printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED); + delete [] rfdist_raw; +} + +void computeRFDistSamePair(const char *trees1, const char *trees2, const char *filename) { + cout << "Reading input trees 1 file " << trees1 << endl; + int ntrees = 0, ntrees2 = 0; + double *rfdist_raw = NULL; + try { + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(trees1); + + ifstream in2; + in2.exceptions(ios::failbit | ios::badbit); + in2.open(trees2); + + DoubleVector rfdist; + for (ntrees = 1; !in.eof() && !in2.eof(); ntrees++) { + MTree tree; + bool is_rooted = false; + // read in the tree and convert into split system for indexing + tree.readTree(in, is_rooted); + + if (verbose_mode >= VB_DEBUG) + cout << ntrees << " " << endl; + DoubleVector dist; + tree.computeRFDist(in2, dist, 0, true); + ntrees2 = dist.size(); + rfdist.insert(rfdist.end(), dist.begin(), dist.end()); + char ch; + in.exceptions(ios::goodbit); + (in) >> ch; + if (in.eof()) break; + in.unget(); + in.exceptions(ios::failbit | ios::badbit); + + } + + in.close(); + in2.close(); + ASSERT(ntrees * ntrees2 == rfdist.size()); + rfdist_raw = new double[rfdist.size()]; + copy(rfdist.begin(), rfdist.end(), rfdist_raw); + + } catch (ios::failure) { + outError(ERR_READ_INPUT, trees1); + } + + printRFDist(filename, rfdist_raw, ntrees, ntrees2, RF_TWO_TREE_SETS_EXTENDED); + + delete [] rfdist_raw; } void computeRFDist(Params ¶ms) { - if (!params.user_file) outError("User tree file not provided"); - - string filename = params.out_prefix; - filename += ".rfdist"; - - if (params.rf_dist_mode == RF_TWO_TREE_SETS_EXTENDED) { - computeRFDistExtended(params.user_file, params.second_tree, filename.c_str()); - return; - } - - MTreeSet trees(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count); - int n = trees.size(), m = trees.size(); - int *rfdist; - int *incomp_splits = NULL; - string infoname = params.out_prefix; - infoname += ".rfinfo"; - string treename = params.out_prefix; - treename += ".rftree"; - if (params.rf_dist_mode == RF_TWO_TREE_SETS) { - MTreeSet treeset2(params.second_tree, params.is_rooted, params.tree_burnin, params.tree_max_count); - cout << "Computing Robinson-Foulds distances between two sets of trees" << endl; - m = treeset2.size(); - rfdist = new int [n*m]; - memset(rfdist, 0, n*m* sizeof(int)); - if (verbose_mode >= VB_MAX) { - incomp_splits = new int [n*m]; - memset(incomp_splits, 0, n*m* sizeof(int)); - } - if (verbose_mode >= VB_MED) - trees.computeRFDist(rfdist, &treeset2, infoname.c_str(),treename.c_str(), incomp_splits); - else - trees.computeRFDist(rfdist, &treeset2); - } else { - rfdist = new int [n*n]; - memset(rfdist, 0, n*n* sizeof(int)); - trees.computeRFDist(rfdist, params.rf_dist_mode, params.split_weight_threshold); - } - - if (verbose_mode >= VB_MED) printRFDist(cout, rfdist, n, m, params.rf_dist_mode); - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - printRFDist(out, rfdist, n, m, params.rf_dist_mode); - out.close(); - cout << "Robinson-Foulds distances printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } - - if (incomp_splits) - try { - filename = params.out_prefix; - filename += ".incomp"; - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - printRFDist(out, incomp_splits, n, m, params.rf_dist_mode); - out.close(); - cout << "Number of incompatible splits in printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } - - if (incomp_splits) delete [] incomp_splits; - delete [] rfdist; + if (!params.user_file) outError("User tree file not provided"); + + string filename = params.out_prefix; + filename += ".rfdist"; + + if (params.rf_dist_mode == RF_TWO_TREE_SETS_EXTENDED) { + computeRFDistExtended(params.user_file, params.second_tree, filename.c_str()); + return; + } + + if (params.rf_same_pair) { + computeRFDistSamePair(params.user_file, params.second_tree, filename.c_str()); + return; + } + + MTreeSet trees(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count); + int n = trees.size(), m = trees.size(); + double *rfdist; + double *incomp_splits = NULL; + string infoname = params.out_prefix; + infoname += ".rfinfo"; + string treename = params.out_prefix; + treename += ".rftree"; + if (params.rf_dist_mode == RF_TWO_TREE_SETS) { + MTreeSet treeset2(params.second_tree, params.is_rooted, params.tree_burnin, params.tree_max_count); + cout << "Computing Robinson-Foulds distances between two sets of trees" << endl; + m = treeset2.size(); + size_t size = n*m; + if (params.rf_same_pair) { + if (m != n) + outError("Tree sets has different number of trees"); + size = n; + } + rfdist = new double [size]; + memset(rfdist, 0, size*sizeof(double)); + if (verbose_mode >= VB_MAX) { + incomp_splits = new double [size]; + memset(incomp_splits, 0, size*sizeof(double)); + } + if (verbose_mode >= VB_MED) + trees.computeRFDist(rfdist, &treeset2, params.rf_same_pair, + infoname.c_str(),treename.c_str(), incomp_splits); + else + trees.computeRFDist(rfdist, &treeset2, params.rf_same_pair); + } else { + rfdist = new double [n*n]; + memset(rfdist, 0, n*n* sizeof(double)); + trees.computeRFDist(rfdist, params.rf_dist_mode, params.split_weight_threshold); + } + + //if (verbose_mode >= VB_MED) printRFDist(cout, rfdist, n, m, params.rf_dist_mode); + + printRFDist(filename, rfdist, n, m, params.rf_dist_mode); + + if (incomp_splits) { + filename = params.out_prefix; + filename += ".incomp"; + printRFDist(filename, incomp_splits, n, m, params.rf_dist_mode, false); + cout << "Number of incompatible splits in printed to " << filename << endl; + } + + if (incomp_splits) delete [] incomp_splits; + delete [] rfdist; } void testInputFile(Params ¶ms) { - SplitGraph sg(params); - if (sg.isWeaklyCompatible()) - cout << "The split system is weakly compatible." << endl; - else - cout << "The split system is NOT weakly compatible." << endl; + SplitGraph sg(params); + if (sg.isWeaklyCompatible()) + cout << "The split system is weakly compatible." << endl; + else + cout << "The split system is NOT weakly compatible." << endl; } /**MINH ANH: for some statistics about the branches on the input tree*/ void branchStats(Params ¶ms){ - MaTree mytree(params.user_file, params.is_rooted); - mytree.drawTree(cout,WT_TAXON_ID + WT_INT_NODE); - //report to output file - string output; - if (params.out_file) - output = params.out_file; - else { - if (params.out_prefix) - output = params.out_prefix; - else - output = params.user_file; - output += ".stats"; - } - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(output.c_str()); - mytree.printBrInfo(out); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, output); - } - cout << "Information about branch lengths of the tree is printed to: " << output << endl; - - /***** Following added by BQM to print internal branch lengths */ - NodeVector nodes1, nodes2; - mytree.generateNNIBraches(nodes1, nodes2); - output = params.out_prefix; - output += ".inlen"; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(output.c_str()); - for (int i = 0; i < nodes1.size(); i++) - out << nodes1[i]->findNeighbor(nodes2[i])->length << " "; - out << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, output); - } - cout << "Internal branch lengths printed to: " << output << endl; + MaTree mytree(params.user_file, params.is_rooted); + mytree.drawTree(cout,WT_TAXON_ID + WT_INT_NODE); + //report to output file + string output; + if (params.out_file) + output = params.out_file; + else { + if (params.out_prefix) + output = params.out_prefix; + else + output = params.user_file; + output += ".stats"; + } + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(output.c_str()); + mytree.printBrInfo(out); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, output); + } + cout << "Information about branch lengths of the tree is printed to: " << output << endl; + + /***** Following added by BQM to print internal branch lengths */ + NodeVector nodes1, nodes2; + mytree.generateNNIBraches(nodes1, nodes2); + output = params.out_prefix; + output += ".inlen"; + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(output.c_str()); + for (int i = 0; i < nodes1.size(); i++) + out << nodes1[i]->findNeighbor(nodes2[i])->length << " "; + out << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, output); + } + cout << "Internal branch lengths printed to: " << output << endl; } /**MINH ANH: for comparison between the input tree and each tree in a given set of trees*/ void compare(Params ¶ms){ - MaTree mytree(params.second_tree, params.is_rooted); - //sort taxon names and update nodeID, to be consistent with MTreeSet - NodeVector taxa; - mytree.getTaxa(taxa); - sort(taxa.begin(), taxa.end(), nodenamecmp); - int i; - NodeVector::iterator it; - for (it = taxa.begin(), i = 0; it != taxa.end(); it++, i++) - (*it)->id = i; - - string drawFile = params.second_tree; - drawFile += ".draw"; - try { - ofstream out1; - out1.exceptions(ios::failbit | ios::badbit); - out1.open(drawFile.c_str()); - mytree.drawTree(out1,WT_TAXON_ID + WT_INT_NODE); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, drawFile); - } - cout << "Tree with branchID (nodeID) was printed to: " << drawFile << endl; - - - MTreeSet trees(params.user_file,params.is_rooted, params.tree_burnin, params.tree_max_count); - DoubleMatrix brMatrix; - DoubleVector BSDs; - IntVector RFs; - mytree.comparedTo(trees, brMatrix, RFs, BSDs); - int numTree = trees.size(); - int numNode = mytree.nodeNum; - - string output; - if (params.out_file) - output = params.out_file; - else { - if (params.out_prefix) - output = params.out_prefix; - else - output = params.user_file; - output += ".compare"; - } - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(output.c_str()); - //print the header - out << "tree " ; - for (int nodeID = 0; nodeID < numNode; nodeID++ ) - if ( brMatrix[0][nodeID] != -2 ) - out << "br_" << nodeID << " "; - out << "RF BSD" << endl; - for ( int treeID = 0; treeID < numTree; treeID++ ) - { - out << treeID << " "; - for (int nodeID = 0; nodeID < numNode; nodeID++ ) - if ( brMatrix[treeID][nodeID] != -2 ) - out << brMatrix[treeID][nodeID] << " "; - out << RFs[treeID] << " " << BSDs[treeID] << endl; - } - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, output); - } - cout << "Comparison with the given set of trees is printed to: " << output << endl; + MaTree mytree(params.second_tree, params.is_rooted); + //sort taxon names and update nodeID, to be consistent with MTreeSet + NodeVector taxa; + mytree.getTaxa(taxa); + sort(taxa.begin(), taxa.end(), nodenamecmp); + int i; + NodeVector::iterator it; + for (it = taxa.begin(), i = 0; it != taxa.end(); it++, i++) + (*it)->id = i; + + string drawFile = params.second_tree; + drawFile += ".draw"; + try { + ofstream out1; + out1.exceptions(ios::failbit | ios::badbit); + out1.open(drawFile.c_str()); + mytree.drawTree(out1,WT_TAXON_ID + WT_INT_NODE); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, drawFile); + } + cout << "Tree with branchID (nodeID) was printed to: " << drawFile << endl; + + + MTreeSet trees(params.user_file,params.is_rooted, params.tree_burnin, params.tree_max_count); + DoubleMatrix brMatrix; + DoubleVector BSDs; + IntVector RFs; + mytree.comparedTo(trees, brMatrix, RFs, BSDs); + int numTree = trees.size(); + int numNode = mytree.nodeNum; + + string output; + if (params.out_file) + output = params.out_file; + else { + if (params.out_prefix) + output = params.out_prefix; + else + output = params.user_file; + output += ".compare"; + } + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(output.c_str()); + //print the header + out << "tree " ; + for (int nodeID = 0; nodeID < numNode; nodeID++ ) + if ( brMatrix[0][nodeID] != -2 ) + out << "br_" << nodeID << " "; + out << "RF BSD" << endl; + for ( int treeID = 0; treeID < numTree; treeID++ ) + { + out << treeID << " "; + for (int nodeID = 0; nodeID < numNode; nodeID++ ) + if ( brMatrix[treeID][nodeID] != -2 ) + out << brMatrix[treeID][nodeID] << " "; + out << RFs[treeID] << " " << BSDs[treeID] << endl; + } + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, output); + } + cout << "Comparison with the given set of trees is printed to: " << output << endl; } /**MINH ANH: to compute 'guided bootstrap' alignment*/ void guidedBootstrap(Params ¶ms) { - MaAlignment inputAlign(params.aln_file,params.sequence_type, params.intype); - inputAlign.readLogLL(params.siteLL_file); + MaAlignment inputAlign(params.aln_file,params.sequence_type, params.intype, params.model_name); + inputAlign.readLogLL(params.siteLL_file); - string outFre_name = params.out_prefix; + string outFre_name = params.out_prefix; outFre_name += ".patInfo"; - inputAlign.printPatObsExpFre(outFre_name.c_str()); + inputAlign.printPatObsExpFre(outFre_name.c_str()); - string gboAln_name = params.out_prefix; + string gboAln_name = params.out_prefix; gboAln_name += ".gbo"; - MaAlignment gboAlign; - double prob; - gboAlign.generateExpectedAlignment(&inputAlign, prob); - gboAlign.printPhylip(gboAln_name.c_str()); - - - string outProb_name = params.out_prefix; - outProb_name += ".gbo.logP"; - try { - ofstream outProb; - outProb.exceptions(ios::failbit | ios::badbit); - outProb.open(outProb_name.c_str()); - outProb.precision(10); - outProb << prob << endl; - outProb.close(); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, outProb_name); - } - - cout << "Information about patterns in the input alignment is printed to: " << outFre_name << endl; - cout << "A 'guided bootstrap' alignment is printed to: " << gboAln_name << endl; - cout << "Log of the probability of the new alignment is printed to: " << outProb_name << endl; + MaAlignment gboAlign; + double prob; + gboAlign.generateExpectedAlignment(&inputAlign, prob); + gboAlign.printAlignment(IN_PHYLIP, gboAln_name.c_str()); + + + string outProb_name = params.out_prefix; + outProb_name += ".gbo.logP"; + try { + ofstream outProb; + outProb.exceptions(ios::failbit | ios::badbit); + outProb.open(outProb_name.c_str()); + outProb.precision(10); + outProb << prob << endl; + outProb.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, outProb_name); + } + + cout << "Information about patterns in the input alignment is printed to: " << outFre_name << endl; + cout << "A 'guided bootstrap' alignment is printed to: " << gboAln_name << endl; + cout << "Log of the probability of the new alignment is printed to: " << outProb_name << endl; } /**MINH ANH: to compute the probability of an alignment given the multinomial distribution of patterns frequencies derived from a reference alignment*/ void computeMulProb(Params ¶ms) { - Alignment refAlign(params.second_align, params.sequence_type, params.intype); - Alignment inputAlign(params.aln_file, params.sequence_type, params.intype); - double prob; - inputAlign.multinomialProb(refAlign,prob); - //Printing - string outProb_name = params.out_prefix; - outProb_name += ".mprob"; - try { - ofstream outProb; - outProb.exceptions(ios::failbit | ios::badbit); - outProb.open(outProb_name.c_str()); - outProb.precision(10); - outProb << prob << endl; - outProb.close(); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, outProb_name); - } - cout << "Probability of alignment " << params.aln_file << " given alignment " << params.second_align << " is: " << prob << endl; - cout << "The probability is printed to: " << outProb_name << endl; + Alignment refAlign(params.second_align, params.sequence_type, params.intype, params.model_name); + Alignment inputAlign(params.aln_file, params.sequence_type, params.intype, params.model_name); + double prob; + inputAlign.multinomialProb(refAlign,prob); + //Printing + string outProb_name = params.out_prefix; + outProb_name += ".mprob"; + try { + ofstream outProb; + outProb.exceptions(ios::failbit | ios::badbit); + outProb.open(outProb_name.c_str()); + outProb.precision(10); + outProb << prob << endl; + outProb.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, outProb_name); + } + cout << "Probability of alignment " << params.aln_file << " given alignment " << params.second_align << " is: " << prob << endl; + cout << "The probability is printed to: " << outProb_name << endl; } void processNCBITree(Params ¶ms) { - NCBITree tree; - Node *dad = tree.readNCBITree(params.user_file, params.ncbi_taxid, params.ncbi_taxon_level, params.ncbi_ignore_level); - if (params.ncbi_names_file) tree.readNCBINames(params.ncbi_names_file); - - cout << "Dad ID: " << dad->name << " Root ID: " << tree.root->name << endl; - string str = params.user_file; - str += ".tree"; - if (params.out_file) str = params.out_file; - //tree.printTree(str.c_str(), WT_SORT_TAXA | WT_BR_LEN); - cout << "NCBI tree printed to " << str << endl; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(str.c_str()); - tree.printTree(out, WT_SORT_TAXA | WT_BR_LEN | WT_TAXON_ID, tree.root, dad); - out << ";" << endl; - out.close(); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, str); - } + NCBITree tree; + Node *dad = tree.readNCBITree(params.user_file, params.ncbi_taxid, params.ncbi_taxon_level, params.ncbi_ignore_level); + if (params.ncbi_names_file) tree.readNCBINames(params.ncbi_names_file); + + cout << "Dad ID: " << dad->name << " Root ID: " << tree.root->name << endl; + string str = params.user_file; + str += ".tree"; + if (params.out_file) str = params.out_file; + //tree.printTree(str.c_str(), WT_SORT_TAXA | WT_BR_LEN); + cout << "NCBI tree printed to " << str << endl; + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(str.c_str()); + tree.printTree(out, WT_SORT_TAXA | WT_BR_LEN | WT_TAXON_ID, tree.root, dad); + out << ";" << endl; + out.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, str); + } } /* write simultaneously to cout/cerr and a file */ class outstreambuf : public streambuf { public: outstreambuf* open( const char* name, ios::openmode mode = ios::out); + bool is_open(); outstreambuf* close(); ~outstreambuf() { close(); } streambuf *get_fout_buf() { return fout_buf; } + streambuf *get_cout_buf() { + return cout_buf; + } ofstream *get_fout() { return &fout; } protected: - ofstream fout; - streambuf *cout_buf; - streambuf *fout_buf; + ofstream fout; + streambuf *cout_buf; + streambuf *fout_buf; virtual int overflow( int c = EOF); virtual int sync(); }; outstreambuf* outstreambuf::open( const char* name, ios::openmode mode) { - if (!(Params::getInstance().suppress_output_flags & OUT_LOG) && MPIHelper::getInstance().isMaster()) { - fout.open(name, mode); - if (!fout.is_open()) { - cerr << "ERROR: Could not open " << name << " for logging" << endl; - exit(EXIT_FAILURE); - return NULL; + if (!(Params::getInstance().suppress_output_flags & OUT_LOG)) { + if (MPIHelper::getInstance().isMaster()) { + fout.open(name, mode); + if (!fout.is_open()) { + cerr << "ERROR: Could not open " << name << " for logging" << endl; + exit(EXIT_FAILURE); + return NULL; + } + fout_buf = fout.rdbuf(); } - fout_buf = fout.rdbuf(); } - cout_buf = cout.rdbuf(); - cout.rdbuf(this); + cout_buf = cout.rdbuf(); + cout.rdbuf(this); return this; } +bool outstreambuf::is_open() { + return fout.is_open(); +} + outstreambuf* outstreambuf::close() { cout.rdbuf(cout_buf); if ( fout.is_open()) { sync(); - fout.close(); + fout.close(); return this; } return NULL; } int outstreambuf::overflow( int c) { // used for output buffer only - if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED) - if (cout_buf->sputc(c) == EOF) return EOF; + if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED) + if (cout_buf->sputc(c) == EOF) return EOF; if (Params::getInstance().suppress_output_flags & OUT_LOG) return c; if (!MPIHelper::getInstance().isMaster()) return c; - if (fout_buf->sputc(c) == EOF) return EOF; - return c; + if (fout_buf->sputc(c) == EOF) return EOF; + return c; } int outstreambuf::sync() { // used for output buffer only - if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED) - cout_buf->pubsync(); + if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED) + cout_buf->pubsync(); if ((Params::getInstance().suppress_output_flags & OUT_LOG) || !MPIHelper::getInstance().isMaster()) return 0; - return fout_buf->pubsync(); + return fout_buf->pubsync(); } class errstreambuf : public streambuf { @@ -1798,8 +1857,8 @@ class errstreambuf : public streambuf { } protected: - streambuf *cerr_buf; - streambuf *fout_buf; + streambuf *cerr_buf; + streambuf *fout_buf; bool new_line; virtual int overflow( int c = EOF) { @@ -1831,6 +1890,30 @@ class errstreambuf : public streambuf { } }; +class muststreambuf : public streambuf { +public: + void init(streambuf *cout_buf, streambuf *fout_buf) { + this->fout_buf = fout_buf; + this->cout_buf = cout_buf; + } + +protected: + streambuf *cout_buf; + streambuf *fout_buf; + + virtual int overflow( int c = EOF) { + if (cout_buf->sputc(c) == EOF) { + return EOF; + } + if (fout_buf->sputc(c) == EOF) return EOF; + return c; + } + + virtual int sync() { + cout_buf->pubsync(); + return fout_buf->pubsync(); + } +}; /********************************************************************************* @@ -1838,6 +1921,9 @@ class errstreambuf : public streambuf { *********************************************************************************/ outstreambuf _out_buf; errstreambuf _err_buf; +muststreambuf _must_buf; +ostream cmust(&_must_buf); + string _log_file; int _exit_wait_optn = FALSE; @@ -1847,21 +1933,23 @@ extern "C" void startLogFile(bool append_log) { else _out_buf.open(_log_file.c_str()); _err_buf.init(_out_buf.get_fout_buf()); + _must_buf.init(_out_buf.get_cout_buf(), _out_buf.get_fout_buf()); } extern "C" void endLogFile() { - _out_buf.close(); - + if (_out_buf.is_open()) + _out_buf.close(); } void funcExit(void) { - if(_exit_wait_optn) { - printf("\npress [return] to finish: "); - fflush(stdout); - while (getchar() != '\n'); - } - + if(_exit_wait_optn) { + printf("\npress [return] to finish: "); + fflush(stdout); + while (getchar() != '\n'); + } + endLogFile(); + MPIHelper::getInstance().finalize(); } extern "C" void funcAbort(int signal_number) @@ -1871,299 +1959,299 @@ extern "C" void funcAbort(int signal_number) because abort() was called, your program will exit or crash anyway (with a dialog box on Windows). */ -#if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32) && !defined(__CYGWIN__) - print_stacktrace(cerr); +#if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32) && !defined(WIN64) && !defined(__CYGWIN__) + print_stacktrace(cerr); #endif - cerr << endl << "*** IQ-TREE CRASHES WITH SIGNAL "; - switch (signal_number) { - case SIGABRT: cerr << "ABORTED"; break; - case SIGFPE: cerr << "ERRONEOUS NUMERIC"; break; - case SIGILL: cerr << "ILLEGAL INSTRUCTION"; break; - case SIGSEGV: cerr << "SEGMENTATION FAULT"; break; -#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__ - case SIGBUS: cerr << "BUS ERROR"; break; + cerr << endl << "*** IQ-TREE CRASHES WITH SIGNAL "; + switch (signal_number) { + case SIGABRT: cerr << "ABORTED"; break; + case SIGFPE: cerr << "ERRONEOUS NUMERIC"; break; + case SIGILL: cerr << "ILLEGAL INSTRUCTION"; break; + case SIGSEGV: cerr << "SEGMENTATION FAULT"; break; +#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__ && !defined WIN64 + case SIGBUS: cerr << "BUS ERROR"; break; #endif - } + } cerr << endl; - cerr << "*** For bug report please send to developers:" << endl << "*** Log file: " << _log_file; - cerr << endl << "*** Alignment files (if possible)" << endl; - funcExit(); - signal(signal_number, SIG_DFL); + cerr << "*** For bug report please send to developers:" << endl << "*** Log file: " << _log_file; + cerr << endl << "*** Alignment files (if possible)" << endl; + funcExit(); + signal(signal_number, SIG_DFL); } extern "C" void getintargv(int *argc, char **argv[]) { - int done; - int count; - int n; - int l; - char ch; - char *argtmp; - char **argstr; - - argtmp = (char *)calloc(10100, sizeof(char)); - argstr = (char **)calloc(100, sizeof(char*)); - for(n=0; n<100; n++) { - argstr[n] = &(argtmp[n * 100]); - } - n=1; - - fprintf(stdout, "\nYou seem to have click-started this program,"); - fprintf(stdout, "\ndo you want to enter commandline parameters: [y]es, [n]o: "); - fflush(stdout); - - /* read one char */ - ch = getc(stdin); - if (ch != '\n') { - do ; - while (getc(stdin) != '\n'); - } - ch = (char) tolower((int) ch); - - if (ch == 'y') { - done=FALSE; - - fprintf(stdout, "\nEnter single parameter [! for none]: "); - fflush(stdout); - count = fscanf(stdin, "%s", argstr[n]); - do ; - while (getc(stdin) != '\n'); - - if(argstr[0][0] == '!') { - count = 0; - } else { - if (strlen(argstr[n]) > 100) { - fprintf(stdout, "\nParameter too long!!!\n"); - } else { - n++; - } - } - - while(!done) { - fprintf(stdout, "\nCurrent commandline: "); - for(l=1; l1 ? ", delete last [l]" : ""), - (n>1 ? ", delete all [a]" : "")); - fflush(stdout); - - /* read one char */ - ch = getc(stdin); - /* ch = getchar(); */ - if (ch != '\n') { - do ; - while (getc(stdin) != '\n'); - /* while (getchar() != '\n'); */ - } - ch = (char) tolower((int) ch); - - switch (ch) { - case 'y': - done=TRUE; - break; - case 'e': - fprintf(stdout, "\nEnter single parameter [! for none]: "); - fflush(stdout); - count = fscanf(stdin, "%s", argstr[n]); - do ; - while (getc(stdin) != '\n'); - - if(argstr[0][0] == '!') { - count = 0; - } else { - if (strlen(argstr[n]) > 100) { - fprintf(stdout, "\nParameter too long!!!\n"); - } else { - n++; - } - } - break; - case 'l': - if (n>1) n--; - break; - case 'a': - n=1; - break; - case 'q': - // tp_exit(0, NULL, FALSE, __FILE__, __LINE__, _exit_wait_optn); - if(_exit_wait_optn) { - printf("\npress [return] to finish: "); - fflush(stdout); - while (getchar() != '\n'); - } - exit(0); - break; - } - } - } - - *argc = n; - *argv = argstr; + int done; + int count; + int n; + int l; + char ch; + char *argtmp; + char **argstr; + + argtmp = (char *)calloc(10100, sizeof(char)); + argstr = (char **)calloc(100, sizeof(char*)); + for(n=0; n<100; n++) { + argstr[n] = &(argtmp[n * 100]); + } + n=1; + + fprintf(stdout, "\nYou seem to have click-started this program,"); + fprintf(stdout, "\ndo you want to enter commandline parameters: [y]es, [n]o: "); + fflush(stdout); + + /* read one char */ + ch = getc(stdin); + if (ch != '\n') { + do ; + while (getc(stdin) != '\n'); + } + ch = (char) tolower((int) ch); + + if (ch == 'y') { + done=FALSE; + + fprintf(stdout, "\nEnter single parameter [! for none]: "); + fflush(stdout); + count = fscanf(stdin, "%s", argstr[n]); + do ; + while (getc(stdin) != '\n'); + + if(argstr[0][0] == '!') { + count = 0; + } else { + if (strlen(argstr[n]) > 100) { + fprintf(stdout, "\nParameter too long!!!\n"); + } else { + n++; + } + } + + while(!done) { + fprintf(stdout, "\nCurrent commandline: "); + for(l=1; l1 ? ", delete last [l]" : ""), + (n>1 ? ", delete all [a]" : "")); + fflush(stdout); + + /* read one char */ + ch = getc(stdin); + /* ch = getchar(); */ + if (ch != '\n') { + do ; + while (getc(stdin) != '\n'); + /* while (getchar() != '\n'); */ + } + ch = (char) tolower((int) ch); + + switch (ch) { + case 'y': + done=TRUE; + break; + case 'e': + fprintf(stdout, "\nEnter single parameter [! for none]: "); + fflush(stdout); + count = fscanf(stdin, "%s", argstr[n]); + do ; + while (getc(stdin) != '\n'); + + if(argstr[0][0] == '!') { + count = 0; + } else { + if (strlen(argstr[n]) > 100) { + fprintf(stdout, "\nParameter too long!!!\n"); + } else { + n++; + } + } + break; + case 'l': + if (n>1) n--; + break; + case 'a': + n=1; + break; + case 'q': + // tp_exit(0, NULL, FALSE, __FILE__, __LINE__, _exit_wait_optn); + if(_exit_wait_optn) { + printf("\npress [return] to finish: "); + fflush(stdout); + while (getchar() != '\n'); + } + exit(0); + break; + } + } + } + + *argc = n; + *argv = argstr; } /* getintargv */ /********************************************************************************************************************************* - Olga: ECOpd - phylogenetic diversity with ecological constraint: choosing a viable subset of species which maximizes PD/SD + Olga: ECOpd - phylogenetic diversity with ecological constraint: choosing a viable subset of species which maximizes PD/SD *********************************************************************************************************************************/ void processECOpd(Params ¶ms) { - double startTime = getCPUTime(); - params.detected_mode = LINEAR_PROGRAMMING; - cout<<"----------------------------------------------------------------------------------------"< 100 || params.diet_max < 0){ - cout<<"The minimum percentage of the diet to be conserved for each predator"<name <name); - //for(i=0;i 100 || params.diet_max < 0){ + cout<<"The minimum percentage of the diet to be conserved for each predator"<name <name); + //for(i=0;i= 0) && !found && (argv[0][n] != '/') + && (argv[0][n] != '\\'); n--) { + + tmpstr = &(argv[0][n]); + dummyint = 0; + (void) sscanf(tmpstr, "click%n", &dummyint); + if (dummyint == 5) found = TRUE; + else { + dummyint = 0; + (void) sscanf(tmpstr, "CLICK%n", &dummyint); + if (dummyint == 5) found = TRUE; + else { + dummyint = 0; + (void) sscanf(tmpstr, "Click%n", &dummyint); + if (dummyint == 5) found = TRUE; + } + } + } + if (found) _exit_wait_optn = TRUE; + + if (_exit_wait_optn) { // get commandline parameters from keyboard + getintargv(&intargc, &intargv); + fprintf(stdout, "\n\n"); + if (intargc > 1) { // if there were option entered, use them as argc/argv + argc = intargc; + argv = intargv; + } + } + } /* local scope */ + /*************************/ - /*************************/ - { /* local scope */ - int found = FALSE; /* "click" found in cmd name? */ - int n, dummyint; - char *tmpstr; - int intargc; - char **intargv; - intargc = 0; - intargv = NULL; - - for (n = strlen(argv[0]) - 5; - (n >= 0) && !found && (argv[0][n] != '/') - && (argv[0][n] != '\\'); n--) { - - tmpstr = &(argv[0][n]); - dummyint = 0; - (void) sscanf(tmpstr, "click%n", &dummyint); - if (dummyint == 5) found = TRUE; - else { - dummyint = 0; - (void) sscanf(tmpstr, "CLICK%n", &dummyint); - if (dummyint == 5) found = TRUE; - else { - dummyint = 0; - (void) sscanf(tmpstr, "Click%n", &dummyint); - if (dummyint == 5) found = TRUE; - } - } - } - if (found) _exit_wait_optn = TRUE; - - if (_exit_wait_optn) { // get commandline parameters from keyboard - getintargv(&intargc, &intargv); - fprintf(stdout, "\n\n"); - if (intargc > 1) { // if there were option entered, use them as argc/argv - argc = intargc; - argv = intargv; - } - } - } /* local scope */ - /*************************/ - - parseArg(argc, argv, Params::getInstance()); + parseArg(argc, argv, Params::getInstance()); // 2015-12-05 Checkpoint *checkpoint = new Checkpoint; @@ -2289,18 +2366,26 @@ int main(int argc, char *argv[]) { if (checkpoint->hasKey("finished")) { if (checkpoint->getBool("finished")) { if (Params::getInstance().force_unfinished) { - cout << "NOTE: Continue analysis although a previous run already finished" << endl; + if (MPIHelper::getInstance().isMaster()) + cout << "NOTE: Continue analysis although a previous run already finished" << endl; } else { - outError("Checkpoint (" + filename + ") indicates that a previous run successfully finished\n" + - "Use `-redo` option if you really want to redo the analysis and overwrite all output files."); delete checkpoint; - return EXIT_FAILURE; + if (MPIHelper::getInstance().isMaster()) + outError("Checkpoint (" + filename + ") indicates that a previous run successfully finished\n" + + "Use `-redo` option if you really want to redo the analysis and overwrite all output files.\n" + + "Use `--redo-tree` option if you want to restore ModelFinder and only redo tree search.\n" + + "Use `--undo` option if you want to continue previous run when changing/adding options." + ); + else + exit(EXIT_SUCCESS); + exit(EXIT_FAILURE); } } else { append_log = true; } } else { - outWarning("Ignore invalid checkpoint file " + filename); + if (MPIHelper::getInstance().isMaster()) + outWarning("Ignore invalid checkpoint file " + filename); checkpoint->clear(); } } @@ -2309,109 +2394,93 @@ int main(int argc, char *argv[]) { if (MPIHelper::getInstance().isWorker()) checkpoint->setFileName(""); - _log_file = Params::getInstance().out_prefix; - _log_file += ".log"; - startLogFile(append_log); - time_t start_time; + _log_file = Params::getInstance().out_prefix; + _log_file += ".log"; + startLogFile(append_log); + time_t start_time; if (append_log) { cout << endl << "******************************************************" << endl << "CHECKPOINT: Resuming analysis from " << filename << endl << endl; } -#ifdef _IQTREE_MPI - cout << "************************************************" << endl; - cout << "* START TREE SEARCH USING MPI WITH " << MPIHelper::getInstance().getNumProcesses() << " PROCESSES *" << endl; - cout << "************************************************" << endl; - unsigned int rndSeed; - if (MPIHelper::getInstance().isMaster()) { - rndSeed = Params::getInstance().ran_seed; - cout << "Random seed of master = " << rndSeed << endl; - } - // Broadcast random seed - MPI_Bcast(&rndSeed, 1, MPI_INT, PROC_MASTER, MPI_COMM_WORLD); - if (MPIHelper::getInstance().isWorker()) { -// Params::getInstance().ran_seed = rndSeed + task_id * 100000; - Params::getInstance().ran_seed = rndSeed; -// printf("Process %d: random_seed = %d\n", task_id, Params::getInstance().ran_seed); - } -#endif - atexit(funcExit); - signal(SIGABRT, &funcAbort); - signal(SIGFPE, &funcAbort); - signal(SIGILL, &funcAbort); - signal(SIGSEGV, &funcAbort); -#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__ - signal(SIGBUS, &funcAbort); + MPIHelper::getInstance().syncRandomSeed(); + + signal(SIGABRT, &funcAbort); + signal(SIGFPE, &funcAbort); + signal(SIGILL, &funcAbort); + signal(SIGSEGV, &funcAbort); +#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__ && !defined WIN64 + signal(SIGBUS, &funcAbort); #endif - printCopyright(cout); + printCopyright(cout); - /* + /* double x=1e-100; double y=1e-101; if (x > y) cout << "ok!" << endl; else cout << "shit!" << endl; */ - //FILE *pfile = popen("hostname","r"); - char hostname[100]; -#if defined WIN32 || defined _WIN32 || defined __WIN32__ + //FILE *pfile = popen("hostname","r"); + char hostname[100]; +#if defined WIN32 || defined _WIN32 || defined __WIN32__ || defined WIN64 WSADATA wsaData; WSAStartup(MAKEWORD(2, 2), &wsaData); gethostname(hostname, sizeof(hostname)); WSACleanup(); #else - gethostname(hostname, sizeof(hostname)); + gethostname(hostname, sizeof(hostname)); #endif - //fgets(hostname, sizeof(hostname), pfile); - //pclose(pfile); + //fgets(hostname, sizeof(hostname), pfile); + //pclose(pfile); - instruction_set = instrset_detect(); + instruction_set = instrset_detect(); #if defined(BINARY32) || defined(__NOAVX__) instruction_set = min(instruction_set, (int)LK_SSE42); #endif - if (instruction_set < LK_SSE2) outError("Your CPU does not support SSE2!"); - bool has_fma3 = (instruction_set >= LK_AVX) && hasFMA3(); + if (instruction_set < LK_SSE2) outError("Your CPU does not support SSE2!"); + bool has_fma3 = (instruction_set >= LK_AVX) && hasFMA3(); #ifdef __FMA__ - bool has_fma = has_fma3; - if (!has_fma) { - outError("Your CPU does not support FMA instruction, quiting now..."); - } + bool has_fma = has_fma3; + if (!has_fma) { + outError("Your CPU does not support FMA instruction, quiting now..."); + } #endif - cout << "Host: " << hostname << " ("; - switch (instruction_set) { - case 0: cout << "x86, "; break; - case 1: cout << "SSE, "; break; - case 2: cout << "SSE2, "; break; - case 3: cout << "SSE3, "; break; - case 4: cout << "SSSE3, "; break; - case 5: cout << "SSE4.1, "; break; - case 6: cout << "SSE4.2, "; break; - case 7: cout << "AVX, "; break; - case 8: cout << "AVX2, "; break; - default: cout << "AVX512, "; break; - } - if (has_fma3) cout << "FMA3, "; -// if (has_fma4) cout << "FMA4, "; + cout << "Host: " << hostname << " ("; + switch (instruction_set) { + case 0: cout << "x86, "; break; + case 1: cout << "SSE, "; break; + case 2: cout << "SSE2, "; break; + case 3: cout << "SSE3, "; break; + case 4: cout << "SSSE3, "; break; + case 5: cout << "SSE4.1, "; break; + case 6: cout << "SSE4.2, "; break; + case 7: cout << "AVX, "; break; + case 8: cout << "AVX2, "; break; + default: cout << "AVX512, "; break; + } + if (has_fma3) cout << "FMA3, "; +// if (has_fma4) cout << "FMA4, "; //#if defined __APPLE__ || defined __MACH__ - cout << (int)(((getMemorySize()/1024.0)/1024)/1024) << " GB RAM)" << endl; + cout << (int)(((getMemorySize()/1024.0)/1024)/1024) << " GB RAM)" << endl; //#else -// cout << (int)(((getMemorySize()/1000.0)/1000)/1000) << " GB RAM)" << endl; +// cout << (int)(((getMemorySize()/1000.0)/1000)/1000) << " GB RAM)" << endl; //#endif - cout << "Command:"; + cout << "Command:"; int i; - for (i = 0; i < argc; i++) - cout << " " << argv[i]; - cout << endl; + for (i = 0; i < argc; i++) + cout << " " << argv[i]; + cout << endl; checkpoint->get("iqtree.seed", Params::getInstance().ran_seed); - cout << "Seed: " << Params::getInstance().ran_seed << " "; - init_random(Params::getInstance().ran_seed + MPIHelper::getInstance().getProcessID(), true); + cout << "Seed: " << Params::getInstance().ran_seed << " "; + init_random(Params::getInstance().ran_seed + MPIHelper::getInstance().getProcessID(), true); - time(&start_time); - cout << "Time: " << ctime(&start_time); + time(&start_time); + cout << "Time: " << ctime(&start_time); // increase instruction set level with FMA if (has_fma3 && instruction_set < LK_AVX_FMA) @@ -2419,19 +2488,19 @@ int main(int argc, char *argv[]) { Params::getInstance().SSE = min(Params::getInstance().SSE, (LikelihoodKernel)instruction_set); - cout << "Kernel: "; + cout << "Kernel: "; if (Params::getInstance().lk_safe_scaling) { cout << "Safe "; } - if (Params::getInstance().pll) { + if (Params::getInstance().pll) { #ifdef __AVX__ - cout << "PLL-AVX"; + cout << "PLL-AVX"; #else - cout << "PLL-SSE3"; + cout << "PLL-SSE3"; #endif - } else { + } else { if (Params::getInstance().SSE >= LK_AVX512) cout << "AVX-512"; else if (Params::getInstance().SSE >= LK_AVX_FMA) { @@ -2442,35 +2511,37 @@ int main(int argc, char *argv[]) { cout << "SSE2"; } else cout << "x86"; - } + } #ifdef _OPENMP - if (Params::getInstance().num_threads >= 1) { + if (Params::getInstance().num_threads >= 1) { omp_set_num_threads(Params::getInstance().num_threads); Params::getInstance().num_threads = omp_get_max_threads(); } -// int max_threads = omp_get_max_threads(); - int max_procs = countPhysicalCPUCores(); - cout << " - "; +// int max_threads = omp_get_max_threads(); + int max_procs = countPhysicalCPUCores(); + cout << " - "; if (Params::getInstance().num_threads > 0) cout << Params::getInstance().num_threads << " threads"; else cout << "auto-detect threads"; cout << " (" << max_procs << " CPU cores detected)"; - if (Params::getInstance().num_threads > max_procs) { - cout << endl; - outError("You have specified more threads than CPU cores available"); - } - omp_set_nested(false); // don't allow nested OpenMP parallelism + if (Params::getInstance().num_threads > max_procs) { + cout << endl; + outError("You have specified more threads than CPU cores available"); + } + omp_set_nested(false); // don't allow nested OpenMP parallelism #else - if (Params::getInstance().num_threads != 1) { - cout << endl << endl; - outError("Number of threads must be 1 for sequential version."); - } + if (Params::getInstance().num_threads != 1) { + cout << endl << endl; + outError("Number of threads must be 1 for sequential version."); + } #endif - -#ifndef _IQTREE_MPI +#ifdef _IQTREE_MPI + cout << endl << "MPI: " << MPIHelper::getInstance().getNumProcesses() << " processes"; +#endif + int num_procs = countPhysicalCPUCores(); #ifdef _OPENMP if (num_procs > 1 && Params::getInstance().num_threads == 1) { @@ -2480,14 +2551,13 @@ int main(int argc, char *argv[]) { #else if (num_procs > 1) cout << endl << endl << "NOTE: Consider using the multicore version because your CPU has " << num_procs << " cores!"; -#endif #endif - //cout << "sizeof(int)=" << sizeof(int) << endl; - cout << endl << endl; + //cout << "sizeof(int)=" << sizeof(int) << endl; + cout << endl << endl; - cout.precision(3); - cout.setf(ios::fixed); + cout.precision(3); + cout.setf(ios::fixed); // checkpoint general run information checkpoint->startStruct("iqtree"); @@ -2515,7 +2585,7 @@ int main(int argc, char *argv[]) { command = ""; } - for (i = 1; i < argc; i++) + for (i = 1; i < argc; i++) command += string(" ") + argv[i]; CKP_SAVE(command); int seed = Params::getInstance().ran_seed; @@ -2525,7 +2595,7 @@ int main(int argc, char *argv[]) { // check for incompatible version string version; stringstream sversion; - sversion << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << "." << iqtree_VERSION_PATCH; + sversion << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR << iqtree_VERSION_PATCH; version = sversion.str(); CKP_SAVE(version); checkpoint->endStruct(); @@ -2533,113 +2603,113 @@ int main(int argc, char *argv[]) { if (MPIHelper::getInstance().getNumProcesses() > 1) { if (Params::getInstance().aln_file || Params::getInstance().partition_file) { runPhyloAnalysis(Params::getInstance(), checkpoint); + cout << "finish runPhyloAnalysis" << endl << flush; } else { outError("Please use one MPI process! The feature you wanted does not need parallelization."); } } else - // call the main function - if (Params::getInstance().tree_gen != NONE) { - generateRandomTree(Params::getInstance()); -// } else if (Params::getInstance().do_pars_multistate) { -// doParsMultiState(Params::getInstance()); - } else if (Params::getInstance().rf_dist_mode != 0) { - computeRFDist(Params::getInstance()); - } else if (Params::getInstance().test_input != TEST_NONE) { - Params::getInstance().intype = detectInputFile(Params::getInstance().user_file); - testInputFile(Params::getInstance()); - } else if (Params::getInstance().run_mode == PRINT_TAXA) { - printTaxa(Params::getInstance()); - } else if (Params::getInstance().run_mode == PRINT_AREA) { - printAreaList(Params::getInstance()); - } else if (Params::getInstance().run_mode == SCALE_BRANCH_LEN || Params::getInstance().run_mode == SCALE_NODE_NAME) { - scaleBranchLength(Params::getInstance()); - } else if (Params::getInstance().run_mode == PD_DISTRIBUTION) { - calcDistribution(Params::getInstance()); - } else if (Params::getInstance().run_mode == STATS){ /**MINH ANH: for some statistics on the input tree*/ - branchStats(Params::getInstance()); // MA - } else if (Params::getInstance().branch_cluster > 0) { - calcTreeCluster(Params::getInstance()); - } else if (Params::getInstance().ncbi_taxid) { - processNCBITree(Params::getInstance()); - } else if (Params::getInstance().user_file && Params::getInstance().eco_dag_file) { /**ECOpd analysis*/ - processECOpd(Params::getInstance()); - } else if (Params::getInstance().aln_file || Params::getInstance().partition_file) { - if ((Params::getInstance().siteLL_file || Params::getInstance().second_align) && !Params::getInstance().gbo_replicates) - { - if (Params::getInstance().siteLL_file) - guidedBootstrap(Params::getInstance()); - if (Params::getInstance().second_align) - computeMulProb(Params::getInstance()); - } else { - runPhyloAnalysis(Params::getInstance(), checkpoint); - } -// } else if (Params::getInstance().ngs_file || Params::getInstance().ngs_mapped_reads) { -// runNGSAnalysis(Params::getInstance()); -// } else if (Params::getInstance().pdtaxa_file && Params::getInstance().gene_scale_factor >=0.0 && Params::getInstance().gene_pvalue_file) { -// runGSSAnalysis(Params::getInstance()); - } else if (Params::getInstance().consensus_type != CT_NONE) { - MExtTree tree; - switch (Params::getInstance().consensus_type) { - case CT_CONSENSUS_TREE: - computeConsensusTree(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold, - Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file, &Params::getInstance()); - break; - case CT_CONSENSUS_NETWORK: - computeConsensusNetwork(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold, - Params::getInstance().split_weight_summary, Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file); - break; - case CT_ASSIGN_SUPPORT: - assignBootstrapSupport(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, - Params::getInstance().second_tree, Params::getInstance().is_rooted, Params::getInstance().out_file, - Params::getInstance().out_prefix, tree, Params::getInstance().tree_weight_file, &Params::getInstance()); - break; - case CT_ASSIGN_SUPPORT_EXTENDED: - assignBranchSupportNew(Params::getInstance()); - break; - case CT_NONE: break; - /**MINH ANH: for some comparison*/ - case COMPARE: compare(Params::getInstance()); break; //MA - } + // call the main function + if (Params::getInstance().tree_gen != NONE) { + generateRandomTree(Params::getInstance()); + } else if (Params::getInstance().do_pars_multistate) { + doParsMultiState(Params::getInstance()); + } else if (Params::getInstance().rf_dist_mode != 0) { + computeRFDist(Params::getInstance()); + } else if (Params::getInstance().test_input != TEST_NONE) { + Params::getInstance().intype = detectInputFile(Params::getInstance().user_file); + testInputFile(Params::getInstance()); + } else if (Params::getInstance().run_mode == RunMode::PRINT_TAXA) { + printTaxa(Params::getInstance()); + } else if (Params::getInstance().run_mode == RunMode::PRINT_AREA) { + printAreaList(Params::getInstance()); + } else if (Params::getInstance().run_mode == RunMode::SCALE_BRANCH_LEN || Params::getInstance().run_mode == RunMode::SCALE_NODE_NAME) { + scaleBranchLength(Params::getInstance()); + } else if (Params::getInstance().run_mode == RunMode::PD_DISTRIBUTION) { + calcDistribution(Params::getInstance()); + } else if (Params::getInstance().run_mode == RunMode::STATS){ /**MINH ANH: for some statistics on the input tree*/ + branchStats(Params::getInstance()); // MA + } else if (Params::getInstance().branch_cluster > 0) { + calcTreeCluster(Params::getInstance()); + } else if (Params::getInstance().ncbi_taxid) { + processNCBITree(Params::getInstance()); + } else if (Params::getInstance().user_file && Params::getInstance().eco_dag_file) { /**ECOpd analysis*/ + processECOpd(Params::getInstance()); + } else if ((Params::getInstance().aln_file || Params::getInstance().partition_file) && + Params::getInstance().consensus_type != CT_ASSIGN_SUPPORT_EXTENDED) + { + if ((Params::getInstance().siteLL_file || Params::getInstance().second_align) && !Params::getInstance().gbo_replicates) + { + if (Params::getInstance().siteLL_file) + guidedBootstrap(Params::getInstance()); + if (Params::getInstance().second_align) + computeMulProb(Params::getInstance()); + } else { + runPhyloAnalysis(Params::getInstance(), checkpoint); + } +// } else if (Params::getInstance().ngs_file || Params::getInstance().ngs_mapped_reads) { +// runNGSAnalysis(Params::getInstance()); +// } else if (Params::getInstance().pdtaxa_file && Params::getInstance().gene_scale_factor >=0.0 && Params::getInstance().gene_pvalue_file) { +// runGSSAnalysis(Params::getInstance()); + } else if (Params::getInstance().consensus_type != CT_NONE) { + MExtTree tree; + switch (Params::getInstance().consensus_type) { + case CT_CONSENSUS_TREE: + computeConsensusTree(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold, + Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file, &Params::getInstance()); + break; + case CT_CONSENSUS_NETWORK: + computeConsensusNetwork(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, Params::getInstance().split_threshold, + Params::getInstance().split_weight_summary, Params::getInstance().split_weight_threshold, Params::getInstance().out_file, Params::getInstance().out_prefix, Params::getInstance().tree_weight_file); + break; + case CT_ASSIGN_SUPPORT: + assignBootstrapSupport(Params::getInstance().user_file, Params::getInstance().tree_burnin, Params::getInstance().tree_max_count, + Params::getInstance().second_tree, Params::getInstance().is_rooted, Params::getInstance().out_file, + Params::getInstance().out_prefix, tree, Params::getInstance().tree_weight_file, &Params::getInstance()); + break; + case CT_ASSIGN_SUPPORT_EXTENDED: + assignBranchSupportNew(Params::getInstance()); + break; + case CT_NONE: break; + /**MINH ANH: for some comparison*/ + case COMPARE: compare(Params::getInstance()); break; //MA + } } else if (Params::getInstance().split_threshold_str) { // for Ricardo: keep those splits from input tree above given support threshold collapseLowBranchSupport(Params::getInstance().user_file, Params::getInstance().split_threshold_str); - } else { - Params::getInstance().intype = detectInputFile(Params::getInstance().user_file); - if (Params::getInstance().intype == IN_NEWICK && Params::getInstance().pdtaxa_file && Params::getInstance().tree_gen == NONE) { - if (Params::getInstance().budget_file) { - //if (Params::getInstance().budget < 0) Params::getInstance().run_mode = PD_USER_SET; - } else { - if (Params::getInstance().sub_size < 1 && Params::getInstance().pd_proportion == 0.0) - Params::getInstance().run_mode = PD_USER_SET; - } - // input is a tree, check if it is a reserve selection -> convert to splits - if (Params::getInstance().run_mode != PD_USER_SET) Params::getInstance().multi_tree = true; - } - - - if (Params::getInstance().intype == IN_NEWICK && !Params::getInstance().find_all && Params::getInstance().budget_file == NULL && - Params::getInstance().find_pd_min == false && Params::getInstance().calc_pdgain == false && - Params::getInstance().run_mode != LINEAR_PROGRAMMING && Params::getInstance().multi_tree == false) - runPDTree(Params::getInstance()); - else if (Params::getInstance().intype == IN_NEXUS || Params::getInstance().intype == IN_NEWICK) { - if (Params::getInstance().run_mode == LINEAR_PROGRAMMING && Params::getInstance().find_pd_min) - outError("Current linear programming does not support finding minimal PD sets!"); - if (Params::getInstance().find_all && Params::getInstance().run_mode == LINEAR_PROGRAMMING) - Params::getInstance().binary_programming = true; - runPDSplit(Params::getInstance()); - } else { - outError("Unknown file input format"); - } - } - - time(&start_time); - cout << "Date and Time: " << ctime(&start_time); - delete checkpoint; - - finish_random(); + } else { + Params::getInstance().intype = detectInputFile(Params::getInstance().user_file); + if (Params::getInstance().intype == IN_NEWICK && Params::getInstance().pdtaxa_file && Params::getInstance().tree_gen == NONE) { + if (Params::getInstance().budget_file) { + //if (Params::getInstance().budget < 0) Params::getInstance().run_mode = PD_USER_SET; + } else { + if (Params::getInstance().sub_size < 1 && Params::getInstance().pd_proportion == 0.0) + Params::getInstance().run_mode = RunMode::PD_USER_SET; + } + // input is a tree, check if it is a reserve selection -> convert to splits + if (Params::getInstance().run_mode != RunMode::PD_USER_SET) Params::getInstance().multi_tree = true; + } + + + if (Params::getInstance().intype == IN_NEWICK && !Params::getInstance().find_all && Params::getInstance().budget_file == NULL && + Params::getInstance().find_pd_min == false && Params::getInstance().calc_pdgain == false && + Params::getInstance().run_mode != RunMode::LINEAR_PROGRAMMING && Params::getInstance().multi_tree == false) + runPDTree(Params::getInstance()); + else if (Params::getInstance().intype == IN_NEXUS || Params::getInstance().intype == IN_NEWICK) { + if (Params::getInstance().run_mode == RunMode::LINEAR_PROGRAMMING && Params::getInstance().find_pd_min) + outError("Current linear programming does not support finding minimal PD sets!"); + if (Params::getInstance().find_all && Params::getInstance().run_mode == RunMode::LINEAR_PROGRAMMING) + Params::getInstance().binary_programming = true; + runPDSplit(Params::getInstance()); + } else { + outError("Unknown file input format"); + } + } + + time(&start_time); + cout << "Date and Time: " << ctime(&start_time); + delete checkpoint; + + finish_random(); -#ifdef _IQTREE_MPI - MPI_Finalize(); -#endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 749e9bfb9..625b0235e 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -27,9 +27,11 @@ #include "tree/phylotree.h" #include "tree/phylosupertree.h" #include "tree/phylosupertreeplen.h" +#include "tree/phylosupertreeunlinked.h" #include "phyloanalysis.h" #include "alignment/alignment.h" #include "alignment/superalignment.h" +#include "alignment/superalignmentunlinked.h" #include "tree/iqtree.h" #include "tree/phylotreemixlen.h" #include "model/modelmarkov.h" @@ -58,20 +60,38 @@ #include "utils/timeutil.h" #include "tree/upperbounds.h" #include "utils/MPIHelper.h" +#include "timetree.h" +#ifdef USE_BOOSTER +extern "C" { +#include "booster/booster.h" +} +#endif + +#ifdef IQTREE_TERRAPHAST + #include "terrace/terrace.h" +#endif -void reportReferences(Params ¶ms, ofstream &out, string &original_model) { +void reportReferences(Params ¶ms, ofstream &out) { + + out << "To cite IQ-TREE please use:" << endl << endl + << "Bui Quang Minh, Heiko A. Schmidt, Olga Chernomor, Dominik Schrempf," << endl + << "Michael D. Woodhams, Arndt von Haeseler, and Robert Lanfear (2020)" << endl + << "IQ-TREE 2: New models and efficient methods for phylogenetic inference" << endl + << "in the genomic era. Mol. Biol. Evol., in press." << endl + << "https://doi.org/10.1093/molbev/msaa015" << endl << endl; + bool modelfinder_only = false; - if (original_model.substr(0,4) == "TEST" || original_model.substr(0, 2) == "MF" || original_model.empty()) { + if (params.model_name.substr(0,4) == "TEST" || params.model_name.substr(0, 2) == "MF" || params.model_name.empty()) { out << "To cite ModelFinder please use: " << endl << endl << "Subha Kalyaanamoorthy, Bui Quang Minh, Thomas KF Wong, Arndt von Haeseler," << endl << "and Lars S Jermiin (2017) ModelFinder: Fast model selection for" << endl << "accurate phylogenetic estimates. Nature Methods, 14:587–589." << endl << "https://doi.org/10.1038/nmeth.4285" << endl << endl; - if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2)=="MF" && original_model.substr(0,3)!="MFP")) + if (params.model_name.find("ONLY") != string::npos || (params.model_name.substr(0,2)=="MF" && params.model_name.substr(0,3)!="MFP")) modelfinder_only = true; } - if (posPOMO(original_model) != string::npos) { + if (posPOMO(params.model_name) != string::npos) { out << "For polymorphism-aware models please cite:" << endl << endl << "Dominik Schrempf, Bui Quang Minh, Nicola De Maio, Arndt von Haeseler, and Carolin Kosiol" << endl << "(2016) Reversible polymorphism-aware phylogenetic models and their application to" << endl @@ -79,52 +99,58 @@ void reportReferences(Params ¶ms, ofstream &out, string &original_model) { << "https://doi.org/10.1016/j.jtbi.2016.07.042" << endl << endl; } - if (!modelfinder_only) - out << "To cite IQ-TREE please use:" << endl << endl - << "Lam-Tung Nguyen, Heiko A. Schmidt, Arndt von Haeseler, and Bui Quang Minh" << endl - << "(2015) IQ-TREE: A fast and effective stochastic algorithm for estimating" << endl - << "maximum likelihood phylogenies. Mol Biol Evol, 32:268-274." << endl - << "https://doi.org/10.1093/molbev/msu300" << endl << endl; - if (params.site_freq_file || params.tree_freq_file) out << "Since you used site-specific frequency model please also cite: " << endl << endl - << "Huai-Chun Wang, Edward Susko, Bui Quang Minh, and Andrew J. Roger (2017)" << endl + << "Huai-Chun Wang, Edward Susko, Bui Quang Minh, and Andrew J. Roger (2018)" << endl << "Modeling site heterogeneity with posterior mean site frequency profiles" << endl - << "accelerates accurate phylogenomic estimation. Syst Biol, in press." << endl + << "accelerates accurate phylogenomic estimation. Syst. Biol., 67:216–235." << endl << "https://doi.org/10.1093/sysbio/syx068" << endl << endl; - if (params.gbo_replicates) - out << "Since you used ultrafast bootstrap (UFBoot) please also cite: " << endl << endl - << "Diep Thi Hoang, Olga Chernomor, Arndt von Haeseler, Bui Quang Minh," << endl - << "and Le Sy Vinh (2017) UFBoot2: Improving the ultrafast bootstrap" << endl - << "approximation. Mol Biol Evol, in press." << endl + if (params.gbo_replicates) + out << "Since you used ultrafast bootstrap (UFBoot) please also cite: " << endl << endl + << "Diep Thi Hoang, Olga Chernomor, Arndt von Haeseler, Bui Quang Minh," << endl + << "and Le Sy Vinh (2018) UFBoot2: Improving the ultrafast bootstrap" << endl + << "approximation. Mol. Biol. Evol., 35:518–522." << endl << "https://doi.org/10.1093/molbev/msx281" << endl << endl; if (params.partition_file) out << "Since you used partition models please also cite:" << endl << endl << "Olga Chernomor, Arndt von Haeseler, and Bui Quang Minh (2016)" << endl << "Terrace aware data structure for phylogenomic inference from" << endl - << "supermatrices. Syst Biol, 65:997-1008." << endl + << "supermatrices. Syst. Biol., 65:997-1008." << endl << "https://doi.org/10.1093/sysbio/syw037" << endl << endl; + if (params.terrace_analysis) + out << "Since you used terrace analysis please also cite:" << endl << endl + << "Biczok R, Bozsoky P, Eisenmann P, Ernst J, Ribizel T, Scholz F," << endl + << "Trefzer A, Weber F, Hamann M, Stamatakis A. (2018)" << endl + << "Two C++ libraries for counting trees on a phylogenetic" << endl + << "terrace. Bioinformatics 34:3399–3401." << endl + << "https://doi.org/10.1093/bioinformatics/bty384" << endl << endl; + + if (params.dating_method == "LSD") + out << "Since you used least square dating (LSD) please also cite: " << endl << endl + << "Thu-Hien To, Matthieu Jung, Samantha Lycett, Olivier Gascuel (2016)" << endl + << "Fast dating using least-squares criteria and algorithms. Syst. Biol. 65:82-97." << endl + << "https://doi.org/10.1093/sysbio/syv068" << endl << endl; } void reportAlignment(ofstream &out, Alignment &alignment, int nremoved_seqs) { - out << "Input data: " << alignment.getNSeq()+nremoved_seqs << " sequences with " - << alignment.getNSite() << " "; - switch (alignment.seq_type) { - case SEQ_BINARY: out << "binary"; break; - case SEQ_DNA: out << "nucleotide"; break; - case SEQ_PROTEIN: out << "amino-acid"; break; - case SEQ_CODON: out << "codon"; break; - case SEQ_MORPH: out << "morphological"; break; - case SEQ_POMO: out << "PoMo"; break; - default: out << "unknown"; break; - } - out << " sites" << endl << "Number of constant sites: " - << round(alignment.frac_const_sites * alignment.getNSite()) - << " (= " << alignment.frac_const_sites * 100 << "% of all sites)" << endl + out << "Input data: " << alignment.getNSeq()+nremoved_seqs << " sequences with " + << alignment.getNSite() << " "; + switch (alignment.seq_type) { + case SEQ_BINARY: out << "binary"; break; + case SEQ_DNA: out << "nucleotide"; break; + case SEQ_PROTEIN: out << "amino-acid"; break; + case SEQ_CODON: out << "codon"; break; + case SEQ_MORPH: out << "morphological"; break; + case SEQ_POMO: out << "PoMo"; break; + default: out << "unknown"; break; + } + out << " sites" << endl << "Number of constant sites: " + << round(alignment.frac_const_sites * alignment.getNSite()) + << " (= " << alignment.frac_const_sites * 100 << "% of all sites)" << endl << "Number of invariant (constant or ambiguous constant) sites: " << round(alignment.frac_invariant_sites * alignment.getNSite()) @@ -133,183 +159,213 @@ void reportAlignment(ofstream &out, Alignment &alignment, int nremoved_seqs) { << "Number of parsimony informative sites: " << alignment.num_informative_sites << endl << "Number of distinct site patterns: " << alignment.size() << endl - << endl; + << endl; } /* void pruneModelInfo(ModelCheckpoint &model_info, PhyloSuperTree *tree) { - ModelCheckpoint res_info; - for (vector::iterator it = tree->part_info.begin(); it != tree->part_info.end(); it++) { - for (ModelCheckpoint::iterator mit = model_info.begin(); mit != model_info.end(); mit++) - if (mit->set_name == it->name) - res_info.push_back(*mit); - } - model_info = res_info; + ModelCheckpoint res_info; + for (vector::iterator it = tree->part_info.begin(); it != tree->part_info.end(); it++) { + for (ModelCheckpoint::iterator mit = model_info.begin(); mit != model_info.end(); mit++) + if (mit->set_name == it->name) + res_info.push_back(*mit); + } + model_info = res_info; } */ void reportModelSelection(ofstream &out, Params ¶ms, ModelCheckpoint *model_info, PhyloTree *tree) { - out << "Best-fit model according to " << criterionName(params.model_test_criterion) << ": "; -// ModelCheckpoint::iterator it; + out << "Best-fit model according to " << criterionName(params.model_test_criterion) << ": "; +// ModelCheckpoint::iterator it; string best_model; PhyloSuperTree *stree = (tree->isSuperTree()) ? ((PhyloSuperTree*)tree) : NULL; - if (tree->isSuperTree()) { + if (tree->isSuperTree()) { + SuperAlignment *saln = (SuperAlignment*)stree->aln; for (int part = 0; part != stree->size(); part++) { if (part != 0) out << ","; - out << stree->part_info[part].model_name << ":" << stree->part_info[part].name; - } -// string set_name = ""; -// for (it = model_info.begin(); it != model_info.end(); it++) { -// if (it->set_name != set_name) { -// if (set_name != "") -// out << ","; -// out << it->name << ":" << it->set_name; -// set_name = it->set_name; -// } -// } - } else { -// out << model_info[0].name; + out << saln->partitions[part]->model_name << ":" << saln->partitions[part]->name; + } +// string set_name = ""; +// for (it = model_info.begin(); it != model_info.end(); it++) { +// if (it->set_name != set_name) { +// if (set_name != "") +// out << ","; +// out << it->name << ":" << it->set_name; +// set_name = it->set_name; +// } +// } + } else { +// out << model_info[0].name; model_info->getBestModel(best_model); out << best_model; - } - - if (tree->isSuperTree()) { - out << endl << endl << "List of best-fit models per partition:" << endl << endl; - } else { - out << endl << endl << "List of models sorted by " - << ((params.model_test_criterion == MTC_BIC) ? "BIC" : - ((params.model_test_criterion == MTC_AIC) ? "AIC" : "AICc")) - << " scores: " << endl << endl; - } - if (tree->isSuperTree()) - out << " ID "; - out << "Model LogL AIC w-AIC AICc w-AICc BIC w-BIC" << endl; - /* - if (is_partitioned) - out << "----------"; - - out << "----------------------------------------------------------------------------------------" << endl; - */ - int setid = 1; - - vector models; + } + + if (tree->isSuperTree()) { + out << endl << endl << "List of best-fit models per partition:" << endl << endl; + } else { + out << endl << endl << "List of models sorted by " + << ((params.model_test_criterion == MTC_BIC) ? "BIC" : + ((params.model_test_criterion == MTC_AIC) ? "AIC" : "AICc")) + << " scores: " << endl << endl; + } + if (tree->isSuperTree()) + out << " ID "; + out << "Model LogL AIC w-AIC AICc w-AICc BIC w-BIC" << endl; + /* + if (is_partitioned) + out << "----------"; + + out << "----------------------------------------------------------------------------------------" << endl; + */ + int setid = 1; + out.precision(3); + + CandidateModelSet models; model_info->getOrderedModels(tree, models); for (auto it = models.begin(); it != models.end(); it++) { - if (tree->isSuperTree()) { - out.width(4); - out << right << setid << " "; + if (tree->isSuperTree()) { + out.width(4); + out << right << setid << " "; setid++; - } - out.width(15); - out << left << it->name << " "; - out.width(11); - out << right << it->logl << " "; - out.width(11); - out << it->AIC_score << ((it->AIC_conf) ? " + " : " - ") << it->AIC_weight << " "; - out.width(11); - out << it->AICc_score << ((it->AICc_conf) ? " + " : " - ") << it->AICc_weight << " "; - out.width(11); - out << it->BIC_score << ((it->BIC_conf) ? " + " : " - ") << it->BIC_weight; - out << endl; + } + out.width(15); + out << left << it->getName() << " "; + out.width(11); + out << right << it->logl << " "; + out.width(11); + out << it->AIC_score << ((it->AIC_conf) ? " + " : " - "); + out.unsetf(ios::fixed); + out.width(8); + out << it->AIC_weight << " "; + out.setf(ios::fixed); + out.width(11); + out << it->AICc_score << ((it->AICc_conf) ? " + " : " - "); + out.unsetf(ios::fixed); + out.width(8); + out << it->AICc_weight << " "; + out.setf(ios::fixed); + out.width(11); + out << it->BIC_score << ((it->BIC_conf) ? " + " : " - "); + out.unsetf(ios::fixed); + out.width(8); + out << it->BIC_weight; + out.setf(ios::fixed); + out << endl; } + out.precision(4); /* TODO - for (it = model_info.begin(); it != model_info.end(); it++) { - if (it->AIC_score == DBL_MAX) continue; - if (it != model_info.begin() && it->set_name != (it-1)->set_name) - setid++; - if (is_partitioned && it != model_info.begin() && it->set_name == (it-1)->set_name) - continue; - if (is_partitioned) { - out.width(4); - out << right << setid << " "; - } - out.width(15); - out << left << it->name << " "; - out.width(11); - out << right << it->logl << " "; - out.width(11); - out << it->AIC_score << ((it->AIC_conf) ? " + " : " - ") << it->AIC_weight << " "; - out.width(11); - out << it->AICc_score << ((it->AICc_conf) ? " + " : " - ") << it->AICc_weight << " "; - out.width(11); - out << it->BIC_score << ((it->BIC_conf) ? " + " : " - ") << it->BIC_weight; - out << endl; - } + for (it = model_info.begin(); it != model_info.end(); it++) { + if (it->AIC_score == DBL_MAX) continue; + if (it != model_info.begin() && it->set_name != (it-1)->set_name) + setid++; + if (is_partitioned && it != model_info.begin() && it->set_name == (it-1)->set_name) + continue; + if (is_partitioned) { + out.width(4); + out << right << setid << " "; + } + out.width(15); + out << left << it->name << " "; + out.width(11); + out << right << it->logl << " "; + out.width(11); + out << it->AIC_score << ((it->AIC_conf) ? " + " : " - ") << it->AIC_weight << " "; + out.width(11); + out << it->AICc_score << ((it->AICc_conf) ? " + " : " - ") << it->AICc_weight << " "; + out.width(11); + out << it->BIC_score << ((it->BIC_conf) ? " + " : " - ") << it->BIC_weight; + out << endl; + } */ - out << endl; - out << "AIC, w-AIC : Akaike information criterion scores and weights." << endl - << "AICc, w-AICc : Corrected AIC scores and weights." << endl - << "BIC, w-BIC : Bayesian information criterion scores and weights." << endl << endl - - << "Plus signs denote the 95% confidence sets." << endl - << "Minus signs denote significant exclusion." <num_states == m->num_states); + int i, j, k; + ASSERT(aln->num_states == m->num_states); double *rate_mat = new double[m->num_states * m->num_states]; if (!m->isSiteSpecificModel()) m->getRateMatrix(rate_mat); else ((ModelSet*)m)->front()->getRateMatrix(rate_mat); - if (m->num_states <= 4) { - out << "Rate parameter R:" << endl << endl; - - if (m->num_states > 4) - out << fixed; - if (m->isReversible()) { - for (i = 0, k = 0; i < m->num_states - 1; i++) - for (j = i + 1; j < m->num_states; j++, k++) { - out << " " << aln->convertStateBackStr(i) << "-" << aln->convertStateBackStr(j) << ": " - << rate_mat[k]; - if (m->num_states <= 4) - out << endl; - else if (k % 5 == 4) - out << endl; - } + if (m->num_states <= 4) { + out << "Rate parameter R:" << endl << endl; + + if (m->num_states > 4) + out << fixed; + if (m->isReversible()) { + for (i = 0, k = 0; i < m->num_states - 1; i++) + for (j = i + 1; j < m->num_states; j++, k++) { + out << " " << aln->convertStateBackStr(i) << "-" << aln->convertStateBackStr(j) << ": " + << rate_mat[k]; + if (m->num_states <= 4) + out << endl; + else if (k % 5 == 4) + out << endl; + } + + } else { // non-reversible model + for (i = 0, k = 0; i < m->num_states; i++) + for (j = 0; j < m->num_states; j++) + if (i != j) { + out << " " << aln->convertStateBackStr(i) << "-" << aln->convertStateBackStr(j) + << ": " << rate_mat[k]; + if (m->num_states <= 4) + out << endl; + else if (k % 5 == 4) + out << endl; + k++; + } - } else { // non-reversible model - for (i = 0, k = 0; i < m->num_states; i++) - for (j = 0; j < m->num_states; j++) - if (i != j) { - out << " " << aln->convertStateBackStr(i) << "-" << aln->convertStateBackStr(j) - << ": " << rate_mat[k]; - if (m->num_states <= 4) - out << endl; - else if (k % 5 == 4) - out << endl; - k++; - } - - } - - //if (tree.aln->num_states > 4) - out << endl; - out.unsetf(ios_base::fixed); - } else if (aln->seq_type == SEQ_PROTEIN && m->getNDim() > 20) { + } + + //if (tree.aln->num_states > 4) + out << endl; + out.unsetf(ios_base::fixed); + } else if (aln->seq_type == SEQ_PROTEIN && m->getNDim() > 20) { ASSERT(m->num_states == 20); out << "WARNING: This model has " << m->getNDim() + m->getNDimFreq() << " parameters that may be overfitting. Please use with caution!" << endl << endl; double full_mat[400]; - for (i = 0, k = 0; i < m->num_states - 1; i++) - for (j = i + 1; j < m->num_states; j++, k++) { - full_mat[i*m->num_states+j] = rate_mat[k]; + + out.precision(6); + + if (m->isReversible()) { + for (i = 0, k = 0; i < m->num_states - 1; i++) + for (j = i + 1; j < m->num_states; j++, k++) { + full_mat[i*m->num_states+j] = rate_mat[k]; + } + out << "Substitution parameters (lower-diagonal) and state frequencies in PAML format (can be used as input for IQ-TREE): " << endl << endl; + for (i = 1; i < m->num_states; i++) { + for (j = 0; j < i; j++) + out << " " << full_mat[j*m->num_states+i]; + out << endl; + } + } else { + // non-reversible model + m->getQMatrix(full_mat); + out << "Full Q matrix and state frequencies (can be used as input for IQ-TREE): " << endl << endl; + for (i = 0; i < m->num_states; i++) { + for (j = 0; j < m->num_states; j++) + out << " " << full_mat[i*m->num_states+j]; + out << endl; } - out << "Substitution parameters (lower-diagonal) and state frequencies in PAML format (can be used as input for IQ-TREE): " << endl << endl; - for (i = 1; i < m->num_states; i++) { - for (j = 0; j < i; j++) - out << "\t" << full_mat[j*m->num_states+i]; - out << endl; } double state_freq[20]; m->getStateFrequency(state_freq); for (i = 0; i < m->num_states; i++) - out << "\t" << state_freq[i]; + out << " " << state_freq[i]; out << endl << endl; + out.precision(4); } delete[] rate_mat; @@ -319,173 +375,185 @@ void reportModel(ofstream &out, Alignment *aln, ModelSubst *m) { return; } - out << "State frequencies: "; - if (m->isSiteSpecificModel()) - out << "(site specific frequencies)" << endl << endl; - else { + out << "State frequencies: "; + if (m->isSiteSpecificModel()) + out << "(site specific frequencies)" << endl << endl; + else { // 2016-11-03: commented out as this is not correct anymore -// if (!m->isReversible()) -// out << "(inferred from Q matrix)" << endl; -// else - switch (m->getFreqType()) { - case FREQ_EMPIRICAL: - out << "(empirical counts from alignment)" << endl; - break; - case FREQ_ESTIMATE: - out << "(estimated with maximum likelihood)" << endl; - break; - case FREQ_USER_DEFINED: - out << ((aln->seq_type == SEQ_PROTEIN) ? "(model)" : "(user-defined)") << endl; - break; - case FREQ_EQUAL: - out << "(equal frequencies)" << endl; - break; - default: - break; - } - out << endl; +// if (!m->isReversible()) +// out << "(inferred from Q matrix)" << endl; +// else + switch (m->getFreqType()) { + case FREQ_EMPIRICAL: + out << "(empirical counts from alignment)" << endl; + break; + case FREQ_ESTIMATE: + out << "(estimated with maximum likelihood)" << endl; + break; + case FREQ_USER_DEFINED: + out << ((aln->seq_type == SEQ_PROTEIN) ? "(model)" : "(user-defined)") << endl; + break; + case FREQ_EQUAL: + out << "(equal frequencies)" << endl; + break; + default: + break; + } + out << endl; - if ((m->getFreqType() != FREQ_USER_DEFINED || aln->seq_type == SEQ_DNA) && m->getFreqType() != FREQ_EQUAL) { - double *state_freqs = new double[m->num_states]; - m->getStateFrequency(state_freqs); + if ((m->getFreqType() != FREQ_USER_DEFINED || aln->seq_type == SEQ_DNA) && m->getFreqType() != FREQ_EQUAL) { + double *state_freqs = new double[m->num_states]; + m->getStateFrequency(state_freqs); int ncols=(aln->seq_type == SEQ_CODON) ? 4 : 1; - for (i = 0; i < m->num_states; i++) { - out << " pi(" << aln->convertStateBackStr(i) << ") = " << state_freqs[i]; + for (i = 0; i < m->num_states; i++) { + out << " pi(" << aln->convertStateBackStr(i) << ") = " << state_freqs[i]; if (i % ncols == ncols-1) out << endl; } - delete[] state_freqs; - out << endl; - } - if (m->num_states <= 4) { - // report Q matrix - double *q_mat = new double[m->num_states * m->num_states]; - m->getQMatrix(q_mat); - - out << "Rate matrix Q:" << endl << endl; - for (i = 0, k = 0; i < m->num_states; i++) { - out << " " << aln->convertStateBackStr(i); - for (j = 0; j < m->num_states; j++, k++) { - out << " "; - out.width(8); - out << q_mat[k]; - } - out << endl; - } - out << endl; - delete[] q_mat; - } - } + delete[] state_freqs; + out << endl; + } + if (m->num_states <= 4 || verbose_mode >= VB_MED) { + // report Q matrix + if (verbose_mode >= VB_MED) + out.precision(6); + double *q_mat = new double[m->num_states * m->num_states]; + m->getQMatrix(q_mat); + + out << "Rate matrix Q:" << endl << endl; + for (i = 0, k = 0; i < m->num_states; i++) { + out << " " << aln->convertStateBackStr(i); + for (j = 0; j < m->num_states; j++, k++) { + out << " "; + out.width(8); + out << q_mat[k]; + } + out << endl; + } + out << endl; + delete[] q_mat; + } + } } void reportModel(ofstream &out, PhyloTree &tree) { -// int i, j, k; - int i; - - if (tree.getModel()->isMixture() && !tree.getModel()->isPolymorphismAware()) { - out << "Mixture model of substitution: " << tree.getModelName() << endl; -// out << "Full name: " << tree.getModelName() << endl; - ModelSubst *mmodel = tree.getModel(); - out << endl << " No Component Rate Weight Parameters" << endl; - i = 0; +// int i, j, k; + int i; + + if (tree.getModel()->isMixture() && !tree.getModel()->isPolymorphismAware()) { + out << "Mixture model of substitution: " << tree.getModelName() << endl; +// out << "Full name: " << tree.getModelName() << endl; + ModelSubst *mmodel = tree.getModel(); + out << endl << " No Component Rate Weight Parameters" << endl; + i = 0; int nmix = mmodel->getNMixtures(); - for (i = 0; i < nmix; i++) { + for (i = 0; i < nmix; i++) { ModelMarkov *m = (ModelMarkov*)mmodel->getMixtureClass(i); - out.width(4); - out << right << i+1 << " "; - out.width(12); - out << left << (m)->name << " "; - out.width(7); - out << (m)->total_num_subst << " "; - out.width(7); - out << mmodel->getMixtureWeight(i) << " " << (m)->getNameParams() << endl; + out.width(4); + out << right << i+1 << " "; + out.width(12); + out << left << (m)->name << " "; + out.width(7); + out << (m)->total_num_subst << " "; + out.width(7); + out << mmodel->getMixtureWeight(i) << " " << (m)->getNameParams() << endl; if (tree.aln->seq_type == SEQ_POMO) { out << endl << "Model for mixture component " << i+1 << ": " << (m)->name << endl; reportModel(out, tree.aln, m); } - } + } if (tree.aln->seq_type != SEQ_POMO && tree.aln->seq_type != SEQ_DNA) - for (i = 0; i < nmix; i++) { + for (i = 0; i < nmix; i++) { ModelMarkov *m = (ModelMarkov*)mmodel->getMixtureClass(i); if (m->getFreqType() == FREQ_EQUAL || m->getFreqType() == FREQ_USER_DEFINED) continue; out << endl << "Model for mixture component " << i+1 << ": " << (m)->name << endl; reportModel(out, tree.aln, m); } - out << endl; - } else { - out << "Model of substitution: " << tree.getModelName() << endl << endl; - reportModel(out, tree.aln, tree.getModel()); - } + out << endl; + } else { + out << "Model of substitution: " << tree.getModelName() << endl << endl; + reportModel(out, tree.aln, tree.getModel()); + } } void reportRate(ostream &out, PhyloTree &tree) { - int i; - RateHeterogeneity *rate_model = tree.getRate(); - out << "Model of rate heterogeneity: " << rate_model->full_name << endl; - rate_model->writeInfo(out); - - if (rate_model->getNDiscreteRate() > 1 || rate_model->getPInvar() > 0.0) { - out << endl << " Category Relative_rate Proportion" << endl; - if (rate_model->getPInvar() > 0.0) - out << " 0 0 " << rate_model->getPInvar() - << endl; - int cats = rate_model->getNDiscreteRate(); - DoubleVector prop; - if (rate_model->getGammaShape() > 0 || rate_model->getPtnCat(0) < 0) { -// prop.resize(cats, (1.0 - rate_model->getPInvar()) / rate_model->getNRate()); - prop.resize(cats); - for (i = 0; i < cats; i++) - prop[i] = rate_model->getProp(i); - } else { - prop.resize(cats, 0.0); - for (i = 0; i < tree.aln->getNPattern(); i++) - prop[rate_model->getPtnCat(i)] += tree.aln->at(i).frequency; - for (i = 0; i < cats; i++) - prop[i] /= tree.aln->getNSite(); - } - for (i = 0; i < cats; i++) { - out << " " << i + 1 << " "; - out.width(14); - out << left << rate_model->getRate(i) << " " << prop[i]; - out << endl; - } - if (rate_model->isGammaRate()) { - out << "Relative rates are computed as " << ((rate_model->isGammaRate() == GAMMA_CUT_MEDIAN) ? "MEDIAN" : "MEAN") << - " of the portion of the Gamma distribution falling in the category." << endl; - } - } - /* - if (rate_model->getNDiscreteRate() > 1 || rate_model->isSiteSpecificRate()) - out << endl << "See file " << rate_file << " for site-specific rates and categories" << endl;*/ - out << endl; + RateHeterogeneity *rate_model = tree.getRate(); + out << "Model of rate heterogeneity: " << rate_model->full_name << endl; + rate_model->writeInfo(out); + + if (rate_model->getNDiscreteRate() > 1 || rate_model->getPInvar() > 0.0) { + out << endl << " Category Relative_rate Proportion" << endl; + if (rate_model->getPInvar() > 0.0) + out << " 0 0 " << rate_model->getPInvar() + << endl; + int cats = rate_model->getNDiscreteRate(); + DoubleVector prop; + if (rate_model->getGammaShape() > 0 || rate_model->getPtnCat(0) < 0) { + // prop.resize(cats, (1.0 - rate_model->getPInvar()) / rate_model->getNRate()); + prop.resize(cats); + for (size_t i = 0; i < cats; i++) { + prop[i] = rate_model->getProp(i); + } + } else { + prop.resize(cats, 0.0); + auto frequencies = tree.getConvertedSequenceFrequencies(); + size_t num_patterns = tree.aln->getNPattern(); + if (frequencies!=nullptr) { + for (size_t i = 0; i < num_patterns; i++) { + prop[rate_model->getPtnCat(i)] += frequencies[i]; + } + } else { + for (size_t i = 0; i < num_patterns; i++) { + prop[rate_model->getPtnCat(i)] += tree.aln->at(i).frequency; + } + } + for (size_t i = 0; i < cats; i++) { + prop[i] /= tree.aln->getNSite(); + } + } + for (size_t i = 0; i < cats; i++) { + out << " " << i + 1 << " "; + out.width(14); + out << left << rate_model->getRate(i) << " " << prop[i]; + out << endl; + } + if (rate_model->isGammaRate()) { + out << "Relative rates are computed as " << ((rate_model->isGammaRate() == GAMMA_CUT_MEDIAN) ? "MEDIAN" : "MEAN") << + " of the portion of the Gamma distribution falling in the category." << endl; + } + } + /* + if (rate_model->getNDiscreteRate() > 1 || rate_model->isSiteSpecificRate()) + out << endl << "See file " << rate_file << " for site-specific rates and categories" << endl;*/ + out << endl; } void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, double lh_variance, double main_tree) { - double epsilon = 1.0 / tree.getAlnNSite(); - double totalLen = tree.treeLength(); - int df = tree.getModelFactory()->getNParameters(BRLEN_OPTIMIZE); - int ssize = tree.getAlnNSite(); - double AIC_score, AICc_score, BIC_score; - computeInformationScores(tree_lh, df, ssize, AIC_score, AICc_score, BIC_score); - - out << "Log-likelihood of the tree: " << fixed << tree_lh; + size_t ssize = tree.getAlnNSite(); + double epsilon = 1.0 / ssize; + double totalLen = tree.treeLength(); + int df = tree.getModelFactory()->getNParameters(BRLEN_OPTIMIZE); + double AIC_score, AICc_score, BIC_score; + computeInformationScores(tree_lh, df, ssize, AIC_score, AICc_score, BIC_score); + + out << "Log-likelihood of the tree: " << fixed << tree_lh; if (lh_variance > 0.0) out << " (s.e. " << sqrt(lh_variance) << ")"; out << endl; - out << "Unconstrained log-likelihood (without tree): " << tree.aln->computeUnconstrainedLogL() << endl; + out << "Unconstrained log-likelihood (without tree): " << tree.aln->computeUnconstrainedLogL() << endl; out << "Number of free parameters (#branches + #model parameters): " << df << endl; // if (ssize > df) { // if (ssize > 40*df) -// out << "Akaike information criterion (AIC) score: " << AIC_score << endl; +// out << "Akaike information criterion (AIC) score: " << AIC_score << endl; // else -// out << "Corrected Akaike information criterion (AICc) score: " << AICc_score << endl; +// out << "Corrected Akaike information criterion (AICc) score: " << AICc_score << endl; // -// out << "Bayesian information criterion (BIC) score: " << BIC_score << endl; +// out << "Bayesian information criterion (BIC) score: " << BIC_score << endl; // } else - out << "Akaike information criterion (AIC) score: " << AIC_score << endl; + out << "Akaike information criterion (AIC) score: " << AIC_score << endl; out << "Corrected Akaike information criterion (AICc) score: " << AICc_score << endl; out << "Bayesian information criterion (BIC) score: " << BIC_score << endl; @@ -528,24 +596,27 @@ void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, out << "NOTE: The branch lengths of PoMo measure mutations and frequency shifts." << endl; out << "To compare PoMo branch lengths to DNA substitution models use the tree length" << endl; out << "measured in substitutions per site." << endl << endl; + out << "PoMo branch length = Substitution model branch length * N * N." << endl << endl; out << "Total tree length (sum of branch lengths)" << endl; out << " - measured in number of mutations and frequency shifts per site: " << totalLen << endl; out << " - measured in number of substitutions per site (divided by N^2): " << totalLen / (N * N) << endl; } else out << "Total tree length (sum of branch lengths): " << totalLen << endl; - double totalLenInternal = tree.treeLengthInternal(epsilon); + double totalLenInternal = tree.treeLengthInternal(epsilon); double totalLenInternalP = totalLenInternal*100.0 / totalLen; if (tree.aln->seq_type == SEQ_POMO) { + int N = tree.aln->virtual_pop_size; + double totLenIntSub = totalLenInternal/(N * N); out << "Sum of internal branch lengths" << endl; out << "- measured in mutations and frequency shifts per site: " << totalLenInternal << " (" << totalLenInternalP << "% of tree length)" << endl; - out << "- measured in substitutions per site: " << totalLenInternal << " (" << totalLenInternalP << "% of tree length)" << endl; + out << "- measured in substitutions per site: " << totLenIntSub << " (" << totalLenInternalP << "% of tree length)" << endl; out << endl; } else { out << "Sum of internal branch lengths: " << totalLenInternal << " (" << totalLenInternalP << "% of tree length)" << endl; - // out << "Sum of internal branch lengths divided by total tree length: " - // << totalLenInternal / totalLen << endl; + // out << "Sum of internal branch lengths divided by total tree length: " + // << totalLenInternal / totalLen << endl; out << endl; } @@ -558,44 +629,50 @@ void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, out << endl; } - //out << "ZERO BRANCH EPSILON = " << epsilon << endl; - int zero_internal_branches = tree.countZeroInternalBranches(NULL, NULL, epsilon); - if (zero_internal_branches > 0) { - //int zero_internal_branches = tree.countZeroInternalBranches(NULL, NULL, epsilon); - /* - out << "WARNING: " << zero_branches - << " branches of near-zero lengths (<" << epsilon << ") and should be treated with caution!" - << endl; - */ - out << "WARNING: " << zero_internal_branches - << " near-zero internal branches (<" << epsilon << ") should be treated with caution" - << endl; - /* - cout << endl << "WARNING: " << zero_branches - << " branches of near-zero lengths (<" << epsilon << ") and should be treated with caution!" - << endl; - */ - out << " Such branches are denoted by '**' in the figure below" - << endl << endl; - } - int long_branches = tree.countLongBranches(NULL, NULL, params.max_branch_length-0.2); - if (long_branches > 0) { - //stringstream sstr; - out << "WARNING: " << long_branches << " too long branches (>" + if (params.partition_type == TOPO_UNLINKED) { + out << "Tree topologies are unlinked across partitions, thus no drawing will be displayed here" << endl; + out << endl; + return; + } + + //out << "ZERO BRANCH EPSILON = " << epsilon << endl; + int zero_internal_branches = tree.countZeroInternalBranches(NULL, NULL, epsilon); + if (zero_internal_branches > 0) { + //int zero_internal_branches = tree.countZeroInternalBranches(NULL, NULL, epsilon); + /* + out << "WARNING: " << zero_branches + << " branches of near-zero lengths (<" << epsilon << ") and should be treated with caution!" + << endl; + */ + out << "WARNING: " << zero_internal_branches + << " near-zero internal branches (<" << epsilon << ") should be treated with caution" + << endl; + /* + cout << endl << "WARNING: " << zero_branches + << " branches of near-zero lengths (<" << epsilon << ") and should be treated with caution!" + << endl; + */ + out << " Such branches are denoted by '**' in the figure below" + << endl << endl; + } + int long_branches = tree.countLongBranches(NULL, NULL, params.max_branch_length-0.2); + if (long_branches > 0) { + //stringstream sstr; + out << "WARNING: " << long_branches << " too long branches (>" << params.max_branch_length-0.2 << ") should be treated with caution!" << endl; - //out << sstr.str(); - //cout << sstr.str(); - } + //out << sstr.str(); + //cout << sstr.str(); + } - //<< "Total tree length: " << tree.treeLength() << endl << endl - tree.sortTaxa(); + //<< "Total tree length: " << tree.treeLength() << endl << endl + tree.sortTaxa(); if (tree.rooted) out << "NOTE: Tree is ROOTED at virtual root '" << tree.root->name << "'" << endl; else out << "NOTE: Tree is UNROOTED although outgroup taxon '" << tree.root->name << "' is drawn at root" << endl; if (tree.isSuperTree() && params.partition_type == BRLEN_OPTIMIZE) - out << "NOTE: Branch lengths are weighted average over all partitions" << endl + out << "NOTE: Branch lengths are weighted average over all partitions" << endl << " (weighted by the number of sites in the partitions)" << endl; if (tree.isMixlen()) out << "NOTE: Branch lengths are weighted average over heterotachy classes" << endl; @@ -611,8 +688,8 @@ void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, } } if (is_codon) - out << endl << "NOTE: Branch lengths are interpreted as number of nucleotide substitutions per codon site!" - << endl << " Rescale them by 1/3 if you want to have #nt substitutions per nt site" << endl; + out << endl << "NOTE: Branch lengths are interpreted as number of nucleotide substitutions per codon site!" + << endl << " Rescale them by 1/3 if you want to have #nt substitutions per nt site" << endl; if (main_tree) if (params.aLRT_replicates > 0 || params.gbo_replicates || (params.num_bootstrap_samples && params.compute_ml_tree)) { out << "Numbers in parentheses are "; @@ -628,60 +705,75 @@ void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, if (params.num_bootstrap_samples && params.compute_ml_tree) { if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test) out << " /"; - out << " standard bootstrap support (%)"; + out << " standard " << RESAMPLE_NAME << " support (%)"; } if (params.gbo_replicates) { if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test) out << " /"; - out << " ultrafast bootstrap support (%)"; + out << " ultrafast " << RESAMPLE_NAME << " support (%)"; } out << endl; } out << endl; - //tree.setExtendedFigChar(); - tree.drawTree(out, WT_BR_SCALE, epsilon); - + //tree.setExtendedFigChar(); + tree.setRootNode(params.root, true); + tree.drawTree(out, WT_BR_SCALE, epsilon); + out << "Tree in newick format:"; if (tree.isMixlen()) out << " (class branch lengths are given in [...] and separated by '/' )"; if (tree.aln->seq_type == SEQ_POMO) - out << " (measured in mutations and frequency shifts):"; + out << " (measured in mutations and frequency shifts)"; + out << endl << endl; + + tree.printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA); + out << endl << endl; + + if (tree.aln->seq_type == SEQ_POMO) { + out << "Tree in newick format (measured in substitutions, see above):" << endl; + out << "WARNING: Only for comparison with substitution models." << endl; + out << " These are NOT the branch lengths inferred by PoMo." << endl << endl; + double len_scale_old = tree.len_scale; + int N = tree.aln->virtual_pop_size; + tree.len_scale = 1.0/(N*N); + tree.printTree(out, WT_BR_SCALE | WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA); + tree.len_scale = len_scale_old; out << endl << endl; + } - tree.printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA); + tree.setRootNode(params.root, false); - out << endl << endl; } void reportCredits(ofstream &out) { - out << "CREDITS" << endl << "-------" << endl << endl - << "Some parts of the code were taken from the following packages/libraries:" - << endl << endl - << "Schmidt HA, Strimmer K, Vingron M, and von Haeseler A (2002)" << endl - << "TREE-PUZZLE: maximum likelihood phylogenetic analysis using quartets" << endl - << "and parallel computing. Bioinformatics, 18(3):502-504." << endl << endl - - //<< "The source code to construct the BIONJ tree were taken from BIONJ software:" - //<< endl << endl - << "Gascuel O (1997) BIONJ: an improved version of the NJ algorithm" << endl - << "based on a simple model of sequence data. Mol. Bio. Evol., 14:685-695." << endl << endl - - //<< "The Nexus file parser was taken from the Nexus Class Library:" - //<< endl << endl - << "Paul O. Lewis (2003) NCL: a C++ class library for interpreting data files in" << endl - << "NEXUS format. Bioinformatics, 19(17):2330-2331." << endl << endl - - << "Mascagni M and Srinivasan A (2000) Algorithm 806: SPRNG: A Scalable Library" << endl - << "for Pseudorandom Number Generation. ACM Transactions on Mathematical Software," << endl - << "26: 436-461." << endl << endl - - << "Guennebaud G, Jacob B, et al. (2010) Eigen v3. http://eigen.tuxfamily.org" << endl << endl; - /* - << "The Modeltest 3.7 source codes were taken from:" << endl << endl - << "David Posada and Keith A. Crandall (1998) MODELTEST: testing the model of" - << endl << "DNA substitution. Bioinformatics, 14(9):817-8." << endl - */ + out << "CREDITS" << endl << "-------" << endl << endl + << "Some parts of the code were taken from the following packages/libraries:" + << endl << endl + << "Schmidt HA, Strimmer K, Vingron M, and von Haeseler A (2002)" << endl + << "TREE-PUZZLE: maximum likelihood phylogenetic analysis using quartets" << endl + << "and parallel computing. Bioinformatics, 18(3):502-504." << endl << endl + + //<< "The source code to construct the BIONJ tree were taken from BIONJ software:" + //<< endl << endl + << "Gascuel O (1997) BIONJ: an improved version of the NJ algorithm" << endl + << "based on a simple model of sequence data. Mol. Bio. Evol., 14:685-695." << endl << endl + + //<< "The Nexus file parser was taken from the Nexus Class Library:" + //<< endl << endl + << "Paul O. Lewis (2003) NCL: a C++ class library for interpreting data files in" << endl + << "NEXUS format. Bioinformatics, 19(17):2330-2331." << endl << endl + + << "Mascagni M and Srinivasan A (2000) Algorithm 806: SPRNG: A Scalable Library" << endl + << "for Pseudorandom Number Generation. ACM Transactions on Mathematical Software," << endl + << "26: 436-461." << endl << endl + + << "Guennebaud G, Jacob B, et al. (2010) Eigen v3. http://eigen.tuxfamily.org" << endl << endl; + /* + << "The Modeltest 3.7 source codes were taken from:" << endl << endl + << "David Posada and Keith A. Crandall (1998) MODELTEST: testing the model of" + << endl << "DNA substitution. Bioinformatics, 14(9):817-8." << endl + */ } /*********************************************************** @@ -695,77 +787,83 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree); void computeLoglFromUserInputGAMMAInvar(Params ¶ms, IQTree &iqtree); -void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) { +void printOutfilesInfo(Params ¶ms, IQTree &tree) { - cout << endl << "Analysis results written to: " << endl; + cout << endl << "Analysis results written to: " << endl; if (!(params.suppress_output_flags & OUT_IQTREE)) - cout<< " IQ-TREE report: " << params.out_prefix << ".iqtree" - << endl; - if (params.compute_ml_tree) { + cout<< " IQ-TREE report: " << params.out_prefix << ".iqtree" + << endl; + if (params.compute_ml_tree) { if (!(params.suppress_output_flags & OUT_TREEFILE)) { - if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2)=="MF" && original_model.substr(0,3)!="MFP")) + if (params.model_name.find("ONLY") != string::npos || (params.model_name.substr(0,2)=="MF" && params.model_name.substr(0,3)!="MFP")) cout << " Tree used for ModelFinder: " << params.out_prefix << ".treefile" << endl; - else + else { cout << " Maximum-likelihood tree: " << params.out_prefix << ".treefile" << endl; + if (params.partition_type == BRLEN_OPTIMIZE && tree.isSuperTree()) + cout << " Partition trees: " << params.out_prefix << ".parttrees" << endl; + } } -// if (params.snni && params.write_local_optimal_trees) { -// cout << " Locally optimal trees (" << tree.candidateTrees.getNumLocalOptTrees() << "): " << params.out_prefix << ".suboptimal_trees" << endl; -// } - } - if (!params.user_file && params.start_tree == STT_BIONJ) { - cout << " BIONJ tree: " << params.out_prefix << ".bionj" - << endl; - } - if (!params.dist_file) { - //cout << " Juke-Cantor distances: " << params.out_prefix << ".jcdist" << endl; - if (params.compute_ml_dist) - cout << " Likelihood distances: " << params.out_prefix - << ".mldist" << endl; - if (params.print_conaln) - cout << " Concatenated alignment: " << params.out_prefix - << ".conaln" << endl; - } - if ((original_model.find("TEST") != string::npos || original_model.substr(0,2) == "MF") && tree.isSuperTree()) { - cout << " Best partitioning scheme: " << params.out_prefix << ".best_scheme.nex" << endl; - bool raxml_format_printed = true; - - for (vector::iterator it = ((PhyloSuperTree*)&tree)->part_info.begin(); - it != ((PhyloSuperTree*)&tree)->part_info.end(); it++) - if (!it->aln_file.empty()) { - raxml_format_printed = false; - break; - } - if (raxml_format_printed) - cout << " in RAxML format: " << params.out_prefix << ".best_scheme" << endl; - } - if ((tree.getRate()->getGammaShape() > 0 || params.partition_file) && params.print_site_rate) - cout << " Site-specific rates: " << params.out_prefix << ".rate" - << endl; +// if (params.snni && params.write_local_optimal_trees) { +// cout << " Locally optimal trees (" << tree.candidateTrees.getNumLocalOptTrees() << "): " << params.out_prefix << ".suboptimal_trees" << endl; +// } + } + if (params.num_runs > 1) + cout << " Trees from independent runs: " << params.out_prefix << ".runtrees" << endl; + + if (!params.user_file && params.start_tree == STT_BIONJ) { + cout << " BIONJ tree: " << params.out_prefix << ".bionj" + << endl; + } + if (!params.dist_file) { + //cout << " Juke-Cantor distances: " << params.out_prefix << ".jcdist" << endl; + if (params.compute_ml_dist) + cout << " Likelihood distances: " << params.out_prefix + << ".mldist" << endl; + if (params.print_conaln) + cout << " Concatenated alignment: " << params.out_prefix + << ".conaln" << endl; + } + if ((params.model_name.find("TEST") != string::npos || params.model_name.substr(0,2) == "MF") && tree.isSuperTree()) { + cout << " Best partitioning scheme: " << params.out_prefix << ".best_scheme.nex" << endl; + bool raxml_format_printed = true; + + for (auto it = ((SuperAlignment*)tree.aln)->partitions.begin(); + it != ((SuperAlignment*)tree.aln)->partitions.end(); it++) + if (!(*it)->aln_file.empty()) { + raxml_format_printed = false; + break; + } + if (raxml_format_printed) + cout << " in RAxML format: " << params.out_prefix << ".best_scheme" << endl; + } + if ((tree.getRate()->getGammaShape() > 0 || params.partition_file) && params.print_site_rate) + cout << " Site-specific rates: " << params.out_prefix << ".rate" + << endl; - if ((tree.getRate()->isSiteSpecificRate() || tree.getRate()->getPtnCat(0) >= 0) && params.print_site_rate) - cout << " Site-rates by MH model: " << params.out_prefix << ".rate" - << endl; + if ((tree.getRate()->isSiteSpecificRate() || tree.getRate()->getPtnCat(0) >= 0) && params.print_site_rate) + cout << " Site-rates by MH model: " << params.out_prefix << ".rate" + << endl; - if (params.print_site_lh) - cout << " Site log-likelihoods: " << params.out_prefix << ".sitelh" - << endl; + if (params.print_site_lh) + cout << " Site log-likelihoods: " << params.out_prefix << ".sitelh" + << endl; - if (params.print_partition_lh) - cout << " Partition log-likelihoods: " << params.out_prefix << ".partlh" - << endl; + if (params.print_partition_lh) + cout << " Partition log-likelihoods: " << params.out_prefix << ".partlh" + << endl; - if (params.print_site_prob) - cout << " Site probability per rate/mix: " << params.out_prefix << ".siteprob" - << endl; + if (params.print_site_prob) + cout << " Site probability per rate/mix: " << params.out_prefix << ".siteprob" + << endl; if (params.print_ancestral_sequence) { cout << " Ancestral state: " << params.out_prefix << ".state" << endl; // cout << " Ancestral sequences: " << params.out_prefix << ".aseq" << endl; } - if (params.write_intermediate_trees) - cout << " All intermediate trees: " << params.out_prefix << ".treels" - << endl; + if (params.write_intermediate_trees) + cout << " All intermediate trees: " << params.out_prefix << ".treels" + << endl; if (params.writeDistImdTrees) { tree.intermediateTrees.printTrees(string("ditrees")); @@ -773,80 +871,80 @@ void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) { cout << " Logl of intermediate trees: " << params.out_prefix << ".ditrees_lh" << endl; } - if (params.gbo_replicates) { - cout << endl << "Ultrafast bootstrap approximation results written to:" << endl - << " Split support values: " << params.out_prefix << ".splits.nex" << endl - << " Consensus tree: " << params.out_prefix << ".contree" << endl; - if (params.print_ufboot_trees) - cout << " UFBoot trees: " << params.out_prefix << ".ufboot" << endl; + if (params.gbo_replicates) { + cout << endl << "Ultrafast " << RESAMPLE_NAME << " approximation results written to:" << endl; + if (!tree.isSuperTreeUnlinked()) + cout << " Split support values: " << params.out_prefix << ".splits.nex" << endl + << " Consensus tree: " << params.out_prefix << ".contree" << endl; + if (params.print_ufboot_trees) + cout << " UFBoot trees: " << params.out_prefix << ".ufboot" << endl; - } + } - if (params.treeset_file) { - cout << " Evaluated user trees: " << params.out_prefix << ".trees" << endl; + if (!params.treeset_file.empty()) { + cout << " Evaluated user trees: " << params.out_prefix << ".trees" << endl; - if (params.print_tree_lh) { - cout << " Tree log-likelihoods: " << params.out_prefix << ".treelh" << endl; - } - } - if (params.lmap_num_quartets >= 0) { - cout << " Likelihood mapping plot (SVG): " << params.out_prefix << ".lmap.svg" << endl; - cout << " Likelihood mapping plot (EPS): " << params.out_prefix << ".lmap.eps" << endl; - } + if (params.print_tree_lh) { + cout << " Tree log-likelihoods: " << params.out_prefix << ".treelh" << endl; + } + } + if (params.lmap_num_quartets >= 0) { + cout << " Likelihood mapping plot (SVG): " << params.out_prefix << ".lmap.svg" << endl; + cout << " Likelihood mapping plot (EPS): " << params.out_prefix << ".lmap.eps" << endl; + } if (!(params.suppress_output_flags & OUT_LOG)) - cout << " Screen log file: " << params.out_prefix << ".log" << endl; - /* if (original_model == "WHTEST") - cout <<" WH-TEST report: " << params.out_prefix << ".whtest" << endl;*/ - cout << endl; + cout << " Screen log file: " << params.out_prefix << ".log" << endl; + /* if (params.model_name == "WHTEST") + cout <<" WH-TEST report: " << params.out_prefix << ".whtest" << endl;*/ -} + cout << endl; +} -void reportPhyloAnalysis(Params ¶ms, string &original_model, - IQTree &tree, ModelCheckpoint &model_info) { +void reportPhyloAnalysis(Params ¶ms, IQTree &tree, ModelCheckpoint &model_info) { if (!MPIHelper::getInstance().isMaster()) { return; } if (params.suppress_output_flags & OUT_IQTREE) { - printOutfilesInfo(params, original_model, tree); + printOutfilesInfo(params, tree); return; } - if (params.count_trees) { - // addon: print #distinct trees - cout << endl << "NOTE: " << pllTreeCounter.size() << " distinct trees evaluated during whole tree search" << endl; - - IntVector counts; - for (StringIntMap::iterator i = pllTreeCounter.begin(); i != pllTreeCounter.end(); i++) { - if (i->second > counts.size()) - counts.resize(i->second+1, 0); - counts[i->second]++; - } - for (IntVector::iterator i2 = counts.begin(); i2 != counts.end(); i2++) { - if (*i2 != 0) { - cout << "#Trees occurring " << (i2-counts.begin()) << " times: " << *i2 << endl; - } - } - } - string outfile = params.out_prefix; - - outfile += ".iqtree"; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(outfile.c_str()); - out << "IQ-TREE " << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR - << "." << iqtree_VERSION_PATCH << " built " << __DATE__ << endl - << endl; - if (params.partition_file) - out << "Partition file name: " << params.partition_file << endl; - if (params.aln_file) - out << "Input file name: " << params.aln_file << endl; - - if (params.user_file) - out << "User tree file name: " << params.user_file << endl; - out << "Type of analysis: "; - bool modelfinder = original_model.substr(0,4)=="TEST" || original_model.substr(0,2) == "MF" || original_model.empty(); + if (params.count_trees) { + // addon: print #distinct trees + cout << endl << "NOTE: " << pllTreeCounter.size() << " distinct trees evaluated during whole tree search" << endl; + + IntVector counts; + for (StringIntMap::iterator i = pllTreeCounter.begin(); i != pllTreeCounter.end(); i++) { + if (i->second > counts.size()) + counts.resize(i->second+1, 0); + counts[i->second]++; + } + for (IntVector::iterator i2 = counts.begin(); i2 != counts.end(); i2++) { + if (*i2 != 0) { + cout << "#Trees occurring " << (i2-counts.begin()) << " times: " << *i2 << endl; + } + } + } + string outfile = params.out_prefix; + + outfile += ".iqtree"; + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(outfile.c_str()); + out << "IQ-TREE " << iqtree_VERSION_MAJOR << "." << iqtree_VERSION_MINOR + << iqtree_VERSION_PATCH << " COVID-edition built " << __DATE__ << endl + << endl; + if (params.partition_file) + out << "Partition file name: " << params.partition_file << endl; + if (params.aln_file) + out << "Input file name: " << params.aln_file << endl; + + if (params.user_file) + out << "User tree file name: " << params.user_file << endl; + out << "Type of analysis: "; + bool modelfinder = params.model_name.substr(0,4)=="TEST" || params.model_name.substr(0,2) == "MF" || params.model_name.empty(); if (modelfinder) out << "ModelFinder"; if (params.compute_ml_tree) { @@ -857,177 +955,212 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, if (params.num_bootstrap_samples > 0) { if (params.compute_ml_tree) out << " + "; - out << "non-parametric bootstrap (" << params.num_bootstrap_samples + out << "non-parametric " << RESAMPLE_NAME << " (" << params.num_bootstrap_samples << " replicates)"; } if (params.gbo_replicates > 0) { - out << " + ultrafast bootstrap (" << params.gbo_replicates << " replicates)"; + out << " + ultrafast " << RESAMPLE_NAME << " (" << params.gbo_replicates << " replicates)"; } - out << endl; - out << "Random seed number: " << params.ran_seed << endl << endl; - out << "REFERENCES" << endl << "----------" << endl << endl; - reportReferences(params, out, original_model); + out << endl; + out << "Random seed number: " << params.ran_seed << endl << endl; + out << "REFERENCES" << endl << "----------" << endl << endl; + reportReferences(params, out); - out << "SEQUENCE ALIGNMENT" << endl << "------------------" << endl - << endl; - if (tree.isSuperTree()) { + out << "SEQUENCE ALIGNMENT" << endl << "------------------" << endl + << endl; + if (tree.isSuperTree()) { // TODO DS: Changes may be needed here for PoMo. - out << "Input data: " << tree.aln->getNSeq()+tree.removed_seqs.size() << " taxa with " - << tree.aln->getNSite() << " partitions and " - << tree.getAlnNSite() << " total sites (" - << ((SuperAlignment*)tree.aln)->computeMissingData()*100 << "% missing data)" << endl << endl; - - PhyloSuperTree *stree = (PhyloSuperTree*) &tree; - int namelen = stree->getMaxPartNameLength(); - int part; - out.width(max(namelen+6,10)); - out << left << " ID Name" << " Type\tSeq\tSite\tUnique\tInfor\tInvar\tConst" << endl; - //out << string(namelen+54, '-') << endl; - part = 0; - for (PhyloSuperTree::iterator it = stree->begin(); it != stree->end(); it++, part++) { - //out << "FOR PARTITION " << stree->part_info[part].name << ":" << endl << endl; - //reportAlignment(out, *((*it)->aln)); - out.width(4); - out << right << part+1 << " "; - out.width(max(namelen,4)); - out << left << stree->part_info[part].name << " "; - switch ((*it)->aln->seq_type) { - case SEQ_BINARY: out << "BIN"; break; - case SEQ_CODON: out << "CODON"; break; - case SEQ_DNA: out << "DNA"; break; - case SEQ_MORPH: out << "MORPH"; break; - case SEQ_MULTISTATE: out << "TINA"; break; - case SEQ_PROTEIN: out << "AA"; break; - case SEQ_POMO: out << "POMO"; break; - case SEQ_UNKNOWN: out << "???"; break; - } - out << "\t" << (*it)->aln->getNSeq() << "\t" << (*it)->aln->getNSite() + out << "Input data: " << tree.aln->getNSeq()+tree.removed_seqs.size() << " taxa with " + << tree.aln->getNSite() << " partitions and " + << tree.getAlnNSite() << " total sites (" + << ((SuperAlignment*)tree.aln)->computeMissingData()*100 << "% missing data)" << endl << endl; + + PhyloSuperTree *stree = (PhyloSuperTree*) &tree; + int namelen = stree->getMaxPartNameLength(); + int part; + out.width(max(namelen+6,10)); + out << left << " ID Name" << " Type\tSeq\tSite\tUnique\tInfor\tInvar\tConst" << endl; + //out << string(namelen+54, '-') << endl; + part = 0; + for (PhyloSuperTree::iterator it = stree->begin(); it != stree->end(); it++, part++) { + //out << "FOR PARTITION " << stree->part_info[part].name << ":" << endl << endl; + //reportAlignment(out, *((*it)->aln)); + out.width(4); + out << right << part+1 << " "; + out.width(max(namelen,4)); + out << left << (*it)->aln->name << " "; + switch ((*it)->aln->seq_type) { + case SEQ_BINARY: out << "BIN"; break; + case SEQ_CODON: out << "CODON"; break; + case SEQ_DNA: out << "DNA"; break; + case SEQ_MORPH: out << "MORPH"; break; + case SEQ_MULTISTATE: out << "MULTI"; break; + case SEQ_PROTEIN: out << "AA"; break; + case SEQ_POMO: out << "POMO"; break; + case SEQ_UNKNOWN: out << "???"; break; + } + out << "\t" << (*it)->aln->getNSeq() << "\t" << (*it)->aln->getNSite() << "\t" << (*it)->aln->getNPattern() << "\t" << (*it)->aln->num_informative_sites << "\t" << (*it)->getAlnNSite() - (*it)->aln->num_variant_sites << "\t" << int((*it)->aln->frac_const_sites*(*it)->getAlnNSite()) << endl; - } - out << endl << "Column meanings:" << endl + } + out << endl << "Column meanings:" << endl << " Unique: Number of unique site patterns" << endl << " Infor: Number of parsimony-informative sites" << endl << " Invar: Number of invariant sites" << endl << " Const: Number of constant sites (can be subset of invariant sites)" << endl << endl; - } else - reportAlignment(out, *(tree.aln), tree.removed_seqs.size()); - - out.precision(4); - out << fixed; - - if (!model_info.empty()) { - out << "ModelFinder" << endl << "-----------" << endl << endl; -// if (tree.isSuperTree()) -// pruneModelInfo(model_info, (PhyloSuperTree*)&tree); - reportModelSelection(out, params, &model_info, &tree); - } - - out << "SUBSTITUTION PROCESS" << endl << "--------------------" << endl - << endl; - if (tree.isSuperTree()) { - if(params.partition_type == BRLEN_SCALE) - out << "Edge-linked-proportional partition model but separate models between partitions" << endl << endl; - else if(params.partition_type == BRLEN_FIX) - out << "Edge-linked-equal partition model but separate models between partitions" << endl << endl; - else - out << "Edge-unlinked partition model and separate models between partitions" << endl << endl; - PhyloSuperTree *stree = (PhyloSuperTree*) &tree; - PhyloSuperTree::iterator it; - int part; - if(params.partition_type != BRLEN_OPTIMIZE) - out << " ID Model Speed Parameters" << endl; - else - out << " ID Model TreeLen Parameters" << endl; - //out << "-------------------------------------" << endl; - for (it = stree->begin(), part = 0; it != stree->end(); it++, part++) { - out.width(4); - out << right << (part+1) << " "; - out.width(14); - if(params.partition_type != BRLEN_OPTIMIZE) - out << left << (*it)->getModelName() << " " << stree->part_info[part].part_rate << " " << (*it)->getModelNameParams() << endl; - else - out << left << (*it)->getModelName() << " " << (*it)->treeLength() << " " << (*it)->getModelNameParams() << endl; - } - out << endl; - /* - for (it = stree->begin(), part = 0; it != stree->end(); it++, part++) { - reportModel(out, *(*it)); - reportRate(out, *(*it)); - }*/ - } else { - reportModel(out, tree); - reportRate(out, tree); - } - - if (params.lmap_num_quartets >= 0) { - tree.reportLikelihoodMapping(out); - } - - - /* - out << "RATE HETEROGENEITY" << endl << "------------------" << endl - << endl; - if (tree.isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*) &tree; - int part = 0; - for (PhyloSuperTree::iterator it = stree->begin(); - it != stree->end(); it++, part++) { - out << "FOR PARTITION " << stree->part_info[part].name << ":" - << endl << endl; - reportRate(out, *(*it)); - } - } else - reportRate(out, tree); - */ - // Bootstrap analysis: - //Display as outgroup: a - - if (original_model == "WHTEST") { - out << "TEST OF MODEL HOMOGENEITY" << endl - << "-------------------------" << endl << endl; - out << "Delta of input data: " - << params.whtest_delta << endl; - out << ".95 quantile of Delta distribution: " - << params.whtest_delta_quantile << endl; - out << "Number of simulations performed: " - << params.whtest_simulations << endl; - out << "P-value: " - << params.whtest_p_value << endl; - if (params.whtest_p_value < 0.05) { - out - << "RESULT: Homogeneity assumption is rejected (p-value cutoff 0.05)" - << endl; - } else { - out - << "RESULT: Homogeneity assumption is NOT rejected (p-value cutoff 0.05)" - << endl; - } - out << endl << "*** For this result please cite:" << endl << endl; - out - << "G. Weiss and A. von Haeseler (2003) Testing substitution models" - << endl - << "within a phylogenetic tree. Mol. Biol. Evol, 20(4):572-578" - << endl << endl; - } + } else + reportAlignment(out, *(tree.aln), tree.removed_seqs.size()); + + out.precision(4); + out << fixed; + + if (!model_info.empty()) { + out << "ModelFinder" << endl << "-----------" << endl << endl; +// if (tree.isSuperTree()) +// pruneModelInfo(model_info, (PhyloSuperTree*)&tree); + reportModelSelection(out, params, &model_info, &tree); + } + + out << "SUBSTITUTION PROCESS" << endl << "--------------------" << endl + << endl; + if (tree.isSuperTree()) { + if(params.partition_type == BRLEN_SCALE) + out << "Edge-linked-proportional partition model with "; + else if(params.partition_type == BRLEN_FIX) + out << "Edge-linked-equal partition model with "; + else if (params.partition_type == BRLEN_OPTIMIZE) + out << "Edge-unlinked partition model with "; + else + out << "Topology-unlinked partition model with "; + + if (params.model_joint) + out << "joint substitution model "; + else + out << "separate substitution models "; + if (params.link_alpha) + out << "and joint gamma shape"; + else + out << "and separate rates across sites"; + out << endl << endl; + + PhyloSuperTree *stree = (PhyloSuperTree*) &tree; + PhyloSuperTree::iterator it; + int part; + if(params.partition_type == BRLEN_OPTIMIZE || params.partition_type == TOPO_UNLINKED) + out << " ID Model TreeLen Parameters" << endl; + else + out << " ID Model Speed Parameters" << endl; + //out << "-------------------------------------" << endl; + for (it = stree->begin(), part = 0; it != stree->end(); it++, part++) { + out.width(4); + out << right << (part+1) << " "; + out.width(14); + if(params.partition_type == BRLEN_OPTIMIZE || params.partition_type == TOPO_UNLINKED) + out << left << (*it)->getModelName() << " " << (*it)->treeLength() << " " << (*it)->getModelNameParams() << endl; + else + out << left << (*it)->getModelName() << " " << stree->part_info[part].part_rate << " " << (*it)->getModelNameParams() << endl; + } + out << endl; + /* + for (it = stree->begin(), part = 0; it != stree->end(); it++, part++) { + reportModel(out, *(*it)); + reportRate(out, *(*it)); + }*/ + PartitionModel *part_model = (PartitionModel*)tree.getModelFactory(); + for (auto itm = part_model->linked_models.begin(); itm != part_model->linked_models.end(); itm++) { + for (it = stree->begin(); it != stree->end(); it++) + if ((*it)->getModel() == itm->second) { + out << "Linked model of substitution: " << itm->second->getName() << endl << endl; + bool fixed = (*it)->getModel()->fixParameters(false); + reportModel(out, (*it)->aln, (*it)->getModel()); + (*it)->getModel()->fixParameters(fixed); + break; + } + } + } else { + reportModel(out, tree); + reportRate(out, tree); + } + + if (params.lmap_num_quartets >= 0) { + tree.reportLikelihoodMapping(out); + } + + + /* + out << "RATE HETEROGENEITY" << endl << "------------------" << endl + << endl; + if (tree.isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*) &tree; + int part = 0; + for (PhyloSuperTree::iterator it = stree->begin(); + it != stree->end(); it++, part++) { + out << "FOR PARTITION " << stree->part_info[part].name << ":" + << endl << endl; + reportRate(out, *(*it)); + } + } else + reportRate(out, tree); + */ + // Bootstrap analysis: + //Display as outgroup: a + + if (params.model_name == "WHTEST") { + out << "TEST OF MODEL HOMOGENEITY" << endl + << "-------------------------" << endl << endl; + out << "Delta of input data: " + << params.whtest_delta << endl; + out << ".95 quantile of Delta distribution: " + << params.whtest_delta_quantile << endl; + out << "Number of simulations performed: " + << params.whtest_simulations << endl; + out << "P-value: " + << params.whtest_p_value << endl; + if (params.whtest_p_value < 0.05) { + out + << "RESULT: Homogeneity assumption is rejected (p-value cutoff 0.05)" + << endl; + } else { + out + << "RESULT: Homogeneity assumption is NOT rejected (p-value cutoff 0.05)" + << endl; + } + out << endl << "*** For this result please cite:" << endl << endl; + out + << "G. Weiss and A. von Haeseler (2003) Testing substitution models" + << endl + << "within a phylogenetic tree. Mol. Biol. Evol, 20(4):572-578" + << endl << endl; + } + + if (params.num_runs > 1) { + out << "MULTIPLE RUNS" << endl + << "-------------" << endl << endl; + out << "Run logL" << endl; + DoubleVector runLnL; + tree.getCheckpoint()->getVector("runLnL", runLnL); + for (int run = 0; run < runLnL.size(); run++) + out << run+1 << "\t" << fixed << runLnL[run] << endl; + out << endl; + } /* - out << "TREE SEARCH" << endl << "-----------" << endl << endl - << "Stopping rule: " - << ((params.stop_condition == SC_STOP_PREDICT) ? "Yes" : "No") - << endl << "Number of iterations: " - << tree.stop_rule.getNumIterations() << endl - << "Probability of deleting sequences: " << params.p_delete - << endl << "Number of representative leaves: " - << params.k_representative << endl - << "NNI log-likelihood cutoff: " << tree.getNNICutoff() << endl - << endl; + out << "TREE SEARCH" << endl << "-----------" << endl << endl + << "Stopping rule: " + << ((params.stop_condition == SC_STOP_PREDICT) ? "Yes" : "No") + << endl << "Number of iterations: " + << tree.stop_rule.getNumIterations() << endl + << "Probability of deleting sequences: " << params.p_delete + << endl << "Number of representative leaves: " + << params.k_representative << endl + << "NNI log-likelihood cutoff: " << tree.getNNICutoff() << endl + << endl; */ - if (params.compute_ml_tree) { - if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2) == "MF" && original_model.substr(0,3) != "MFP")) { - out << "TREE USED FOR ModelFinder" << endl - << "-------------------------" << endl << endl; + if (params.compute_ml_tree) { + if (params.model_name.find("ONLY") != string::npos || (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP")) { + out << "TREE USED FOR ModelFinder" << endl + << "-------------------------" << endl << endl; } else if (params.min_iterations == 0) { if (params.user_file) out << "USER TREE" << endl @@ -1036,136 +1169,187 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, out << "STARTING TREE" << endl << "-------------" << endl << endl; } else { - out << "MAXIMUM LIKELIHOOD TREE" << endl - << "-----------------------" << endl << endl; + out << "MAXIMUM LIKELIHOOD TREE" << endl + << "-----------------------" << endl << endl; } - tree.setRootNode(params.root); + tree.setRootNode(params.root); if (params.gbo_replicates) { - if (tree.boot_consense_logl > tree.getBestScore() + 0.1) { + if (tree.boot_consense_logl > tree.getBestScore() + 0.1 && !tree.isSuperTreeUnlinked()) { out << endl << "**NOTE**: Consensus tree has higher likelihood than ML tree found! Please use consensus tree below." << endl; } } - reportTree(out, params, tree, tree.getBestScore(), tree.logl_variance, true); - - if (tree.isSuperTree() && verbose_mode >= VB_MED) { - PhyloSuperTree *stree = (PhyloSuperTree*) &tree; -// stree->mapTrees(); -// int empty_branches = stree->countEmptyBranches(); -// if (empty_branches) { -// stringstream ss; -// ss << empty_branches << " branches in the overall tree with no phylogenetic information due to missing data!"; -// outWarning(ss.str()); -// } - - int part = 0; - for (PhyloSuperTree::iterator it = stree->begin(); - it != stree->end(); it++, part++) { - out << "FOR PARTITION " << stree->part_info[part].name - << ":" << endl << endl; + reportTree(out, params, tree, tree.getBestScore(), tree.logl_variance, true); + + if (tree.isSuperTree() && verbose_mode >= VB_MED) { + PhyloSuperTree *stree = (PhyloSuperTree*) &tree; +// stree->mapTrees(); +// int empty_branches = stree->countEmptyBranches(); +// if (empty_branches) { +// stringstream ss; +// ss << empty_branches << " branches in the overall tree with no phylogenetic information due to missing data!"; +// outWarning(ss.str()); +// } + + int part = 0; + for (PhyloSuperTree::iterator it = stree->begin(); + it != stree->end(); it++, part++) { + out << "FOR PARTITION " << (*it)->aln->name + << ":" << endl << endl; (*it)->setRootNode(params.root); -// reportTree(out, params, *(*it), (*it)->computeLikelihood(), (*it)->computeLogLVariance(), false); - reportTree(out, params, *(*it), stree->part_info[part].cur_score, 0.0, false); - } - } +// reportTree(out, params, *(*it), (*it)->computeLikelihood(), (*it)->computeLogLVariance(), false); + reportTree(out, params, *(*it), stree->part_info[part].cur_score, 0.0, false); + } + } - } - /* - if (params.write_intermediate_trees) { - out << endl << "CONSENSUS OF INTERMEDIATE TREES" << endl << "-----------------------" << endl << endl - << "Number of intermediate trees: " << tree.stop_rule.getNumIterations() << endl - << "Split threshold: " << params.split_threshold << endl - << "Burn-in: " << params.tree_burnin << endl << endl; - }*/ - - if (params.consensus_type == CT_CONSENSUS_TREE) { - out << "CONSENSUS TREE" << endl << "--------------" << endl << endl; - out << "Consensus tree is constructed from " - << (params.num_bootstrap_samples ? params.num_bootstrap_samples : params.gbo_replicates) - << " bootstrap trees"; + } + /* + if (params.write_intermediate_trees) { + out << endl << "CONSENSUS OF INTERMEDIATE TREES" << endl << "-----------------------" << endl << endl + << "Number of intermediate trees: " << tree.stop_rule.getNumIterations() << endl + << "Split threshold: " << params.split_threshold << endl + << "Burn-in: " << params.tree_burnin << endl << endl; + }*/ + + if (params.consensus_type == CT_CONSENSUS_TREE && !tree.isSuperTreeUnlinked()) { + out << "CONSENSUS TREE" << endl << "--------------" << endl << endl; + out << "Consensus tree is constructed from " + << (params.num_bootstrap_samples ? params.num_bootstrap_samples : params.gbo_replicates) + << " " << RESAMPLE_NAME << " trees"; if (params.gbo_replicates || params.num_bootstrap_samples) { out << endl << "Log-likelihood of consensus tree: " << fixed << tree.boot_consense_logl; } - string con_file = params.out_prefix; - con_file += ".contree"; + string con_file = params.out_prefix; + con_file += ".contree"; // -- Mon Apr 17 21:14:53 BST 2017 // DONE Minh: merged correctly if (params.compute_ml_tree) out << endl << "Robinson-Foulds distance between ML tree and consensus tree: " - << params.contree_rfdist << endl; + << tree.contree_rfdist << endl; // -- - out << endl << "Branches with bootstrap support >" - << floor(params.split_threshold * 1000) / 10 << "% are kept"; - if (params.split_threshold == 0.0) - out << " (extended consensus)"; - if (params.split_threshold == 0.5) - out << " (majority-rule consensus)"; - if (params.split_threshold >= 0.99) - out << " (strict consensus)"; - - out << endl << "Branch lengths are optimized by maximum likelihood on original alignment" << endl; - out << "Numbers in parentheses are bootstrap supports (%)" << endl << endl; - - bool rooted = false; - MTree contree; - contree.readTree(con_file.c_str(), rooted); - contree.drawTree(out, WT_BR_SCALE); - out << endl << "Consensus tree in newick format: " << endl << endl; - contree.printTree(out); - out << endl << endl; -// tree.freeNode(); -// tree.root = NULL; -// tree.readTree(con_file.c_str(), rooted); -// if (removed_seqs.size() > 0) { -// tree.reinsertIdenticalSeqs(tree.aln, removed_seqs, twin_seqs); -// } -// tree.setAlignment(tree.aln); - - // bug fix -// if ((tree.sse == LK_EIGEN || tree.sse == LK_EIGEN_SSE) && !tree.isBifurcating()) { -// cout << "NOTE: Changing to old kernel as consensus tree is multifurcating" << endl; -// tree.changeLikelihoodKernel(LK_SSE); -// } - -// tree.initializeAllPartialLh(); -// tree.fixNegativeBranch(false); -// if (tree.isSuperTree()) -// ((PhyloSuperTree*) &tree)->mapTrees(); -// tree.optimizeAllBranches(); -// tree.printTree(con_file.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA); -// tree.sortTaxa(); -// tree.drawTree(out, WT_BR_SCALE); -// out << endl << "Consensus tree in newick format: " << endl << endl; -// tree.printResultTree(out); -// out << endl << endl; - } - - - /* evaluate user trees */ - vector info; - IntVector distinct_trees; - if (params.treeset_file) { - evaluateTrees(params, &tree, info, distinct_trees); - out.precision(4); + out << endl << "Branches with support >" + << floor(params.split_threshold * 1000) / 10 << "% are kept"; + if (params.split_threshold == 0.0) + out << " (extended consensus)"; + if (params.split_threshold == 0.5) + out << " (majority-rule consensus)"; + if (params.split_threshold >= 0.99) + out << " (strict consensus)"; + + out << endl << "Branch lengths are optimized by maximum likelihood on original alignment" << endl; + out << "Numbers in parentheses are " << RESAMPLE_NAME << " supports (%)" << endl << endl; + + bool rooted = false; + MTree contree; + contree.readTree(con_file.c_str(), rooted); + contree.drawTree(out, WT_BR_SCALE); + out << endl << "Consensus tree in newick format: " << endl << endl; + contree.printTree(out); + out << endl << endl; +// tree.freeNode(); +// tree.root = NULL; +// tree.readTree(con_file.c_str(), rooted); +// if (removed_seqs.size() > 0) { +// tree.reinsertIdenticalSeqs(tree.aln, removed_seqs, twin_seqs); +// } +// tree.setAlignment(tree.aln); + + // bug fix +// if ((tree.sse == LK_EIGEN || tree.sse == LK_EIGEN_SSE) && !tree.isBifurcating()) { +// cout << "NOTE: Changing to old kernel as consensus tree is multifurcating" << endl; +// tree.changeLikelihoodKernel(LK_SSE); +// } + +// tree.initializeAllPartialLh(); +// tree.fixNegativeBranch(false); +// if (tree.isSuperTree()) +// ((PhyloSuperTree*) &tree)->mapTrees(); +// tree.optimizeAllBranches(); +// tree.printTree(con_file.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA); +// tree.sortTaxa(); +// tree.drawTree(out, WT_BR_SCALE); +// out << endl << "Consensus tree in newick format: " << endl << endl; +// tree.printResultTree(out); +// out << endl << endl; + } +#ifdef IQTREE_TERRAPHAST + if (params.terrace_analysis && params.compute_ml_tree) { + + out << "TERRACE ANALYSIS" << endl << "----------------" << endl << endl; + cout << "Running additional analysis: Phylogenetic Terraces ..."<< endl; + + string filename = params.out_prefix; + filename += ".terrace"; + + try + { + Terrace terrace(tree, (SuperAlignment*)(tree.aln)); + + uint64_t terrace_size = terrace.getSize(); + + if (terrace_size == 1) { + out << "The tree does not lie on a terrace." << endl; + } else { + out << "The tree lies on a terrace of size "; + + if (terrace_size == UINT64_MAX) { + out << "at least " << terrace_size << " (integer overflow)"; + } else { + out << terrace_size; + } + + out << endl; + + ofstream terraceout; + terraceout.open(filename.c_str()); + + terrace.printTreesCompressed(terraceout); + + terraceout.close(); + + out << "Terrace trees written (in compressed Newick format) to " << filename << endl; + } + } + catch (std::exception& e) + { + out << "ERROR: Terrace analysis using Terraphast failed: " << e.what() << endl << endl; + } + + out << endl; + out << "For documentation, see the technical supplement to Biczok et al. (2018)" << endl; + out << "https://doi.org/10.1093/bioinformatics/bty384"; + + out << endl << endl; + cout<< "Done. Results are written in "< info; + IntVector distinct_trees; + if (!params.treeset_file.empty()) { + evaluateTrees(params.treeset_file, params, &tree, info, distinct_trees); + out.precision(4); out.setf(ios_base::fixed); out << endl << "USER TREES" << endl << "----------" << endl << endl; out << "See " << params.out_prefix << ".trees for trees with branch lengths." << endl << endl; if (params.topotest_replicates && info.size() > 1) { + if (params.do_au_test && params.topotest_replicates < 10000) + out << "WARNING: Too few replicates for AU test. At least -zb 10000 for reliable results!" << endl << endl; out << "Tree logL deltaL bp-RELL p-KH p-SH "; if (params.do_weighted_test) out << "p-WKH p-WSH "; - out << "c-ELW"; + out << " c-ELW"; if (params.do_au_test) - out << " p-AU"; + out << " p-AU"; out << endl << "------------------------------------------------------------------"; if (params.do_weighted_test) - out << "------------------"; + out << "------------------"; if (params.do_au_test) out << "-------"; out << endl; @@ -1185,17 +1369,19 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, out << " = tree " << distinct_trees[orig_id]+1 << endl; continue; } - out.precision(3); + out.unsetf(ios::fixed); + out.precision(10); out.width(12); out << info[tid].logl << " "; out.width(7); + out.precision(5); out << maxL - info[tid].logl; if (!params.topotest_replicates || info.size() <= 1) { out << endl; tid++; continue; } - out.precision(4); + out.precision(3); out << " "; out.width(6); out << info[tid].rell_bp; @@ -1230,454 +1416,381 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model, else out << " + "; } - out.width(6); - out << info[tid].elw_value; + out.width(9); + out << right << info[tid].elw_value; if (info[tid].elw_confident) out << " + "; else out << " - "; if (params.do_au_test) { - out.width(6); + out.width(8); out << right << info[tid].au_pvalue; if (info[tid].au_pvalue < 0.05) out << " - "; else out << " + "; } + out.setf(ios::fixed); - out << endl; - tid++; - } - out << endl; + out << endl; + tid++; + } + out << endl; - if (params.topotest_replicates) { - out << "deltaL : logL difference from the maximal logl in the set." << endl - << "bp-RELL : bootstrap proportion using RELL method (Kishino et al. 1990)." << endl - << "p-KH : p-value of one sided Kishino-Hasegawa test (1989)." << endl - << "p-SH : p-value of Shimodaira-Hasegawa test (2000)." << endl; - if (params.do_weighted_test) { - out << "p-WKH : p-value of weighted KH test." << endl - << "p-WSH : p-value of weighted SH test." << endl; - } - out << "c-ELW : Expected Likelihood Weight (Strimmer & Rambaut 2002)." << endl; + if (params.topotest_replicates) { + out << "deltaL : logL difference from the maximal logl in the set." << endl + << "bp-RELL : bootstrap proportion using RELL method (Kishino et al. 1990)." << endl + << "p-KH : p-value of one sided Kishino-Hasegawa test (1989)." << endl + << "p-SH : p-value of Shimodaira-Hasegawa test (2000)." << endl; + if (params.do_weighted_test) { + out << "p-WKH : p-value of weighted KH test." << endl + << "p-WSH : p-value of weighted SH test." << endl; + } + out << "c-ELW : Expected Likelihood Weight (Strimmer & Rambaut 2002)." << endl; if (params.do_au_test) { - out << "p-AU : p-value of approximately unbiased (AU) test (Shimodaira, 2002)." << endl; + out << "p-AU : p-value of approximately unbiased (AU) test (Shimodaira, 2002)." << endl; } out << endl - << "Plus signs denote the 95% confidence sets." << endl - << "Minus signs denote significant exclusion." << endl - << "All tests performed " - << params.topotest_replicates << " resamplings using the RELL method."<getNSeq(); - IntVector checked; - checked.resize(ntaxa, 0); - int i, j; - for (i = 0; i < ntaxa - 1; i++) { - if (checked[i]) - continue; - string str = ""; - bool first = true; - for (j = i + 1; j < ntaxa; j++) - if (dist[i * ntaxa + j] <= Params::getInstance().min_branch_length) { - if (first) - str = "ZERO distance between sequences " - + aln->getSeqName(i); - str += ", " + aln->getSeqName(j); - checked[j] = 1; - first = false; - } - checked[i] = 1; - if (str != "") - outWarning(str); - } + size_t ntaxa = aln->getNSeq(); + IntVector checked; + checked.resize(ntaxa, 0); + auto minLen = Params::getInstance().min_branch_length; + for (size_t i = 0; i < ntaxa - 1; ++i) { + if (checked[i]) + continue; + string str = ""; + bool first = true; + auto distRow = dist + i*ntaxa; + for (size_t j = i + 1; j < ntaxa; ++j) + if (distRow[j] <= minLen ) { + if (first) + str = "ZERO distance between sequences " + + aln->getSeqName(i); + str += ", " + aln->getSeqName(j); + checked[j] = 1; + first = false; + } + checked[i] = 1; + if (str != "") { + outWarning(str); + } + } } void printAnalysisInfo(int model_df, IQTree& iqtree, Params& params) { -// if (!params.raxmllib) { - cout << "Model of evolution: "; - if (iqtree.isSuperTree()) { - cout << iqtree.getModelName() << " (" << model_df << " free parameters)" << endl; - } else { - cout << iqtree.getModelName() << " with "; - switch (iqtree.getModel()->getFreqType()) { - case FREQ_EQUAL: - cout << "equal"; - break; - case FREQ_EMPIRICAL: - cout << "counted"; - break; - case FREQ_USER_DEFINED: - cout << "user-defined"; - break; - case FREQ_ESTIMATE: - cout << "optimized"; - break; - case FREQ_CODON_1x4: - cout << "counted 1x4"; - break; - case FREQ_CODON_3x4: - cout << "counted 3x4"; - break; - case FREQ_CODON_3x4C: - cout << "counted 3x4-corrected"; - break; - case FREQ_DNA_RY: - cout << "constrained A+G=C+T"; - break; - case FREQ_DNA_WS: - cout << "constrained A+T=C+G"; - break; - case FREQ_DNA_MK: - cout << "constrained A+C=G+T"; - break; - case FREQ_DNA_1112: - cout << "constrained A=C=G"; - break; - case FREQ_DNA_1121: - cout << "constrained A=C=T"; - break; - case FREQ_DNA_1211: - cout << "constrained A=G=T"; - break; - case FREQ_DNA_2111: - cout << "constrained C=G=T"; - break; - case FREQ_DNA_1122: - cout << "constrained A=C,G=T"; - break; - case FREQ_DNA_1212: - cout << "constrained A=G,C=T"; - break; - case FREQ_DNA_1221: - cout << "constrained A=T,C=G"; - break; - case FREQ_DNA_1123: - cout << "constrained A=C"; - break; - case FREQ_DNA_1213: - cout << "constrained A=G"; - break; - case FREQ_DNA_1231: - cout << "constrained A=T"; - break; - case FREQ_DNA_2113: - cout << "constrained C=G"; - break; - case FREQ_DNA_2131: - cout << "constrained C=T"; - break; - case FREQ_DNA_2311: - cout << "constrained G=T"; - break; - default: - outError("Wrong specified state frequencies"); - } - cout << " frequencies (" << model_df << " free parameters)" << endl; - } - cout << "Fixed branch lengths: " - << ((params.fixed_branch_length) ? "Yes" : "No") << endl; - - if (params.min_iterations > 0) { - cout << "Tree search algorithm: " << (params.snni ? "Stochastic nearest neighbor interchange" : "IQPNNI") << endl; - cout << "Termination condition: "; - if (params.stop_condition == SC_REAL_TIME) { - cout << "after " << params.maxtime << " minutes" << endl; - } else if (params.stop_condition == SC_UNSUCCESS_ITERATION) { - cout << "after " << params.unsuccess_iteration << " unsuccessful iterations" << endl; - } else if (params.stop_condition == SC_FIXED_ITERATION) { - cout << params.min_iterations << " iterations" << endl; - } else if(params.stop_condition == SC_WEIBULL) { - cout << "predicted in [" << params.min_iterations << "," - << params.max_iterations << "] (confidence " - << params.stop_confidence << ")" << endl; - } else if (params.stop_condition == SC_BOOTSTRAP_CORRELATION) { - cout << "min " << params.min_correlation << " correlation coefficient" << endl; - } - - if (!params.snni) { - cout << "Number of representative leaves : " << params.k_representative << endl; - cout << "Probability of deleting sequences: " << iqtree.getProbDelete() << endl; - cout << "Number of leaves to be deleted : " << iqtree.getDelete() << endl; - cout << "Important quartets assessed on: " - << ((params.iqp_assess_quartet == IQP_DISTANCE) ? - "Distance" : ((params.iqp_assess_quartet == IQP_PARSIMONY) ? "Parsimony" : "Bootstrap")) - << endl; - } - cout << "NNI assessed on: " << ((params.nni5) ? "5 branches" : "1 branch") << endl; - } - cout << "Phylogenetic likelihood library: " << (params.pll ? "Yes" : "No") << endl; - if (!params.fixed_branch_length) +// if (!params.raxmllib) { + cout << "Model of evolution: "; + if (iqtree.isSuperTree()) { + cout << iqtree.getModelName() << " (" << model_df << " free parameters)" << endl; + } else { + cout << iqtree.getModelName() << " with "; + switch (iqtree.getModel()->getFreqType()) { + case FREQ_EQUAL: + cout << "equal"; + break; + case FREQ_EMPIRICAL: + cout << "counted"; + break; + case FREQ_USER_DEFINED: + cout << "user-defined"; + break; + case FREQ_ESTIMATE: + cout << "optimized"; + break; + case FREQ_CODON_1x4: + cout << "counted 1x4"; + break; + case FREQ_CODON_3x4: + cout << "counted 3x4"; + break; + case FREQ_CODON_3x4C: + cout << "counted 3x4-corrected"; + break; + case FREQ_DNA_RY: + cout << "constrained A+G=C+T"; + break; + case FREQ_DNA_WS: + cout << "constrained A+T=C+G"; + break; + case FREQ_DNA_MK: + cout << "constrained A+C=G+T"; + break; + case FREQ_DNA_1112: + cout << "constrained A=C=G"; + break; + case FREQ_DNA_1121: + cout << "constrained A=C=T"; + break; + case FREQ_DNA_1211: + cout << "constrained A=G=T"; + break; + case FREQ_DNA_2111: + cout << "constrained C=G=T"; + break; + case FREQ_DNA_1122: + cout << "constrained A=C,G=T"; + break; + case FREQ_DNA_1212: + cout << "constrained A=G,C=T"; + break; + case FREQ_DNA_1221: + cout << "constrained A=T,C=G"; + break; + case FREQ_DNA_1123: + cout << "constrained A=C"; + break; + case FREQ_DNA_1213: + cout << "constrained A=G"; + break; + case FREQ_DNA_1231: + cout << "constrained A=T"; + break; + case FREQ_DNA_2113: + cout << "constrained C=G"; + break; + case FREQ_DNA_2131: + cout << "constrained C=T"; + break; + case FREQ_DNA_2311: + cout << "constrained G=T"; + break; + default: + outError("Wrong specified state frequencies"); + } + cout << " frequencies (" << model_df << " free parameters)" << endl; + } + cout << "Fixed branch lengths: " + << ((params.fixed_branch_length) ? "Yes" : "No") << endl; + + if (params.min_iterations > 0) { + cout << "Tree search algorithm: " << (params.snni ? "Stochastic nearest neighbor interchange" : "IQPNNI") << endl; + cout << "Termination condition: "; + if (params.stop_condition == SC_REAL_TIME) { + cout << "after " << params.maxtime << " minutes" << endl; + } else if (params.stop_condition == SC_UNSUCCESS_ITERATION) { + cout << "after " << params.unsuccess_iteration << " unsuccessful iterations" << endl; + } else if (params.stop_condition == SC_FIXED_ITERATION) { + cout << params.min_iterations << " iterations" << endl; + } else if(params.stop_condition == SC_WEIBULL) { + cout << "predicted in [" << params.min_iterations << "," + << params.max_iterations << "] (confidence " + << params.stop_confidence << ")" << endl; + } else if (params.stop_condition == SC_BOOTSTRAP_CORRELATION) { + cout << "min " << params.min_correlation << " correlation coefficient" << endl; + } + + if (!params.snni) { + cout << "Number of representative leaves : " << params.k_representative << endl; + cout << "Probability of deleting sequences: " << iqtree.getProbDelete() << endl; + cout << "Number of leaves to be deleted : " << iqtree.getDelete() << endl; + cout << "Important quartets assessed on: " + << ((params.iqp_assess_quartet == IQP_DISTANCE) ? + "Distance" : ((params.iqp_assess_quartet == IQP_PARSIMONY) ? "Parsimony" : "Bootstrap")) + << endl; + } + cout << "NNI assessed on: " << ((params.nni5) ? "5 branches" : "1 branch") << endl; + } + cout << "Phylogenetic likelihood library: " << (params.pll ? "Yes" : "No") << endl; + if (params.fixed_branch_length != BRLEN_FIX) cout << "Branch length optimization method: " << ((iqtree.optimize_by_newton) ? "Newton" : "Brent") << endl; cout << "Number of Newton-Raphson steps in NNI evaluation and branch length optimization: " << NNI_MAX_NR_STEP << " / " << PLL_NEWZPERCYCLE << endl; cout << "SSE instructions: " << ((iqtree.sse) ? "Yes" : "No") << endl; - cout << endl; + cout << endl; } -void computeMLDist(Params& params, IQTree& iqtree, string &dist_file, double begin_time) { - double longest_dist; -// stringstream best_tree_string; -// iqtree.printTree(best_tree_string, WT_BR_LEN + WT_TAXON_ID); - cout << "Computing ML distances based on estimated model parameters..."; - double *ml_dist = NULL; - double *ml_var = NULL; - longest_dist = iqtree.computeDist(params, iqtree.aln, ml_dist, ml_var, dist_file); - cout << " " << (getCPUTime() - begin_time) << " sec" << endl; - +void computeMLDist ( Params& params, IQTree& iqtree + , double begin_wallclock_time, double begin_cpu_time) { + double longest_dist; + cout << "Computing ML distances based on estimated model parameters..." << endl; + double *ml_dist = nullptr; + double *ml_var = nullptr; + iqtree.decideDistanceFilePath(params); + longest_dist = iqtree.computeDist(params, iqtree.aln, ml_dist, ml_var); + cout << "Computing ML distances took " + << (getRealTime() - begin_wallclock_time) << " sec (of wall-clock time) " + << (getCPUTime() - begin_cpu_time) << " sec (of CPU time)" << endl; + size_t n = iqtree.aln->getNSeq(); + size_t nSquared = n*n; + if ( iqtree.dist_matrix == nullptr ) { + iqtree.dist_matrix = ml_dist; + ml_dist = nullptr; + } else { + memmove(iqtree.dist_matrix, ml_dist, + sizeof(double) * nSquared); + delete[] ml_dist; + } + if ( iqtree.var_matrix == nullptr ) { + iqtree.var_matrix = ml_var; + ml_var = nullptr; + } else { + memmove(iqtree.var_matrix, ml_var, + sizeof(double) * nSquared); + delete[] ml_var; + } + if (!params.dist_file) + { + iqtree.printDistanceFile(); + } double max_genetic_dist = MAX_GENETIC_DIST; if (iqtree.aln->seq_type == SEQ_POMO) { int N = iqtree.aln->virtual_pop_size; max_genetic_dist *= N * N; } - if (longest_dist > max_genetic_dist * 0.99) { - outWarning("Some pairwise ML distances are too long (saturated)"); - //cout << "Some ML distances are too long, using old distances..." << endl; - } //else - { - if ( !iqtree.dist_matrix ) { - iqtree.dist_matrix = new double[iqtree.aln->getNSeq() * iqtree.aln->getNSeq()]; - } - if ( !iqtree.var_matrix ) { - iqtree.var_matrix = new double[iqtree.aln->getNSeq() * iqtree.aln->getNSeq()]; - } - memmove(iqtree.dist_matrix, ml_dist, - sizeof (double) * iqtree.aln->getNSeq() * iqtree.aln->getNSeq()); - memmove(iqtree.var_matrix, ml_var, - sizeof(double) * iqtree.aln->getNSeq() * iqtree.aln->getNSeq()); - } - delete[] ml_dist; - delete[] ml_var; + if (longest_dist > max_genetic_dist * 0.99) { + outWarning("Some pairwise ML distances are too long (saturated)"); + } } -void computeInitialDist(Params ¶ms, IQTree &iqtree, string &dist_file) { +void computeInitialDist(Params ¶ms, IQTree &iqtree) { double longest_dist; - if (params.dist_file) { - cout << "Reading distance matrix file " << params.dist_file << " ..." << endl; - } else if (params.compute_jc_dist) { - cout << "Computing Juke-Cantor distances..." << endl; - } else if (params.compute_obs_dist) { - cout << "Computing observed distances..." << endl; - } - - if (params.compute_jc_dist || params.compute_obs_dist || params.partition_file) { - longest_dist = iqtree.computeDist(params, iqtree.aln, iqtree.dist_matrix, iqtree.var_matrix, dist_file); - checkZeroDist(iqtree.aln, iqtree.dist_matrix); + if (params.dist_file) { + cout << "Reading distance matrix file " << params.dist_file << " ..." << endl; + } else if (params.compute_jc_dist) { + cout << "Computing Jukes-Cantor distances..." << endl; + } else if (params.compute_obs_dist) { + cout << "Computing observed distances..." << endl; + } + if (params.compute_jc_dist || params.compute_obs_dist || params.partition_file) { + longest_dist = iqtree.computeDist(params, iqtree.aln, iqtree.dist_matrix, iqtree.var_matrix); + //if (!params.suppress_zero_distance_warnings) { + // checkZeroDist(iqtree.aln, iqtree.dist_matrix); + //} double max_genetic_dist = MAX_GENETIC_DIST; if (iqtree.aln->seq_type == SEQ_POMO) { int N = iqtree.aln->virtual_pop_size; max_genetic_dist *= N * N; } - if (longest_dist > max_genetic_dist * 0.99) { - outWarning("Some pairwise distances are too long (saturated)"); - } + if (longest_dist > max_genetic_dist * 0.99) { + outWarning("Some pairwise distances are too long (saturated)"); + } } - } -void initializeParams(Params ¶ms, IQTree &iqtree, ModelCheckpoint &model_info, - ModelsBlock *models_block, string &dist_file) +void initializeParams(Params ¶ms, IQTree &iqtree) { // iqtree.setCurScore(-DBL_MAX); - bool test_only = (params.model_name.find("ONLY") != string::npos) || (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP"); - - bool empty_model_found = params.model_name.empty() && !iqtree.isSuperTree(); - - if (params.model_name.empty() && iqtree.isSuperTree()) { - // check whether any partition has empty model_name - PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; - for (auto i = stree->part_info.begin(); i != stree->part_info.end(); i++) - if (i->model_name.empty()) { - empty_model_found = true; - break; - } + bool ok_tree = iqtree.root; + if (iqtree.isSuperTreeUnlinked()) + ok_tree = ((PhyloSuperTree*)&iqtree)->front()->root; + if (!ok_tree) + { + // compute initial tree + if (!params.compute_ml_tree_only) { + iqtree.computeInitialTree(params.SSE); + } } + ASSERT(iqtree.aln); - /* initialize substitution model */ - if (empty_model_found || params.model_name.substr(0, 4) == "TEST" || params.model_name.substr(0, 2) == "MF") { - if (MPIHelper::getInstance().getNumProcesses() > 1) - outError("Please use only 1 MPI process! We are currently working on the MPI parallelization of model selection."); - // TODO: check if necessary -// if (iqtree.isSuperTree()) -// ((PhyloSuperTree*) &iqtree)->mapTrees(); - double cpu_time = getCPUTime(); - double real_time = getRealTime(); - model_info.setFileName((string)params.out_prefix + ".model.gz"); + if (iqtree.aln->model_name == "WHTEST") { + if (iqtree.aln->seq_type != SEQ_DNA) + outError("Weiss & von Haeseler test of model homogeneity only works for DNA"); + iqtree.aln->model_name = "GTR+G"; + } + if (params.gbo_replicates) + { + params.speed_conf = 1.0; + } - bool ok_model_file = false; - if (!params.print_site_lh && !params.model_test_again) { - ok_model_file = model_info.load(); - } + // TODO: check if necessary +// if (iqtree.isSuperTree()) +// ((PhyloSuperTree*) &iqtree)->mapTrees(); - cout << endl; + // set parameter for the current tree +// iqtree.setParams(params); +} - ok_model_file &= model_info.size() > 0; - if (ok_model_file) - cout << "NOTE: Restoring information from model checkpoint file " << model_info.getFileName() << endl; +void pruneTaxa(Params ¶ms, IQTree &iqtree, double *pattern_lh, NodeVector &pruned_taxa, StrVector &linked_name) { + int num_low_support; + double mytime; + + if (params.aLRT_threshold <= 100 && (params.aLRT_replicates > 0 || params.localbp_replicates > 0)) { + mytime = getCPUTime(); + cout << "Testing tree branches by SH-like aLRT with " << params.aLRT_replicates << " replicates..." << endl; + iqtree.setRootNode(params.root); + double curScore = iqtree.getCurScore(); + iqtree.computePatternLikelihood(pattern_lh, &curScore); + num_low_support = iqtree.testAllBranches(params.aLRT_threshold, curScore, + pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test); + iqtree.printResultTree(); + cout << " " << getCPUTime() - mytime << " sec." << endl; + cout << num_low_support << " branches show low support values (<= " << params.aLRT_threshold << "%)" << endl; + + //tree.drawTree(cout); + cout << "Collapsing stable clades..." << endl; + iqtree.collapseStableClade(params.aLRT_threshold, pruned_taxa, linked_name, iqtree.dist_matrix); + cout << pruned_taxa.size() << " taxa were pruned from stable clades" << endl; + } - Checkpoint *orig_checkpoint = iqtree.getCheckpoint(); - iqtree.setCheckpoint(&model_info); - iqtree.restoreCheckpoint(); + if (!pruned_taxa.empty()) { + cout << "Pruned alignment contains " << iqtree.aln->getNSeq() + << " sequences and " << iqtree.aln->getNSite() << " sites and " + << iqtree.aln->getNPattern() << " patterns" << endl; + iqtree.initializeAllPartialLh(); + iqtree.clearAllPartialLH(); + iqtree.setCurScore(iqtree.optimizeAllBranches()); + //cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl; + iqtree.optimizeNNI(); + cout << "Log-likelihood after optimizing partial tree: " + << iqtree.getCurScore() << endl; + } - int partition_type; - if (CKP_RESTORE2((&model_info), partition_type)) { - if (partition_type != params.partition_type) - outError("Mismatch partition type between checkpoint and partition file command option"); - } else { - partition_type = params.partition_type; - CKP_SAVE2((&model_info), partition_type); - } +} - // compute initial tree - iqtree.computeInitialTree(dist_file, params.SSE); +void restoreTaxa(IQTree &iqtree, double *saved_dist_mat, NodeVector &pruned_taxa, StrVector &linked_name) { + if (!pruned_taxa.empty()) { + cout << "Restoring full tree..." << endl; + iqtree.restoreStableClade(iqtree.aln, pruned_taxa, linked_name); + delete[] iqtree.dist_matrix; + iqtree.dist_matrix = saved_dist_mat; + iqtree.initializeAllPartialLh(); + iqtree.clearAllPartialLH(); + iqtree.setCurScore(iqtree.optimizeAllBranches()); + //cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl; + pair nniInfo; + nniInfo = iqtree.optimizeNNI(); + cout << "Log-likelihood after reoptimizing full tree: " << iqtree.getCurScore() << endl; + //iqtree.setBestScore(iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, true, params.model_eps)); - if (iqtree.isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; - int part = 0; - for (auto it = stree->begin(); it != stree->end(); it++, part++) { - model_info.startStruct(stree->part_info[part].name); - (*it)->saveCheckpoint(); - model_info.endStruct(); - } - } else { - iqtree.saveCheckpoint(); - } - - // also save initial tree to the original .ckp.gz checkpoint -// string initTree = iqtree.getTreeString(); -// CKP_SAVE(initTree); -// iqtree.saveCheckpoint(); -// checkpoint->dump(true); - - params.model_name = testModel(params, &iqtree, model_info, models_block, params.num_threads, BRLEN_OPTIMIZE, "", true); - - iqtree.setCheckpoint(orig_checkpoint); - - params.startCPUTime = cpu_time; - params.start_real_time = real_time; - cpu_time = getCPUTime() - cpu_time; - real_time = getRealTime() - real_time; - cout << endl; - cout << "All model information printed to " << model_info.getFileName() << endl; - cout << "CPU time for ModelFinder: " << cpu_time << " seconds (" << convert_time(cpu_time) << ")" << endl; - cout << "Wall-clock time for ModelFinder: " << real_time << " seconds (" << convert_time(real_time) << ")" << endl; - -// alignment = iqtree.aln; - if (test_only) { - params.min_iterations = 0; - } - } else { - // compute initial tree - iqtree.computeInitialTree(dist_file, params.SSE); - } - - if (params.model_name == "WHTEST") { - if (iqtree.aln->seq_type != SEQ_DNA) - outError("Weiss & von Haeseler test of model homogeneity only works for DNA"); - params.model_name = "GTR+G"; } - - ASSERT(iqtree.aln); - if (params.gbo_replicates) - params.speed_conf = 1.0; - - // TODO: check if necessary -// if (iqtree.isSuperTree()) -// ((PhyloSuperTree*) &iqtree)->mapTrees(); - - // set parameter for the current tree -// iqtree.setParams(params); -} - - -void pruneTaxa(Params ¶ms, IQTree &iqtree, double *pattern_lh, NodeVector &pruned_taxa, StrVector &linked_name) { - int num_low_support; - double mytime; - - if (params.aLRT_threshold <= 100 && (params.aLRT_replicates > 0 || params.localbp_replicates > 0)) { - mytime = getCPUTime(); - cout << "Testing tree branches by SH-like aLRT with " << params.aLRT_replicates << " replicates..." << endl; - iqtree.setRootNode(params.root); - double curScore = iqtree.getCurScore(); - iqtree.computePatternLikelihood(pattern_lh, &curScore); - num_low_support = iqtree.testAllBranches(params.aLRT_threshold, curScore, - pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test); - iqtree.printResultTree(); - cout << " " << getCPUTime() - mytime << " sec." << endl; - cout << num_low_support << " branches show low support values (<= " << params.aLRT_threshold << "%)" << endl; - - //tree.drawTree(cout); - cout << "Collapsing stable clades..." << endl; - iqtree.collapseStableClade(params.aLRT_threshold, pruned_taxa, linked_name, iqtree.dist_matrix); - cout << pruned_taxa.size() << " taxa were pruned from stable clades" << endl; - } - - if (!pruned_taxa.empty()) { - cout << "Pruned alignment contains " << iqtree.aln->getNSeq() - << " sequences and " << iqtree.aln->getNSite() << " sites and " - << iqtree.aln->getNPattern() << " patterns" << endl; - //tree.clearAllPartialLh(); - iqtree.initializeAllPartialLh(); - iqtree.clearAllPartialLH(); - iqtree.setCurScore(iqtree.optimizeAllBranches()); - //cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl; -// pair nniInfo = iqtree.optimizeNNI(); - iqtree.optimizeNNI(); - cout << "Log-likelihood after optimizing partial tree: " - << iqtree.getCurScore() << endl; - } - -} - -void restoreTaxa(IQTree &iqtree, double *saved_dist_mat, NodeVector &pruned_taxa, StrVector &linked_name) { - if (!pruned_taxa.empty()) { - cout << "Restoring full tree..." << endl; - iqtree.restoreStableClade(iqtree.aln, pruned_taxa, linked_name); - delete[] iqtree.dist_matrix; - iqtree.dist_matrix = saved_dist_mat; - iqtree.initializeAllPartialLh(); - iqtree.clearAllPartialLH(); - iqtree.setCurScore(iqtree.optimizeAllBranches()); - //cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl; - pair nniInfo; - nniInfo = iqtree.optimizeNNI(); - cout << "Log-likelihood after reoptimizing full tree: " << iqtree.getCurScore() << endl; - //iqtree.setBestScore(iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, true, params.model_eps)); - - } } void runApproximateBranchLengths(Params ¶ms, IQTree &iqtree) { if (!params.fixed_branch_length && params.leastSquareBranch) { @@ -1691,25 +1804,25 @@ void runApproximateBranchLengths(Params ¶ms, IQTree &iqtree) { cout << "Logl of tree with LS branch lengths: " << iqtree.getCurScore() << endl; cout << "Tree with LS branch lengths written to " << filename << endl; if (params.print_branch_lengths) { - if (params.manuel_analytic_approx) { - cout << "Applying Manuel's analytic approximation.." << endl; - iqtree.approxAllBranches(); - } - ofstream out; - filename = params.out_prefix; - filename += ".lsbrlen"; - out.open(filename.c_str()); - iqtree.printBranchLengths(out); - out.close(); - cout << "LS Branch lengths written to " << filename << endl; + if (params.manuel_analytic_approx) { + cout << "Applying Manuel's analytic approximation.." << endl; + iqtree.approxAllBranches(); + } + ofstream out; + filename = params.out_prefix; + filename += ".lsbrlen"; + out.open(filename.c_str()); + iqtree.printBranchLengths(out); + out.close(); + cout << "LS Branch lengths written to " << filename << endl; } cout << "Total LS tree length: " << iqtree.treeLength() << endl; } if (params.pars_branch_length) { - cout << endl << "Computing parsimony branch lengths..." << endl; - iqtree.fixNegativeBranch(true); - iqtree.clearAllPartialLH(); + cout << endl << "Computing parsimony branch lengths..." << endl; + iqtree.fixNegativeBranch(true); + iqtree.clearAllPartialLH(); iqtree.setCurScore(iqtree.computeLikelihood()); string filename = params.out_prefix; filename += ".mptree"; @@ -1717,22 +1830,22 @@ void runApproximateBranchLengths(Params ¶ms, IQTree &iqtree) { cout << "Logl of tree with MP branch lengths: " << iqtree.getCurScore() << endl; cout << "Tree with MP branch lengths written to " << filename << endl; if (params.print_branch_lengths) { - ofstream out; - filename = params.out_prefix; - filename += ".mpbrlen"; - out.open(filename.c_str()); - iqtree.printBranchLengths(out); - out.close(); - cout << "MP Branch lengths written to " << filename << endl; + ofstream out; + filename = params.out_prefix; + filename += ".mpbrlen"; + out.open(filename.c_str()); + iqtree.printBranchLengths(out); + out.close(); + cout << "MP Branch lengths written to " << filename << endl; } cout << "Total MP tree length: " << iqtree.treeLength() << endl; } if (params.bayes_branch_length) { - cout << endl << "Computing Bayesian branch lengths..." << endl; - iqtree.computeAllBayesianBranchLengths(); - iqtree.clearAllPartialLH(); + cout << endl << "Computing Bayesian branch lengths..." << endl; + iqtree.computeAllBayesianBranchLengths(); + iqtree.clearAllPartialLH(); iqtree.setCurScore(iqtree.computeLikelihood()); string filename = params.out_prefix; filename += ".batree"; @@ -1740,13 +1853,13 @@ void runApproximateBranchLengths(Params ¶ms, IQTree &iqtree) { cout << "Logl of tree with Bayesian branch lengths: " << iqtree.getCurScore() << endl; cout << "Tree with Bayesian branch lengths written to " << filename << endl; if (params.print_branch_lengths) { - ofstream out; - filename = params.out_prefix; - filename += ".babrlen"; - out.open(filename.c_str()); - iqtree.printBranchLengths(out); - out.close(); - cout << "Bayesian Branch lengths written to " << filename << endl; + ofstream out; + filename = params.out_prefix; + filename += ".babrlen"; + out.open(filename.c_str()); + iqtree.printBranchLengths(out); + out.close(); + cout << "Bayesian Branch lengths written to " << filename << endl; } cout << "Total Bayesian tree length: " << iqtree.treeLength() << endl; @@ -1754,36 +1867,76 @@ void runApproximateBranchLengths(Params ¶ms, IQTree &iqtree) { } +void printSiteRates(IQTree &iqtree, const char *rate_file, bool bayes) { + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(rate_file); + out << "# Site-specific subtitution rates determined by "; + if (bayes) + out<< "empirical Bayesian method" << endl; + else + out<< "maximum likelihood" << endl; + out << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab=read.table('" << rate_file << "',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl; + if (iqtree.isSuperTree()) { + out << "# Part: Partition ID (1=" << ((PhyloSuperTree*)&iqtree)->front()->aln->name << ", etc)" << endl + << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; + } else + out << "# Site: Alignment site ID" << endl; + + if (bayes) + out << "# Rate: Posterior mean site rate weighted by posterior probability" << endl + << "# Cat: Category with highest posterior (0=invariable, 1=slow, etc)" << endl + << "# C_Rate: Corresponding rate of highest category" << endl; + else + out << "# Rate: Site rate estimated by maximum likelihood" << endl; + if (iqtree.isSuperTree()) + out << "Part\t"; + out << "Site\tRate"; + if (bayes) + out << "\tCat\tC_Rate" << endl; + else + out << endl; + iqtree.writeSiteRates(out, bayes); + out.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, rate_file); + } + cout << "Site rates printed to " << rate_file << endl; +} + void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) { - if (params.print_site_lh && !params.pll) { - string site_lh_file = params.out_prefix; - site_lh_file += ".sitelh"; - if (params.print_site_lh == WSL_SITE) - printSiteLh(site_lh_file.c_str(), &iqtree, pattern_lh); - else - printSiteLhCategory(site_lh_file.c_str(), &iqtree, params.print_site_lh); - } + if (params.print_site_lh && !params.pll) { + string site_lh_file = params.out_prefix; + site_lh_file += ".sitelh"; + if (params.print_site_lh == WSL_SITE) + printSiteLh(site_lh_file.c_str(), &iqtree, pattern_lh); + else + printSiteLhCategory(site_lh_file.c_str(), &iqtree, params.print_site_lh); + } if (params.print_partition_lh && !iqtree.isSuperTree()) { outWarning("-wpl does not work with non-partition model"); params.print_partition_lh = false; } - if (params.print_partition_lh && !params.pll) { + if (params.print_partition_lh && !params.pll) { string part_lh_file = (string)params.out_prefix + ".partlh"; printPartitionLh(part_lh_file.c_str(), &iqtree, pattern_lh); - } + } - if (params.print_site_prob && !params.pll) { + if (params.print_site_prob && !params.pll) { printSiteProbCategory(((string)params.out_prefix + ".siteprob").c_str(), &iqtree, params.print_site_prob); - } + } if (params.print_ancestral_sequence) { printAncestralSequences(params.out_prefix, &iqtree, params.print_ancestral_sequence); } if (params.print_site_state_freq != WSF_NONE && !params.site_freq_file && !params.tree_freq_file) { - string site_freq_file = params.out_prefix; - site_freq_file += ".sitesf"; + string site_freq_file = params.out_prefix; + site_freq_file += ".sitesf"; printSiteStateFreq(site_freq_file.c_str(), &iqtree); } @@ -1824,84 +1977,83 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) { out.close(); } - if (params.print_branch_lengths) { - if (params.manuel_analytic_approx) { - cout << "Applying Manuel's analytic approximation.." << endl; - iqtree.approxAllBranches(); - } - string brlen_file = params.out_prefix; - brlen_file += ".brlen"; - ofstream out; - out.open(brlen_file.c_str()); - iqtree.printBranchLengths(out); - out.close(); - cout << "Branch lengths written to " << brlen_file << endl; - } - - if (params.print_partition_info && iqtree.isSuperTree()) { - string partition_info = params.out_prefix; - partition_info += ".partinfo.nex"; - ((PhyloSuperTree*)(&iqtree))->printPartition(partition_info.c_str()); - partition_info = (string)params.out_prefix + ".partitions"; - ((PhyloSuperTree*)(&iqtree))->printPartitionRaxml(partition_info.c_str()); - } - - if (params.mvh_site_rate) { - RateMeyerHaeseler *rate_mvh = new RateMeyerHaeseler(params.rate_file, - &iqtree, params.rate_mh_type); - cout << endl << "Computing site-specific rates by " - << rate_mvh->full_name << "..." << endl; - rate_mvh->runIterativeProc(params, iqtree); - cout << endl << "BEST SCORE FOUND : " << iqtree.getBestScore()<< endl; - string mhrate_file = params.out_prefix; - mhrate_file += ".mhrate"; + if (params.print_branch_lengths) { + if (params.manuel_analytic_approx) { + cout << "Applying Manuel's analytic approximation.." << endl; + iqtree.approxAllBranches(); + } + string brlen_file = params.out_prefix; + brlen_file += ".brlen"; + ofstream out; + out.open(brlen_file.c_str()); + iqtree.printBranchLengths(out); + out.close(); + cout << "Branch lengths written to " << brlen_file << endl; + } + + if (params.write_branches) { + string filename = string(params.out_prefix) + ".branches.csv"; + ofstream out; + out.open(filename.c_str()); + iqtree.writeBranches(out); + out.close(); + cout << "Branch lengths written to " << filename << endl; + } + + if (params.print_conaln && iqtree.isSuperTree()) { + string str = params.out_prefix; + str = params.out_prefix; + str += ".conaln"; + iqtree.aln->printAlignment(params.aln_output_format, str.c_str()); + } + + if (params.print_partition_info && iqtree.isSuperTree()) { + ASSERT(params.print_conaln); + string aln_file = (string)params.out_prefix + ".conaln"; + string partition_info = params.out_prefix; + partition_info += ".partinfo.nex"; + ((SuperAlignment*)(iqtree.aln))->printPartition(partition_info.c_str(), aln_file.c_str()); + partition_info = (string)params.out_prefix + ".partitions"; + ((SuperAlignment*)(iqtree.aln))->printPartitionRaxml(partition_info.c_str()); + } + + if (params.mvh_site_rate) { + RateMeyerHaeseler *rate_mvh = new RateMeyerHaeseler(params.rate_file, + &iqtree, params.rate_mh_type); + cout << endl << "Computing site-specific rates by " + << rate_mvh->full_name << "..." << endl; + rate_mvh->runIterativeProc(params, iqtree); + cout << endl << "BEST SCORE FOUND : " << iqtree.getBestScore()<< endl; + string mhrate_file = params.out_prefix; + mhrate_file += ".mhrate"; try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(mhrate_file.c_str()); - iqtree.writeSiteRates(out); + iqtree.writeSiteRates(out, true); out.close(); } catch (ios::failure) { outError(ERR_WRITE_OUTPUT, mhrate_file); } - if (params.print_site_lh) { - string site_lh_file = params.out_prefix; - site_lh_file += ".mhsitelh"; - printSiteLh(site_lh_file.c_str(), &iqtree); - } - } + if (params.print_site_lh) { + string site_lh_file = params.out_prefix; + site_lh_file += ".mhsitelh"; + printSiteLh(site_lh_file.c_str(), &iqtree); + } + } - if (params.print_site_rate) { - string rate_file = params.out_prefix; - rate_file += ".rate"; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(rate_file.c_str()); - out << "# Site-specific subtitution rates determined by empirical Bayesian method" << endl - << "# This file can be read in MS Excel or in R with command:" << endl - << "# tab=read.table('" << params.out_prefix << ".rate',header=TRUE)" << endl - << "# Columns are tab-separated with following meaning:" << endl; - if (iqtree.isSuperTree()) { - out << "# Part: Partition ID (1=" << ((PhyloSuperTree*)&iqtree)->part_info[0].name << ", etc)" << endl - << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; - } else - out << "# Site: Alignment site ID" << endl; + if (params.print_site_rate & 1) { + string rate_file = params.out_prefix; + rate_file += ".rate"; + printSiteRates(iqtree, rate_file.c_str(), true); + } - out << "# Rate: Posterior mean site rate weighted by posterior probability" << endl - << "# Cat: Category with highest posterior (0=invariable, 1=slow, etc)" << endl - << "# C_Rate: Corresponding rate of highest category" << endl; - if (iqtree.isSuperTree()) - out << "Part\t"; - out << "Site\tRate\tCat\tC_Rate" << endl; - iqtree.writeSiteRates(out); - out.close(); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, rate_file); - } - cout << "Site rates printed to " << rate_file << endl; - } + if (params.print_site_rate & 2) { + string rate_file = params.out_prefix; + rate_file += ".mlrate"; + printSiteRates(iqtree, rate_file.c_str(), false); + } if (params.fixed_branch_length == BRLEN_SCALE) { string filename = (string)params.out_prefix + ".blscale"; @@ -1912,119 +2064,166 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) { } void printFinalSearchInfo(Params ¶ms, IQTree &iqtree, double search_cpu_time, double search_real_time) { - cout << "Total tree length: " << iqtree.treeLength() << endl; - - if (iqtree.isSuperTree() && verbose_mode >= VB_MAX) { - PhyloSuperTree *stree = (PhyloSuperTree*) &iqtree; - cout << stree->evalNNIs << " NNIs evaluated from " << stree->totalNNIs << " all possible NNIs ( " << - (int)(((stree->evalNNIs+1.0)/(stree->totalNNIs+1.0))*100.0) << " %)" << endl; - cout<<"Details for subtrees:"<size(); part++){ - cout << part+1 <<". "<part_info[part].name<<": "<part_info[part].evalNNIs<<" ( " - << (int)(((stree->part_info[part].evalNNIs+1.0)/((stree->totalNNIs+1.0) / stree->size()))*100.0) - << " %)" << endl; - } - } - - params.run_time = (getCPUTime() - params.startCPUTime); - cout << endl; - cout << "Total number of iterations: " << iqtree.stop_rule.getCurIt() << endl; + cout << "Total tree length: " << iqtree.treeLength() << endl; + + if (iqtree.isSuperTree() && verbose_mode >= VB_MAX) { + PhyloSuperTree *stree = (PhyloSuperTree*) &iqtree; + cout << stree->evalNNIs << " NNIs evaluated from " << stree->totalNNIs << " all possible NNIs ( " << + (int)(((stree->evalNNIs+1.0)/(stree->totalNNIs+1.0))*100.0) << " %)" << endl; + cout<<"Details for subtrees:"<begin(); it != stree->end(); it++,part++){ + cout << part+1 << ". " << (*it)->aln->name << ": " << stree->part_info[part].evalNNIs<<" ( " + << (int)(((stree->part_info[part].evalNNIs+1.0)/((stree->totalNNIs+1.0) / stree->size()))*100.0) + << " %)" << endl; + } + } + + params.run_time = (getCPUTime() - params.startCPUTime); + cout << endl; + cout << "Total number of iterations: " << iqtree.stop_rule.getCurIt() << endl; // cout << "Total number of partial likelihood vector computations: " << iqtree.num_partial_lh_computations << endl; - cout << "CPU time used for tree search: " << search_cpu_time - << " sec (" << convert_time(search_cpu_time) << ")" << endl; - cout << "Wall-clock time used for tree search: " << search_real_time - << " sec (" << convert_time(search_real_time) << ")" << endl; - cout << "Total CPU time used: " << (double) params.run_time << " sec (" - << convert_time((double) params.run_time) << ")" << endl; - cout << "Total wall-clock time used: " - << getRealTime() - params.start_real_time << " sec (" - << convert_time(getRealTime() - params.start_real_time) << ")" << endl; + cout << "CPU time used for tree search: " << search_cpu_time + << " sec (" << convert_time(search_cpu_time) << ")" << endl; + cout << "Wall-clock time used for tree search: " << search_real_time + << " sec (" << convert_time(search_real_time) << ")" << endl; + cout << "Total CPU time used: " << (double) params.run_time << " sec (" + << convert_time((double) params.run_time) << ")" << endl; + cout << "Total wall-clock time used: " + << getRealTime() - params.start_real_time << " sec (" + << convert_time(getRealTime() - params.start_real_time) << ")" << endl; } void printTrees(vector trees, Params ¶ms, string suffix) { - ofstream treesOut((string(params.out_prefix) + suffix).c_str(), - ofstream::out); - for (vector::iterator it = trees.begin(); it != trees.end(); it++) { - treesOut << (*it); - treesOut << endl; - } - treesOut.close(); + ofstream treesOut((string(params.out_prefix) + suffix).c_str(), + ofstream::out); + for (vector::iterator it = trees.begin(); it != trees.end(); it++) { + treesOut << (*it); + treesOut << endl; + } + treesOut.close(); } /************************************************************ * MAIN TREE RECONSTRUCTION ***********************************************************/ -void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtree, ModelCheckpoint &model_info) { +void startTreeReconstruction(Params ¶ms, IQTree* &iqtree, ModelCheckpoint &model_info) { if (params.root) { - string root_name = params.root; - if (iqtree->aln->getSeqID(root_name) < 0) - outError("Alignment does not have specified outgroup taxon ", params.root); + StrVector outgroup_names; + convert_string_vec(params.root, outgroup_names); + for (auto it = outgroup_names.begin(); it != outgroup_names.end(); it++) + if (iqtree->aln->getSeqID(*it) < 0) + outError("Alignment does not have specified outgroup taxon ", *it); + } + +// if (params.count_trees && pllTreeCounter == NULL) +// pllTreeCounter = new StringIntMap; + + // Temporary fix since PLL only supports DNA/Protein: switch to IQ-TREE parsimony kernel + if (params.start_tree == STT_PLL_PARSIMONY) { + if (iqtree->isSuperTreeUnlinked()) { + params.start_tree = STT_PARSIMONY; + } else if (iqtree->isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)iqtree; + for (PhyloSuperTree::iterator it = stree->begin(); it != stree->end(); it++) + if ((*it)->aln->seq_type != SEQ_DNA && (*it)->aln->seq_type != SEQ_PROTEIN) + params.start_tree = STT_PARSIMONY; + } else if (iqtree->aln->seq_type != SEQ_DNA && iqtree->aln->seq_type != SEQ_PROTEIN) + params.start_tree = STT_PARSIMONY; + } + + /***************** Initialization for PLL and sNNI ******************/ + if (params.start_tree == STT_PLL_PARSIMONY || params.start_tree == STT_RANDOM_TREE || params.pll) { + /* Initialized all data structure for PLL*/ + iqtree->initializePLL(params); + } + + /********************* Compute pairwise distances *******************/ + if (params.start_tree == STT_BIONJ || params.iqp || params.leastSquareBranch) { + computeInitialDist(params, *iqtree); } + + /******************** Pass the parameter object params to IQTree *******************/ + iqtree->setParams(¶ms); + + /*************** SET UP PARAMETERS and model testing ****************/ + + // FOR TUNG: swapping the order cause bug for -m TESTLINK +// iqtree.initSettings(params); + + runModelFinder(params, *iqtree, model_info); +} + +/** + optimize branch lengths of consensus tree + */ +void optimizeConTree(Params ¶ms, IQTree *tree) { + string contree_file = string(params.out_prefix) + ".contree"; + + DoubleVector rfdist; + tree->computeRFDist(contree_file.c_str(), rfdist); + tree->contree_rfdist = (int)rfdist[0]; + + tree->readTreeFile(contree_file); + + tree->initializeAllPartialLh(); + tree->fixNegativeBranch(false); + + tree->boot_consense_logl = tree->optimizeAllBranches(); + cout << "Log-likelihood of consensus tree: " << tree->boot_consense_logl << endl; + tree->setRootNode(params.root); + tree->insertTaxa(tree->removed_seqs, tree->twin_seqs); + tree->printTree(contree_file.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE); + string contree = tree->getTreeString(); + tree->getCheckpoint()->put("contree", contree); +} +// todo: skip likelihood computations if nn does model selection +void runTreeReconstruction(Params ¶ms, IQTree* &iqtree) { - string dist_file; + // string dist_file; params.startCPUTime = getCPUTime(); params.start_real_time = getRealTime(); - + int absent_states = 0; if (iqtree->isSuperTree()) { - SuperAlignment *saln = (SuperAlignment*)iqtree->aln; PhyloSuperTree *stree = (PhyloSuperTree*)iqtree; - for (int i = 0; i < saln->partitions.size(); i++) - absent_states += saln->partitions[i]->checkAbsentStates("partition " + stree->part_info[i].name); + for (auto i = stree->begin(); i != stree->end(); i++) + absent_states += (*i)->aln->checkAbsentStates("partition " + (*i)->aln->name); } else { absent_states = iqtree->aln->checkAbsentStates("alignment"); } if (absent_states > 0) { cout << "NOTE: " << absent_states << " states (see above) are not present and thus removed from Markov process to prevent numerical problems" << endl; } - + // Make sure that no partial likelihood of IQ-TREE is initialized when PLL is used to save memory if (params.pll) { iqtree->deleteAllPartialLh(); } - -// if (params.count_trees && pllTreeCounter == NULL) -// pllTreeCounter = new StringIntMap; - - // Temporary fix since PLL only supports DNA/Protein: switch to IQ-TREE parsimony kernel - if (params.start_tree == STT_PLL_PARSIMONY) { - if (iqtree->isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)iqtree; - for (PhyloSuperTree::iterator it = stree->begin(); it != stree->end(); it++) - if ((*it)->aln->seq_type != SEQ_DNA && (*it)->aln->seq_type != SEQ_PROTEIN) - params.start_tree = STT_BIONJ; - } else if (iqtree->aln->seq_type != SEQ_DNA && iqtree->aln->seq_type != SEQ_PROTEIN) - params.start_tree = STT_PARSIMONY; - } - + /***************** Initialization for PLL and sNNI ******************/ - if (params.start_tree == STT_PLL_PARSIMONY || params.start_tree == STT_RANDOM_TREE || params.pll) { + if ((params.start_tree == STT_PLL_PARSIMONY || params.start_tree == STT_RANDOM_TREE || params.pll) && !iqtree->isInitializedPLL()) { /* Initialized all data structure for PLL*/ - iqtree->initializePLL(params); + iqtree->initializePLL(params); } - - + + /********************* Compute pairwise distances *******************/ - if (params.start_tree == STT_BIONJ || params.iqp || params.leastSquareBranch) { - computeInitialDist(params, *iqtree, dist_file); + if ((params.start_tree == STT_BIONJ || params.iqp || params.leastSquareBranch) && !iqtree->root) { + computeInitialDist(params, *iqtree); } - + /******************** Pass the parameter object params to IQTree *******************/ iqtree->setParams(¶ms); + + ModelsBlock *models_block = readModelsDefinition(params); - /*************** SET UP PARAMETERS and model testing ****************/ - - // FOR TUNG: swapping the order cause bug for -m TESTLINK -// iqtree.initSettings(params); - - ModelsBlock *models_block = readModelsDefinition(params); - - initializeParams(params, *iqtree, model_info, models_block, dist_file); + initializeParams(params, *iqtree); - if (posRateHeterotachy(params.model_name) != string::npos && !iqtree->isMixlen()) { + if (posRateHeterotachy(iqtree->aln->model_name) != string::npos && !iqtree->isMixlen()) { // create a new instance IQTree* iqtree_new = new PhyloTreeMixlen(iqtree->aln, 0); iqtree_new->setCheckpoint(iqtree->getCheckpoint()); @@ -2037,24 +2236,31 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr iqtree_new->initializePLL(params); } iqtree_new->setParams(¶ms); - iqtree_new->copyPhyloTree(iqtree); + iqtree_new->copyPhyloTree(iqtree, false); // replace iqtree object delete iqtree; iqtree = iqtree_new; - } - iqtree->setRootNode(params.root); + if (!params.compute_ml_tree_only) { + iqtree->setRootNode(params.root); + } iqtree->restoreCheckpoint(); + + if (params.online_bootstrap && params.gbo_replicates > 0) { + cout << "Generating " << params.gbo_replicates << " samples for ultrafast " + << RESAMPLE_NAME << " (seed: " << params.ran_seed << ")..." << endl; + } + iqtree->initSettings(params); /*********************** INITIAL MODEL OPTIMIZATION *****************/ if (!iqtree->getModelFactory()) { - iqtree->initializeModel(params, params.model_name, models_block); + iqtree->initializeModel(params, iqtree->aln->model_name, models_block); } if (iqtree->getRate()->isHeterotachy() && !iqtree->isMixlen()) { ASSERT(0 && "Heterotachy tree not properly created"); @@ -2066,19 +2272,19 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr // UpperBounds analysis. Here, to analyse the initial tree without any tree search or optimization /* if (params.upper_bound) { - iqtree.setCurScore(iqtree.computeLikelihood()); - cout<= VB_MED) { - cout << "ML-TREE SEARCH START WITH THE FOLLOWING PARAMETERS:" << endl; + cout << "ML-TREE SEARCH START WITH THE FOLLOWING PARAMETERS:" << endl; int model_df = iqtree->getModelFactory()->getNParameters(BRLEN_OPTIMIZE); - printAnalysisInfo(model_df, *iqtree, params); + printAnalysisInfo(model_df, *iqtree, params); } if (!params.pll) { @@ -2114,8 +2320,8 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr //#else // cout << "NOTE: " << ((double) mem_size / 1000.0) / 1000 << " MB RAM is required!" << endl; //#endif - if (params.memCheck) - exit(0); + if (params.memCheck) + exit(0); #ifdef BINARY32 if (mem_required >= 2000000000) { outError("Memory required exceeds 2GB limit of 32-bit executable"); @@ -2129,212 +2335,218 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr } } - -#ifdef _OPENMP - if (iqtree->num_threads <= 0) { - int bestThreads = iqtree->testNumThreads(); - omp_set_num_threads(bestThreads); - params.num_threads = bestThreads; - } else - iqtree->warnNumThreads(); -#endif - - - iqtree->initializeAllPartialLh(); - double initEpsilon = params.min_iterations == 0 ? params.modelEps : (params.modelEps*10); - - - if (iqtree->getRate()->name.find("+I+G") != string::npos) { - if (params.alpha_invar_file != NULL) { // COMPUTE TREE LIKELIHOOD BASED ON THE INPUT ALPHA AND P_INVAR VALUE - computeLoglFromUserInputGAMMAInvar(params, *iqtree); - exit(0); - } - - if (params.exh_ai) { - exhaustiveSearchGAMMAInvar(params, *iqtree); - exit(0); - } - - } - - // Optimize model parameters and branch lengths using ML for the initial tree - string initTree; - iqtree->clearAllPartialLH(); - - iqtree->getModelFactory()->restoreCheckpoint(); - if (iqtree->getCheckpoint()->getBool("finishedModelInit")) { - // model optimization already done: ignore this step - if (!iqtree->candidateTrees.empty()) - iqtree->readTreeString(iqtree->getBestTrees()[0]); - iqtree->setCurScore(iqtree->computeLikelihood()); - initTree = iqtree->getTreeString(); - cout << "CHECKPOINT: Model parameters restored, LogL: " << iqtree->getCurScore() << endl; - } else { - initTree = iqtree->optimizeModelParameters(true, initEpsilon); - if (iqtree->isMixlen()) - initTree = ((ModelFactoryMixlen*)iqtree->getModelFactory())->sortClassesByTreeLength(); - - iqtree->saveCheckpoint(); - iqtree->getModelFactory()->saveCheckpoint(); - iqtree->getCheckpoint()->putBool("finishedModelInit", true); - iqtree->getCheckpoint()->dump(); -// cout << "initTree: " << initTree << endl; - } - - if (params.lmap_num_quartets >= 0) { - cout << endl << "Performing likelihood mapping with "; - if (params.lmap_num_quartets > 0) - cout << params.lmap_num_quartets; - else - cout << "all"; - cout << " quartets..." << endl; - double lkmap_time = getRealTime(); - iqtree->doLikelihoodMapping(); - cout << "Likelihood mapping needed " << getRealTime()-lkmap_time << " seconds" << endl << endl; - } + bool finishedInitTree = false; + double initEpsilon = params.min_iterations == 0 ? params.modelEps : (params.modelEps*10); + string initTree; + //None of his will work until there is actually a tree + //(we cannot do it until we *have* one). + if (!params.compute_ml_tree_only) { + iqtree->ensureNumberOfThreadsIsSet(¶ms); - // TODO: why is this variable not used? - // ANSWER: moved to doTreeSearch -// bool finishedCandidateSet = iqtree.getCheckpoint()->getBool("finishedCandidateSet"); - bool finishedInitTree = iqtree->getCheckpoint()->getBool("finishedInitTree"); - - // now overwrite with random tree - if (params.start_tree == STT_RANDOM_TREE && !finishedInitTree) { - cout << "Generate random initial Yule-Harding tree..." << endl; - iqtree->generateRandomTree(YULE_HARDING); - iqtree->wrapperFixNegativeBranch(true); iqtree->initializeAllPartialLh(); - initTree = iqtree->optimizeBranches(params.brlen_num_traversal); - cout << "Log-likelihood of random tree: " << iqtree->getCurScore() << endl; - } - - /****************** NOW PERFORM MAXIMUM LIKELIHOOD TREE RECONSTRUCTION ******************/ - - // Update best tree - if (!finishedInitTree) { - iqtree->addTreeToCandidateSet(initTree, iqtree->getCurScore(), false, MPIHelper::getInstance().getProcessID()); - iqtree->printResultTree(); - iqtree->intermediateTrees.update(iqtree->getTreeString(), iqtree->getCurScore()); + + if (iqtree->getRate()->name.find("+I+G") != string::npos) { + if (params.alpha_invar_file != NULL) { // COMPUTE TREE LIKELIHOOD BASED ON THE INPUT ALPHA AND P_INVAR VALUE + computeLoglFromUserInputGAMMAInvar(params, *iqtree); + exit(0); + } + if (params.exh_ai) { + exhaustiveSearchGAMMAInvar(params, *iqtree); + exit(0); + } + } + + // Optimize model parameters and branch lengths using ML for the initial tree + iqtree->clearAllPartialLH(); + initTree = iqtree->ensureModelParametersAreSet(initEpsilon); + + + if (params.lmap_num_quartets >= 0) { + cout << endl << "Performing likelihood mapping with "; + if (params.lmap_num_quartets > 0) + cout << params.lmap_num_quartets; + else + cout << "all"; + cout << " quartets..." << endl; + double lkmap_time = getRealTime(); + iqtree->doLikelihoodMapping(); + cout << "Likelihood mapping needed " << getRealTime()-lkmap_time << " seconds" << endl << endl; + } + + finishedInitTree = iqtree->getCheckpoint()->getBool("finishedInitTree"); + + // now overwrite with random tree + if (params.start_tree == STT_RANDOM_TREE && !finishedInitTree) { + cout << "Generate random initial Yule-Harding tree..." << endl; + iqtree->generateRandomTree(YULE_HARDING); + iqtree->wrapperFixNegativeBranch(true); + iqtree->initializeAllPartialLh(); + initTree = iqtree->optimizeBranches(params.brlen_num_traversal); + cout << "Log-likelihood of random tree: " << iqtree->getCurScore() << endl; + } + + /****************** NOW PERFORM MAXIMUM LIKELIHOOD TREE RECONSTRUCTION ******************/ + + // Update best tree + if (!finishedInitTree) { + iqtree->addTreeToCandidateSet(initTree, iqtree->getCurScore(), false, MPIHelper::getInstance().getProcessID()); + iqtree->printResultTree(); + iqtree->intermediateTrees.update(iqtree->getTreeString(), iqtree->getCurScore()); + if (iqtree->isSuperTreeUnlinked()) { + PhyloSuperTree* stree = (PhyloSuperTree*)iqtree; + for (auto it = stree->begin(); it != stree->end(); it++) + ((IQTree*)(*it))->addTreeToCandidateSet((*it)->getTreeString(), + (*it)->getCurScore(), false, MPIHelper::getInstance().getProcessID()); + } + } + + if (params.min_iterations && !iqtree->isBifurcating()) { + outError("Tree search does not work with initial multifurcating tree. Please specify `-n 0` to avoid this."); + } + + // Compute maximum likelihood distance + // ML distance is only needed for NNI/IQP + + if ((params.min_iterations <= 1 || params.numInitTrees <= 1) && params.start_tree != STT_BIONJ) { + params.compute_ml_dist = false; + } + if ((params.user_file || params.start_tree == STT_RANDOM_TREE) && params.snni && !params.iqp) { + params.compute_ml_dist = false; + } + if (params.constraint_tree_file) { + params.compute_ml_dist = false; + } + if (iqtree->isSuperTreeUnlinked()) { + params.compute_ml_dist = false; + } + //Todo: Check: is it always true that we've done this, if we reach this line? + cout << "Wrote distance file to... " << iqtree->getDistanceFileWritten() << endl; } - - if (params.min_iterations && !iqtree->isBifurcating()) - outError("Tree search does not work with initial multifurcating tree. Please specify `-n 0` to avoid this."); - - - // Compute maximum likelihood distance - // ML distance is only needed for IQP -// if ( params.start_tree != STT_BIONJ && ((params.snni && !params.iqp) || params.min_iterations == 0)) { -// params.compute_ml_dist = false; -// } - if ((params.min_iterations <= 1 || params.numInitTrees <= 1) && params.start_tree != STT_BIONJ) - params.compute_ml_dist = false; - - if ((params.user_file || params.start_tree == STT_RANDOM_TREE) && params.snni && !params.iqp) { - params.compute_ml_dist = false; + bool wantMLDistances = MPIHelper::getInstance().isMaster() && !iqtree->getCheckpoint()->getBool("finishedCandidateSet"); + if (wantMLDistances) { + wantMLDistances = !finishedInitTree && ((!params.dist_file && params.compute_ml_dist) || params.leastSquareBranch); } - - if (params.constraint_tree_file) - params.compute_ml_dist = false; - - //Generate BIONJ tree - if (MPIHelper::getInstance().isMaster() && !iqtree->getCheckpoint()->getBool("finishedCandidateSet")) { - if (!finishedInitTree && ((!params.dist_file && params.compute_ml_dist) || params.leastSquareBranch)) { - computeMLDist(params, *iqtree, dist_file, getCPUTime()); - if (!params.user_file && params.start_tree != STT_RANDOM_TREE) { - // NEW 2015-08-10: always compute BIONJ tree into the candidate set - iqtree->resetCurScore(); + + //Compute ML distances, and generate BIONJ tree from those + if (wantMLDistances || params.compute_ml_tree_only) { + computeMLDist(params, *iqtree, getRealTime(), getCPUTime()); + bool wasMLDistanceWrittenToFile = false; + if (!params.user_file) { + if (params.start_tree != STT_RANDOM_TREE) { + if (!params.compute_ml_tree_only) { + iqtree->resetCurScore(); + } double start_bionj = getRealTime(); - bool orig_rooted = iqtree->rooted; - iqtree->rooted = false; - iqtree->computeBioNJ(params, iqtree->aln, dist_file); - cout << getRealTime() - start_bionj << " seconds" << endl; - if (iqtree->isSuperTree()) - iqtree->wrapperFixNegativeBranch(true); - else - iqtree->wrapperFixNegativeBranch(false); - if (orig_rooted) - iqtree->convertToRooted(); - iqtree->initializeAllPartialLh(); - if (params.start_tree == STT_BIONJ) { - initTree = iqtree->optimizeModelParameters(params.min_iterations==0, initEpsilon); + iqtree->computeBioNJ(params); + if (verbose_mode >= VB_MED) { + cout << "Wall-clock time spent creating initial tree was " + << getRealTime() - start_bionj << " seconds" << endl; + } + wasMLDistanceWrittenToFile = !params.dist_file; + if (params.compute_ml_tree_only) { + iqtree->initializeAllPartialPars(); + iqtree->clearAllPartialLH(); + iqtree->fixNegativeBranch(iqtree->isSuperTree()); + //Because wrapperFixNegativeBranch resets the score, + //and that complains if the number of threads isn't set. + //(but... if you set the number of threads first, it + //complains about the negative path lengths). + + //This fails if there are any lengths <=0 (so it has to + //go after the fix-up for negative branch lengths). + auto threadCount = iqtree->ensureNumberOfThreadsIsSet(¶ms); + cout << "Number of threads is " << threadCount << endl; + initTree = iqtree->ensureModelParametersAreSet(initEpsilon); } else { - initTree = iqtree->optimizeBranches(); + iqtree->wrapperFixNegativeBranch(iqtree->isSuperTree()); + iqtree->initializeAllPartialLh(); + if (params.start_tree == STT_BIONJ) { + initTree = iqtree->optimizeModelParameters(params.min_iterations==0, initEpsilon); + } else { + initTree = iqtree->optimizeBranches(); + } } - cout << "Log-likelihood of BIONJ tree: " << iqtree->getCurScore() << endl; -// cout << "BIONJ tree: " << iqtree->getTreeString() << endl; + cout << "Log-likelihood of " << params.start_tree_subtype_name + << " tree: " << iqtree->getCurScore() << endl; iqtree->candidateTrees.update(initTree, iqtree->getCurScore()); } } + if (!wasMLDistanceWrittenToFile && !params.dist_file) { + double write_begin_time = getRealTime(); + iqtree->printDistanceFile(); + if (verbose_mode >= VB_MED) { + #ifdef _OPENMP + #pragma omp critical (io) + #endif + cout << "Time taken to write distance file ( " << iqtree->dist_file << ") : " + << getRealTime() - write_begin_time << " seconds " << endl; + } + } } - -// iqtree->saveCheckpoint(); + //iqtree->saveCheckpoint(); - double cputime_search_start = getCPUTime(); + double cputime_search_start = getCPUTime(); double realtime_search_start = getRealTime(); if (params.leastSquareNNI) { - iqtree->computeSubtreeDists(); - } - - if (original_model == "WHTEST") { - cout << endl << "Testing model homogeneity by Weiss & von Haeseler (2003)..." << endl; - WHTest(params, *iqtree); - } - - NodeVector pruned_taxa; - StrVector linked_name; - double *saved_dist_mat = iqtree->dist_matrix; - double *pattern_lh; - - pattern_lh = new double[iqtree->getAlnNPattern()]; - - // prune stable taxa - pruneTaxa(params, *iqtree, pattern_lh, pruned_taxa, linked_name); - - /***************************************** DO STOCHASTIC TREE SEARCH *******************************************/ - if (params.min_iterations > 0 && !params.tree_spr) { - iqtree->doTreeSearch(); - iqtree->setAlignment(iqtree->aln); - cout << "TREE SEARCH COMPLETED AFTER " << iqtree->stop_rule.getCurIt() << " ITERATIONS" - << " / Time: " << convert_time(getRealTime() - params.start_real_time) << endl << endl; - } else { - /* do SPR with likelihood function */ - if (params.tree_spr) { - //tree.optimizeSPRBranches(); - cout << "Doing SPR Search" << endl; - cout << "Start tree.optimizeSPR()" << endl; - double spr_score = iqtree->optimizeSPR(); - cout << "Finish tree.optimizeSPR()" << endl; - //double spr_score = tree.optimizeSPR(tree.curScore, (PhyloNode*) tree.root->neighbors[0]->node); - if (spr_score <= iqtree->getCurScore()) { - cout << "SPR search did not found any better tree" << endl; - } - } - } - - // restore pruned taxa - restoreTaxa(*iqtree, saved_dist_mat, pruned_taxa, linked_name); + iqtree->computeSubtreeDists(); + } + if (params.model_name == "WHTEST") { + cout << endl << "Testing model homogeneity by Weiss & von Haeseler (2003)..." << endl; + WHTest(params, *iqtree); + } + NodeVector pruned_taxa; + StrVector linked_name; + double *saved_dist_mat = iqtree->dist_matrix; + double *pattern_lh = new double[iqtree->getAlnNPattern()]; + + // prune stable taxa + pruneTaxa(params, *iqtree, pattern_lh, pruned_taxa, linked_name); + + /***************************************** DO STOCHASTIC TREE SEARCH *******************************************/ + if (params.min_iterations > 0 && !params.tree_spr) { + iqtree->doTreeSearch(); + iqtree->setAlignment(iqtree->aln); + } else { + iqtree->candidateTrees.saveCheckpoint(); + /* do SPR with likelihood function */ + if (params.tree_spr) { + //tree.optimizeSPRBranches(); + cout << "Doing SPR Search" << endl; + cout << "Start tree.optimizeSPR()" << endl; + double spr_score = iqtree->optimizeSPR(); + cout << "Finish tree.optimizeSPR()" << endl; + //double spr_score = tree.optimizeSPR(tree.curScore, (PhyloNode*) tree.root->neighbors[0]->node); + if (spr_score <= iqtree->getCurScore()) { + cout << "SPR search did not found any better tree" << endl; + } + } + } + // restore pruned taxa + restoreTaxa(*iqtree, saved_dist_mat, pruned_taxa, linked_name); - double search_cpu_time = getCPUTime() - cputime_search_start; - double search_real_time = getRealTime() - realtime_search_start; + double search_cpu_time = getCPUTime() - cputime_search_start; + double search_real_time = getRealTime() - realtime_search_start; // COMMENT THIS OUT BECAUSE IT DELETES ALL BRANCH LENGTHS OF SUBTREES! -// if (iqtree.isSuperTree()) -// ((PhyloSuperTree*) iqtree)->mapTrees(); +// if (iqtree.isSuperTree()) +// ((PhyloSuperTree*) iqtree)->mapTrees(); if (!MPIHelper::getInstance().isMaster()) { delete[] pattern_lh; return; } - if (params.snni && params.min_iterations && verbose_mode >= VB_MED) { - cout << "Log-likelihoods of " << params.popSize << " best candidate trees: " << endl; - iqtree->printBestScores(); - cout << endl; - } + if (params.snni && params.min_iterations && verbose_mode >= VB_MED) { + cout << "Log-likelihoods of " << params.popSize << " best candidate trees: " << endl; + iqtree->printBestScores(); + cout << endl; + } - if (params.min_iterations) { - iqtree->readTreeString(iqtree->getBestTrees()[0]); + if (!params.final_model_opt) { + iqtree->setCurScore(iqtree->computeLikelihood()); + } else if (params.min_iterations) { + iqtree->readTreeString(iqtree->getBestTrees()[0]); iqtree->initializeAllPartialLh(); iqtree->clearAllPartialLH(); cout << "--------------------------------------------------------------------" << endl; @@ -2349,8 +2561,6 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr string tree; Params::getInstance().fixStableSplits = false; Params::getInstance().tabu = false; - // why doing NNI search here? -// iqtree->doNNISearch(); tree = iqtree->optimizeModelParameters(true); iqtree->addTreeToCandidateSet(tree, iqtree->getCurScore(), false, MPIHelper::getInstance().getProcessID()); iqtree->getCheckpoint()->putBool("finishedModelFinal", true); @@ -2359,65 +2569,73 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr } - if (iqtree->isSuperTree()) - ((PhyloSuperTree*) iqtree)->computeBranchLengths(); + if (iqtree->isSuperTree()) { + ((PhyloSuperTree*) iqtree)->computeBranchLengths(); + ((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str()); + } - cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; + cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl; - if (params.write_candidate_trees) { - printTrees(iqtree->getBestTrees(), params, ".imd_trees"); - } + if (params.write_candidate_trees) { + printTrees(iqtree->getBestTrees(), params, ".imd_trees"); + } - if (params.pll) - iqtree->inputModelPLL2IQTree(); + if (params.pll) + iqtree->inputModelPLL2IQTree(); - /* root the tree at the first sequence */ + /* root the tree at the first sequence */ // BQM: WHY SETTING THIS ROOT NODE???? -// iqtree->root = iqtree->findLeafName(iqtree->aln->getSeqName(0)); -// assert(iqtree->root); +// iqtree->root = iqtree->findLeafName(iqtree->aln->getSeqName(0)); +// assert(iqtree->root); iqtree->setRootNode(params.root); - if (!params.pll) { - iqtree->computeLikelihood(pattern_lh); - // compute logl variance - iqtree->logl_variance = iqtree->computeLogLVariance(); - } + if (!params.pll) { + iqtree->computeLikelihood(pattern_lh); + // compute logl variance + iqtree->logl_variance = iqtree->computeLogLVariance(); + } - printMiscInfo(params, *iqtree, pattern_lh); + printMiscInfo(params, *iqtree, pattern_lh); - /****** perform SH-aLRT test ******************/ - if ((params.aLRT_replicates > 0 || params.localbp_replicates > 0 || params.aLRT_test || params.aBayes_test) && !params.pll) { - double mytime = getCPUTime(); - params.aLRT_replicates = max(params.aLRT_replicates, params.localbp_replicates); + if (params.root_test) { + cout << "Testing root positions..." << endl; + iqtree->testRootPosition(true, params.loglh_epsilon); + } + + /****** perform SH-aLRT test ******************/ + if ((params.aLRT_replicates > 0 || params.localbp_replicates > 0 || params.aLRT_test || params.aBayes_test) && !params.pll) { + double mytime = getRealTime(); + params.aLRT_replicates = max(params.aLRT_replicates, params.localbp_replicates); cout << endl; if (params.aLRT_replicates > 0) cout << "Testing tree branches by SH-like aLRT with " - << params.aLRT_replicates << " replicates..." << endl; + << params.aLRT_replicates << " replicates..." << endl; if (params.localbp_replicates) cout << "Testing tree branches by local-BP test with " << params.localbp_replicates << " replicates..." << endl; if (params.aLRT_test) cout << "Testing tree branches by aLRT parametric test..." << endl; if (params.aBayes_test) cout << "Testing tree branches by aBayes parametric test..." << endl; - iqtree->setRootNode(params.root); + iqtree->setRootNode(params.root); if (iqtree->isBifurcating()) { iqtree->testAllBranches(params.aLRT_threshold, iqtree->getCurScore(), pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test); - cout << "CPU Time used: " << getCPUTime() - mytime << " sec." << endl; + cout << getRealTime() - mytime << " sec." << endl; } else { outWarning("Tree is multifurcating and such test is not applicable"); params.aLRT_replicates = params.localbp_replicates = params.aLRT_test = params.aBayes_test = 0; } - } - - if (params.gbo_replicates > 0) { - if (!params.online_bootstrap) - outError("Obsolete feature"); -// runGuidedBootstrap(params, iqtree->aln, iqtree); - else - iqtree->summarizeBootstrap(params); - } + } + + if (params.gbo_replicates > 0) { + cout << "Creating " << RESAMPLE_NAME << " support values..." << endl; + if (!params.online_bootstrap) + outError("Obsolete feature"); +// runGuidedBootstrap(params, iqtree->aln, iqtree); + else + iqtree->summarizeBootstrap(params); + } if (params.collapse_zero_branch) { cout << "Collapsing near-zero internal branches... "; @@ -2425,10 +2643,35 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr cout << " collapsed" << endl; } - printFinalSearchInfo(params, *iqtree, search_cpu_time, search_real_time); + printFinalSearchInfo(params, *iqtree, search_cpu_time, search_real_time); - // BUG FIX: readTreeString(bestTreeString) not needed before this line - iqtree->printResultTree(); + if (params.gbo_replicates && params.online_bootstrap && params.print_ufboot_trees) + iqtree->writeUFBootTrees(params); + + if (params.gbo_replicates && params.online_bootstrap && !iqtree->isSuperTreeUnlinked()) { + + cout << endl << "Computing " << RESAMPLE_NAME << " consensus tree..." << endl; + string splitsfile = params.out_prefix; + splitsfile += ".splits.nex"; + double weight_threshold = (params.split_threshold<1) ? params.split_threshold : (params.gbo_replicates-1.0)/params.gbo_replicates; + weight_threshold *= 100.0; + computeConsensusTree(splitsfile.c_str(), 0, 1e6, -1, + weight_threshold, NULL, params.out_prefix, NULL, ¶ms); + // now optimize branch lengths of the consensus tree + string current_tree = iqtree->getTreeString(); + optimizeConTree(params, iqtree); + // revert the best tree + iqtree->readTreeString(current_tree); + } + if (Params::getInstance().writeDistImdTrees) { + cout << endl; + cout << "Recomputing the log-likelihood of the intermediate trees ... " << endl; + iqtree->intermediateTrees.recomputeLoglOfAllTrees(*iqtree); + } + + // BUG FIX: readTreeString(bestTreeString) not needed before this line + iqtree->printResultTree(); + iqtree->saveCheckpoint(); if (params.upper_bound_NNI) { string out_file_UB = params.out_prefix; @@ -2442,83 +2685,281 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree* &iqtr out_UB.close(); } - if (params.out_file) - iqtree->printTree(params.out_file); + if (params.out_file) + iqtree->printTree(params.out_file); - delete[] pattern_lh; + delete[] pattern_lh; - runApproximateBranchLengths(params, *iqtree); + runApproximateBranchLengths(params, *iqtree); } +/********************************************************** + * MULTIPLE TREE RECONSTRUCTION + ***********************************************************/ +void runMultipleTreeReconstruction(Params ¶ms, Alignment *alignment, IQTree *tree) { + ModelCheckpoint *model_info = new ModelCheckpoint; + + if (params.suppress_output_flags & OUT_TREEFILE) + outError("Suppress .treefile not allowed with -runs option"); + string treefile_name = params.out_prefix; + treefile_name += ".treefile"; + string runtrees_name = params.out_prefix; + runtrees_name += ".runtrees"; + DoubleVector runLnL; + + if (tree->getCheckpoint()->getVector("runLnL", runLnL)) { + cout << endl << "CHECKPOINT: " << runLnL.size() << " independent run(s) restored" << endl; + } else if (MPIHelper::getInstance().isMaster()) { + // first empty the runtrees file + try { + ofstream tree_out; + tree_out.exceptions(ios::failbit | ios::badbit); + tree_out.open(runtrees_name.c_str()); + tree_out.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, runtrees_name); + } + } + + double start_time = getCPUTime(); + double start_real_time = getRealTime(); + + int orig_seed = params.ran_seed; + int run; + int best_run = 0; + for (run = 0; run < runLnL.size(); run++) + if (runLnL[run] > runLnL[best_run]) + best_run = run; + + // do multiple tree reconstruction + for (run = runLnL.size(); run < params.num_runs; run++) { + + tree->getCheckpoint()->startStruct("run" + convertIntToString(run+1)); + + params.ran_seed = orig_seed + run*1000 + MPIHelper::getInstance().getProcessID(); + + cout << endl << "---> START RUN NUMBER " << run + 1 << " (seed: " << params.ran_seed << ")" << endl; + + tree->getCheckpoint()->put("seed", params.ran_seed); + + // initialize random stream for replicating the run + + int *saved_randstream = randstream; + init_random(params.ran_seed); + + IQTree *iqtree; + if (alignment->isSuperAlignment()){ + if(params.partition_type != BRLEN_OPTIMIZE){ + iqtree = new PhyloSuperTreePlen((SuperAlignment*) alignment, (PhyloSuperTree*) tree); + } else { + iqtree = new PhyloSuperTree((SuperAlignment*) alignment, (PhyloSuperTree*) tree); + } + } else { + // allocate heterotachy tree if neccessary + int pos = posRateHeterotachy(alignment->model_name); + + if (params.num_mixlen > 1) { + iqtree = new PhyloTreeMixlen(alignment, params.num_mixlen); + } else if (pos != string::npos) { + iqtree = new PhyloTreeMixlen(alignment, 0); + } else + iqtree = new IQTree(alignment); + } + + if (!tree->constraintTree.empty()) { + iqtree->constraintTree.readConstraint(tree->constraintTree); + } + + // set checkpoint + iqtree->setCheckpoint(tree->getCheckpoint()); + iqtree->num_precision = tree->num_precision; + + runTreeReconstruction(params, iqtree); + // read in the output tree file + stringstream ss; + iqtree->printTree(ss); + if (MPIHelper::getInstance().isMaster()) + try { + ofstream tree_out; + tree_out.exceptions(ios::failbit | ios::badbit); + tree_out.open(runtrees_name.c_str(), ios_base::out | ios_base::app); + tree_out.precision(10); + tree_out << "[ lh=" << iqtree->getBestScore() << " ]"; + tree_out << ss.str() << endl; + tree_out.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, runtrees_name); + } + // fix bug: set the model for original tree after testing + if ((params.model_name.substr(0,4) == "TEST" || params.model_name.substr(0,2) == "MF") && tree->isSuperTree()) { + PhyloSuperTree *stree = ((PhyloSuperTree*)tree); + stree->part_info = ((PhyloSuperTree*)iqtree)->part_info; + } + runLnL.push_back(iqtree->getBestScore()); + + if (MPIHelper::getInstance().isMaster()) { + if (params.num_bootstrap_samples > 0 && params.consensus_type == CT_CONSENSUS_TREE && + (run == 0 || iqtree->getBestScore() > runLnL[best_run])) { + // 2017-12-08: optimize branch lengths of consensus tree + // now optimize branch lengths of the consensus tree + string current_tree = iqtree->getTreeString(); + optimizeConTree(params, iqtree); + // revert the best tree + iqtree->readTreeString(current_tree); + iqtree->saveCheckpoint(); + } + } + if (iqtree->getBestScore() > runLnL[best_run]) + best_run = run; + + if (params.num_runs == 1) + reportPhyloAnalysis(params, *iqtree, *model_info); + delete iqtree; + + tree->getCheckpoint()->endStruct(); + // clear all checkpointed information +// tree->getCheckpoint()->keepKeyPrefix("iqtree"); + tree->getCheckpoint()->putVector("runLnL", runLnL); +// tree->getCheckpoint()->putBool("finished", false); + tree->getCheckpoint()->dump(true); + // restore randstream + finish_random(); + randstream = saved_randstream; + } + + cout << endl << "---> SUMMARIZE RESULTS FROM " << runLnL.size() << " RUNS" << endl << endl; + + cout << "Run " << best_run+1 << " gave best log-likelihood: " << runLnL[best_run] << endl; + + // initialize tree and model strucgture + ModelsBlock *models_block = readModelsDefinition(params); + tree->setParams(¶ms); + tree->setNumThreads(params.num_threads); + if (!tree->getModelFactory()) { + tree->initializeModel(params, tree->aln->model_name, models_block); + } + if (tree->getRate()->isHeterotachy() && !tree->isMixlen()) { + ASSERT(0 && "Heterotachy tree not properly created"); + } + delete models_block; + + // restore the tree and model from the best run + tree->getCheckpoint()->startStruct("run" + convertIntToString(best_run+1)); + tree->restoreCheckpoint(); + tree->getModelFactory()->restoreCheckpoint(); + tree->setCurScore(runLnL[best_run]); + if (params.gbo_replicates && !tree->isSuperTreeUnlinked()) { + + string out_file = (string)params.out_prefix + ".splits"; + if (params.print_splits_file) { + tree->boot_splits.back()->saveFile(out_file.c_str(), IN_OTHER, true); + cout << "Split supports printed to star-dot file " << out_file << endl; + } + + if (params.print_splits_nex_file) { + out_file = (string)params.out_prefix + ".splits.nex"; + tree->boot_splits.back()->saveFile(out_file.c_str(), IN_NEXUS, false); + cout << "Split supports printed to NEXUS file " << out_file << endl; + } + + // overwrite .ufboot trees + if (params.print_ufboot_trees) + tree->writeUFBootTrees(params); + + // overwrite .contree + string contree; + if (!tree->getCheckpoint()->getString("contree", contree)) + ASSERT(0 && "Couldn't restore contree"); + string contree_file = string(params.out_prefix) + ".contree"; + string current_tree = tree->getTreeString(); + tree->readTreeString(contree); + tree->setRootNode(params.root); + tree->insertTaxa(tree->removed_seqs, tree->twin_seqs); + tree->printTree(contree_file.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE); + tree->readTreeString(current_tree); + cout << "Consensus tree written to " << contree_file << endl; + } + tree->getCheckpoint()->endStruct(); + + // overwrite .treefile + tree->printResultTree(); + + if (MPIHelper::getInstance().isMaster()) { + cout << "Total CPU time for " << params.num_runs << " runs: " << (getCPUTime() - start_time) << " seconds." << endl; + cout << "Total wall-clock time for " << params.num_runs << " runs: " << (getRealTime() - start_real_time) << " seconds." << endl << endl; + } + delete model_info; +} + void computeLoglFromUserInputGAMMAInvar(Params ¶ms, IQTree &iqtree) { - RateHeterogeneity *site_rates = iqtree.getRate(); - site_rates->setFixPInvar(true); - site_rates->setFixGammaShape(true); - vector alphas, p_invars, logl; - ifstream aiFile; - aiFile.open(params.alpha_invar_file, ios_base::in); - if (aiFile.good()) { - double alpha, p_invar; - while (aiFile >> alpha >> p_invar) { - alphas.push_back(alpha); - p_invars.push_back(p_invar); - } - aiFile.close(); - cout << "Computing tree logl based on the alpha and p_invar values in " << params.alpha_invar_file << " ..." << - endl; - } else { - stringstream errMsg; - errMsg << "Could not find file: " << params.alpha_invar_file; - outError(errMsg.str().c_str()); - } - string aiResultsFileName = string(params.out_prefix) + "_" + string(params.alpha_invar_file) + ".results"; - ofstream aiFileResults; - aiFileResults.open(aiResultsFileName.c_str()); - aiFileResults << fixed; - aiFileResults.precision(4); - DoubleVector lenvec; - aiFileResults << "Alpha P_Invar Logl TreeLength\n"; - for (int i = 0; i < alphas.size(); i++) { - iqtree.saveBranchLengths(lenvec); - aiFileResults << alphas.at(i) << " " << p_invars.at(i) << " "; - site_rates->setGammaShape(alphas.at(i)); - site_rates->setPInvar(p_invars.at(i)); - iqtree.clearAllPartialLH(); - double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001); - aiFileResults << lh << " " << iqtree.treeLength() << "\n"; - iqtree.restoreBranchLengths(lenvec); - } - aiFileResults.close(); - cout << "Results were written to: " << aiResultsFileName << endl; - cout << "Wall clock time used: " << getRealTime() - params.start_real_time << endl; + RateHeterogeneity *site_rates = iqtree.getRate(); + site_rates->setFixPInvar(true); + site_rates->setFixGammaShape(true); + vector alphas, p_invars, logl; + ifstream aiFile; + aiFile.open(params.alpha_invar_file, ios_base::in); + if (aiFile.good()) { + double alpha, p_invar; + while (aiFile >> alpha >> p_invar) { + alphas.push_back(alpha); + p_invars.push_back(p_invar); + } + aiFile.close(); + cout << "Computing tree logl based on the alpha and p_invar values in " << params.alpha_invar_file << " ..." << + endl; + } else { + stringstream errMsg; + errMsg << "Could not find file: " << params.alpha_invar_file; + outError(errMsg.str().c_str()); + } + string aiResultsFileName = string(params.out_prefix) + "_" + string(params.alpha_invar_file) + ".results"; + ofstream aiFileResults; + aiFileResults.open(aiResultsFileName.c_str()); + aiFileResults << fixed; + aiFileResults.precision(4); + DoubleVector lenvec; + aiFileResults << "Alpha P_Invar Logl TreeLength\n"; + for (int i = 0; i < alphas.size(); i++) { + iqtree.saveBranchLengths(lenvec); + aiFileResults << alphas.at(i) << " " << p_invars.at(i) << " "; + site_rates->setGammaShape(alphas.at(i)); + site_rates->setPInvar(p_invars.at(i)); + iqtree.clearAllPartialLH(); + double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001); + aiFileResults << lh << " " << iqtree.treeLength() << "\n"; + iqtree.restoreBranchLengths(lenvec); + } + aiFileResults.close(); + cout << "Results were written to: " << aiResultsFileName << endl; + cout << "Wall clock time used: " << getRealTime() - params.start_real_time << endl; } void searchGAMMAInvarByRestarting(IQTree &iqtree) { if (!Params::getInstance().fixed_branch_length) - iqtree.setCurScore(iqtree.optimizeAllBranches(1)); - else - iqtree.setCurScore(iqtree.computeLikelihood()); - RateHeterogeneity* site_rates = (iqtree.getRate()); - double values[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 }; - vector initAlphas; - if (Params::getInstance().randomAlpha) { - while (initAlphas.size() < 10) { - double initAlpha = random_double(); - initAlphas.push_back(initAlpha + iqtree.params->min_gamma_shape*2); - } - } else { - initAlphas.assign(values, values+10); - } - double bestLogl = iqtree.getCurScore(); - double bestAlpha = 0.0; - double bestPInvar = 0.0; - double initPInvar = iqtree.getRate()->getPInvar(); + iqtree.setCurScore(iqtree.optimizeAllBranches(1)); + else + iqtree.setCurScore(iqtree.computeLikelihood()); + RateHeterogeneity* site_rates = (iqtree.getRate()); + double values[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 }; + vector initAlphas; + if (Params::getInstance().randomAlpha) { + while (initAlphas.size() < 10) { + double initAlpha = random_double(); + initAlphas.push_back(initAlpha + iqtree.params->min_gamma_shape*2); + } + } else { + initAlphas.assign(values, values+10); + } + double bestLogl = iqtree.getCurScore(); + double bestAlpha = 0.0; + double bestPInvar = 0.0; + double initPInvar = iqtree.getRate()->getPInvar(); /* Back up branch lengths and substitutional rates */ - DoubleVector lenvec; - DoubleVector bestLens; - iqtree.saveBranchLengths(lenvec); + DoubleVector lenvec; + DoubleVector bestLens; + iqtree.saveBranchLengths(lenvec); int numRateEntries = iqtree.getModel()->getNumRateEntries(); double *rates = new double[numRateEntries]; double *bestRates = new double[numRateEntries]; @@ -2529,42 +2970,42 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) { double *bestStateFreqs = new double[numStates]; for (int i = 0; i < 10; i++) { - cout << endl; - cout << "Testing alpha: " << initAlphas[i] << endl; + cout << endl; + cout << "Testing alpha: " << initAlphas[i] << endl; // Initialize model parameters iqtree.restoreBranchLengths(lenvec); ((ModelMarkov*) iqtree.getModel())->setRateMatrix(rates); ((ModelMarkov*) iqtree.getModel())->setStateFrequency(state_freqs); iqtree.getModel()->decomposeRateMatrix(); site_rates->setGammaShape(initAlphas[i]); - site_rates->setPInvar(initPInvar); - iqtree.clearAllPartialLH(); - iqtree.optimizeModelParameters(verbose_mode >= VB_MED, Params::getInstance().testAlphaEps); + site_rates->setPInvar(initPInvar); + iqtree.clearAllPartialLH(); + iqtree.optimizeModelParameters(verbose_mode >= VB_MED, Params::getInstance().testAlphaEps); double estAlpha = iqtree.getRate()->getGammaShape(); double estPInv = iqtree.getRate()->getPInvar(); double logl = iqtree.getCurScore(); - cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv + cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv << " / Logl: " << logl << endl; - if (iqtree.getCurScore() > bestLogl) { - bestLogl = logl; - bestAlpha = estAlpha; - bestPInvar = estPInv; - bestLens.clear(); - iqtree.saveBranchLengths(bestLens); + if (iqtree.getCurScore() > bestLogl) { + bestLogl = logl; + bestAlpha = estAlpha; + bestPInvar = estPInv; + bestLens.clear(); + iqtree.saveBranchLengths(bestLens); iqtree.getModel()->getRateMatrix(bestRates); iqtree.getModel()->getStateFrequency(bestStateFreqs); } } - site_rates->setGammaShape(bestAlpha); - site_rates->setFixGammaShape(false); - site_rates->setPInvar(bestPInvar); - site_rates->setFixPInvar(false); + site_rates->setGammaShape(bestAlpha); + site_rates->setFixGammaShape(false); + site_rates->setPInvar(bestPInvar); + site_rates->setFixPInvar(false); ((ModelMarkov*) iqtree.getModel())->setRateMatrix(bestRates); ((ModelMarkov*) iqtree.getModel())->setStateFrequency(bestStateFreqs); - iqtree.restoreBranchLengths(bestLens); + iqtree.restoreBranchLengths(bestLens); iqtree.getModel()->decomposeRateMatrix(); - iqtree.clearAllPartialLH(); + iqtree.clearAllPartialLH(); iqtree.setCurScore(iqtree.computeLikelihood()); cout << endl; cout << "Best initial alpha: " << bestAlpha << " / initial pinv: " << bestPInvar << " / "; @@ -2578,34 +3019,34 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) { // Test alpha fom 0.1 to 15 and p_invar from 0.1 to 0.99, stepsize = 0.01 void exhaustiveSearchGAMMAInvar(Params ¶ms, IQTree &iqtree) { - double alphaMin = 0.01; - double alphaMax = 10.00; - double p_invarMin = 0.01; - double p_invarMax = 1.00; -// double p_invarMax = iqtree.aln->frac_const_sites; - double stepSize = 0.01; - int numAlpha = (int) floor((alphaMax - alphaMin)/stepSize); - int numInvar = (int) floor((p_invarMax - p_invarMin)/stepSize); - - cout << "EVALUATING " << numAlpha*numInvar << " COMBINATIONS OF " << " alpha=" << alphaMin << ".." << alphaMax + double alphaMin = 0.01; + double alphaMax = 10.00; + double p_invarMin = 0.01; + double p_invarMax = 1.00; +// double p_invarMax = iqtree.aln->frac_const_sites; + double stepSize = 0.01; + int numAlpha = (int) floor((alphaMax - alphaMin)/stepSize); + int numInvar = (int) floor((p_invarMax - p_invarMin)/stepSize); + + cout << "EVALUATING " << numAlpha*numInvar << " COMBINATIONS OF " << " alpha=" << alphaMin << ".." << alphaMax << " AND " << " p-invar=" << p_invarMin << ".." << p_invarMax << " (epsilon: " << params.modelEps << ")" << endl; -// vector results; -// results.reserve((unsigned long) (numAlpha * numInvar)); - DoubleVector lenvec; - iqtree.saveBranchLengths(lenvec); +// vector results; +// results.reserve((unsigned long) (numAlpha * numInvar)); + DoubleVector lenvec; + iqtree.saveBranchLengths(lenvec); - RateHeterogeneity* site_rates = (iqtree.getRate()); - site_rates->setFixPInvar(true); - site_rates->setFixGammaShape(true); + RateHeterogeneity* site_rates = (iqtree.getRate()); + site_rates->setFixPInvar(true); + site_rates->setFixGammaShape(true); - string aiResultsFileName = string(params.out_prefix) + ".ai_results"; - ofstream aiFileResults; - aiFileResults.open(aiResultsFileName.c_str()); - aiFileResults << fixed; - aiFileResults.precision(4); - aiFileResults << "alpha p_invar logl tree_len\n"; + string aiResultsFileName = string(params.out_prefix) + ".ai_results"; + ofstream aiFileResults; + aiFileResults.open(aiResultsFileName.c_str()); + aiFileResults << fixed; + aiFileResults.precision(4); + aiFileResults << "alpha p_invar logl tree_len\n"; for (double alpha = alphaMin; alpha < alphaMax; alpha = alpha + stepSize) { cout << "alpha = " << alpha << endl; @@ -2622,64 +3063,41 @@ void exhaustiveSearchGAMMAInvar(Params ¶ms, IQTree &iqtree) { iqtree.restoreBranchLengths(lenvec); } } -// for (vector::iterator it = results.begin(); it != results.end(); it++) { -// aiFileResults << (*it) << endl; -// } - aiFileResults.close(); - cout << "Results were written to: " << aiResultsFileName << endl; - cout << "Wall clock time used: " << getRealTime() - params.start_real_time << endl; -} - - -/** - optimize branch lengths of consensus tree -*/ -void optimizeConTree(Params ¶ms, IQTree *tree) { - string contree_file = string(params.out_prefix) + ".contree"; - - IntVector rfdist; - tree->computeRFDist(contree_file.c_str(), rfdist); - params.contree_rfdist = rfdist[0]; - - tree->readTreeFile(contree_file); - - tree->initializeAllPartialLh(); - tree->fixNegativeBranch(false); - - tree->boot_consense_logl = tree->optimizeAllBranches(); - cout << "Log-likelihood of consensus tree: " << tree->boot_consense_logl << endl; - tree->setRootNode(params.root); - tree->insertTaxa(tree->removed_seqs, tree->twin_seqs); - tree->printTree(contree_file.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE); +// for (vector::iterator it = results.begin(); it != results.end(); it++) { +// aiFileResults << (*it) << endl; +// } + aiFileResults.close(); + cout << "Results were written to: " << aiResultsFileName << endl; + cout << "Wall clock time used: " << getRealTime() - params.start_real_time << endl; } /********************************************************** * STANDARD NON-PARAMETRIC BOOTSTRAP ***********************************************************/ -void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *alignment, IQTree *tree) { - ModelCheckpoint *model_info = new ModelCheckpoint; - StrVector removed_seqs, twin_seqs; +void runStandardBootstrap(Params ¶ms, Alignment *alignment, IQTree *tree) { + ModelCheckpoint *model_info = new ModelCheckpoint; + StrVector removed_seqs, twin_seqs; - // turn off all branch tests - int saved_aLRT_replicates = params.aLRT_replicates; + // turn off all branch tests + int saved_aLRT_replicates = params.aLRT_replicates; int saved_localbp_replicates = params.localbp_replicates; bool saved_aLRT_test = params.aLRT_test; bool saved_aBayes_test = params.aBayes_test; - params.aLRT_replicates = 0; + params.aLRT_replicates = 0; params.localbp_replicates = 0; params.aLRT_test = false; params.aBayes_test = false; if (params.suppress_output_flags & OUT_TREEFILE) outError("Suppress .treefile not allowed for standard bootstrap"); - string treefile_name = params.out_prefix; - treefile_name += ".treefile"; - string boottrees_name = params.out_prefix; - boottrees_name += ".boottrees"; - string bootaln_name = params.out_prefix; - bootaln_name += ".bootaln"; - string bootlh_name = params.out_prefix; - bootlh_name += ".bootlh"; + string treefile_name = params.out_prefix; + treefile_name += ".treefile"; + string boottrees_name = params.out_prefix; + boottrees_name += ".boottrees"; + string bootaln_name = params.out_prefix; + bootaln_name += ".bootaln"; + string bootlh_name = params.out_prefix; + bootlh_name += ".bootlh"; int bootSample = 0; if (tree->getCheckpoint()->get("bootSample", bootSample)) { cout << "CHECKPOINT: " << bootSample << " bootstrap analyses restored" << endl; @@ -2706,66 +3124,73 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali } } - double start_time = getCPUTime(); - double start_real_time = getRealTime(); - - + double start_time = getCPUTime(); + double start_real_time = getRealTime(); - // do bootstrap analysis - for (int sample = bootSample; sample < params.num_bootstrap_samples; sample++) { - cout << endl << "===> START BOOTSTRAP REPLICATE NUMBER " - << sample + 1 << endl << endl; + startTreeReconstruction(params, tree, *model_info); + + // 2018-06-21: bug fix: alignment might be changed by -m ...MERGE + alignment = tree->aln; + + // do bootstrap analysis + for (int sample = bootSample; sample < params.num_bootstrap_samples; sample++) { + cout << endl << "===> START " << RESAMPLE_NAME_UPPER << " REPLICATE NUMBER " + << sample + 1 << endl << endl; // 2015-12-17: initialize random stream for creating bootstrap samples // mainly so that checkpointing does not need to save bootstrap samples int *saved_randstream = randstream; init_random(params.ran_seed + sample); - Alignment* bootstrap_alignment; - cout << "Creating bootstrap alignment (seed: " << params.ran_seed+sample << ")..." << endl; - if (alignment->isSuperAlignment()) - bootstrap_alignment = new SuperAlignment; - else - bootstrap_alignment = new Alignment; - bootstrap_alignment->createBootstrapAlignment(alignment, NULL, params.bootstrap_spec); + Alignment* bootstrap_alignment; + cout << "Creating " << RESAMPLE_NAME << " alignment (seed: " << params.ran_seed+sample << ")..." << endl; + + if (alignment->isSuperAlignment()) + bootstrap_alignment = new SuperAlignment; + else + bootstrap_alignment = new Alignment; + bootstrap_alignment->createBootstrapAlignment(alignment, NULL, params.bootstrap_spec); // restore randstream finish_random(); randstream = saved_randstream; - if (params.print_tree_lh && MPIHelper::getInstance().isMaster()) { - double prob; - bootstrap_alignment->multinomialProb(*alignment, prob); - ofstream boot_lh; - if (sample == 0) - boot_lh.open(bootlh_name.c_str()); - else - boot_lh.open(bootlh_name.c_str(), ios_base::out | ios_base::app); - boot_lh << "0\t" << prob << endl; - boot_lh.close(); - } - IQTree *boot_tree; - if (alignment->isSuperAlignment()){ - if(params.partition_type != BRLEN_OPTIMIZE){ - boot_tree = new PhyloSuperTreePlen((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) tree); - } else { - boot_tree = new PhyloSuperTree((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) tree); - } - } else - boot_tree = new IQTree(bootstrap_alignment); - if (params.print_bootaln && MPIHelper::getInstance().isMaster()) { - if (bootstrap_alignment->isSuperAlignment()) - ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment(bootaln_name.c_str(), true); + if (params.print_tree_lh && MPIHelper::getInstance().isMaster()) { + double prob; + bootstrap_alignment->multinomialProb(*alignment, prob); + ofstream boot_lh; + if (sample == 0) + boot_lh.open(bootlh_name.c_str()); else - bootstrap_alignment->printPhylip(bootaln_name.c_str(), true); + boot_lh.open(bootlh_name.c_str(), ios_base::out | ios_base::app); + boot_lh << "0\t" << prob << endl; + boot_lh.close(); + } + IQTree *boot_tree; + if (alignment->isSuperAlignment()){ + if(params.partition_type != BRLEN_OPTIMIZE){ + boot_tree = new PhyloSuperTreePlen((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) tree); + } else { + boot_tree = new PhyloSuperTree((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) tree); + } + } else { + // allocate heterotachy tree if neccessary + int pos = posRateHeterotachy(alignment->model_name); + + if (params.num_mixlen > 1) { + boot_tree = new PhyloTreeMixlen(bootstrap_alignment, params.num_mixlen); + } else if (pos != string::npos) { + boot_tree = new PhyloTreeMixlen(bootstrap_alignment, 0); + } else + boot_tree = new IQTree(bootstrap_alignment); + } + if (params.print_bootaln && MPIHelper::getInstance().isMaster()) { + bootstrap_alignment->printAlignment(params.aln_output_format, bootaln_name.c_str(), true); } - if (params.print_boot_site_freq && MPIHelper::getInstance().isMaster()) { + if (params.print_boot_site_freq && MPIHelper::getInstance().isMaster()) { printSiteStateFreq((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootsitefreq").c_str(), bootstrap_alignment); - if (bootstrap_alignment->isSuperAlignment()) - ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootaln").c_str()); - else - bootstrap_alignment->printPhylip((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootaln").c_str()); + bootstrap_alignment->printAlignment(params.aln_output_format, (((string)params.out_prefix)+"."+convertIntToString(sample)+".bootaln").c_str()); } if (!tree->constraintTree.empty()) { @@ -2776,157 +3201,185 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali boot_tree->setCheckpoint(tree->getCheckpoint()); boot_tree->num_precision = tree->num_precision; - runTreeReconstruction(params, original_model, boot_tree, *model_info); - // read in the output tree file + runTreeReconstruction(params, boot_tree); + // read in the output tree file stringstream ss; boot_tree->printTree(ss); -// try { -// ifstream tree_in; -// tree_in.exceptions(ios::failbit | ios::badbit); -// tree_in.open(treefile_name.c_str()); -// tree_in >> tree_str; -// tree_in.close(); -// } catch (ios::failure) { -// outError(ERR_READ_INPUT, treefile_name); -// } - // write the tree into .boottrees file +// try { +// ifstream tree_in; +// tree_in.exceptions(ios::failbit | ios::badbit); +// tree_in.open(treefile_name.c_str()); +// tree_in >> tree_str; +// tree_in.close(); +// } catch (ios::failure) { +// outError(ERR_READ_INPUT, treefile_name); +// } + // write the tree into .boottrees file if (MPIHelper::getInstance().isMaster()) - try { - ofstream tree_out; - tree_out.exceptions(ios::failbit | ios::badbit); - tree_out.open(boottrees_name.c_str(), ios_base::out | ios_base::app); - tree_out << ss.str() << endl; - tree_out.close(); - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, boottrees_name); - } - // fix bug: set the model for original tree after testing - if ((original_model.substr(0,4) == "TEST" || original_model.substr(0,2) == "MF") && tree->isSuperTree()) { - PhyloSuperTree *stree = ((PhyloSuperTree*)tree); - stree->part_info = ((PhyloSuperTree*)boot_tree)->part_info; -// for (int i = 0; i < ((PhyloSuperTree*)tree)->part_info.size(); i++) -// ((PhyloSuperTree*)tree)->part_info[i].model_name = ((PhyloSuperTree*)boot_tree)->part_info[i].model_name; - } - if (params.num_bootstrap_samples == 1) - reportPhyloAnalysis(params, original_model, *boot_tree, *model_info); - // WHY was the following line missing, which caused memory leak? - bootstrap_alignment = boot_tree->aln; - delete boot_tree; - // fix bug: bootstrap_alignment might be changed - delete bootstrap_alignment; + try { + ofstream tree_out; + tree_out.exceptions(ios::failbit | ios::badbit); + tree_out.open(boottrees_name.c_str(), ios_base::out | ios_base::app); + tree_out << ss.str() << endl; + tree_out.close(); + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, boottrees_name); + } + // OBSOLETE fix bug: set the model for original tree after testing +// if ((params.model_name.substr(0,4) == "TEST" || params.model_name.substr(0,2) == "MF") && tree->isSuperTree()) { +// PhyloSuperTree *stree = ((PhyloSuperTree*)tree); +// stree->part_info = ((PhyloSuperTree*)boot_tree)->part_info; +// } + if (params.num_bootstrap_samples == 1) + reportPhyloAnalysis(params, *boot_tree, *model_info); + // WHY was the following line missing, which caused memory leak? + bootstrap_alignment = boot_tree->aln; + delete boot_tree; + // fix bug: bootstrap_alignment might be changed + delete bootstrap_alignment; // clear all checkpointed information - Checkpoint *newCheckpoint = new Checkpoint; - tree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree"); - tree->getCheckpoint()->clear(); - tree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end()); + tree->getCheckpoint()->keepKeyPrefix("iqtree"); tree->getCheckpoint()->put("bootSample", sample+1); tree->getCheckpoint()->putBool("finished", false); tree->getCheckpoint()->dump(true); - delete newCheckpoint; - - } + } - if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) { + if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) { - cout << endl << "===> COMPUTE CONSENSUS TREE FROM " - << params.num_bootstrap_samples << " BOOTSTRAP TREES" << endl << endl; + cout << endl << "===> COMPUTE CONSENSUS TREE FROM " << params.num_bootstrap_samples + << RESAMPLE_NAME_UPPER << " TREES" << endl << endl; string root_name = (params.root) ? params.root : alignment->getSeqName(0); const char* saved_root = params.root; params.root = root_name.c_str(); - computeConsensusTree(boottrees_name.c_str(), 0, 1e6, -1, - params.split_threshold, NULL, params.out_prefix, NULL, ¶ms); + computeConsensusTree(boottrees_name.c_str(), 0, 1e6, -1, + params.split_threshold, NULL, params.out_prefix, NULL, ¶ms); params.root = saved_root; - } + } - if (params.compute_ml_tree) { - cout << endl << "===> START ANALYSIS ON THE ORIGINAL ALIGNMENT" << endl << endl; + if (params.compute_ml_tree) { + cout << endl << "===> START ANALYSIS ON THE ORIGINAL ALIGNMENT" << endl << endl; // restore branch tests - params.aLRT_replicates = saved_aLRT_replicates; + params.aLRT_replicates = saved_aLRT_replicates; params.localbp_replicates = saved_localbp_replicates; params.aLRT_test = saved_aLRT_test; params.aBayes_test = saved_aBayes_test; - runTreeReconstruction(params, original_model, tree, *model_info); + if (params.num_runs == 1) + runTreeReconstruction(params, tree); + else + runMultipleTreeReconstruction(params, tree->aln, tree); if (MPIHelper::getInstance().isMaster()) { - if (params.consensus_type == CT_CONSENSUS_TREE) { + if (params.consensus_type == CT_CONSENSUS_TREE && params.num_runs == 1) { // 2017-12-08: optimize branch lengths of consensus tree optimizeConTree(params, tree); } - cout << endl << "===> ASSIGN BOOTSTRAP SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl; + cout << endl << "===> ASSIGN " << RESAMPLE_NAME_UPPER + << " SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl; MExtTree ext_tree; assignBootstrapSupport(boottrees_name.c_str(), 0, 1e6, treefile_name.c_str(), false, treefile_name.c_str(), params.out_prefix, ext_tree, NULL, ¶ms); tree->copyTree(&ext_tree); - reportPhyloAnalysis(params, original_model, *tree, *model_info); - } - } else if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) { - int mi = params.min_iterations; - STOP_CONDITION sc = params.stop_condition; - params.min_iterations = 0; - params.stop_condition = SC_FIXED_ITERATION; - runTreeReconstruction(params, original_model, tree, *model_info); - params.min_iterations = mi; - params.stop_condition = sc; - tree->stop_rule.initialize(params); + reportPhyloAnalysis(params, *tree, *model_info); + } + } else if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) { + int mi = params.min_iterations; + STOP_CONDITION sc = params.stop_condition; + params.min_iterations = 0; + params.stop_condition = SC_FIXED_ITERATION; + runTreeReconstruction(params, tree); + params.min_iterations = mi; + params.stop_condition = sc; + tree->stop_rule.initialize(params); optimizeConTree(params, tree); - reportPhyloAnalysis(params, original_model, *tree, *model_info); - } else - cout << endl; + reportPhyloAnalysis(params, *tree, *model_info); + } else + cout << endl; +#ifdef USE_BOOSTER + if (params.transfer_bootstrap) { + // transfer bootstrap expectation (TBE) + cout << "Performing transfer bootstrap expectation..." << endl; + string input_tree = (string)params.out_prefix + ".treefile"; + string boot_trees = (string)params.out_prefix + ".boottrees"; + string out_tree = (string)params.out_prefix + ".tbe.tree"; + string out_raw_tree = (string)params.out_prefix + ".tbe.rawtree"; + string stat_out = (string)params.out_prefix + ".tbe.stat"; + main_booster(input_tree.c_str(), boot_trees.c_str(), out_tree.c_str(), + (params.transfer_bootstrap==2) ? out_raw_tree.c_str() : NULL, + stat_out.c_str(), (verbose_mode >= VB_MED) ? 0 : 1); + cout << "TBE tree written to " << out_tree << endl; + if (params.transfer_bootstrap == 2) + cout << "TBE raw tree written to " << out_raw_tree << endl; + cout << "TBE statistic written to " << stat_out << endl; + cout << endl; + } +#endif + if (MPIHelper::getInstance().isMaster()) { - cout << "Total CPU time for bootstrap: " << (getCPUTime() - start_time) << " seconds." << endl; - cout << "Total wall-clock time for bootstrap: " << (getRealTime() - start_real_time) << " seconds." << endl << endl; - cout << "Non-parametric bootstrap results written to:" << endl; - if (params.print_bootaln) - cout << " Bootstrap alignments: " << params.out_prefix << ".bootaln" << endl; - cout << " Bootstrap trees: " << params.out_prefix << ".boottrees" << endl; - if (params.consensus_type == CT_CONSENSUS_TREE) - cout << " Consensus tree: " << params.out_prefix << ".contree" << endl; - cout << endl; + cout << "Total CPU time for " << RESAMPLE_NAME << ": " << (getCPUTime() - start_time) << " seconds." << endl; + cout << "Total wall-clock time for " << RESAMPLE_NAME << ": " << (getRealTime() - start_real_time) << " seconds." << endl << endl; + cout << "Non-parametric " << RESAMPLE_NAME << " results written to:" << endl; + if (params.print_bootaln) + cout << RESAMPLE_NAME_I << " alignments: " << params.out_prefix << ".bootaln" << endl; + cout << RESAMPLE_NAME_I << " trees: " << params.out_prefix << ".boottrees" << endl; + if (params.consensus_type == CT_CONSENSUS_TREE) + cout << " Consensus tree: " << params.out_prefix << ".contree" << endl; + cout << endl; } delete model_info; } void convertAlignment(Params ¶ms, IQTree *iqtree) { - Alignment *alignment = iqtree->aln; - if (params.num_bootstrap_samples || params.print_bootaln) { - // create bootstrap alignment - Alignment* bootstrap_alignment; - cout << "Creating bootstrap alignment..." << endl; - if (alignment->isSuperAlignment()) - bootstrap_alignment = new SuperAlignment; - else - bootstrap_alignment = new Alignment; - bootstrap_alignment->createBootstrapAlignment(alignment, NULL, params.bootstrap_spec); - delete alignment; - alignment = bootstrap_alignment; + Alignment *alignment = iqtree->aln; + if (params.num_bootstrap_samples || params.print_bootaln) { + // create bootstrap alignment + Alignment* bootstrap_alignment; + cout << "Creating " << RESAMPLE_NAME << " alignment..." << endl; + if (alignment->isSuperAlignment()) + bootstrap_alignment = new SuperAlignment; + else + bootstrap_alignment = new Alignment; + bootstrap_alignment->createBootstrapAlignment(alignment, NULL, params.bootstrap_spec); + delete alignment; + alignment = bootstrap_alignment; iqtree->aln = alignment; - } - if (alignment->isSuperAlignment()) { - ((SuperAlignment*)alignment)->printCombinedAlignment(params.aln_output); - if (params.print_subaln) - ((SuperAlignment*)alignment)->printSubAlignments(params, ((PhyloSuperTree*)iqtree)->part_info); - - } else if (params.gap_masked_aln) { - Alignment out_aln; - Alignment masked_aln(params.gap_masked_aln, params.sequence_type, params.intype); - out_aln.createGapMaskedAlignment(&masked_aln, alignment); - out_aln.printPhylip(params.aln_output, false, params.aln_site_list, - params.aln_nogaps, params.aln_no_const_sites, params.ref_seq_name); - string str = params.gap_masked_aln; - str += ".sitegaps"; - out_aln.printSiteGaps(str.c_str()); - } else if (params.aln_output_format == ALN_PHYLIP) - alignment->printPhylip(params.aln_output, false, params.aln_site_list, - params.aln_nogaps, params.aln_no_const_sites, params.ref_seq_name); - else if (params.aln_output_format == ALN_FASTA) - alignment->printFasta(params.aln_output, false, params.aln_site_list, - params.aln_nogaps, params.aln_no_const_sites, params.ref_seq_name); + } + + int exclude_sites = 0; + if (params.aln_nogaps) + exclude_sites += EXCLUDE_GAP; + if (params.aln_no_const_sites) + exclude_sites += EXCLUDE_INVAR; + + if (alignment->isSuperAlignment()) { + alignment->printAlignment(params.aln_output_format, params.aln_output, false, params.aln_site_list, + exclude_sites, params.ref_seq_name); + if (params.print_subaln) + ((SuperAlignment*)alignment)->printSubAlignments(params); + if (params.aln_output_format != IN_NEXUS) { + string partition_info = string(params.aln_output) + ".nex"; + ((SuperAlignment*)alignment)->printPartition(partition_info.c_str(), params.aln_output); + partition_info = (string)params.aln_output + ".partitions"; + ((SuperAlignment*)alignment)->printPartitionRaxml(partition_info.c_str()); + } + } else if (params.gap_masked_aln) { + Alignment out_aln; + Alignment masked_aln(params.gap_masked_aln, params.sequence_type, params.intype, params.model_name); + out_aln.createGapMaskedAlignment(&masked_aln, alignment); + out_aln.printAlignment(params.aln_output_format, params.aln_output, false, params.aln_site_list, + exclude_sites, params.ref_seq_name); + string str = params.gap_masked_aln; + str += ".sitegaps"; + out_aln.printSiteGaps(str.c_str()); + } else { + alignment->printAlignment(params.aln_output_format, params.aln_output, false, params.aln_site_list, + exclude_sites, params.ref_seq_name); + } } /** @@ -2943,13 +3396,14 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) { tree->setAlignment(alignment); tree->setRootNode(params.root); - ModelsBlock *models_block = readModelsDefinition(params); - tree->setModelFactory(new ModelFactory(params, params.model_name, tree, models_block)); + ModelsBlock *models_block = readModelsDefinition(params); + tree->setModelFactory(new ModelFactory(params, alignment->model_name, tree, models_block)); delete models_block; tree->setModel(tree->getModelFactory()->model); tree->setRate(tree->getModelFactory()->site_rate); - tree->setLikelihoodKernel(params.SSE, params.num_threads); - + tree->setLikelihoodKernel(params.SSE); + tree->setNumThreads(params.num_threads); + if (!tree->getModel()->isMixture()) outError("No mixture model was specified!"); uint64_t mem_size = tree->getMemoryRequired(); @@ -2964,13 +3418,7 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) { } #endif -#ifdef _OPENMP - if (tree->num_threads <= 0) { - int bestThreads = tree->testNumThreads(); - omp_set_num_threads(bestThreads); - } else - tree->warnNumThreads(); -#endif + tree->ensureNumberOfThreadsIsSet(nullptr); tree->initializeAllPartialLh(); // 2017-12-07: Increase espilon ten times (0.01 -> 0.1) to speedup PMSF computation @@ -2999,38 +3447,290 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) { /********************************************************** * TOP-LEVEL FUNCTION ***********************************************************/ + +IQTree *newIQTree(Params ¶ms, Alignment *alignment) { + IQTree *tree; + if (alignment->isSuperAlignment()) { + if (params.partition_type == TOPO_UNLINKED) { + tree = new PhyloSuperTreeUnlinked((SuperAlignment*)alignment); + } else if(params.partition_type != BRLEN_OPTIMIZE){ + // initialize supertree - Proportional Edges case + tree = new PhyloSuperTreePlen((SuperAlignment*)alignment, params.partition_type); + } else { + // initialize supertree stuff if user specifies partition file with -sp option + tree = new PhyloSuperTree((SuperAlignment*)alignment); + } + // this alignment will actually be of type SuperAlignment + // alignment = tree->aln; + if (((PhyloSuperTree*)tree)->rescale_codon_brlen) + cout << "NOTE: Mixed codon and other data, branch lengths of codon partitions are rescaled by 3!" << endl; + + } else { + // allocate heterotachy tree if neccessary + int pos = posRateHeterotachy(alignment->model_name); + + if (params.num_mixlen > 1) { + tree = new PhyloTreeMixlen(alignment, params.num_mixlen); + } else if (pos != string::npos) { + tree = new PhyloTreeMixlen(alignment, 0); + } else + tree = new IQTree(alignment); + } + + return tree; +} + +/** get ID of bad or good symtest results */ +void getSymTestID(vector &res, set &id, bool bad_res) { + if (bad_res) { + // get significant test ID + switch (Params::getInstance().symtest) { + case SYMTEST_BINOM: + for (auto i = res.begin(); i != res.end(); i++) + if (i->pvalue_binom < Params::getInstance().symtest_pcutoff) + id.insert(i - res.begin()); + break; + case SYMTEST_MAXDIV: + for (auto i = res.begin(); i != res.end(); i++) + if (i->pvalue_maxdiv < Params::getInstance().symtest_pcutoff) + id.insert(i - res.begin()); + break; + default: + break; + } + } else { + // get non-significant test ID + switch (Params::getInstance().symtest) { + case SYMTEST_BINOM: + for (auto i = res.begin(); i != res.end(); i++) + if (i->pvalue_binom >= Params::getInstance().symtest_pcutoff) + id.insert(i - res.begin()); + break; + case SYMTEST_MAXDIV: + for (auto i = res.begin(); i != res.end(); i++) + if (i->pvalue_maxdiv >= Params::getInstance().symtest_pcutoff) + id.insert(i - res.begin()); + break; + default: + break; + } + } +} + +double computePValueSMax(vector &sym, int start, int step) { + double orig_max = sym[start].max_stat; + int count = 0, num = 0; + for (size_t i = start; i < sym.size(); i += step, num++) + if (sym[i].max_stat >= orig_max) + count++; + return double(count)/num; + +} + +void doSymTest(Alignment *alignment, Params ¶ms) { + double start_time = getRealTime(); + cout << "Performing matched-pair tests of symmetry..."; + vector sym, marsym, intsym; + + size_t num_parts = 1; + if (alignment->isSuperAlignment()) + num_parts = ((SuperAlignment*)alignment)->partitions.size(); + + string filename_stat = string(params.out_prefix) + ".symstat.csv"; + ofstream *out_stat = NULL; + if (params.symtest_stat) { + out_stat = new ofstream; + out_stat->open(filename_stat); + *out_stat + << "# Statistic values for matched-pair tests of symmetry" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# dat=read.csv('" << filename_stat << "',comment.char='#')" << endl + << "# Columns are comma-separated with following meanings:" << endl + << "# ID: Partition ID" << endl + << "# Seq1: ID of sequence 1 within partition" << endl + << "# Seq1: ID of sequence 2 within partition" << endl + << "# Sym: Statistic for test of symmetry" << endl + << "# SymChi: Chi-square p-value for test of symmetry" << endl + << "# Mar: Statistic for test of marginal symmetry" << endl + << "# MarChi: Chi-square p-value for marginal test of symmetry" << endl + << "# Int: Statistic for test of internal symmetry" << endl + << "# MarChi: Chi-square p-value for internal test of symmetry" << endl + << "ID,Seq1,Seq2,Sym,SymChi,Mar,MarChi,Int,IntChi" << endl; + + } + + sym.resize(num_parts*params.symtest_shuffle); + marsym.resize(num_parts*params.symtest_shuffle); + intsym.resize(num_parts*params.symtest_shuffle); + + for (int i = 0; i < params.symtest_shuffle; i++) { + vector *stats = NULL; + if (params.symtest_stat) + stats = new vector; + if (i == 0) // original alignment + alignment->doSymTest(i*num_parts, sym, marsym, intsym, NULL, stats); + else { + int *rstream; + init_random(params.ran_seed+i+1, false, &rstream); + alignment->doSymTest(i*num_parts, sym, marsym, intsym, rstream, stats); + finish_random(rstream); + } + if ((i+1)*10 % params.symtest_shuffle == 0) { + cout << " " << (i+1)*100 / params.symtest_shuffle << "%"; + cout.flush(); + } + if (!stats) + continue; + for (auto it = stats->begin(); it != stats->end(); it++) { + *out_stat << it->part << ',' << it->seq1 << ',' << it->seq2 << ',' + << it->chi2_sym << ',' << it->pval_sym << ',' + << it->chi2_marsym << ',' << it->pval_marsym << ',' + << it->chi2_intsym << ',' << it->pval_intsym << endl; + } + delete stats; + } + + if (out_stat) { + out_stat->close(); + delete out_stat; + } + + if (params.symtest_shuffle > 1) { + // compute p-value for s-max approach + for (int part = 0; part < num_parts; part++) { + sym[part].pvalue_perm = computePValueSMax(sym, part, num_parts); + marsym[part].pvalue_perm = computePValueSMax(marsym, part, num_parts); + intsym[part].pvalue_perm = computePValueSMax(intsym, part, num_parts); + } + } + + string filename = string(params.out_prefix) + ".symtest.csv"; + ofstream out; + out.open(filename); + out << "# Matched-pair tests of symmetry" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# dat=read.csv('" << filename << "',comment.char='#')" << endl + << "# Columns are comma-separated with following meanings:" << endl + << "# Name: Partition name" << endl + << "# SymSig: Number of significant sequence pairs by test of symmetry" << endl + << "# SymNon: Number of non-significant sequence pairs by test of symmetry" << endl + << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "# SymBi: P-value for binomial test of symmetry" : "# SymPval: P-value for maximum test of symmetry") << endl; + if (params.symtest_shuffle > 1) + out << "# SymMax: Maximum of pair statistics by test of symmetry" << endl + << "# SymPerm: P-value for permutation test of symmetry" << endl; + + out << "# MarSig: Number of significant sequence pairs by test of marginal symmetry" << endl + << "# MarNon: Number of non-significant sequence pairs by test of marginal symmetry" << endl + << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "# MarBi: P-value for binomial test of marginal symmetry" : "# MarPval: P-value for maximum test of marginal symmetry") << endl; + if (params.symtest_shuffle > 1) + out << "# MarMax: Maximum of pair statistics by test of marginal symmetry" << endl + << "# MarPerm: P-value for permutation test of marginal symmetry" << endl; + out << "# IntSig: Number of significant sequence pairs by test of internal symmetry" << endl + << "# IntNon: Number of non-significant sequence pairs by test of internal symmetry" << endl + << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "# IntBi: P-value for binomial test of symmetry" : "# IntPval: P-value for maximum test of internal symmetry") << endl; + if (params.symtest_shuffle > 1) + out << "# IntMax: Maximum of pair statistics by test of internal symmetry" << endl + << "# IntPerm: P-value for permutation test of internal symmetry" << endl; + + out << "Name,SymSig,SymNon," << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "SymBi" : "SymPval") + << ((params.symtest_shuffle > 1) ? ",SymMax,SymPerm" : "") + << ",MarSig,MarNon," << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "MarBi" : "MarPval") + << ((params.symtest_shuffle > 1) ? ",MarMax,MarPerm" : "") + << ",IntSig,IntNon," << ((Params::getInstance().symtest == SYMTEST_BINOM) ? "IntBi" : "IntPval") + << ((params.symtest_shuffle > 1) ? ",IntMax,IntPerm" : "") << endl; + + if (alignment->isSuperAlignment()) { + SuperAlignment *saln = (SuperAlignment*)alignment; + for (int part = 0; part < saln->partitions.size(); part++) + out << saln->partitions[part]->name << ',' + << sym[part] << ',' << marsym[part] << ',' << intsym[part] << endl; + } else { + out << alignment->name << ',' << sym[0] << ',' << marsym[0] << ',' << intsym[0] << endl; + } + + if (params.symtest_shuffle > 1) { + for (int part = num_parts; part < sym.size(); part++) { + sym[part].pvalue_perm = marsym[part].pvalue_perm = intsym[part].pvalue_perm = -1.0; + out << part % num_parts << ',' + << sym[part] << ',' << marsym[part] << ',' << intsym[part] << endl; + } + // erase the rest + sym.erase(sym.begin()+num_parts, sym.end()); + marsym.erase(marsym.begin()+num_parts, marsym.end()); + intsym.erase(intsym.begin()+num_parts, intsym.end()); + } + + out.close(); + cout << " " << getRealTime() - start_time << " seconds" << endl; + if (params.symtest_stat) + cout << "SymTest statistics written to " << filename_stat << endl; + cout << "SymTest results written to " << filename << endl; + + // now filter out partitions + if (alignment->isSuperAlignment()) { + set part_id; + if (params.symtest_remove == 1) { + // remove bad loci + if (params.symtest_type == 0) + getSymTestID(sym, part_id, true); + else if (params.symtest_type == 1) + getSymTestID(marsym, part_id, true); + else + getSymTestID(intsym, part_id, true); + } else if (params.symtest_remove == 2) { + // remove good loci + if (params.symtest_type == 0) + getSymTestID(sym, part_id, false); + else if (params.symtest_type == 1) + getSymTestID(marsym, part_id, false); + else + getSymTestID(intsym, part_id, false); + } + if (!part_id.empty()) { + SuperAlignment *saln = (SuperAlignment*)alignment; + cout << "Removing " << part_id.size() + << ((params.symtest_remove == 1)? " bad" : " good") << " partitions (pvalue cutoff = " + << params.symtest_pcutoff << ")..." << endl; + if (part_id.size() < alignment->getNSite()) + saln->removePartitions(part_id); + else + outError("Can't remove all partitions"); + if (params.aln_output_format == IN_NEXUS) { + string aln_file = (string)params.out_prefix + ((params.symtest_remove == 1)? ".good.nex" : ".bad.nex"); + alignment->printAlignment(params.aln_output_format, aln_file.c_str()); + } else { + string aln_file = (string)params.out_prefix + ((params.symtest_remove == 1)? ".good.phy" : ".bad.phy"); + alignment->printAlignment(params.aln_output_format, aln_file.c_str()); + string filename = (string)params.out_prefix + ((params.symtest_remove == 2)? ".good.nex" : ".bad.nex"); + saln->printPartition(filename.c_str(), aln_file.c_str()); + } + } + } + if (params.symtest_only) + exit(EXIT_SUCCESS); +} + void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) { - Alignment *alignment; - IQTree *tree; + Alignment *alignment; checkpoint->putBool("finished", false); checkpoint->setDumpInterval(params.checkpoint_dump_interval); - /****************** read in alignment **********************/ - if (params.partition_file) { - // Partition model analysis - if(params.partition_type != BRLEN_OPTIMIZE){ - // since nni5 does not work yet, stop the programm -/* if(params.nni5) - outError("-nni5 option is unsupported yet for proportitional partition model. please use -nni1 option");*/ -// if(params.aLRT_replicates || params.localbp_replicates) -// outError("-alrt or -lbp option is unsupported yet for joint/proportional partition model"); - // initialize supertree - Proportional Edges case, "-spt p" option - tree = new PhyloSuperTreePlen(params); - } else { - // initialize supertree stuff if user specifies partition file with -sp option - tree = new PhyloSuperTree(params); - } - // this alignment will actually be of type SuperAlignment - alignment = tree->aln; - } else { - alignment = new Alignment(params.aln_file, params.sequence_type, params.intype); - - if (params.freq_const_patterns) { - int orig_nsite = alignment->getNSite(); - alignment->addConstPatterns(params.freq_const_patterns); - cout << "INFO: " << alignment->getNSite() - orig_nsite << " const sites added into alignment" << endl; - } + /****************** read in alignment **********************/ + if (params.partition_file) { + // Partition model analysis + if (params.partition_type == TOPO_UNLINKED) + alignment = new SuperAlignmentUnlinked(params); + else + alignment = new SuperAlignment(params); + } else { + alignment = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name); + + if (params.freq_const_patterns) { + int orig_nsite = alignment->getNSite(); + alignment->addConstPatterns(params.freq_const_patterns); + cout << "INFO: " << alignment->getNSite() - orig_nsite << " const sites added into alignment" << endl; + } // Initialize site-frequency model if (params.tree_freq_file) { @@ -3047,25 +3747,22 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) { if (params.site_freq_file) { alignment->readSiteStateFreq(params.site_freq_file); } + } + if (params.symtest) { + doSymTest(alignment, params); + } - // allocate heterotachy tree if neccessary - int pos = posRateHeterotachy(params.model_name); - - if (params.num_mixlen > 1) { - tree = new PhyloTreeMixlen(alignment, params.num_mixlen); - } else if (pos != string::npos) { - tree = new PhyloTreeMixlen(alignment, 0); - } else - tree = new IQTree(alignment); - } - + cout << "params.print_aln_info = " << params.print_aln_info << endl << flush; if (params.print_aln_info) { string site_info_file = string(params.out_prefix) + ".alninfo"; alignment->printSiteInfo(site_info_file.c_str()); cout << "Alignment sites statistics printed to " << site_info_file << endl; } + /*************** initialize tree ********************/ + IQTree *tree = newIQTree(params, alignment); + tree->setCheckpoint(checkpoint); if (params.min_branch_length <= 0.0) { params.min_branch_length = 1e-6; @@ -3093,13 +3790,11 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) { } - string original_model = params.model_name; - - if (params.concatenate_aln) { - Alignment aln(params.concatenate_aln, params.sequence_type, params.intype); - cout << "Concatenating " << params.aln_file << " with " << params.concatenate_aln << " ..." << endl; - alignment->concatenateAlignment(&aln); - } + if (params.concatenate_aln) { + Alignment aln(params.concatenate_aln, params.sequence_type, params.intype, params.model_name); + cout << "Concatenating " << params.aln_file << " with " << params.concatenate_aln << " ..." << endl; + alignment->concatenateAlignment(&aln); + } if (params.constraint_tree_file) { cout << "Reading constraint tree " << params.constraint_tree_file << "..." << endl; @@ -3128,124 +3823,399 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) { string out_tree = (string)params.out_prefix + ".seqident_tree"; tree->printTree(out_tree.c_str()); cout << "Tree with sequence identity printed to " << out_tree << endl; - } else if (params.aln_output) { - /************ convert alignment to other format and write to output file *************/ - convertAlignment(params, tree); - } else if (params.gbo_replicates > 0 && params.user_file && params.second_tree) { - // run one of the UFBoot analysis -// runGuidedBootstrap(params, alignment, *tree); - outError("Obsolete feature"); - } else if (params.avh_test) { - // run one of the wondering test for Arndt -// runAvHTest(params, alignment, *tree); - outError("Obsolete feature"); - } else if (params.bootlh_test) { - // run Arndt's plot of tree likelihoods against bootstrap alignments -// runBootLhTest(params, alignment, *tree); - outError("Obsolete feature"); - } else if (params.num_bootstrap_samples == 0) { - /******************************************************************************** + } else if (params.aln_output) { + /************ convert alignment to other format and write to output file *************/ + convertAlignment(params, tree); + } else if (params.gbo_replicates > 0 && params.user_file && params.second_tree) { + // run one of the UFBoot analysis +// runGuidedBootstrap(params, alignment, *tree); + outError("Obsolete feature"); + } else if (params.avh_test) { + // run one of the wondering test for Arndt +// runAvHTest(params, alignment, *tree); + outError("Obsolete feature"); + } else if (params.bootlh_test) { + // run Arndt's plot of tree likelihoods against bootstrap alignments +// runBootLhTest(params, alignment, *tree); + outError("Obsolete feature"); + } else if (params.num_bootstrap_samples == 0) { + /******************************************************************************** THE MAIN MAXIMUM LIKELIHOOD TREE RECONSTRUCTION - ********************************************************************************/ - ModelCheckpoint *model_info = new ModelCheckpoint; - alignment->checkGappySeq(params.remove_empty_seq); + ********************************************************************************/ + ModelCheckpoint *model_info = new ModelCheckpoint; + alignment->checkGappySeq(params.remove_empty_seq); - // remove identical sequences + // remove identical sequences + cout << "params.ignore_identical_seqs = " << params.ignore_identical_seqs << endl << flush; if (params.ignore_identical_seqs) { tree->removeIdenticalSeqs(params); if (tree->removed_seqs.size() > 0 && MPIHelper::getInstance().isMaster() && (params.suppress_output_flags & OUT_UNIQUESEQ) == 0) { string filename = (string)params.out_prefix + ".uniqueseq.phy"; - if (tree->isSuperTree()) - ((SuperAlignment*)tree->aln)->printCombinedAlignment(filename.c_str()); - else - tree->aln->printPhylip(filename.c_str()); + tree->aln->printAlignment(params.aln_output_format, filename.c_str()); cout << endl << "For your convenience alignment with unique sequences printed to " << filename << endl; } } alignment = NULL; // from now on use tree->aln instead - // call main tree reconstruction - runTreeReconstruction(params, original_model, tree, *model_info); - + cout << "start tree reconstruction" << endl << flush; + startTreeReconstruction(params, tree, *model_info); + // call main tree reconstruction + cout << "call main tree reconstruction. params.num_runs = " << params.num_runs << endl << flush; + if (params.num_runs == 1) + runTreeReconstruction(params, tree); + else + runMultipleTreeReconstruction(params, tree->aln, tree); + + cout << "finish main tree reconstruction" << endl << flush; + if (MPIHelper::getInstance().isMaster()) { + reportPhyloAnalysis(params, *tree, *model_info); + } - if (params.gbo_replicates && params.online_bootstrap) { - if (params.print_ufboot_trees) - tree->writeUFBootTrees(params); - - cout << endl << "Computing bootstrap consensus tree..." << endl; - string splitsfile = params.out_prefix; - splitsfile += ".splits.nex"; - double weight_threshold = (params.split_threshold<1) ? params.split_threshold : (params.gbo_replicates-1.0)/params.gbo_replicates; - weight_threshold *= 100.0; - computeConsensusTree(splitsfile.c_str(), 0, 1e6, -1, - weight_threshold, NULL, params.out_prefix, NULL, ¶ms); - // now optimize branch lengths of the consensus tree - string current_tree = tree->getTreeString(); - optimizeConTree(params, tree); - // revert the best tree - tree->readTreeString(current_tree); - } - if (Params::getInstance().writeDistImdTrees) { - cout << endl; - cout << "Recomputing the log-likelihood of the intermediate trees ... " << endl; - tree->intermediateTrees.recomputeLoglOfAllTrees(*tree); - } - reportPhyloAnalysis(params, original_model, *tree, *model_info); - } - - // reinsert identical sequences - if (tree->removed_seqs.size() > 0) { - // BUG FIX: dont use reinsertIdenticalSeqs anymore - tree->insertTaxa(tree->removed_seqs, tree->twin_seqs); - tree->printResultTree(); - } + // reinsert identical sequences + if (tree->removed_seqs.size() > 0) { + // BUG FIX: dont use reinsertIdenticalSeqs anymore + tree->insertTaxa(tree->removed_seqs, tree->twin_seqs); + tree->printResultTree(); + } delete model_info; - } else { - // the classical non-parameter bootstrap (SBS) - if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) - outError("-m TESTMERGE is not allowed when doing standard bootstrap. Please first\nfind partition scheme on the original alignment and use it for bootstrap analysis"); + + if (params.dating_method != "") { + doTimeTree(tree); + } + + } else { + // the classical non-parameter bootstrap (SBS) +// if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) +// outError("-m TESTMERGE is not allowed when doing standard bootstrap. Please first\nfind partition scheme on the original alignment and use it for bootstrap analysis"); if (alignment->getNSeq() < 4) outError("It makes no sense to perform bootstrap with less than 4 sequences."); - runStandardBootstrap(params, original_model, alignment, tree); - } - -// if (params.upper_bound) { -// UpperBounds(¶ms, alignment, tree); -// } - - if(verbose_mode >= VB_MED){ - if(tree->isSuperTree() && params.partition_type != BRLEN_OPTIMIZE){ - ((PhyloSuperTreePlen*) tree)->printNNIcasesNUM(); - } - } + runStandardBootstrap(params, alignment, tree); + } + +// if (params.upper_bound) { +// UpperBounds(¶ms, alignment, tree); +// } + + if(verbose_mode >= VB_MED){ + if(tree->isSuperTree() && params.partition_type != BRLEN_OPTIMIZE){ + ((PhyloSuperTreePlen*) tree)->printNNIcasesNUM(); + } + } // 2015-09-22: bug fix, move this line to before deleting tree alignment = tree->aln; - delete tree; - // BUG FIX: alignment can be changed, should delete tree->aln instead + delete tree; + // BUG FIX: alignment can be changed, should delete tree->aln instead // 2015-09-22: THIS IS STUPID: after deleting tree, one cannot access tree->aln anymore -// alignment = tree->aln; - delete alignment; +// alignment = tree->aln; + delete alignment; checkpoint->putBool("finished", true); checkpoint->dump(true); } +/** + Perform separate tree reconstruction when tree topologies + are unlinked between partitions + */ +void runUnlinkedPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) { + SuperAlignment *super_aln; + + ASSERT(params.partition_file); + + /****************** read in alignment **********************/ + // Partition model analysis + super_aln = new SuperAlignmentUnlinked(params); + PhyloSuperTree *super_tree = new PhyloSuperTree(super_aln); + + /**** do separate tree reconstruction for each partition ***/ + + MTreeSet part_trees; + + if (params.user_file) { + // reading user tree file for all partitions + bool is_rooted = false; + part_trees.readTrees(params.user_file, is_rooted, 0, super_aln->partitions.size()); + if (is_rooted) + outError("Rooted trees not allowed: ", params.user_file); + if (part_trees.size() != super_aln->partitions.size()) + outError("User tree file does not have the same number of trees as partitions"); + params.user_file = NULL; + } + + ModelCheckpoint *model_info = new ModelCheckpoint; + int part = 0; + for (auto alnit = super_aln->partitions.begin(); alnit != super_aln->partitions.end(); alnit++, part++) { + + checkpoint->startStruct((*alnit)->name); + + // allocate heterotachy tree if neccessary + int pos = posRateHeterotachy((*alnit)->model_name); + IQTree *tree; + + if (params.num_mixlen > 1) { + tree = new PhyloTreeMixlen((*alnit), params.num_mixlen); + } else if (pos != string::npos) { + tree = new PhyloTreeMixlen((*alnit), 0); + } else + tree = new IQTree((*alnit)); + + tree->setCheckpoint(checkpoint); + if (checkpoint->getBool("finished")) { + tree->restoreCheckpoint(); + } else { + if (!part_trees.empty()) + tree->copyTree(part_trees[part]); + + startTreeReconstruction(params, tree, *model_info); + // call main tree reconstruction + if (params.num_runs == 1) + runTreeReconstruction(params, tree); + else + runMultipleTreeReconstruction(params, tree->aln, tree); + checkpoint->putBool("finished", true); + checkpoint->dump(); + } + + super_tree->at(part)->copyTree(tree); + + delete tree; + checkpoint->endStruct(); + } + + IQTree *iqtree = super_tree; + super_tree->setCheckpoint(checkpoint); + startTreeReconstruction(params, iqtree, *model_info); + runTreeReconstruction(params, iqtree); + if (MPIHelper::getInstance().isMaster()) + reportPhyloAnalysis(params, *iqtree, *model_info); + + delete super_tree; + delete super_aln; + delete model_info; +} + void assignBranchSupportNew(Params ¶ms) { - if (!params.user_file) - outError("No trees file provided"); - if (!params.second_tree) - outError("No target tree file provided"); - cout << "Reading tree " << params.second_tree << " ..." << endl; - MTree tree(params.second_tree, params.is_rooted); - cout << tree.leafNum << " taxa and " << tree.branchNum << " branches" << endl; - tree.assignBranchSupport(params.user_file); - string str = params.second_tree; - str += ".suptree"; - tree.printTree(str.c_str()); - cout << "Tree with assigned branch supports written to " << str << endl; - if (verbose_mode >= VB_DEBUG) - tree.drawTree(cout); + if (!params.user_file) + outError("No target tree file provided"); + if (params.num_threads == 0) + outError("-nt AUTO is not supported for concordance factor analysis, please specify no. cores"); + PhyloTree *tree; + Alignment *aln = NULL; + if (params.site_concordance) { + if (!params.aln_file && !params.partition_file) + outError("Please provide an alignment (-s) or partition file"); + if (params.partition_file) { + params.compute_seq_composition = false; + aln = new SuperAlignment(params); + tree = new PhyloSuperTree((SuperAlignment*)aln); + } else { + aln = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name); + tree = new PhyloTree; + } + } else { + tree = new PhyloTree; + } + tree->setParams(¶ms); + + cout << "Reading tree " << params.user_file << " ..." << endl; + bool rooted = params.is_rooted; + tree->readTree(params.user_file, rooted); + cout << ((tree->rooted) ? "rooted" : "un-rooted") << " tree with " + << tree->leafNum - tree->rooted << " taxa and " << tree->branchNum << " branches" << endl; + + // 2018-12-13: move initialisation to fix rooted vs unrooted tree + if (params.site_concordance) { + tree->setAlignment(aln); + if (tree->isSuperTree()) + ((PhyloSuperTree*)tree)->mapTrees(); + } + + BranchVector branches; + tree->getInnerBranches(branches); + BranchVector::iterator brit; + for (brit = branches.begin(); brit != branches.end(); brit++) { + Neighbor *branch = brit->second->findNeighbor(brit->first); + string label = brit->second->name; + if (!label.empty()) + PUT_ATTR(branch, label); + } + + map meanings; + + if (!params.treeset_file.empty()) { + bool rooted = params.is_rooted; + MTreeSet trees(params.treeset_file.c_str(), rooted, params.tree_burnin, params.tree_max_count); + double start_time = getRealTime(); + cout << "Computing gene concordance factor..." << endl; + tree->computeGeneConcordance(trees, meanings); + if (params.internode_certainty) + tree->computeQuartetConcordance(trees); + cout << getRealTime() - start_time << " sec" << endl; + } + if (params.site_concordance) { + cout << "Computing site concordance factor..." << endl; + double start_time = getRealTime(); + tree->computeSiteConcordance(meanings); + cout << getRealTime() - start_time << " sec" << endl; + delete aln; + } + string prefix = (params.out_prefix) ? params.out_prefix : params.user_file; + string str = prefix + ".cf.tree"; + tree->printTree(str.c_str()); + cout << "Tree with concordance factors written to " << str << endl; + str = prefix + ".cf.tree.nex"; + string filename = prefix + ".cf.stat"; + tree->printNexus(str, WT_BR_LEN, "See " + filename + " for branch annotation meanings." + + " This file is best viewed in FigTree."); + cout << "Annotated tree (best viewed in FigTree) written to " << str << endl; + if (verbose_mode >= VB_DEBUG) + tree->drawTree(cout); + str = prefix + ".cf.branch"; + tree->printTree(str.c_str(), WT_BR_LEN + WT_INT_NODE + WT_NEWLINE); + cout << "Tree with branch IDs written to " << str << endl; + ofstream out; + out.open(filename.c_str()); + out << "# Concordance factor statistics" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab=read.table('" << filename << "',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl + << "# ID: Branch ID" << endl; + map::iterator mit; + for (mit = meanings.begin(); mit != meanings.end(); mit++) + if (mit->first[0] != '*') + out << "# " << mit->first << ": " << mit->second << endl; + out << "# Label: Existing branch label" << endl; + out << "# Length: Branch length" << endl; + for (mit = meanings.begin(); mit != meanings.end(); mit++) + if (mit->first[0] == '*') + out << "# " << mit->first << ": " << mit->second << endl; + out << "ID"; + for (mit = meanings.begin(); mit != meanings.end(); mit++) + if (mit->first[0] != '*') + out << "\t" << mit->first; + out << "\tLabel\tLength" << endl; + for (brit = branches.begin(); brit != branches.end(); brit++) { + Neighbor *branch = brit->second->findNeighbor(brit->first); + int ID = brit->second->id; + out << ID; + for (mit = meanings.begin(); mit != meanings.end(); mit++) { + if (mit->first[0] == '*') + continue; // ignore NOTES + out << '\t'; + string val; + if (branch->getAttr(mit->first, val)) + out << val; + else + out << "NA"; + } + double length = branch->length; + string label; + GET_ATTR(branch, label); + out << '\t' << label << '\t' << length << endl; + } + out.close(); + cout << "Concordance factors per branch printed to " << filename << endl; + + if (params.print_cf_quartets) { + filename = prefix + ".cf.quartet"; + out.open(filename); + out << "# Site concordance factor for all resampled quartets (with replacement)" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab=read.table('" << filename << "',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl + << "# ID: Branch ID" << endl + << "# QuartID: Quartet ID" << endl + << "# Seq1: ID of sequence 1 on 'left' side of the branch" << endl + << "# Seq2: ID of sequence 2 on 'left' side of the branch" << endl + << "# Seq3: ID of sequence 3 on 'right' side of the branch" << endl + << "# Seq4: ID of sequence 4 on 'right' side of the branch" << endl + << "# qCF: Fraction of concordant sites supporting quartet Seq1,Seq2|Seq3,Seq4 (=qCF_N/qN)" << endl + << "# qCF_N: Number of concordant sites supporting quartet Seq1,Seq2|Seq3,Seq4" << endl + << "# qDF1: Fraction of discordant sites supporting quartet Seq1,Seq3|Seq2,Seq4 (=qDF1_N/qN)" << endl + << "# qDF1_N: Number of discordant sites supporting quartet Seq1,Seq3|Seq2,Seq4" << endl + << "# qDF2: Fraction of discordant sites supporting quartet Seq1,Seq4|Seq2,Seq3 (=qDF2_N/qN)" << endl + << "# qDF2_N: Number of discordant sites supporting quartet Seq1,Seq4|Seq2,Seq3" << endl + << "# qN: Number of decisive sites with four taxa Seq1,Seq2,Seq3,Seq4 (=qCF_N+qDF1_N+qDF2_N)" << endl + << "ID\tQuartID\tSeq1\tSeq2\tSeq3\tSeq4\tqCF\tqCF_N\tqDF1\tqDF1_N\tqDF2\tqDF2_N\tqN" << endl; + for (brit = branches.begin(); brit != branches.end(); brit++) { + Neighbor *branch = brit->second->findNeighbor(brit->first); + int ID = brit->second->id; + for (int qid = 0; ; qid++) { + string qstr; + if (branch->attributes.find("q" + convertIntToString(qid)) == branch->attributes.end()) + break; + out << ID << '\t' << qid+1 << '\t' << branch->attributes["q" + convertIntToString(qid)] << endl; + } + } + out.close(); + cout << "Site concordance factors for quartets printed to " << filename << endl; + } + + if (!params.site_concordance_partition) + return; + + // print concordant/discordant gene trees + filename = prefix + ".cf.stat_tree"; + out.open(filename); + out << "# Concordance factor statistics for decisive trees" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab2=read.table('" << filename << "',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl + << "# ID: Branch ID" << endl + << "# TreeID: Tree ID" << endl + << "# gC: 1/0 if tree is concordant/discordant with branch" << endl + << "# gD1: 1/0 if NNI-1 tree is concordant/discordant with branch" << endl + << "# gD2: 1/0 if NNI-2 tree is concordant/discordant with branch" << endl + << "# NOTE: NA means that tree is not decisive for branch" << endl + << "ID\tTreeID\tgC\tgD1\tgD2" << endl; + for (brit = branches.begin(); brit != branches.end(); brit++) { + Neighbor *branch = brit->second->findNeighbor(brit->first); + int ID = brit->second->id; + for (int part = 1; ; part++) { + string gC, gD1, gD2; + if (!branch->getAttr("gC" + convertIntToString(part), gC)) + break; + branch->getAttr("gD1" + convertIntToString(part), gD1); + branch->getAttr("gD2" + convertIntToString(part), gD2); + out << ID << '\t' << part << '\t' << gC << '\t' << gD1 << '\t' << gD2 << endl; + } + } + out.close(); + cout << "Concordance factors per branch and tree printed to " << filename << endl; + + if (!params.site_concordance_partition || !tree->isSuperTree()) + return; + // print partition-wise concordant/discordant sites + filename = prefix + ".cf.stat_loci"; + out.open(filename); + out << "# Concordance factor statistics for loci" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab2=read.table('" << filename << "',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl + << "# ID: Branch ID" << endl + << "# PartID: Locus ID" << endl + << "# sC: Number of concordant sites averaged over " << params.site_concordance << " quartets" << endl + << "# sD1: Number of discordant sites for alternative quartet 1" << endl + << "# sD2: Number of discordant sites for alternative quartet 2" << endl + << "# NOTE: NA means that locus is not decisive for branch" << endl + << "ID\tPartID\tsC\tsD1\tsD2" << endl; + for (brit = branches.begin(); brit != branches.end(); brit++) { + Neighbor *branch = brit->second->findNeighbor(brit->first); + int ID = brit->second->id; + for (int part = 1; ; part++) { + string sC, sD1, sD2; + if (!branch->getAttr("sC" + convertIntToString(part), sC)) + break; + if (!branch->getAttr("sD1" + convertIntToString(part), sD1)) + break; + if (!branch->getAttr("sD2" + convertIntToString(part), sD2)) + break; + out << ID << '\t' << part << '\t' << sC << '\t' << sD1 << '\t' << sD2 << endl; + } + } + out.close(); + cout << "Concordance factors per branch and locus printed to " << filename << endl; } @@ -3265,139 +4235,158 @@ void assignBranchSupportNew(Params ¶ms) { * @param params program parameters */ void assignBootstrapSupport(const char *input_trees, int burnin, int max_count, - const char *target_tree, bool rooted, const char *output_tree, - const char *out_prefix, MExtTree &mytree, const char* tree_weight_file, - Params *params) { - //bool rooted = false; - // read the tree file - cout << "Reading tree " << target_tree << " ..." << endl; - mytree.init(target_tree, rooted); - // reindex the taxa in the tree to aphabetical names - NodeVector taxa; - mytree.getTaxa(taxa); - sort(taxa.begin(), taxa.end(), nodenamecmp); - int i = 0; - for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++) { - (*it)->id = i++; - } - - /* - string filename = params.boot_trees; - filename += ".nolen"; - boot_trees.printTrees(filename.c_str(), false); - return; - */ - SplitGraph sg; - SplitIntMap hash_ss; - // make the taxa name - vector taxname; - taxname.resize(mytree.leafNum); - mytree.getTaxaName(taxname); - - // read the bootstrap tree file - double scale = 100.0; - if (params->scaling_factor > 0) - scale = params->scaling_factor; - - MTreeSet boot_trees; - if (params && detectInputFile(input_trees) == IN_NEXUS) { - sg.init(*params); - for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++) - hash_ss.insertSplit((*it), (*it)->getWeight()); - StrVector sgtaxname; - sg.getTaxaName(sgtaxname); - i = 0; - for (StrVector::iterator sit = sgtaxname.begin(); - sit != sgtaxname.end(); sit++, i++) { - Node *leaf = mytree.findLeafName(*sit); - if (!leaf) - outError("Tree does not contain taxon ", *sit); - leaf->id = i; - } - scale /= sg.maxWeight(); - } else { - boot_trees.init(input_trees, rooted, burnin, max_count, - tree_weight_file); - boot_trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, params->support_tag); - scale /= boot_trees.sumTreeWeights(); - } - //sg.report(cout); - cout << "Rescaling split weights by " << scale << endl; - if (params->scaling_factor < 0) - sg.scaleWeight(scale, true); - else { - sg.scaleWeight(scale, false, params->numeric_precision); - } - - cout << sg.size() << " splits found" << endl; - // compute the percentage of appearance - // printSplitSet(sg, hash_ss); - //sg.report(cout); - cout << "Creating bootstrap support values..." << endl; - mytree.createBootstrapSupport(taxname, boot_trees, sg, hash_ss, params->support_tag); - //mytree.scaleLength(100.0/boot_trees.size(), true); - string out_file; - if (output_tree) - out_file = output_tree; - else { - if (out_prefix) - out_file = out_prefix; - else - out_file = target_tree; - out_file += ".suptree"; - } - - mytree.printTree(out_file.c_str()); - cout << "Tree with assigned bootstrap support written to " << out_file - << endl; - /* - if (out_prefix) - out_file = out_prefix; - else - out_file = target_tree; - out_file += ".supval"; - mytree.writeInternalNodeNames(out_file); - - cout << "Support values written to " << out_file << endl; - */ + const char *target_tree, bool rooted, const char *output_tree, + const char *out_prefix, MExtTree &mytree, const char* tree_weight_file, + Params *params) { + bool myrooted = rooted; + // read the tree file + cout << "Reading tree " << target_tree << " ..." << endl; + mytree.init(target_tree, myrooted); + if (mytree.rooted) + cout << "rooted tree detected" << endl; + else + cout << "unrooted tree detected" << endl; + // reindex the taxa in the tree to aphabetical names + NodeVector taxa; + mytree.getTaxa(taxa); + sort(taxa.begin(), taxa.end(), nodenamecmp); + int i = 0; + for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++) { + (*it)->id = i++; + } + + /* + string filename = params.boot_trees; + filename += ".nolen"; + boot_trees.printTrees(filename.c_str(), false); + return; + */ + SplitGraph sg; + SplitIntMap hash_ss; + // make the taxa name + vector taxname; + taxname.resize(mytree.leafNum); + mytree.getTaxaName(taxname); + + // read the bootstrap tree file + double scale = 100.0; + if (params->scaling_factor > 0) + scale = params->scaling_factor; + + MTreeSet boot_trees; + if (params && detectInputFile(input_trees) == IN_NEXUS) { + sg.init(*params); + for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++) + hash_ss.insertSplit((*it), (*it)->getWeight()); + StrVector sgtaxname; + sg.getTaxaName(sgtaxname); + i = 0; + for (StrVector::iterator sit = sgtaxname.begin(); + sit != sgtaxname.end(); sit++, i++) { + Node *leaf = mytree.findLeafName(*sit); + if (!leaf) + outError("Tree does not contain taxon ", *sit); + leaf->id = i; + } + scale /= sg.maxWeight(); + } else { + myrooted = rooted; + boot_trees.init(input_trees, myrooted, burnin, max_count, + tree_weight_file); + if (mytree.rooted != boot_trees.isRooted()) + outError("Target tree and tree set have different rooting"); + if (boot_trees.equal_taxon_set) { + boot_trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, params->support_tag); + scale /= boot_trees.sumTreeWeights(); + } + } + //sg.report(cout); + if (!sg.empty()) { + cout << "Rescaling split weights by " << scale << endl; + if (params->scaling_factor < 0) + sg.scaleWeight(scale, true); + else { + sg.scaleWeight(scale, false, params->numeric_precision); + } + + cout << sg.size() << " splits found" << endl; + } + // compute the percentage of appearance + // printSplitSet(sg, hash_ss); + //sg.report(cout); + cout << "Creating " << RESAMPLE_NAME << " support values..." << endl; + if (!sg.empty()) + mytree.createBootstrapSupport(taxname, boot_trees, hash_ss, params->support_tag); + else { + //mytree.createBootstrapSupport(boot_trees); + cout << "Unequal taxon sets, rereading trees..." << endl; + DoubleVector rfdist; + mytree.computeRFDist(input_trees, rfdist, 1); + } + + //mytree.scaleLength(100.0/boot_trees.size(), true); + string out_file; + if (output_tree) + out_file = output_tree; + else { + if (out_prefix) + out_file = out_prefix; + else + out_file = target_tree; + out_file += ".suptree"; + } + + mytree.printTree(out_file.c_str()); + cout << "Tree with assigned support written to " << out_file + << endl; + /* + if (out_prefix) + out_file = out_prefix; + else + out_file = target_tree; + out_file += ".supval"; + mytree.writeInternalNodeNames(out_file); + + cout << "Support values written to " << out_file << endl; + */ } void computeConsensusTree(const char *input_trees, int burnin, int max_count, - double cutoff, double weight_threshold, const char *output_tree, - const char *out_prefix, const char *tree_weight_file, Params *params) { - bool rooted = false; - - // read the bootstrap tree file - /* - MTreeSet boot_trees(input_trees, rooted, burnin, tree_weight_file); - string first_taxname = boot_trees.front()->root->name; - //if (params.root) first_taxname = params.root; - - SplitGraph sg; - - boot_trees.convertSplits(sg, cutoff, SW_COUNT, weight_threshold);*/ - - //sg.report(cout); - SplitGraph sg; - SplitIntMap hash_ss; - // make the taxa name - //vector taxname; - //taxname.resize(mytree.leafNum); - //mytree.getTaxaName(taxname); - - // read the bootstrap tree file - double scale = 100.0; - if (params->scaling_factor > 0) - scale = params->scaling_factor; - - MTreeSet boot_trees; - if (params && detectInputFile(input_trees) == IN_NEXUS) { - char *user_file = params->user_file; - params->user_file = (char*) input_trees; - params->split_weight_summary = SW_COUNT; // count number of splits - sg.init(*params); - params->user_file = user_file; - for (SplitGraph::iterator it = sg.begin(); it != sg.end();) + double cutoff, double weight_threshold, const char *output_tree, + const char *out_prefix, const char *tree_weight_file, Params *params) { + bool rooted = false; + + // read the bootstrap tree file + /* + MTreeSet boot_trees(input_trees, rooted, burnin, tree_weight_file); + string first_taxname = boot_trees.front()->root->name; + //if (params.root) first_taxname = params.root; + + SplitGraph sg; + + boot_trees.convertSplits(sg, cutoff, SW_COUNT, weight_threshold);*/ + + //sg.report(cout); + SplitGraph sg; + SplitIntMap hash_ss; + // make the taxa name + //vector taxname; + //taxname.resize(mytree.leafNum); + //mytree.getTaxaName(taxname); + + // read the bootstrap tree file + double scale = 100.0; + if (params->scaling_factor > 0) + scale = params->scaling_factor; + + MTreeSet boot_trees; + if (params && detectInputFile(input_trees) == IN_NEXUS) { + char *user_file = params->user_file; + params->user_file = (char*) input_trees; + params->split_weight_summary = SW_COUNT; // count number of splits + sg.init(*params); + params->user_file = user_file; + for (SplitGraph::iterator it = sg.begin(); it != sg.end();) if ((*it)->getWeight() > weight_threshold) { hash_ss.insertSplit((*it), (*it)->getWeight()); it++; @@ -3409,133 +4398,135 @@ void computeConsensusTree(const char *input_trees, int burnin, int max_count, delete sg.back(); sg.pop_back(); } - /* StrVector sgtaxname; - sg.getTaxaName(sgtaxname); - i = 0; - for (StrVector::iterator sit = sgtaxname.begin(); sit != sgtaxname.end(); sit++, i++) { - Node *leaf = mytree.findLeafName(*sit); - if (!leaf) outError("Tree does not contain taxon ", *sit); - leaf->id = i; - }*/ - scale /= sg.maxWeight(); - } else { - boot_trees.init(input_trees, rooted, burnin, max_count, - tree_weight_file); - boot_trees.convertSplits(sg, cutoff, SW_COUNT, weight_threshold); - scale /= boot_trees.sumTreeWeights(); - cout << sg.size() << " splits found" << endl; - } - //sg.report(cout); - if (verbose_mode >= VB_MED) - cout << "Rescaling split weights by " << scale << endl; - if (params->scaling_factor < 0) - sg.scaleWeight(scale, true); - else { - sg.scaleWeight(scale, false, params->numeric_precision); - } - - - - //cout << "Creating greedy consensus tree..." << endl; - MTree mytree; - SplitGraph maxsg; - sg.findMaxCompatibleSplits(maxsg); - - if (verbose_mode >= VB_MAX) - maxsg.saveFileStarDot(cout); - //cout << "convert compatible split system into tree..." << endl; - mytree.convertToTree(maxsg); - //cout << "done" << endl; - string taxname; - if (params->root) - taxname = params->root; - else - taxname = sg.getTaxa()->GetTaxonLabel(0); - Node *node = mytree.findLeafName(taxname); - if (node) - mytree.root = node; - // mytree.scaleLength(100.0 / boot_trees.sumTreeWeights(), true); - - // mytree.getTaxaID(maxsg.getSplitsBlock()->getCycle()); - //maxsg.saveFile(cout); - - string out_file; - - if (output_tree) - out_file = output_tree; - else { - if (out_prefix) - out_file = out_prefix; - else - out_file = input_trees; - out_file += ".contree"; - } - -// if (removed_seqs.size() > 0) -// mytree.insertTaxa(removed_seqs, twin_seqs); - - mytree.printTree(out_file.c_str(), WT_BR_CLADE); - cout << "Consensus tree written to " << out_file << endl; - - if (output_tree) - out_file = output_tree; - else { - if (out_prefix) - out_file = out_prefix; - else - out_file = input_trees; - out_file += ".splits"; - } + /* StrVector sgtaxname; + sg.getTaxaName(sgtaxname); + i = 0; + for (StrVector::iterator sit = sgtaxname.begin(); sit != sgtaxname.end(); sit++, i++) { + Node *leaf = mytree.findLeafName(*sit); + if (!leaf) outError("Tree does not contain taxon ", *sit); + leaf->id = i; + }*/ + scale /= sg.maxWeight(); + } else { + boot_trees.init(input_trees, rooted, burnin, max_count, + tree_weight_file); + boot_trees.convertSplits(sg, cutoff, SW_COUNT, weight_threshold); + scale /= boot_trees.sumTreeWeights(); + cout << sg.size() << " splits found" << endl; + } + //sg.report(cout); + if (verbose_mode >= VB_MED) + cout << "Rescaling split weights by " << scale << endl; + if (params->scaling_factor < 0) + sg.scaleWeight(scale, true); + else { + sg.scaleWeight(scale, false, params->numeric_precision); + } + + + + //cout << "Creating greedy consensus tree..." << endl; + MTree mytree; + SplitGraph maxsg; + sg.findMaxCompatibleSplits(maxsg); + + if (verbose_mode >= VB_MAX) + maxsg.saveFileStarDot(cout); + //cout << "convert compatible split system into tree..." << endl; + mytree.convertToTree(maxsg); + //cout << "done" << endl; + if (!mytree.rooted) { + string taxname; + if (params->root) + taxname = params->root; + else + taxname = sg.getTaxa()->GetTaxonLabel(0); + Node *node = mytree.findLeafName(taxname); + if (node) + mytree.root = node; + } + // mytree.scaleLength(100.0 / boot_trees.sumTreeWeights(), true); + + // mytree.getTaxaID(maxsg.getSplitsBlock()->getCycle()); + //maxsg.saveFile(cout); + + string out_file; + + if (output_tree) + out_file = output_tree; + else { + if (out_prefix) + out_file = out_prefix; + else + out_file = input_trees; + out_file += ".contree"; + } + +// if (removed_seqs.size() > 0) +// mytree.insertTaxa(removed_seqs, twin_seqs); + + mytree.printTree(out_file.c_str(), WT_BR_CLADE); + cout << "Consensus tree written to " << out_file << endl; + + if (output_tree) + out_file = output_tree; + else { + if (out_prefix) + out_file = out_prefix; + else + out_file = input_trees; + out_file += ".splits"; + } //sg.scaleWeight(0.01, false, 4); - if (params->print_splits_file) { - sg.saveFile(out_file.c_str(), IN_OTHER, true); - cout << "Non-trivial split supports printed to star-dot file " << out_file << endl; - } + if (params->print_splits_file) { + sg.saveFile(out_file.c_str(), IN_OTHER, true); + cout << "Non-trivial split supports printed to star-dot file " << out_file << endl; + } } void computeConsensusNetwork(const char *input_trees, int burnin, int max_count, - double cutoff, int weight_summary, double weight_threshold, const char *output_tree, - const char *out_prefix, const char* tree_weight_file) { - bool rooted = false; - - // read the bootstrap tree file - MTreeSet boot_trees(input_trees, rooted, burnin, max_count, - tree_weight_file); - - SplitGraph sg; - //SplitIntMap hash_ss; - - boot_trees.convertSplits(sg, cutoff, weight_summary, weight_threshold); - - string out_file; - - if (output_tree) - out_file = output_tree; - else { - if (out_prefix) - out_file = out_prefix; - else - out_file = input_trees; - out_file += ".nex"; - } - - sg.saveFile(out_file.c_str(), IN_NEXUS); - cout << "Consensus network printed to " << out_file << endl; - - if (output_tree) - out_file = output_tree; - else { - if (out_prefix) - out_file = out_prefix; - else - out_file = input_trees; - out_file += ".splits"; - } - if (verbose_mode >= VB_MED) { - sg.saveFile(out_file.c_str(), IN_OTHER, true); - cout << "Non-trivial split supports printed to star-dot file " << out_file << endl; - } + double cutoff, int weight_summary, double weight_threshold, const char *output_tree, + const char *out_prefix, const char* tree_weight_file) { + bool rooted = false; + + // read the bootstrap tree file + MTreeSet boot_trees(input_trees, rooted, burnin, max_count, + tree_weight_file); + + SplitGraph sg; + //SplitIntMap hash_ss; + + boot_trees.convertSplits(sg, cutoff, weight_summary, weight_threshold); + + string out_file; + + if (output_tree) + out_file = output_tree; + else { + if (out_prefix) + out_file = out_prefix; + else + out_file = input_trees; + out_file += ".nex"; + } + + sg.saveFile(out_file.c_str(), IN_NEXUS); + cout << "Consensus network printed to " << out_file << endl; + + if (output_tree) + out_file = output_tree; + else { + if (out_prefix) + out_file = out_prefix; + else + out_file = input_trees; + out_file += ".splits"; + } + if (verbose_mode >= VB_MED) { + sg.saveFile(out_file.c_str(), IN_OTHER, true); + cout << "Non-trivial split supports printed to star-dot file " << out_file << endl; + } } diff --git a/main/phyloanalysis.h b/main/phyloanalysis.h index 86f1b2743..d592bba8f 100644 --- a/main/phyloanalysis.h +++ b/main/phyloanalysis.h @@ -26,6 +26,7 @@ #include "utils/tools.h" #include "tree/mexttree.h" #include "phylotesting.h" +#include "treetesting.h" #include "tree/upperbounds.h" // Olga: functions for Upper Bounds analysis #include "utils/pllnni.h" @@ -38,8 +39,15 @@ class IQTree; */ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint); -void runTreeReconstruction(Params ¶ms, string &original_model, - IQTree* &tree, ModelCheckpoint &model_info); +/** + Perform separate tree inference across partitions + */ +void runUnlinkedPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint); + +void startTreeReconstruction(Params ¶ms, IQTree* &iqtree, + ModelCheckpoint &model_info); + +void runTreeReconstruction(Params ¶ms, IQTree* &tree); /** take the collection of trees from input_trees, it assign support values to target_tree diff --git a/main/phylotesting.cpp b/main/phylotesting.cpp index e5bd22a63..43aaeb694 100644 --- a/main/phylotesting.cpp +++ b/main/phylotesting.cpp @@ -1,6 +1,6 @@ /* * phylotesting.cpp - * + * implementation of ModelFinder and PartitionFinder * Created on: Aug 23, 2013 * Author: minh */ @@ -11,9 +11,9 @@ #include #endif #include +#include #include "tree/phylotree.h" #include "tree/iqtree.h" -#include "tree/phylosupertree.h" #include "tree/phylotreemixlen.h" #include "phylotesting.h" @@ -36,97 +36,121 @@ #include "model/modelpomo.h" #include "utils/timeutil.h" #include "model/modelfactorymixlen.h" +#include "tree/phylosupertreeplen.h" +#include "tree/phylosupertreeunlinked.h" #include "phyloanalysis.h" #include "gsl/mygsl.h" +#include "utils/MPIHelper.h" //#include "vectorclass/vectorclass.h" +#include "nn/neuralnetwork.h" + +// *********for MPI communication********* +#define ONESIDE_COMM + +// the ratio of the total jobs first distributed to the processors +#define DIST_RATIO 0.8 + +// for one-side communication, how often perform synchronization between the master and the workers +#define TIME_SYN 10 // in seconds /******* Binary model set ******/ -const char* bin_model_names[] = { "JC2", "GTR2" }; +const char* bin_model_names[] = {"GTR2", "JC2"}; /******* Morphological model set ******/ -const char* morph_model_names[] = {"MK", "ORDERED"}; +// 2018-08-20: don't test ORDERED model due to lots of numerical issues +//const char* morph_model_names[] = {"MK", "ORDERED"}; +const char* morph_model_names[] = {"MK"}; /******* DNA model set ******/ -const char* dna_model_names[] = { "JC", "F81", "K80", "HKY", "TNe", - "TN", "K81", "K81u", "TPM2", "TPM2u", "TPM3", "TPM3u", "TIMe", "TIM", - "TIM2e", "TIM2", "TIM3e", "TIM3", "TVMe", "TVM", "SYM", "GTR" }; +const char* dna_model_names[] = {"GTR", "SYM", "TVM", "TVMe", "TIM3", + "TIM3e", "TIM2", "TIM2e", "TIM", "TIMe", "TPM3u", "TPM3", + "TPM2u", "TPM2", "K81u", "K81", "TN", "TNe", "HKY", "K80", "F81", "JC"}; /* DNA models supported by PhyML/PartitionFinder */ -const char* dna_model_names_old[] ={"JC", "F81", "K80", "HKY", "TNe", - "TN", "K81", "K81u", "TIMe", "TIM", "TVMe", "TVM", "SYM", "GTR"}; +const char* dna_model_names_old[] ={"GTR", "SYM", "TVM", "TVMe", "TIM", "TIMe", + "K81u", "K81", "TN", "TNe", "HKY", "K80", "F81", "JC"}; /* DNA model supported by RAxML */ const char* dna_model_names_rax[] ={"GTR"}; /* DNA model supported by MrBayes */ -const char *dna_model_names_mrbayes[] = {"JC", "F81", "K80", "HKY", "SYM", "GTR"}; +const char *dna_model_names_mrbayes[] = {"GTR", "SYM", "HKY", "K80", "F81", "JC"}; + +/* DNA model supported by ModelOMatic */ +const char *dna_model_names_modelomatic[] = {"GTR", "HKY", "K80", "F81", "JC"}; //const char* dna_freq_names[] = {"+FO"}; // Lie-Markov models without an RY, WS or MK prefix -const char *dna_model_names_lie_markov_fullsym[] = +const char *dna_model_names_lie_markov_fullsym[] = {"1.1", "3.3a", "4.4a", "6.7a", "9.20b", "12.12"}; // Lie-Markov models with RY symmetry/distinguished pairing const char *dna_model_names_lie_markov_ry[] = { - "RY2.2b", "RY3.3b", "RY3.3c", "RY3.4", "RY4.4b", + "RY2.2b", "RY3.3b", "RY3.3c", "RY3.4", "RY4.4b", "RY4.5a", "RY4.5b", "RY5.6a", "RY5.6b", "RY5.7a", - "RY5.7b", "RY5.7c", "RY5.11a", "RY5.11b", "RY5.11c", - "RY5.16", "RY6.6", "RY6.7b", "RY6.8a", "RY6.8b", - "RY6.17a", "RY6.17b","RY8.8", "RY8.10a", "RY8.10b", + "RY5.7b", "RY5.7c", "RY5.11a", "RY5.11b", "RY5.11c", + "RY5.16", "RY6.6", "RY6.7b", "RY6.8a", "RY6.8b", + "RY6.17a", "RY6.17b","RY8.8", "RY8.10a", "RY8.10b", "RY8.16", "RY8.17", "RY8.18", "RY9.20a", "RY10.12", "RY10.34" }; // Lie-Markov models with WS symmetry/distinguished pairing const char *dna_model_names_lie_markov_ws[] = { - "WS2.2b", "WS3.3b", "WS3.3c", "WS3.4", "WS4.4b", + "WS2.2b", "WS3.3b", "WS3.3c", "WS3.4", "WS4.4b", "WS4.5a", "WS4.5b", "WS5.6a", "WS5.6b", "WS5.7a", - "WS5.7b", "WS5.7c", "WS5.11a", "WS5.11b", "WS5.11c", - "WS5.16", "WS6.6", "WS6.7b", "WS6.8a", "WS6.8b", - "WS6.17a", "WS6.17b","WS8.8", "WS8.10a", "WS8.10b", + "WS5.7b", "WS5.7c", "WS5.11a", "WS5.11b", "WS5.11c", + "WS5.16", "WS6.6", "WS6.7b", "WS6.8a", "WS6.8b", + "WS6.17a", "WS6.17b","WS8.8", "WS8.10a", "WS8.10b", "WS8.16", "WS8.17", "WS8.18", "WS9.20a", "WS10.12", "WS10.34" }; // Lie-Markov models with MK symmetry/distinguished pairing const char *dna_model_names_lie_markov_mk[] = { - "MK2.2b", "MK3.3b", "MK3.3c", "MK3.4", "MK4.4b", + "MK2.2b", "MK3.3b", "MK3.3c", "MK3.4", "MK4.4b", "MK4.5a", "MK4.5b", "MK5.6a", "MK5.6b", "MK5.7a", - "MK5.7b", "MK5.7c", "MK5.11a", "MK5.11b", "MK5.11c", - "MK5.16", "MK6.6", "MK6.7b", "MK6.8a", "MK6.8b", - "MK6.17a", "MK6.17b","MK8.8", "MK8.10a", "MK8.10b", + "MK5.7b", "MK5.7c", "MK5.11a", "MK5.11b", "MK5.11c", + "MK5.16", "MK6.6", "MK6.7b", "MK6.8a", "MK6.8b", + "MK6.17a", "MK6.17b","MK8.8", "MK8.10a", "MK8.10b", "MK8.16", "MK8.17", "MK8.18", "MK9.20a", "MK10.12", "MK10.34" }; // Lie-Markov models which are strand symmetric const char *dna_model_names_lie_markov_strsym[] = { - "1.1", "WS2.2b", "3.3a", "WS3.3b", "WS3.3c", "WS3.4", + "1.1", "WS2.2b", "3.3a", "WS3.3b", "WS3.3c", "WS3.4", "WS4.4b", "WS4.5a", "WS4.5b", "WS5.6a", "WS6.6" }; /****** Protein model set ******/ -const char* aa_model_names[] = { "Dayhoff", "mtMAM", "JTT", "WAG", - "cpREV", "mtREV", "rtREV", "mtART", "mtZOA", "VT", "LG", "DCMut", "PMB", - "HIVb", "HIVw", "JTTDCMut", "FLU", "Blosum62" , "mtMet" , "mtVer" , "mtInv" }; - +const char* aa_model_names[] = {"LG", "WAG", "JTT", "JTTDCMut", "DCMut", "VT", "PMB", "Blosum62", "Dayhoff", + "mtREV", "mtART", "mtZOA", "mtMet" , "mtVer" , "mtInv", "mtMAM", + "HIVb", "HIVw", "FLU", "rtREV", "cpREV"}; + /* Protein models supported by PhyML/PartitionFinder */ -const char *aa_model_names_phyml[] = { "Dayhoff", "mtMAM", "JTT", "WAG", - "cpREV", "mtREV", "rtREV", "mtART", "VT", "LG", "DCMut", - "HIVb", "HIVw", "Blosum62" }; +const char *aa_model_names_phyml[] = {"LG", "WAG", "JTT", "DCMut", "VT", "Blosum62", "Dayhoff", + "mtREV", "mtART", "mtMAM", + "HIVb", "HIVw", "rtREV", "cpREV"}; /* Protein models supported by RAxML */ -const char *aa_model_names_rax[] = { "Dayhoff", "mtMAM", "JTT", "WAG", - "cpREV", "mtREV", "rtREV", "mtART", "mtZOA", "PMB", "HIVb", "HIVw", "JTTDCMut", "FLU", "VT", "LG", "DCMut", "Blosum62" }; +const char *aa_model_names_rax[] = {"LG", "WAG", "JTT", "JTTDCMut", "DCMut", "VT", "PMB", "Blosum62", "Dayhoff", + "mtREV", "mtART", "mtZOA", "mtMAM", + "HIVb", "HIVw", "FLU", "rtREV", "cpREV"}; + +const char* aa_model_names_mrbayes[] = {"WAG", "JTT", "VT", "Blosum62", "Dayhoff", + "mtREV", "mtMAM", + "rtREV", "cpREV"}; -const char* aa_model_names_mrbayes[] = {"Poisson", "Dayhoff", "mtMAM", "JTT", "WAG", - "cpREV", "mtREV", "rtREV", "VT", "Blosum62" }; +const char* aa_model_names_modelomatic[] = {"LG", "WAG", "JTT", "VT", "Blosum62", "Dayhoff", + "mtART", "mtMAM", "mtREV", + "HIVb", "HIVw", "rtREV", "cpREV"}; -const char *aa_model_names_nuclear[] = {"WAG", "Dayhoff","JTT", "LG", "VT", "DCMut", "PMB", "JTTDCMut", "Blosum62"}; +const char *aa_model_names_nuclear[] = {"LG", "WAG", "JTT", "JTTDCMut","DCMut", "VT", "PMB", "Blosum62", "Dayhoff"}; -const char *aa_model_names_mitochondrial[] = {"mtREV", "mtMAM", "mtART", "mtZOA", "mtMet" , "mtVer" , "mtInv" }; +const char *aa_model_names_mitochondrial[] = {"mtREV", "mtART", "mtZOA", "mtMet" , "mtVer" , "mtInv", "mtMAM"}; const char *aa_model_names_chloroplast[] = {"cpREV"}; @@ -138,39 +162,166 @@ const char* aa_freq_names[] = {"", "+F"}; /****** Codon models ******/ //const char *codon_model_names[] = {"GY", "MG", "MGK", "KOSI07", "SCHN05","KOSI07_GY1KTV","SCHN05_GY1KTV"}; //short int std_genetic_code[] = { 0, 0, 0, 1, 1, 1, 1}; -const char *codon_model_names[] = {"MG", "MGK", "GY", "KOSI07", "SCHN05"}; +const char *codon_model_names[] = { "GY", "MGK", "MG", "KOSI07", "SCHN05"}; short int std_genetic_code[] = { 0, 0, 0, 1, 1}; +const char *codon_model_names_modelomatic[] = {"GY"}; +short int std_genetic_code_modelomatic[] = { 0}; -const char *codon_freq_names[] = {"", "+F1X4", "+F3X4", "+F"}; +const char *codon_freq_names[] = {"+F3X4", "+F1X4", "+F", ""}; -const double TOL_LIKELIHOOD_MODELTEST = 0.1; +//const double TOL_LIKELIHOOD_MODELTEST = 0.1; const double TOL_GRADIENT_MODELTEST = 0.0001; -void ModelInfo::computeICScores(size_t sample_size) { +extern double RunKMeans1D(int n, int k, double *points, int *weights, double *centers, int *assignments); + + +string getSeqTypeName(SeqType seq_type) { + switch (seq_type) { + case SEQ_BINARY: return "binary"; + case SEQ_DNA: return "DNA"; + case SEQ_PROTEIN: return "protein"; + case SEQ_CODON: return "codon"; + case SEQ_MORPH: return "morphological"; + case SEQ_POMO: return "PoMo"; + case SEQ_UNKNOWN: return "unknown"; + case SEQ_MULTISTATE: return "MultiState"; + } +} + +string getUsualModelSubst(SeqType seq_type) { + switch (seq_type) { + case SEQ_DNA: return dna_model_names[0]; + case SEQ_PROTEIN: return aa_model_names[0]; + case SEQ_CODON: return string(codon_model_names[0]) + codon_freq_names[0]; + case SEQ_BINARY: return bin_model_names[0]; + case SEQ_MORPH: return morph_model_names[0]; + case SEQ_POMO: return string(dna_model_names[0]) + "+P"; + default: ASSERT(0 && "Unprocessed seq_type"); return ""; + } +} + +void getRateHet(SeqType seq_type, string model_name, double frac_invariant_sites, + string rate_set, StrVector &ratehet); + +size_t CandidateModel::getUsualModel(Alignment *aln) { + size_t aln_len = 0; + if (aln->isSuperAlignment()) { + SuperAlignment *super_aln = (SuperAlignment*)aln; + for (auto it = super_aln->partitions.begin(); it != super_aln->partitions.end(); it++) { + CandidateModel usual_model(*it); + if (!subst_name.empty()) + subst_name += ','; + subst_name += usual_model.subst_name; + if (!rate_name.empty()) + rate_name += ','; + rate_name += usual_model.rate_name; + aln_len += (*it)->getNSite(); + } + } else { + subst_name = getUsualModelSubst(aln->seq_type); + StrVector ratehet; + getRateHet(aln->seq_type, Params::getInstance().model_name, aln->frac_invariant_sites, "1", ratehet); + ASSERT(!ratehet.empty()); + rate_name = ratehet[0]; + aln_len = aln->getNSite(); + } + orig_subst_name = subst_name; + orig_rate_name = rate_name; + return aln_len; +} + +void CandidateModel::computeICScores(size_t sample_size) { computeInformationScores(logl, df, sample_size, AIC_score, AICc_score, BIC_score); } -double ModelInfo::computeICScore(size_t sample_size) { +void CandidateModel::computeICScores() { + size_t sample_size = aln->getNSite(); + if (aln->isSuperAlignment()) { + sample_size = 0; + SuperAlignment *super_aln = (SuperAlignment*)aln; + for (auto a : super_aln->partitions) + sample_size += a->getNSite(); + } + if (hasFlag(MF_SAMPLE_SIZE_TRIPLE)) + sample_size /= 3; + computeInformationScores(logl, df, sample_size, AIC_score, AICc_score, BIC_score); +} + +double CandidateModel::computeICScore(size_t sample_size) { return computeInformationScore(logl, df, sample_size, Params::getInstance().model_test_criterion); } -bool ModelCheckpoint::getBestModel(string &best_model) { +double CandidateModel::getScore(ModelTestCriterion mtc) { + switch (mtc) { + case MTC_AIC: + return AIC_score; + case MTC_AICC: + return AICc_score; + case MTC_BIC: + return BIC_score; + case MTC_ALL: + ASSERT(0 && "Unhandled case"); + return 0.0; + } +} + +double CandidateModel::getScore() { + return getScore(Params::getInstance().model_test_criterion); +} + +int CandidateModelSet::getBestModelID(ModelTestCriterion mtc) { + double best_score = DBL_MAX; + int best_model = -1; + for (int model = 0; model < size(); model++) + if (at(model).hasFlag(MF_DONE) && best_score > at(model).getScore(mtc)) { + best_score = at(model).getScore(mtc); + best_model = model; + } + return best_model; +} + +/** + restore model from checkpoint + */ +bool CandidateModel::restoreCheckpointRminus1(Checkpoint *ckp, CandidateModel *model) { + size_t posR; + const char *rates[] = {"+R", "*R", "+H", "*H"}; + for (int i = 0; i < sizeof(rates)/sizeof(char*); i++) { + if ((posR = model->rate_name.find(rates[i])) != string::npos) { + int cat = convert_int(model->rate_name.substr(posR+2).c_str()); + subst_name = model->subst_name; + rate_name = model->rate_name.substr(0, posR+2) + convertIntToString(cat-1); + return restoreCheckpoint(ckp); + } + } + return false; +} + +bool ModelCheckpoint::getBestModel(string &best_model) { return getString("best_model_" + criterionName(Params::getInstance().model_test_criterion), best_model); } +bool ModelCheckpoint::getBestModelList(string &best_model_list) { + return getString("best_model_list_" + criterionName(Params::getInstance().model_test_criterion), best_model_list); +} + +void ModelCheckpoint::putBestModelList(string &best_model_list) { + return put("best_model_list_" + criterionName(Params::getInstance().model_test_criterion), best_model_list); +} + bool ModelCheckpoint::getBestTree(string &best_tree) { return getString("best_tree_" + criterionName(Params::getInstance().model_test_criterion), best_tree); } -bool ModelCheckpoint::getOrderedModels(PhyloTree *tree, vector &ordered_models) { +bool ModelCheckpoint::getOrderedModels(PhyloTree *tree, CandidateModelSet &ordered_models) { double best_score_AIC, best_score_AICc, best_score_BIC; if (tree->isSuperTree()) { PhyloSuperTree *stree = (PhyloSuperTree*)tree; ordered_models.clear(); for (int part = 0; part != stree->size(); part++) { - startStruct(stree->part_info[part].name); - ModelInfo info; - if (!getBestModel(info.name)) return false; + startStruct(stree->at(part)->aln->name); + CandidateModel info; + if (!getBestModel(info.subst_name)) return false; info.restoreCheckpoint(this); info.computeICScores(stree->at(part)->getAlnNSite()); endStruct(); @@ -183,14 +334,14 @@ bool ModelCheckpoint::getOrderedModels(PhyloTree *tree, vector &order CKP_RESTORE2(this, best_score_BIC); double sum_AIC = 0, sum_AICc = 0, sum_BIC = 0; string str; - bool ret = getString("best_model_list_" + criterionName(Params::getInstance().model_test_criterion), str); + bool ret = getBestModelList(str); if (!ret) return false; istringstream istr(str); string model; ordered_models.clear(); while (istr >> model) { - ModelInfo info; - info.name = model; + CandidateModel info; + info.subst_name = model; info.restoreCheckpoint(this); info.computeICScores(tree->getAlnNSite()); sum_AIC += info.AIC_weight = exp(-0.5*(info.AIC_score-best_score_AIC)); @@ -238,7 +389,7 @@ void appendCString(const char **cvec, int n, StrVector &strvec, bool touppercase } -int getSeqType(const char *model_name, SeqType &seq_type) { +int detectSeqType(const char *model_name, SeqType &seq_type) { bool empirical_model = false; int i; string model_str = model_name; @@ -246,7 +397,7 @@ int getSeqType(const char *model_name, SeqType &seq_type) { StrVector model_list; seq_type = SEQ_UNKNOWN; - + copyCString(bin_model_names, sizeof(bin_model_names)/sizeof(char*), model_list, true); for (i = 0; i < model_list.size(); i++) if (model_str == model_list[i]) { @@ -279,13 +430,13 @@ int getSeqType(const char *model_name, SeqType &seq_type) { if (std_genetic_code[i]) empirical_model = true; break; } - + return (empirical_model) ? 2 : 1; } -string getSeqType(string model_name) { +string detectSeqTypeName(string model_name) { SeqType seq_type; - getSeqType(model_name.c_str(), seq_type); + detectSeqType(model_name.c_str(), seq_type); switch (seq_type) { case SEQ_BINARY: return "BIN"; break; case SEQ_MORPH: return "MORPH"; break; @@ -322,615 +473,654 @@ string criterionName(ModelTestCriterion mtc) { return "AICc"; if (mtc == MTC_BIC) return "BIC"; + if (mtc == NN) + return "the neural network"; return ""; } -void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh, - bool append, const char *linename) { - int i; - double *pattern_lh; - if (!ptn_lh) { - pattern_lh = new double[tree->getAlnNPattern()]; - tree->computePatternLikelihood(pattern_lh); - } else - pattern_lh = ptn_lh; - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - if (append) { - out.open(filename, ios::out | ios::app); - } else { - out.open(filename); - out << 1 << " " << tree->getAlnNSite() << endl; - } - IntVector pattern_index; - tree->aln->getSitePatternIndex(pattern_index); - if (!linename) - out << "Site_Lh "; - else { - out.width(10); - out << left << linename; - } - for (i = 0; i < tree->getAlnNSite(); i++) - out << " " << pattern_lh[pattern_index[i]]; - out << endl; - out.close(); - if (!append) - cout << "Site log-likelihoods printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } - - if (!ptn_lh) - delete[] pattern_lh; -} -void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh, - bool append, const char *linename) { +/** + * select models for all partitions + * @param[in,out] model_info (IN/OUT) all model information + * @return total number of parameters + */ +void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads); - ASSERT(tree->isSuperTree()); - PhyloSuperTree *stree = (PhyloSuperTree*)tree; - int i; - double *pattern_lh; - if (!ptn_lh) { - pattern_lh = new double[tree->getAlnNPattern()]; - tree->computePatternLikelihood(pattern_lh); - } else - pattern_lh = ptn_lh; - double partition_lh[stree->size()]; - int part; - double *pattern_lh_ptr = pattern_lh; - for (part = 0; part < stree->size(); part++) { - size_t nptn = stree->at(part)->getAlnNPattern(); - partition_lh[part] = 0.0; - for (i = 0; i < nptn; i++) - partition_lh[part] += pattern_lh_ptr[i] * stree->at(part)->ptn_freq[i]; - pattern_lh_ptr += nptn; - } - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - if (append) { - out.open(filename, ios::out | ios::app); - } else { - out.open(filename); - out << 1 << " " << stree->size() << endl; - } - if (!linename) - out << "Part_Lh "; - else { - out.width(10); - out << left << linename; - } - for (i = 0; i < stree->size(); i++) - out << " " << partition_lh[i]; - out << endl; - out.close(); - if (!append) - cout << "Partition log-likelihoods printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } +/** + compute log-adapter function according to Whelan et al. 2015 + @param orig_aln original codon alignment + @param newaln AA alignment + @param[out] adjusted_df adjusted degree of freedom factor + @return adjusted log-likelihood factor + */ +double computeAdapter(Alignment *orig_aln, Alignment *newaln, int &adjusted_df) { + int aa, codon; + + // count codon occurences + unsigned int codon_counts[orig_aln->num_states]; + orig_aln->computeAbsoluteStateFreq(codon_counts); + + // compute AA frequency +// double aa_freq[newaln->num_states]; +// newaln->computeStateFreq(aa_freq); + + // compute codon frequency + double codon_freq[orig_aln->num_states]; + //orig_aln->computeStateFreq(codon_freq); + + double sum = 0.0; + for (codon = 0; codon < orig_aln->num_states; codon++) + sum += codon_counts[codon]; + sum = 1.0/sum; + for (codon = 0; codon < orig_aln->num_states; codon++) + codon_freq[codon] = sum*codon_counts[codon]; + + // new rescale codon_freq s.t. codons coding for the same AA + // have f summing up to the frequency of this AA + for (aa = 0; aa < newaln->num_states; aa++) { + double sum = 0; + for (codon = 0; codon < orig_aln->num_states; codon++) + if (newaln->convertState(orig_aln->genetic_code[(int)orig_aln->codon_table[codon]]) == aa) + sum += codon_freq[codon]; + sum = 1.0/sum; + for (codon = 0; codon < orig_aln->num_states; codon++) + if (newaln->convertState(orig_aln->genetic_code[(int)orig_aln->codon_table[codon]]) == aa) + codon_freq[codon] *= sum; + } - if (!ptn_lh) - delete[] pattern_lh; + // now compute adapter function + double adapter = 0.0; + adjusted_df = 0; + vector has_AA; + has_AA.resize(newaln->num_states, false); + + for (codon = 0; codon < orig_aln->num_states; codon++) { + if (codon_counts[codon] == 0) + continue; + has_AA[newaln->convertState(orig_aln->genetic_code[(int)orig_aln->codon_table[codon]])] = true; + adapter += codon_counts[codon]*log(codon_freq[codon]); + adjusted_df++; + } + for (aa = 0; aa < has_AA.size(); aa++) + if (has_AA[aa]) + adjusted_df--; + return adapter; } -void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) { +/** + compute fast ML tree by stepwise addition MP + ML-NNI + @return the tree string + */ +string computeFastMLTree(Params ¶ms, Alignment *aln, + ModelCheckpoint &model_info, ModelsBlock *models_block, + int &num_threads, int brlen_type, string dist_file) { + //string model_name; + CandidateModel usual_model(aln); + StrVector subst_names; + StrVector rate_names; + convert_string_vec(usual_model.subst_name.c_str(), subst_names); + convert_string_vec(usual_model.rate_name.c_str(), rate_names); + ASSERT(subst_names.size() == rate_names.size()); + //set model_set; + + string concat_tree; - if (wsl == WSL_NONE || wsl == WSL_SITE) - return; - int ncat = tree->getNumLhCat(wsl); - if (tree->isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)tree; - for (auto it = stree->begin(); it != stree->end(); it++) { - int part_ncat = (*it)->getNumLhCat(wsl); - if (part_ncat > ncat) - ncat = part_ncat; + IQTree *iqtree = NULL; + + StrVector saved_model_names; + + if (aln->isSuperAlignment()) { + SuperAlignment *saln = (SuperAlignment*)aln; + if (params.partition_type == TOPO_UNLINKED) + iqtree = new PhyloSuperTreeUnlinked(saln); + else if (params.partition_type == BRLEN_OPTIMIZE) + iqtree = new PhyloSuperTree(saln); + else + iqtree = new PhyloSuperTreePlen(saln, brlen_type); + for (int part = 0; part != subst_names.size(); part++) { + saved_model_names.push_back(saln->partitions[part]->model_name); + saln->partitions[part]->model_name = subst_names[part] + rate_names[part]; } + } else if (posRateHeterotachy(rate_names[0]) != string::npos) { + iqtree = new PhyloTreeMixlen(aln, 0); + } else { + iqtree = new IQTree(aln); + } + if ((params.start_tree == STT_PLL_PARSIMONY || params.start_tree == STT_RANDOM_TREE || params.pll) && !iqtree->isInitializedPLL()) { + /* Initialized all data structure for PLL*/ + iqtree->initializePLL(params); } - int i; + iqtree->setParams(¶ms); + iqtree->setLikelihoodKernel(params.SSE); + iqtree->optimize_by_newton = params.optimize_by_newton; + iqtree->setNumThreads(num_threads); + iqtree->setCheckpoint(&model_info); - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - out << "# Site likelihood per rate/mixture category" << endl - << "# This file can be read in MS Excel or in R with command:" << endl - << "# tab=read.table('" << filename << "',header=TRUE,fill=TRUE)" << endl - << "# Columns are tab-separated with following meaning:" << endl; - if (tree->isSuperTree()) { - out << "# Part: Partition ID (1=" << ((PhyloSuperTree*)tree)->part_info[0].name << ", etc)" << endl - << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; - } else - out << "# Site: Alignment site ID" << endl; + iqtree->dist_file = dist_file; + iqtree->computeInitialTree(params.SSE); + iqtree->restoreCheckpoint(); - out << "# LnL: Logarithm of site likelihood" << endl - << "# Thus, sum of LnL is equal to tree log-likelihood" << endl - << "# LnLW_k: Logarithm of (category-k site likelihood times category-k weight)" << endl - << "# Thus, sum of exp(LnLW_k) is equal to exp(LnL)" << endl; + //ASSERT(iqtree->root); + iqtree->initializeModel(params, usual_model.getName(), models_block); + if (!iqtree->getModel()->isMixture() || aln->seq_type == SEQ_POMO) { + usual_model.subst_name = iqtree->getSubstName(); + usual_model.rate_name = iqtree->getRateName(); + } - if (tree->isSuperTree()) { - out << "Part\tSite\tLnL"; - } else - out << "Site\tLnL"; - for (i = 0; i < ncat; i++) - out << "\tLnLW_" << i+1; - out << endl; - out.precision(4); - out.setf(ios::fixed); + iqtree->getModelFactory()->restoreCheckpoint(); + iqtree->ensureNumberOfThreadsIsSet(nullptr); + iqtree->initializeAllPartialLh(); + double saved_modelEps = params.modelEps; + params.modelEps = params.modelfinder_eps; + string initTree; - tree->writeSiteLh(out, wsl); + double start_time = getRealTime(); - out.close(); - cout << "Site log-likelihoods per category printed to " << filename << endl; - /* - if (!tree->isSuperTree()) { - cout << "Log-likelihood of constant sites: " << endl; - double const_prob = 0.0; - for (i = 0; i < tree->aln->getNPattern(); i++) - if (tree->aln->at(i).isConst()) { - Pattern pat = tree->aln->at(i); - for (Pattern::iterator it = pat.begin(); it != pat.end(); it++) - cout << tree->aln->convertStateBackStr(*it); - cout << ": " << pattern_lh[i] << endl; - const_prob += exp(pattern_lh[i]); + cout << "Perform fast likelihood tree search using " << subst_names[0]+rate_names[0] << " model..." << endl; + + if (iqtree->getCheckpoint()->getBool("finishedFastMLTree")) { + // model optimization already done: ignore this step + iqtree->setCurScore(iqtree->computeLikelihood()); + initTree = iqtree->getTreeString(); + cout << "CHECKPOINT: Tree restored, LogL: " << iqtree->getCurScore() << endl; + } else { + bool saved_opt_gammai = params.opt_gammai; + // disable thorough I+G optimization + params.opt_gammai = false; + initTree = iqtree->optimizeModelParameters(false, params.modelEps*50.0); + if (iqtree->isMixlen()) + initTree = ((ModelFactoryMixlen*)iqtree->getModelFactory())->sortClassesByTreeLength(); + + // do quick NNI search + if (params.start_tree != STT_USER_TREE) { + cout << "Perform nearest neighbor interchange..." << endl; + iqtree->doNNISearch(true); + + // For MPI, we compared between the iqtree objects from all processes and select the optimal one +#ifdef _IQTREE_MPI + int worker, winner; + struct { + double like; + int rank; + } in, out; + if (MPIHelper::getInstance().getNumProcesses() > 1) { + // find out which process has the maximum likelihood + in.like = iqtree->getCurScore(); + in.rank = MPIHelper::getInstance().getProcessID(); + MPI_Allreduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + winner = out.rank; + if (MPIHelper::getInstance().isMaster()) + cout << "Optimal Fast-NNI log-likelihood: " << out.like << " from process " << winner << endl; + + // the winner sends the checkpoint to others + if (MPIHelper::getInstance().getProcessID() == winner) { + iqtree->saveCheckpoint(); + iqtree->getModelFactory()->saveCheckpoint(); + for (worker=0; workergetCheckpoint(), worker); + } + } else { + // receive the checkpoint from the winner + // and update the iqtree object + MPIHelper::getInstance().recvCheckpoint(iqtree->getCheckpoint()); + iqtree->restoreCheckpoint(); + iqtree->getModelFactory()->restoreCheckpoint(); + iqtree->initializeAllPartialLh(); + iqtree->setCurScore(iqtree->computeLikelihood()); } - cout << "Probability of const sites: " << const_prob << endl; + } +#endif + + initTree = iqtree->getTreeString(); } - */ - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } + params.opt_gammai = saved_opt_gammai; + + iqtree->saveCheckpoint(); + iqtree->getModelFactory()->saveCheckpoint(); + iqtree->getCheckpoint()->putBool("finishedFastMLTree", true); + iqtree->getCheckpoint()->dump(); + // cout << "initTree: " << initTree << endl; + cout << "Time for fast ML tree search: " << getRealTime() - start_time << " seconds" << endl; + cout << endl; + } + + // restore model epsilon + params.modelEps = saved_modelEps; + + // save information to the checkpoint for later retrieval + if (iqtree->isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)iqtree; + int part = 0; + for (auto it = stree->begin(); it != stree->end(); it++, part++) { + model_info.startStruct((*it)->aln->name); + (*it)->saveCheckpoint(); + (*it)->getModelFactory()->saveCheckpoint(); + model_info.endStruct(); + } + SuperAlignment *saln = (SuperAlignment*)aln; + // restore model_names + for (int i = 0; i < saln->partitions.size(); i++) + saln->partitions[i]->model_name = saved_model_names[i]; + } else { + iqtree->saveCheckpoint(); + iqtree->getModelFactory()->saveCheckpoint(); + } + + delete iqtree; + return initTree; } -void printAncestralSequences(const char *out_prefix, PhyloTree *tree, AncestralSeqType ast) { - -// int *joint_ancestral = NULL; -// -// if (tree->params->print_ancestral_sequence == AST_JOINT) { -// joint_ancestral = new int[nptn*tree->leafNum]; -// tree->computeJointAncestralSequences(joint_ancestral); -// } - - string filename = (string)out_prefix + ".state"; -// string filenameseq = (string)out_prefix + ".stateseq"; - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename.c_str()); - out.setf(ios::fixed, ios::floatfield); - out.precision(5); - -// ofstream outseq; -// outseq.exceptions(ios::failbit | ios::badbit); -// outseq.open(filenameseq.c_str()); - - NodeVector nodes; - tree->getInternalNodes(nodes); - - double *marginal_ancestral_prob; - int *marginal_ancestral_seq; - -// if (tree->params->print_ancestral_sequence == AST_JOINT) -// outseq << 2*(tree->nodeNum-tree->leafNum) << " " << nsites << endl; -// else -// outseq << (tree->nodeNum-tree->leafNum) << " " << nsites << endl; -// -// int name_width = max(tree->aln->getMaxSeqNameLength(),6)+10; - - out << "# Ancestral state reconstruction for all nodes in " << tree->params->out_prefix << ".treefile" << endl - << "# This file can be read in MS Excel or in R with command:" << endl - << "# tab=read.table('" << tree->params->out_prefix << ".state',header=TRUE)" << endl - << "# Columns are tab-separated with following meaning:" << endl - << "# Node: Node name in the tree" << endl; - if (tree->isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)tree; - out << "# Part: Partition ID (1=" << stree->part_info[0].name << ", etc)" << endl - << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; - } else - out << "# Site: Alignment site ID" << endl; +/** + Transfer parameters from ModelFinder into the a checkpoint to speed up later stage + */ +void transferModelFinderParameters(IQTree *iqtree, Checkpoint *target) { - out << "# State: Most likely state assignment" << endl - << "# p_X: Posterior probability for state X (empirical Bayesian method)" << endl; + Checkpoint *source = iqtree->getCheckpoint(); - if (tree->isSuperTree()) { - PhyloSuperTree *stree = (PhyloSuperTree*)tree; - out << "Node\tPart\tSite\tState"; - for (size_t i = 0; i < stree->front()->aln->num_states; i++) - out << "\tp_" << stree->front()->aln->convertStateBackStr(i); - } else { - out << "Node\tSite\tState"; - for (size_t i = 0; i < tree->aln->num_states; i++) - out << "\tp_" << tree->aln->convertStateBackStr(i); + // transfer the substitution model and site-rate parameters + if (iqtree->isSuperTree()) { + DoubleVector tree_lens; + string struct_name; + if (iqtree->params->partition_type == BRLEN_SCALE || iqtree->params->partition_type == BRLEN_FIX) + struct_name = "PartitionModelPlen"; + else + struct_name = "PartitionModel"; + target->startStruct(struct_name); + SuperAlignment *super_aln = (SuperAlignment*)iqtree->aln; + for (auto aln : super_aln->partitions) { + source->transferSubCheckpoint(target, aln->name + CKP_SEP + "Model"); + source->transferSubCheckpoint(target, aln->name + CKP_SEP + "Rate"); + + // transfer partition rates + if (iqtree->params->partition_type == BRLEN_SCALE) { + source->startStruct(aln->name); + CandidateModel info; + info.subst_name = aln->model_name; + if (info.restoreCheckpoint(source)) + tree_lens.push_back(info.tree_len); + else + ASSERT(0 && "Could not restore tree_len"); + source->endStruct(); + } } - out << endl; + if (iqtree->params->partition_type == BRLEN_SCALE) { + // now normalize the rates + PhyloSuperTree *tree = (PhyloSuperTree*)iqtree; + double sum = 0.0; + size_t nsite = 0; + int i; + for (i = 0; i < tree->size(); i++) { + sum += tree_lens[i] * tree->at(i)->aln->getNSite(); + if (tree->at(i)->aln->seq_type == SEQ_CODON && tree->rescale_codon_brlen) + nsite += 3*tree->at(i)->aln->getNSite(); + else + nsite += tree->at(i)->aln->getNSite(); + } - bool orig_kernel_nonrev; - tree->initMarginalAncestralState(out, orig_kernel_nonrev, marginal_ancestral_prob, marginal_ancestral_seq); + sum /= nsite; + iqtree->restoreCheckpoint(); + iqtree->scaleLength(sum/iqtree->treeLength()); + iqtree->saveCheckpoint(); + sum = 1.0/sum; + for (i = 0; i < tree->size(); i++) + tree_lens[i] *= sum; + target->putVector("part_rates", tree_lens); + } + target->endStruct(); + } else { + source->transferSubCheckpoint(target, "Model"); + source->transferSubCheckpoint(target, "Rate"); + } - for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++) { - PhyloNode *node = (PhyloNode*)(*it); - PhyloNode *dad = (PhyloNode*)node->neighbors[0]->node; - - tree->computeMarginalAncestralState((PhyloNeighbor*)dad->findNeighbor(node), dad, - marginal_ancestral_prob, marginal_ancestral_seq); - -// int *joint_ancestral_node = joint_ancestral + (node->id - tree->leafNum)*nptn; + // transfer tree + source->transferSubCheckpoint(target, "PhyloTree"); +} - // set node name if neccessary - if (node->name.empty() || !isalpha(node->name[0])) { - node->name = "Node" + convertIntToString(node->id-tree->leafNum+1); +void runModelFinder(Params ¶ms, IQTree &iqtree, ModelCheckpoint &model_info) +{ + // iqtree.setCurScore(-DBL_MAX); + bool test_only = (params.model_name.find("ONLY") != string::npos) || + (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP"); + + bool empty_model_found = params.model_name.empty() && !iqtree.isSuperTree(); + + if (params.model_name.empty() && iqtree.isSuperTree()) { + // check whether any partition has empty model_name + PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; + for (auto i = stree->begin(); i != stree->end(); i++) + if ((*i)->aln->model_name.empty()) { + empty_model_found = true; + break; } - - // print ancestral state probabilities - tree->writeMarginalAncestralState(out, node, marginal_ancestral_prob, marginal_ancestral_seq); - - // print ancestral sequences -// outseq.width(name_width); -// outseq << left << node->name << " "; -// for (i = 0; i < nsites; i++) -// outseq << tree->aln->convertStateBackStr(marginal_ancestral_seq[pattern_index[i]]); -// outseq << endl; -// -// if (tree->params->print_ancestral_sequence == AST_JOINT) { -// outseq.width(name_width); -// outseq << left << (node->name+"_joint") << " "; -// for (i = 0; i < nsites; i++) -// outseq << tree->aln->convertStateBackStr(joint_ancestral_node[pattern_index[i]]); -// outseq << endl; -// } - } - - tree->endMarginalAncestralState(orig_kernel_nonrev, marginal_ancestral_prob, marginal_ancestral_seq); - - out.close(); -// outseq.close(); - cout << "Ancestral state probabilities printed to " << filename << endl; -// cout << "Ancestral sequences printed to " << filenameseq << endl; - - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } - -// if (joint_ancestral) -// delete[] joint_ancestral; - -} + } -void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) { + if (params.model_joint) + empty_model_found = false; - if (wsl == WSL_NONE || wsl == WSL_SITE) + // Model already specifed, nothing to do here + if (!empty_model_found && params.model_name.substr(0, 4) != "TEST" && params.model_name.substr(0, 2) != "MF") return; - // error checking - if (!tree->getModel()->isMixture()) { - if (wsl != WSL_RATECAT) { - outWarning("Switch now to '-wspr' as it is the only option for non-mixture model"); - wsl = WSL_RATECAT; - } + // if (MPIHelper::getInstance().getNumProcesses() > 1) + // outError("Please use only 1 MPI process! We are currently working on the MPI parallelization of model selection."); + // TODO: check if necessary + // if (iqtree.isSuperTree()) + // ((PhyloSuperTree*) &iqtree)->mapTrees(); + double cpu_time = getCPUTime(); + double real_time = getRealTime(); + + model_info.setFileName((string)params.out_prefix + ".model.gz"); + model_info.setDumpInterval(params.checkpoint_dump_interval); + + bool ok_model_file = false; + if (!params.model_test_again) { + ok_model_file = model_info.load(); + } + + cout << endl; + + ok_model_file &= model_info.size() > 0; + if (ok_model_file) + cout << "NOTE: Restoring information from model checkpoint file " << model_info.getFileName() << endl; + + // after loading, workers are not allowed to write checkpoint anymore + if (MPIHelper::getInstance().isWorker()) + model_info.setFileName(""); + + Checkpoint *orig_checkpoint = iqtree.getCheckpoint(); + iqtree.setCheckpoint(&model_info); + iqtree.restoreCheckpoint(); + + int partition_type; + if (CKP_RESTORE2((&model_info), partition_type)) { + if (partition_type != params.partition_type) + outError("Mismatch partition type between checkpoint and partition file command option\nRerun with -mredo to ignore .model.gz checkpoint file"); } else { - // mixture model - if (wsl == WSL_MIXTURE_RATECAT && tree->getModelFactory()->fused_mix_rate) { - outWarning("-wspmr is not suitable for fused mixture model, switch now to -wspm"); - wsl = WSL_MIXTURE; - } + partition_type = params.partition_type; + CKP_SAVE2((&model_info), partition_type); } - size_t cat, ncat = tree->getNumLhCat(wsl); - double *ptn_prob_cat = new double[((size_t)tree->getAlnNPattern())*ncat]; - tree->computePatternProbabilityCategory(ptn_prob_cat, wsl); - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - if (tree->isSuperTree()) - out << "Set\t"; - out << "Site"; - for (cat = 0; cat < ncat; cat++) - out << "\tp" << cat+1; - out << endl; - IntVector pattern_index; - if (tree->isSuperTree()) { - PhyloSuperTree *super_tree = (PhyloSuperTree*)tree; - size_t offset = 0; - for (PhyloSuperTree::iterator it = super_tree->begin(); it != super_tree->end(); it++) { - size_t part_ncat = (*it)->getNumLhCat(wsl); - (*it)->aln->getSitePatternIndex(pattern_index); - size_t site, nsite = (*it)->aln->getNSite(); - for (site = 0; site < nsite; site++) { - out << (it-super_tree->begin())+1 << "\t" << site+1; - double *prob_cat = ptn_prob_cat + (offset+pattern_index[site]*part_ncat); - for (cat = 0; cat < part_ncat; cat++) - out << "\t" << prob_cat[cat]; - out << endl; - } - offset += (*it)->aln->getNPattern()*(*it)->getNumLhCat(wsl); - } + + ModelsBlock *models_block = readModelsDefinition(params); + + if (!params.use_nn_model) { + // compute initial tree + // cout << "params.modelfinder_ml_tree = " << params.modelfinder_ml_tree << endl << flush; + if (params.modelfinder_ml_tree) { + // 2019-09-10: Now perform NNI on the initial tree + string tree_str = computeFastMLTree(params, iqtree.aln, model_info, + models_block, params.num_threads, params.partition_type, iqtree.dist_file); + iqtree.restoreCheckpoint(); } else { - tree->aln->getSitePatternIndex(pattern_index); - int nsite = tree->getAlnNSite(); - for (int site = 0; site < nsite; site++) { - out << site+1; - double *prob_cat = ptn_prob_cat + pattern_index[site]*ncat; - for (cat = 0; cat < ncat; cat++) { - out << "\t" << prob_cat[cat]; + iqtree.computeInitialTree(params.SSE); + + if (iqtree.isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; + int part = 0; + for (auto it = stree->begin(); it != stree->end(); it++, part++) { + model_info.startStruct((*it)->aln->name); + (*it)->saveCheckpoint(); + model_info.endStruct(); } - out << endl; + } else { + iqtree.saveCheckpoint(); } } - out.close(); - cout << "Site probabilities per category printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } -} + // also save initial tree to the original .ckp.gz checkpoint + // string initTree = iqtree.getTreeString(); + // CKP_SAVE(initTree); + // iqtree.saveCheckpoint(); + // checkpoint->dump(true); + CandidateModelSet candidate_models; + int max_cats = candidate_models.generate(params, iqtree.aln, params.model_test_separate_rate, false); -void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs) { + uint64_t mem_size = iqtree.getMemoryRequiredThreaded(max_cats); + cout << "NOTE: ModelFinder requires " << (mem_size / 1024) / 1024 << " MB RAM!" << endl; + if (mem_size >= getMemorySize()) { + outError("Memory required exceeds your computer RAM size!"); + } + } +#ifdef BINARY32 + if (mem_size >= 2000000000) { + outError("Memory required exceeds 2GB limit of 32-bit executable"); + } +#endif - int i, j, nsites = tree->getAlnNSite(), nstates = tree->aln->num_states; - double *ptn_state_freq; - if (state_freqs) { - ptn_state_freq = state_freqs; + if (iqtree.isSuperTree()) { + // partition model selection + PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; + // TD: modified this function to allow for nn model selection + testPartitionModel(params, stree, model_info, models_block, params.num_threads); + stree->mapTrees(); + string res_models = ""; + for (auto it = stree->begin(); it != stree->end(); it++) { + if (it != stree->begin()) res_models += ","; + res_models += (*it)->aln->model_name; + } + iqtree.aln->model_name = res_models; } else { - ptn_state_freq = new double[((size_t)tree->getAlnNPattern()) * nstates]; - tree->computePatternStateFreq(ptn_state_freq); - } - - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - IntVector pattern_index; - tree->aln->getSitePatternIndex(pattern_index); - for (i = 0; i < nsites; i++) { - out.width(6); - out << left << i+1 << " "; - double *state_freq = &ptn_state_freq[pattern_index[i]*nstates]; - for (j = 0; j < nstates; j++) { - out.width(15); - out << state_freq[j] << " "; - } - out << endl; - } - out.close(); - cout << "Site state frequency vectors printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } - if (!state_freqs) - delete [] ptn_state_freq; -} + // single model selection + CandidateModel best_model; + Checkpoint *checkpoint = &model_info; + // neural network model selection (added by TD) + if (params.use_nn_model) { + cout << "We are using the neural network to select the model of sequence evolution because " + "option --use-nn-model is set to " << params.use_nn_model << endl; + Alignment *alignment = (iqtree.aln->removeAndFillUpGappySites())->replaceAmbiguousChars(); + NeuralNetwork nn(alignment); + iqtree.aln->model_name = nn.doModelInference(); + double alpha = nn.doAlphaInference(); + if (alpha >= 0) { // +G + iqtree.aln->model_name += "+G{" + to_string(alpha) + "}"; + } + string best_model_NN; + CKP_RESTORE(best_model_NN); + delete alignment; + } + else { + if (params.openmp_by_model) + best_model = CandidateModelSet().evaluateAll(params, &iqtree, + model_info, models_block, params.num_threads, + BRLEN_OPTIMIZE); + else + best_model = CandidateModelSet().test(params, &iqtree, + model_info, models_block, params.num_threads, BRLEN_OPTIMIZE); + iqtree.aln->model_name = best_model.getName(); + + Checkpoint *checkpoint = &model_info; + string best_model_AIC, best_model_AICc, best_model_BIC; + CKP_RESTORE(best_model_AIC); + CKP_RESTORE(best_model_AICc); + CKP_RESTORE(best_model_BIC); + cout << "Akaike Information Criterion: " << best_model_AIC << endl; + cout << "Corrected Akaike Information Criterion: " << best_model_AICc << endl; + cout << "Bayesian Information Criterion: " << best_model_BIC << endl; + } + cout << "Best-fit model: " << iqtree.aln->model_name << " chosen according to " + << criterionName(params.model_test_criterion) << endl; + } -void printSiteStateFreq(const char* filename, Alignment *aln) { - if (aln->site_state_freq.empty()) - return; - int i, j, nsites = aln->getNSite(), nstates = aln->num_states; - try { - ofstream out; - out.exceptions(ios::failbit | ios::badbit); - out.open(filename); - IntVector pattern_index; - aln->getSitePatternIndex(pattern_index); - for (i = 0; i < nsites; i++) { - out.width(6); - out << left << i+1 << " "; - double *state_freq = aln->site_state_freq[pattern_index[i]]; - for (j = 0; j < nstates; j++) { - out.width(15); - out << state_freq[j] << " "; - } - out << endl; - } - out.close(); - cout << "Site state frequency vectors printed to " << filename << endl; - } catch (ios::failure) { - outError(ERR_WRITE_OUTPUT, filename); - } -} + delete models_block; -/* -bool checkModelFile(ifstream &in, bool is_partitioned, ModelCheckpoint &infos) { - if (!in.is_open()) return false; - in.exceptions(ios::badbit); - string str; - if (is_partitioned) { - in >> str; - if (str != "Charset") - return false; - } - in >> str; - if (str != "Model") - return false; - in >> str; - if (str != "df") - return false; - in >> str; - if (str != "LnL") - return false; - in >> str; - if (str != "TreeLen") { - outWarning(".model file was produced from a previous version of IQ-TREE"); - return false; - } - safeGetline(in, str); - while (!in.eof()) { - in >> str; - if (in.eof()) - break; - ModelInfo info; - if (is_partitioned) { - info.set_name = str; - in >> str; - } - info.name = str; - in >> info.df >> info.logl >> info.tree_len; - safeGetline(in, str); - info.tree = ""; - if (*str.rbegin() == ';') { - size_t pos = str.rfind('\t'); - if (pos != string::npos) - info.tree = str.substr(pos+1); -// else -// outWarning(".model file was produced from a previous version of IQ-TREE"); - } - infos.push_back(info); - //cout << str << " " << df << " " << logl << endl; - } - in.clear(); - return true; -} -bool checkModelFile(string model_file, bool is_partitioned, ModelCheckpoint &infos) { - if (!fileExists(model_file)) - return false; - //cout << model_file << " exists, checking this file" << endl; - ifstream in; - try { - in.exceptions(ios::failbit | ios::badbit); - in.open(model_file.c_str()); - if (!checkModelFile(in, is_partitioned, infos)) - throw false; - // set the failbit again - in.exceptions(ios::failbit | ios::badbit); - in.close(); - } catch (bool ret) { - in.close(); - return ret; - } catch (ios::failure) { - outError("Cannot read file ", model_file); - } - return true; + // force to dump all checkpointing information + model_info.dump(true); + + // transfer models parameters + transferModelFinderParameters(&iqtree, orig_checkpoint); + iqtree.setCheckpoint(orig_checkpoint); + + params.startCPUTime = cpu_time; + params.start_real_time = real_time; + cpu_time = getCPUTime() - cpu_time; + real_time = getRealTime() - real_time; + cout << endl; + cout << "All model information printed to " << model_info.getFileName() << endl; + cout << "CPU time for ModelFinder: " << cpu_time << " seconds (" << convert_time(cpu_time) << ")" << endl; + cout << "Wall-clock time for ModelFinder: " << real_time << " seconds (" << convert_time(real_time) << ")" << endl; + + // alignment = iqtree.aln; + if (test_only) { + params.min_iterations = 0; + } } -*/ +// added by TD +void runModelFinderNN(Params ¶ms, IQTree &iqtree, ModelCheckpoint &model_info) { /** - * get the list of model - * @param models (OUT) vectors of model names - * @return maximum number of rate categories + * Legacy code, remove after discussion with Minh */ -int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separate_rate = false) { - StrVector model_names; - StrVector freq_names; - SeqType seq_type = aln->seq_type; - - const char *rate_options[] = { "", "+I", "+ASC", "+G", "+I+G", "+ASC+G", "+R", "+ASC+R"}; - bool test_options_default[] = {true, true, false, true, true, false,false, false}; - bool test_options_morph[] = {true,false, true, true, false, true,false, false}; - bool test_options_noASC_I[] = {true,false, false, true, false, false,false, false}; - bool test_options_asc[] ={false,false, true,false, false, true,false, false}; - bool test_options_new[] = {true, true, false, true, true, false, true, false}; - bool test_options_morph_new[] = {true,false, true, true, false, true, true, true}; - bool test_options_noASC_I_new[] = {true,false, false, true, false, false, true, false}; - bool test_options_asc_new[] ={false,false, true,false, false, true,false, true}; - bool test_options_pomo[] = {true, false, false, true, false, false,false, false}; - bool *test_options = test_options_default; -// bool test_options_codon[] = {true,false, false,false, false, false}; - const int noptions = sizeof(rate_options) / sizeof(char*); - int i, j; - - if (seq_type == SEQ_BINARY) { - if (params.model_set == NULL) { + if (iqtree.isSuperTree()) { + // partition model selection + PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree; + // testPartitionModel(params, stree, model_info, models_block, params.num_threads); + stree->mapTrees(); + string res_models = ""; + for (auto it = stree->begin(); it != stree->end(); it++) { + if (it != stree->begin()) res_models += ","; + Alignment alignment = *((*it)->aln->removeAndFillUpGappySites()); + NeuralNetwork nn(&alignment); + (*it)->aln->model_name = nn.doModelInference(); + res_models += (*it)->aln->model_name; + } + iqtree.aln->model_name = res_models; + } + + } + +/** + * get the list of substitution models + */ +void getModelSubst(SeqType seq_type, bool standard_code, string model_name, + string model_set, char *model_subset, StrVector &model_names) { + int i, j; + + if (model_set == "1") { + model_names.push_back(getUsualModelSubst(seq_type)); + return; + } + + if (iEquals(model_set, "ALL") || iEquals(model_set, "AUTO")) + model_set = ""; + + if (seq_type == SEQ_BINARY) { + if (model_set.empty()) { copyCString(bin_model_names, sizeof(bin_model_names) / sizeof(char*), model_names); - } else { - convert_string_vec(params.model_set, model_names); - } - } else if (seq_type == SEQ_MORPH) { - if (params.model_set == NULL) { + } else if (model_set[0] == '+') { + // append model_set into existing models + convert_string_vec(model_set.c_str()+1, model_names); + appendCString(bin_model_names, sizeof(bin_model_names) / sizeof(char*), model_names); + } else { + convert_string_vec(model_set.c_str(), model_names); + } + } else if (seq_type == SEQ_MORPH) { + if (model_set.empty()) { copyCString(morph_model_names, sizeof(morph_model_names) / sizeof(char*), model_names); - } else { - convert_string_vec(params.model_set, model_names); - } - } else if (seq_type == SEQ_DNA || seq_type == SEQ_POMO) { - if (params.model_set == NULL) { - copyCString(dna_model_names, sizeof(dna_model_names) / sizeof(char*), model_names); -// copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); - } else if (strcmp(params.model_set, "partitionfinder") == 0 || strcmp(params.model_set, "phyml") == 0) { - copyCString(dna_model_names_old, sizeof(dna_model_names_old) / sizeof(char*), model_names); -// copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); - } else if (strcmp(params.model_set, "raxml") == 0) { - copyCString(dna_model_names_rax, sizeof(dna_model_names_rax) / sizeof(char*), model_names); -// copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); - } else if (strcmp(params.model_set, "mrbayes") == 0) { - copyCString(dna_model_names_mrbayes, sizeof(dna_model_names_mrbayes) / sizeof(char*), model_names); -// copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); - } else if (strcmp(params.model_set, "liemarkov") == 0) { - copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "liemarkovry") == 0) { - copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "liemarkovws") == 0) { - copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "liemarkovmk") == 0) { - copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "strandsymmetric") == 0) { - copyCString(dna_model_names_lie_markov_strsym, sizeof(dna_model_names_lie_markov_strsym) / sizeof(char*), model_names); - // IMPORTANT NOTE: If you add any more -mset names for sets of Lie Markov models, - // you also need to change getPrototypeModel function. - } else { - convert_string_vec(params.model_set, model_names); -// copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); - } + } else if (model_set[0] == '+') { + // append model_set into existing models + convert_string_vec(model_set.c_str()+1, model_names); + appendCString(morph_model_names, sizeof(morph_model_names) / sizeof(char*), model_names); + } else { + convert_string_vec(model_set.c_str(), model_names); + } + } else if (seq_type == SEQ_DNA || seq_type == SEQ_POMO) { + if (model_set.empty()) { + copyCString(dna_model_names, sizeof(dna_model_names) / sizeof(char*), model_names); + // copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); + } else if (model_set == "partitionfinder" || model_set== "phyml") { + copyCString(dna_model_names_old, sizeof(dna_model_names_old) / sizeof(char*), model_names); + // copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); + } else if (model_set == "raxml") { + copyCString(dna_model_names_rax, sizeof(dna_model_names_rax) / sizeof(char*), model_names); + // copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); + } else if (model_set == "mrbayes") { + copyCString(dna_model_names_mrbayes, sizeof(dna_model_names_mrbayes) / sizeof(char*), model_names); + // copyCString(dna_freq_names, sizeof(dna_freq_names)/sizeof(char*), freq_names); + } else if (model_set == "modelomatic") { + copyCString(dna_model_names_modelomatic, sizeof(dna_model_names_modelomatic) / sizeof(char*), model_names); + } else if (model_set == "liemarkov") { + copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); + } else if (model_set == "liemarkovry") { + copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); + } else if (model_set == "liemarkovws") { + copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); + } else if (model_set == "liemarkovmk") { + copyCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); + } else if (model_set == "strandsymmetric") { + copyCString(dna_model_names_lie_markov_strsym, sizeof(dna_model_names_lie_markov_strsym) / sizeof(char*), model_names); + // IMPORTANT NOTE: If you add any more -mset names for sets of Lie Markov models, + // you also need to change getPrototypeModel function. + } else if (model_set[0] == '+') { + // append model_set into existing models + convert_string_vec(model_set.c_str()+1, model_names); + appendCString(dna_model_names, sizeof(dna_model_names) / sizeof(char*), model_names); + } else { + convert_string_vec(model_set.c_str(), model_names); + } - if (params.model_name.find("+LMRY") != string::npos) { - appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); - } else if (params.model_name.find("+LMWS") != string::npos) { - appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); - } else if (params.model_name.find("+LMMK") != string::npos) { - appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); - } else if (params.model_name.find("+LMSS") != string::npos) { - appendCString(dna_model_names_lie_markov_strsym, sizeof(dna_model_names_lie_markov_strsym) / sizeof(char*), model_names); - } else if (params.model_name.find("+LM") != string::npos) { - appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); - appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); - } - } else if (seq_type == SEQ_PROTEIN) { - if (params.model_set == NULL) { - copyCString(aa_model_names, sizeof(aa_model_names) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "partitionfinder") == 0 || strcmp(params.model_set, "phyml") == 0) { - copyCString(aa_model_names_phyml, sizeof(aa_model_names_phyml) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "raxml") == 0) { - copyCString(aa_model_names_rax, sizeof(aa_model_names_rax) / sizeof(char*), model_names); - } else if (strcmp(params.model_set, "mrbayes") == 0) { - copyCString(aa_model_names_mrbayes, sizeof(aa_model_names_mrbayes) / sizeof(char*), model_names); - } else { - convert_string_vec(params.model_set, model_names); - } - copyCString(aa_freq_names, sizeof(aa_freq_names)/sizeof(char*), freq_names); - - if (params.model_subset) { + if (model_name.find("+LMRY") != string::npos) { + appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); + } else if (model_name.find("+LMWS") != string::npos) { + appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); + } else if (model_name.find("+LMMK") != string::npos) { + appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); + } else if (model_name.find("+LMSS") != string::npos) { + appendCString(dna_model_names_lie_markov_strsym, sizeof(dna_model_names_lie_markov_strsym) / sizeof(char*), model_names); + } else if (model_name.find("+LM") != string::npos) { + appendCString(dna_model_names_lie_markov_fullsym, sizeof(dna_model_names_lie_markov_fullsym) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ry, sizeof(dna_model_names_lie_markov_ry) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_ws, sizeof(dna_model_names_lie_markov_ws) / sizeof(char*), model_names); + appendCString(dna_model_names_lie_markov_mk, sizeof(dna_model_names_lie_markov_mk) / sizeof(char*), model_names); + } + } else if (seq_type == SEQ_PROTEIN) { + if (model_set.empty()) { + copyCString(aa_model_names, sizeof(aa_model_names) / sizeof(char*), model_names); + } else if (model_set == "partitionfinder" || model_set == "phyml") { + copyCString(aa_model_names_phyml, sizeof(aa_model_names_phyml) / sizeof(char*), model_names); + } else if (model_set == "raxml") { + copyCString(aa_model_names_rax, sizeof(aa_model_names_rax) / sizeof(char*), model_names); + } else if (model_set == "mrbayes") { + copyCString(aa_model_names_mrbayes, sizeof(aa_model_names_mrbayes) / sizeof(char*), model_names); + } else if (model_set == "modelomatic") { + copyCString(aa_model_names_modelomatic, sizeof(aa_model_names_modelomatic) / sizeof(char*), model_names); + } else if (model_set[0] == '+') { + // append model_set into existing models + convert_string_vec(model_set.c_str()+1, model_names); + appendCString(aa_model_names, sizeof(aa_model_names) / sizeof(char*), model_names); + } else { + convert_string_vec(model_set.c_str(), model_names); + } + + if (model_subset) { StrVector submodel_names; - if (strncmp(params.model_subset, "nuclear", 3) == 0) { + if (strncmp(model_subset, "nuclear", 3) == 0) { copyCString(aa_model_names_nuclear, sizeof(aa_model_names_nuclear) / sizeof(char*), submodel_names); - } else if (strncmp(params.model_subset, "mitochondrial", 3) == 0) { + } else if (strncmp(model_subset, "mitochondrial", 3) == 0) { copyCString(aa_model_names_mitochondrial, sizeof(aa_model_names_mitochondrial) / sizeof(char*), submodel_names); - } else if (strncmp(params.model_subset, "chloroplast", 3) == 0) { + } else if (strncmp(model_subset, "chloroplast", 3) == 0) { copyCString(aa_model_names_chloroplast, sizeof(aa_model_names_chloroplast) / sizeof(char*), submodel_names); - } else if (strncmp(params.model_subset, "viral",3) == 0) { + } else if (strncmp(model_subset, "viral",3) == 0) { copyCString(aa_model_names_viral, sizeof(aa_model_names_viral) / sizeof(char*), submodel_names); } else { outError("Wrong -msub option"); } for (i = 0; i < model_names.size(); i++) { bool appear = false; - for (j = 0; j < submodel_names.size(); j++) + for (j = 0; j < submodel_names.size(); j++) if (model_names[i] == submodel_names[j]) { appear = true; break; @@ -942,73 +1132,92 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat } } - } else if (seq_type == SEQ_CODON) { - if (params.model_set == NULL) { - if (aln->isStandardGeneticCode()) - copyCString(codon_model_names, sizeof(codon_model_names) / sizeof(char*), model_names); - else { + } else if (seq_type == SEQ_CODON) { + if (model_set.empty()) { + if (standard_code) + copyCString(codon_model_names, sizeof(codon_model_names) / sizeof(char*), model_names); + else { i = sizeof(codon_model_names) / sizeof(char*); for (j = 0; j < i; j++) if (!std_genetic_code[j]) model_names.push_back(codon_model_names[j]); -// copyCString(codon_model_names, sizeof(codon_model_names) / sizeof(char*) - 1, model_names); + // copyCString(codon_model_names, sizeof(codon_model_names) / sizeof(char*) - 1, model_names); } - } else - convert_string_vec(params.model_set, model_names); - copyCString(codon_freq_names, sizeof(codon_freq_names) / sizeof(char*), freq_names); - } - - if (model_names.empty()) - return 1; - - if (params.state_freq_set) - convert_string_vec(params.state_freq_set, freq_names); + } else if (model_set == "modelomatic") { + copyCString(codon_model_names_modelomatic, sizeof(codon_model_names_modelomatic) / sizeof(char*), model_names); + } else if (model_set[0] == '+') { + // append model_set into existing models + convert_string_vec(model_set.c_str()+1, model_names); + if (standard_code) + appendCString(codon_model_names, sizeof(codon_model_names) / sizeof(char*), model_names); + else { + i = sizeof(codon_model_names) / sizeof(char*); + for (j = 0; j < i; j++) + if (!std_genetic_code[j]) + model_names.push_back(codon_model_names[j]); + } + } else + convert_string_vec(model_set.c_str(), model_names); + } +} + +void getStateFreqs(SeqType seq_type, char *state_freq_set, StrVector &freq_names) { + int j; + + switch (seq_type) { + case SEQ_PROTEIN: + copyCString(aa_freq_names, sizeof(aa_freq_names)/sizeof(char*), freq_names); + break; + case SEQ_CODON: + copyCString(codon_freq_names, sizeof(codon_freq_names) / sizeof(char*), freq_names); + break; + default: + break; + } + if (state_freq_set) + convert_string_vec(state_freq_set, freq_names); for (j = 0; j < freq_names.size(); j++) { std::transform(freq_names[j].begin(), freq_names[j].end(), freq_names[j].begin(), ::toupper); -// for (i = 0; i < freq_names.size(); i++) -// cout << " " << freq_names[i]; -// cout << endl; if (freq_names[j] != "" && freq_names[j][0] != '+') freq_names[j] = "+" + freq_names[j]; } - - if (freq_names.size() > 0) { - StrVector orig_model_names = model_names; - model_names.clear(); - for (j = 0; j < orig_model_names.size(); j++) { - if (aln->seq_type == SEQ_CODON) { - SeqType seq_type; - int model_type = getSeqType(orig_model_names[j].c_str(), seq_type); - for (i = 0; i < freq_names.size(); i++) { - // disallow MG+F - if (freq_names[i] == "+F" && orig_model_names[j].find("MG") != string::npos) - continue; - if (freq_names[i] != "" || (model_type == 2 && orig_model_names[j].find("MG") == string::npos)) - // empirical model also allow "" - model_names.push_back(orig_model_names[j] + freq_names[i]); - } - } else { - for (i = 0; i < freq_names.size(); i++) - model_names.push_back(orig_model_names[j] + freq_names[i]); - } - } - } +} + +/** + get list of rate heterogeneity + */ +void getRateHet(SeqType seq_type, string model_name, double frac_invariant_sites, + string rate_set, StrVector &ratehet) { + const char *rate_options[] = { "", "+I", "+ASC", "+G", "+I+G", "+ASC+G", "+R", "+ASC+R"}; + bool test_options_default[] = {true, true, false, true, true, false, false, false}; + bool test_options_fast[] = {false, false, false, false, true, false, false, false}; + bool test_options_morph[] = {true, false, true, true, false, true, false, false}; + bool test_options_morph_fast[]= {false, false, false, false, false, true, false, false}; + bool test_options_noASC_I[] = {true, false, false, true, false, false, false, false}; + bool test_options_noASC_I_fast[]={false,false, false, true, false, false, false, false}; + bool test_options_asc[] ={false, false, true, false, false, true, false, false}; + bool test_options_new[] = {true, true, false, true, true, false, true, false}; + bool test_options_morph_new[] = {true, false, true, true, false, true, true, true}; + bool test_options_noASC_I_new[]= {true, false, false, true, false, false, true, false}; + bool test_options_asc_new[] = {false, false, true, false, false, true, false, true}; + bool test_options_pomo[] = {true, false, false, true, false, false, false, false}; + bool test_options_norate[] = {true, false, false, false, false, false, false, false}; + bool *test_options = test_options_default; + // bool test_options_codon[] = {true,false, false,false, false, false}; + const int noptions = sizeof(rate_options) / sizeof(char*); + int i, j; - bool with_new = (params.model_name.find("NEW") != string::npos || params.model_name.substr(0,2) == "MF" || params.model_name.empty()); - bool with_asc = params.model_name.find("ASC") != string::npos; + bool with_new = (model_name.find("NEW") != string::npos || model_name.substr(0,2) == "MF" || model_name.empty()); + bool with_asc = model_name.find("ASC") != string::npos; -// if (seq_type == SEQ_CODON) { -// for (i = 0; i < noptions; i++) -// test_options[i] = test_options_codon[i]; -// } else if (seq_type == SEQ_POMO) { - for (i = 0; i < noptions; i++) - test_options[i] = test_options_pomo[i]; + for (i = 0; i < noptions; i++) + test_options[i] = test_options_pomo[i]; } // If not PoMo, go on with normal treatment. - else if (aln->frac_invariant_sites == 0.0) { + else if (frac_invariant_sites == 0.0) { // morphological or SNP data: activate +ASC - if (with_new) { + if (with_new && rate_set != "1") { if (with_asc) test_options = test_options_asc_new; else if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY || seq_type == SEQ_MORPH) @@ -1017,13 +1226,23 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat test_options = test_options_noASC_I_new; } else if (with_asc) test_options = test_options_asc; - else if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY || seq_type == SEQ_MORPH) - test_options = test_options_morph; - else - test_options = test_options_noASC_I; - } else { + else if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY || seq_type == SEQ_MORPH) { + if (rate_set == "1") + test_options = test_options_morph_fast; + else + test_options = test_options_morph; + } else { + if (rate_set == "1") + test_options = test_options_noASC_I_fast; + else + test_options = test_options_noASC_I; + } + } else if (frac_invariant_sites >= 1.0) { + // 2018-06-12: alignment with only invariant sites, no rate variation added + test_options = test_options_norate; + } else { // normal data, use +I instead - if (with_new) { + if (with_new && rate_set != "1") { // change +I+G to +R if (with_asc) test_options = test_options_asc_new; @@ -1031,44 +1250,104 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat test_options = test_options_new; } else if (with_asc) { test_options = test_options_asc; - } else + } else if (rate_set == "1") + test_options = test_options_fast; + else test_options = test_options_default; - if (aln->frac_const_sites == 0.0) { + if (frac_invariant_sites == 0.0) { // deactivate +I for (j = 0; j < noptions; j++) if (strstr(rate_options[j], "+I")) test_options[j] = false; } } - - - StrVector ratehet; - int max_cats = params.num_rate_cats; - - if (params.ratehet_set) { - // take the rate_options from user-specified models - convert_string_vec(params.ratehet_set, ratehet); - if (!ratehet.empty() && ratehet[0] == "default") { - ratehet.erase(ratehet.begin()); - StrVector ratedef; - for (j = 0; j < noptions; j++) - if (test_options[j]) - ratedef.push_back(rate_options[j]); - ratehet.insert(ratehet.begin(), ratedef.begin(), ratedef.end()); - } + if (!rate_set.empty() && rate_set != "1" && !iEquals(rate_set, "ALL") && !iEquals(rate_set, "AUTO")) { + // take the rate_options from user-specified models + convert_string_vec(rate_set.c_str(), ratehet); + if (!ratehet.empty() && iEquals(ratehet[0], "ALL")) { + ratehet.erase(ratehet.begin()); + StrVector ratedef; + for (j = 0; j < noptions; j++) + if (test_options[j]) + ratedef.push_back(rate_options[j]); + ratehet.insert(ratehet.begin(), ratedef.begin(), ratedef.end()); + } for (j = 0; j < ratehet.size(); j++) { if (ratehet[j] != "" && ratehet[j][0] != '+' && ratehet[j][0] != '*') ratehet[j] = "+" + ratehet[j]; - if (ratehet[j] == "+E") // for equal rate model + if (ratehet[j] == "+E") // for equal rate model ratehet[j] = ""; } } else { for (j = 0; j < noptions; j++) if (test_options[j]) ratehet.push_back(rate_options[j]); - + + } +} + +int CandidateModelSet::generate(Params ¶ms, Alignment *aln, bool separate_rate, bool merge_phase) { + StrVector model_names; + StrVector freq_names; + SeqType seq_type = aln->seq_type; + + int i, j; + string model_set; + + if (merge_phase) { + model_set = params.merge_models; + } else + model_set = params.model_set; + + bool auto_model = iEquals(model_set, "AUTO"); + + getModelSubst(seq_type, aln->isStandardGeneticCode(), params.model_name, + model_set, params.model_subset, model_names); + + if (model_names.empty()) + return 1; + + getStateFreqs(seq_type, params.state_freq_set, freq_names); + + // combine model_names with freq_names + if (freq_names.size() > 0) { + StrVector orig_model_names = model_names; + model_names.clear(); + for (j = 0; j < orig_model_names.size(); j++) { + if (aln->seq_type == SEQ_CODON) { + SeqType seq_type; + int model_type = detectSeqType(orig_model_names[j].c_str(), seq_type); + for (i = 0; i < freq_names.size(); i++) { + // disallow MG+F + if (freq_names[i] == "+F" && orig_model_names[j].find("MG") != string::npos) + continue; + if (freq_names[i] != "" || (model_type == 2 && orig_model_names[j].find("MG") == string::npos)) + // empirical model also allow "" + model_names.push_back(orig_model_names[j] + freq_names[i]); + } + } else { + for (i = 0; i < freq_names.size(); i++) + model_names.push_back(orig_model_names[j] + freq_names[i]); + } + } } + + + + StrVector ratehet; + int max_cats = params.num_rate_cats; + string ratehet_set; + if (merge_phase) { + ratehet_set = params.merge_rates; + } else + ratehet_set = params.ratehet_set; + + //bool auto_rate = iEquals(ratehet_set, "AUTO"); + + getRateHet(seq_type, params.model_name, aln->frac_invariant_sites, ratehet_set, ratehet); + + // add number of rate cateogories for special rate models const char *rates[] = {"+R", "*R", "+H", "*H"}; for (i = 0; i < sizeof(rates)/sizeof(char*); i++) @@ -1077,6 +1356,9 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat size_t pos; + vector flags; + flags.resize(ratehet.size(), 0); + for (i = 0; i < sizeof(rates)/sizeof(char*); i++) for (j = 0; j < ratehet.size(); j++) if ((pos = ratehet[j].find(rates[i])) != string::npos && @@ -1086,29 +1368,50 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat ratehet[j].insert(pos+2, convertIntToString(params.min_rate_cats)); max_cats = max(max_cats, params.max_rate_cats); for (int k = params.min_rate_cats+1; k <= params.max_rate_cats; k++) { - ratehet.insert(ratehet.begin()+j+k-params.min_rate_cats, str.substr(0, pos+2) + convertIntToString(k) + str.substr(pos+2)); + int ins_pos = j+k-params.min_rate_cats; + ratehet.insert(ratehet.begin() + ins_pos, str.substr(0, pos+2) + convertIntToString(k) + str.substr(pos+2)); + flags.insert(flags.begin() + ins_pos, MF_WAITING); } } + ASSERT(ratehet.size() == flags.size()); + string pomo_suffix = (seq_type == SEQ_POMO) ? "+P" : ""; // TODO DS: should we allow virtual population size? + // combine substitution models with rate heterogeneity if (separate_rate) { - for (i = 0; i < model_names.size(); i++) - models.push_back(model_names[i]); + for (i = 0; i < model_names.size(); i++) + push_back(CandidateModel(model_names[i], ratehet[0] + pomo_suffix, aln)); for (j = 0; j < ratehet.size(); j++) if (ratehet[j] != "") - models.push_back(ratehet[j] + pomo_suffix); + push_back(CandidateModel("", ratehet[j] + pomo_suffix, aln)); } else { - for (i = 0; i < model_names.size(); i++) - for (j = 0; j < ratehet.size(); j++) { - models.push_back(model_names[i] + ratehet[j] + pomo_suffix); - } + if (auto_model) { + // all rate heterogeneity for the first model + for (j = 0; j < ratehet.size(); j++) + push_back(CandidateModel(model_names[0], ratehet[j] + pomo_suffix, aln, flags[j])); + // now all models the first RHAS + for (i = 1; i < model_names.size(); i++) + push_back(CandidateModel(model_names[i], ratehet[0] + pomo_suffix, aln, flags[0])); + // all remaining models + for (i = 1; i < model_names.size(); i++) + for (j = 1; j < ratehet.size(); j++) { + push_back(CandidateModel(model_names[i], ratehet[j] + pomo_suffix, aln, flags[j])); + } + } else { + // testing all models + for (i = 0; i < model_names.size(); i++) + for (j = 0; j < ratehet.size(); j++) { + push_back(CandidateModel(model_names[i], ratehet[j] + pomo_suffix, aln, flags[j])); + } + } } if (params.model_extra_set) { StrVector extra_model_names; - convert_string_vec(params.model_extra_set, extra_model_names); - models.insert(models.end(), extra_model_names.begin(), extra_model_names.end()); + convert_string_vec(params.model_extra_set, extra_model_names); + for (auto s : extra_model_names) + push_back(CandidateModel(s, "", aln)); } return max_cats; } @@ -1119,10 +1422,102 @@ void replaceModelInfo(string &set_name, ModelCheckpoint &model_info, ModelCheckp } } -void extractModelInfo(string &set_name, ModelCheckpoint &model_info, ModelCheckpoint &part_model_info) { +void extractModelInfo(string &orig_set_name, ModelCheckpoint &model_info, ModelCheckpoint &part_model_info) { + string set_name = orig_set_name + CKP_SEP; int len = set_name.length(); for (auto it = model_info.lower_bound(set_name); it != model_info.end() && it->first.substr(0, len) == set_name; it++) { - part_model_info.put(it->first.substr(len+1), it->second); + part_model_info.put(it->first.substr(len), it->second); + } +} + +string getSubsetName(PhyloSuperTree *super_tree, set &subset) { + string set_name; + for (auto it = subset.begin(); it != subset.end(); it++) { + if (it != subset.begin()) + set_name += "+"; + set_name += super_tree->at(*it)->aln->name; + } + return set_name; +} + +int getSubsetAlnLength(PhyloSuperTree *super_tree, set &subset) { + int len = 0; + for (auto i : subset) { + len += super_tree->at(i)->aln->getNSite(); + } + return len; +} + +/** + * transfer model parameters from two subsets to the target subsets + */ +void transferModelParameters(PhyloSuperTree *super_tree, ModelCheckpoint &model_info, ModelCheckpoint &part_model_info, + set &gene_set1, set &gene_set2) +{ + set merged_set; + merged_set.insert(gene_set1.begin(), gene_set1.end()); + merged_set.insert(gene_set2.begin(), gene_set2.end()); + string set_name = getSubsetName(super_tree, merged_set); + string set1_name = getSubsetName(super_tree, gene_set1); + string set2_name = getSubsetName(super_tree, gene_set2); + double weight1 = getSubsetAlnLength(super_tree, gene_set1); + double weight2 = getSubsetAlnLength(super_tree, gene_set2); + double weight_sum = weight1 + weight2; + weight1 = weight1/weight_sum; + weight2 = weight2/weight_sum; + enum MeanComp {GEOM_MEAN, ARIT_MEAN}; + enum ValType {VAL_SINGLE, VAL_VECTOR}; + vector > info_strings = { + make_tuple(VAL_SINGLE, ARIT_MEAN, (string)"RateGamma" + CKP_SEP + "gamma_shape"), + make_tuple(VAL_SINGLE, ARIT_MEAN, (string)"RateGammaInvar" + CKP_SEP + "gamma_shape"), + make_tuple(VAL_SINGLE, ARIT_MEAN, (string)"RateGammaInvar" + CKP_SEP + "p_invar"), + make_tuple(VAL_SINGLE, ARIT_MEAN, (string)"RateInvar" + CKP_SEP + "p_invar") + //make_tuple(VAL_VECTOR, GEOM_MEAN, (string)"ModelDNA" + CKP_SEP + "rates") + }; + for (auto info : info_strings) { + switch (std::get<0>(info)) { + case VAL_SINGLE: { + double value1, value2, value; + bool ok1 = model_info.get(set1_name + CKP_SEP + std::get<2>(info), value1); + bool ok2 = model_info.get(set2_name + CKP_SEP + std::get<2>(info), value2); + if (!ok1 || !ok2) + continue; + if (part_model_info.get(std::get<2>(info), value)) + continue; // value already exist + switch (std::get<1>(info)) { + case ARIT_MEAN: + value = weight1*value1 + weight2*value2; + break; + case GEOM_MEAN: + value = sqrt(value1*value2); + break; + } + part_model_info.put(std::get<2>(info), value); + break; + } + case VAL_VECTOR: { + DoubleVector value1, value2, value; + bool ok1 = model_info.getVector(set1_name + CKP_SEP + std::get<2>(info), value1); + bool ok2 = model_info.getVector(set2_name + CKP_SEP + std::get<2>(info), value2); + if (!ok1 || !ok2) + continue; + ASSERT(value1.size() == value2.size()); + if (part_model_info.getVector(std::get<2>(info), value)) + continue; // value already exist + value.reserve(value1.size()); + for (int i = 0; i < value1.size(); i++) + switch (std::get<1>(info)) { + case ARIT_MEAN: + value.push_back(weight1*value1[i] + weight2*value2[i]); + break; + case GEOM_MEAN: + value.push_back(sqrt(value1[i]*value2[i])); + break; + } + part_model_info.putVector(std::get<2>(info), value); + break; + } + } } } @@ -1132,34 +1527,33 @@ void mergePartitions(PhyloSuperTree* super_tree, vector > &gene_sets, S SuperAlignment *super_aln = (SuperAlignment*)super_tree->aln; vector part_info; vector tree_vec; + SuperAlignment *new_super_aln = new SuperAlignment(); for (it = gene_sets.begin(); it != gene_sets.end(); it++) { + Alignment *aln = super_aln->concatenateAlignments(*it); PartitionInfo info; - info.name = ""; - info.position_spec = ""; - info.aln_file = ""; - info.sequence_type = ""; - info.model_name = model_names[it-gene_sets.begin()]; + aln->model_name = model_names[it-gene_sets.begin()]; info.part_rate = 1.0; // BIG FIX: make -spp works with -m TESTMERGE now! info.evalNNIs = 0; for (set::iterator i = it->begin(); i != it->end(); i++) { if (i != it->begin()) { - info.name += "+"; - info.position_spec += ", "; + aln->name += "+"; + if (!super_aln->partitions[*i]->position_spec.empty()) + aln->position_spec += ", "; } - info.name += super_tree->part_info[*i].name; - info.position_spec += super_tree->part_info[*i].position_spec; - if (!super_tree->part_info[*i].aln_file.empty()) { - if (info.aln_file.empty()) - info.aln_file = super_tree->part_info[*i].aln_file; - else if (info.aln_file != super_tree->part_info[*i].aln_file) { - info.aln_file = "__NA__"; + aln->name += super_aln->partitions[*i]->name; + aln->position_spec += super_aln->partitions[*i]->position_spec; + if (!super_aln->partitions[*i]->aln_file.empty()) { + if (aln->aln_file.empty()) + aln->aln_file = super_aln->partitions[*i]->aln_file; + else if (aln->aln_file != super_aln->partitions[*i]->aln_file) { + aln->aln_file = aln->aln_file + ',' + super_aln->partitions[*i]->aln_file; } } - if (!super_tree->part_info[*i].sequence_type.empty()) { - if (info.sequence_type.empty()) - info.sequence_type = super_tree->part_info[*i].sequence_type; - else if (info.sequence_type != super_tree->part_info[*i].sequence_type) { - info.sequence_type = "__NA__"; + if (!super_aln->partitions[*i]->sequence_type.empty()) { + if (aln->sequence_type.empty()) + aln->sequence_type = super_aln->partitions[*i]->sequence_type; + else if (aln->sequence_type != super_aln->partitions[*i]->sequence_type) { + aln->sequence_type = "__NA__"; } } } @@ -1167,13 +1561,21 @@ void mergePartitions(PhyloSuperTree* super_tree, vector > &gene_sets, S info.nniMoves[0].ptnlh = NULL; info.nniMoves[1].ptnlh = NULL; part_info.push_back(info); - Alignment *aln = super_aln->concatenateAlignments(*it); PhyloTree *tree = super_tree->extractSubtree(*it); tree->setParams(super_tree->params); tree->setAlignment(aln); tree_vec.push_back(tree); + new_super_aln->partitions.push_back(aln); } + // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed + // get the taxa names from existing tree + StrVector seq_names; + if (super_tree->root) { + super_tree->getTaxaName(seq_names); + } + new_super_aln->init(&seq_names); + for (PhyloSuperTree::reverse_iterator tit = super_tree->rbegin(); tit != super_tree->rend(); tit++) delete (*tit); super_tree->clear(); @@ -1181,101 +1583,90 @@ void mergePartitions(PhyloSuperTree* super_tree, vector > &gene_sets, S super_tree->part_info = part_info; delete super_tree->aln; - super_tree->aln = new SuperAlignment(super_tree); - super_tree->setAlignment(super_tree->aln); -} - -void printModelFile(ostream &fmodel, Params ¶ms, PhyloTree *tree, ModelInfo &info, string &set_name) { - string sitelh_file = params.out_prefix; - sitelh_file += ".sitelh"; - SeqType seq_type = tree->aln->seq_type; - if (tree->isSuperTree()) - seq_type = ((PhyloSuperTree*)tree)->front()->aln->seq_type; - - fmodel.precision(4); - fmodel << fixed; - if (set_name != "") - fmodel << set_name << "\t"; - fmodel << info.name << "\t" << info.df << "\t" << info.logl << "\t" << info.tree_len; - if (seq_type == SEQ_DNA) { - int nrates = tree->getModel()->getNumRateEntries(); - double *rate_mat = new double[nrates]; - tree->getModel()->getRateMatrix(rate_mat); - for (int rate = 0; rate < nrates; rate++) - fmodel << "\t" << rate_mat[rate]; - delete [] rate_mat; - } - if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY) { - int nstates = (seq_type == SEQ_DNA) ? 4 : 2; - double *freqs = new double[nstates]; - tree->getModel()->getStateFrequency(freqs); - for (int freq = 0; freq < nstates; freq++) - fmodel << "\t" << freqs[freq]; - delete [] freqs; - } - double alpha = tree->getRate()->getGammaShape(); - fmodel << "\t"; - if (alpha > 0) fmodel << alpha; else fmodel << "NA"; - fmodel << "\t"; - double pinvar = tree->getRate()->getPInvar(); - if (pinvar > 0) fmodel << pinvar; else fmodel << "NA"; - fmodel << "\t"; -// tree->printTree(fmodel); - fmodel << info.tree; - fmodel << endl; - fmodel.precision(4); - const char *model_name = (params.print_site_lh) ? info.name.c_str() : NULL; - if (params.print_site_lh) - printSiteLh(sitelh_file.c_str(), tree, NULL, true, model_name); - if (params.model_test_and_tree) { - delete tree; - tree = NULL; - } +// super_tree->aln = new SuperAlignment(super_tree); + super_tree->setAlignment(new_super_aln); } /** - test one single model - @param model_name model to be tested - @param params program parameters - @param in_tree input tree - @param model_info checkpointing information - @param[out] info output model information - @param models_block models block - @param num_thread number of threads - @return tree string -*/ -string testOneModel(string &model_name, Params ¶ms, Alignment *in_aln, - ModelCheckpoint &model_info, ModelInfo &info, ModelsBlock *models_block, + called when some partition is changed + */ +void fixPartitions(PhyloSuperTree* super_tree) { + SuperAlignment *super_aln = (SuperAlignment*)super_tree->aln; + int part; + bool aln_changed = false; + for (part = 0; part < super_tree->size(); part++) + if (super_aln->partitions[part] != super_tree->at(part)->aln) { + aln_changed = true; + super_aln->partitions[part] = super_tree->at(part)->aln; + } + if (!aln_changed) + return; + super_aln->buildPattern(); + super_aln->orderPatternByNumChars(PAT_VARIANT); + super_tree->deleteAllPartialLh(); +} + +string CandidateModel::evaluate(Params ¶ms, + ModelCheckpoint &in_model_info, ModelCheckpoint &out_model_info, + ModelsBlock *models_block, int &num_threads, int brlen_type) { + //string model_name = name; + Alignment *in_aln = aln; IQTree *iqtree = NULL; - if (posRateHeterotachy(model_name) != string::npos) + if (in_aln->isSuperAlignment()) { + SuperAlignment *saln = (SuperAlignment*)in_aln; + if (params.partition_type == BRLEN_OPTIMIZE) + iqtree = new PhyloSuperTree(saln); + else + iqtree = new PhyloSuperTreePlen(saln, brlen_type); + StrVector subst_names; + StrVector rate_names; + convert_string_vec(subst_name.c_str(), subst_names); + convert_string_vec(rate_name.c_str(), rate_names); + ASSERT(subst_names.size() == rate_names.size()); + for (int part = 0; part != subst_names.size(); part++) + saln->partitions[part]->model_name = subst_names[part]+rate_names[part]; + } else if (posRateHeterotachy(getName()) != string::npos) iqtree = new PhyloTreeMixlen(in_aln, 0); else iqtree = new IQTree(in_aln); iqtree->setParams(¶ms); - iqtree->sse = params.SSE; + iqtree->setLikelihoodKernel(params.SSE); iqtree->optimize_by_newton = params.optimize_by_newton; - iqtree->num_threads = num_threads; + iqtree->setNumThreads(num_threads); - iqtree->setCheckpoint(&model_info); + iqtree->setCheckpoint(&in_model_info); +#ifdef _OPENMP +#pragma omp critical +#endif iqtree->restoreCheckpoint(); ASSERT(iqtree->root); - iqtree->initializeModel(params, model_name, models_block); - if (!iqtree->getModel()->isMixture() || in_aln->seq_type == SEQ_POMO) - model_name = iqtree->getModelName(); + iqtree->initializeModel(params, getName(), models_block); + if (!iqtree->getModel()->isMixture() || in_aln->seq_type == SEQ_POMO) { + subst_name = iqtree->getSubstName(); + rate_name = iqtree->getRateName(); + } - info.name = model_name; - if (info.restoreCheckpoint(&model_info)) { + if (restoreCheckpoint(&in_model_info)) { delete iqtree; return ""; } +#ifdef _OPENMP +#pragma omp critical +#endif + iqtree->getModelFactory()->restoreCheckpoint(); + + // now switch to the output checkpoint + iqtree->getModelFactory()->setCheckpoint(&out_model_info); + iqtree->setCheckpoint(&out_model_info); + + double new_logl; + if (params.model_test_and_tree) { //--- PERFORM FULL TREE SEARCH PER MODEL ----// - - string original_model = params.model_name; // BQM 2017-03-29: disable bootstrap int orig_num_bootstrap_samples = params.num_bootstrap_samples; int orig_gbo_replicates = params.gbo_replicates; @@ -1285,25 +1676,9 @@ string testOneModel(string &model_name, Params ¶ms, Alignment *in_aln, if (params.stop_condition == SC_BOOTSTRAP_CORRELATION) params.stop_condition = SC_UNSUCCESS_ITERATION; - params.model_name = model_name; -// char *orig_user_tree = params.user_file; -// string new_user_tree = (string)params.out_prefix+".treefile"; -// if (params.model_test_and_tree == 1 && model>0 && fileExists(new_user_tree)) { -// params.user_file = (char*)new_user_tree.c_str(); -// } - // set checkpoint - // commented out: using model_info as checkpoint -// iqtree->setCheckpoint(in_tree->getCheckpoint()); -// iqtree->num_precision = in_tree->num_precision; - - // clear all checkpointed information -// Checkpoint *newCheckpoint = new Checkpoint; -// iqtree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree"); -// iqtree->getCheckpoint()->clear(); -// iqtree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end()); -// delete newCheckpoint; - - cout << endl << "===> Testing model " << model_name << endl; + iqtree->aln->model_name = getName(); + + cout << endl << "===> Testing model " << getName() << endl; if (iqtree->root) { // start from previous tree @@ -1312,28 +1687,19 @@ string testOneModel(string &model_name, Params ¶ms, Alignment *in_aln, iqtree->saveCheckpoint(); } - runTreeReconstruction(params, original_model, iqtree, model_info); - info.logl = iqtree->computeLikelihood(); - info.tree_len = iqtree->treeLength(); - info.tree = iqtree->getTreeString(); + iqtree->ensureNumberOfThreadsIsSet(nullptr); + + runTreeReconstruction(params, iqtree); + new_logl = iqtree->computeLikelihood(); + tree_len = iqtree->treeLength(); + tree = iqtree->getTreeString(); // restore original parameters - params.model_name = original_model; -// params.user_file = orig_user_tree; // 2017-03-29: restore bootstrap replicates params.num_bootstrap_samples = orig_num_bootstrap_samples; params.gbo_replicates = orig_gbo_replicates; params.stop_condition = orig_stop_condition; - // clear all checkpointed information -// newCheckpoint = new Checkpoint; -// iqtree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree"); -// iqtree->getCheckpoint()->clear(); -// iqtree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end()); -// iqtree->getCheckpoint()->putBool("finished", false); -// iqtree->getCheckpoint()->dump(true); -// delete newCheckpoint; - int count = iqtree->getCheckpoint()->eraseKeyPrefix("finished"); cout << count << " finished checkpoint entries erased" << endl; iqtree->getCheckpoint()->eraseKeyPrefix("CandidateSet"); @@ -1342,710 +1708,439 @@ string testOneModel(string &model_name, Params ¶ms, Alignment *in_aln, //--- FIX TREE TOPOLOGY AND ESTIMATE MODEL PARAMETERS ----// if (verbose_mode >= VB_MED) - cout << "Optimizing model " << info.name << endl; - iqtree->getModelFactory()->restoreCheckpoint(); - - #ifdef _OPENMP - if (num_threads <= 0) { - num_threads = iqtree->testNumThreads(); - omp_set_num_threads(num_threads); - } else - iqtree->warnNumThreads(); - #endif + cout << "Optimizing model " << getName() << endl; + iqtree->ensureNumberOfThreadsIsSet(nullptr); iqtree->initializeAllPartialLh(); for (int step = 0; step < 2; step++) { - info.logl = iqtree->getModelFactory()->optimizeParameters(brlen_type, false, - TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST); - info.tree_len = iqtree->treeLength(); + iqtree->getModelFactory()->syncChkPoint = this->syncChkPoint; + new_logl = iqtree->getModelFactory()->optimizeParameters(brlen_type, false, + params.modelfinder_eps, TOL_GRADIENT_MODELTEST); + tree_len = iqtree->treeLength(); iqtree->getModelFactory()->saveCheckpoint(); iqtree->saveCheckpoint(); // check if logl(+R[k]) is worse than logl(+R[k-1]) - ModelInfo prev_info; - if (!prev_info.restoreCheckpointRminus1(&model_info, info.name)) break; - if (prev_info.logl < info.logl + TOL_GRADIENT_MODELTEST) break; + CandidateModel prev_info; + if (!prev_info.restoreCheckpointRminus1(&in_model_info, this)) break; + if (prev_info.logl < new_logl + params.modelfinder_eps) break; if (step == 0) { iqtree->getRate()->initFromCatMinusOne(); - } else if (info.logl < prev_info.logl - TOL_LIKELIHOOD_MODELTEST) { - outWarning("Log-likelihood of " + info.name + " worse than " + prev_info.name); + } else if (new_logl < prev_info.logl - params.modelfinder_eps*10.0) { + outWarning("Log-likelihood " + convertDoubleToString(new_logl) + " of " + + getName() + " worse than " + prev_info.getName() + " " + convertDoubleToString(prev_info.logl)); } } } - - info.df = iqtree->getModelFactory()->getNParameters(brlen_type); + // sum in case of adjusted df and logl already stored + df += iqtree->getModelFactory()->getNParameters(brlen_type); + logl += new_logl; string tree_string = iqtree->getTreeString(); - +#ifdef _OPENMP +#pragma omp critical + { +#endif + saveCheckpoint(&in_model_info); +#ifdef _OPENMP + } +#endif delete iqtree; return tree_string; } -/** model information by merging two partitions */ -struct ModelPair { - /** score after merging */ - double score; - /** ID of partition 1 */ - int part1; - /** ID of partition 2 */ - int part2; - /** log-likelihood */ - double logl; - /** degree of freedom */ - int df; - /** tree length */ - double tree_len; - /** IDs of merged partitions */ - set merged_set; - /** set name */ - string set_name; - /* best model name */ - string model_name; -}; - -class ModelPairSet : public multimap { - -public: +string CandidateModel::evaluateConcatenation(Params ¶ms, SuperAlignment *super_aln, + ModelCheckpoint &model_info, ModelsBlock *models_block, int num_threads) +{ + aln = super_aln->concatenateAlignments(); + size_t ssize = getUsualModel(aln); - /** insert a partition pair */ - void insertPair(ModelPair &pair) { - insert(value_type(pair.score, pair)); - } + string concat_tree; - /** - find the maximum compatible partition pairs - @param num max number of pairs to return - */ - void getCompatiblePairs(int num, ModelPairSet &res) { - set part_ids; + cout << "Testing " << getName() << " on supermatrix..." << endl; + concat_tree = evaluate(params, model_info, model_info, + models_block, num_threads, BRLEN_OPTIMIZE); - for (auto it = begin(); it != end() && res.size() < num; it++) { + computeICScores(ssize); - // check for compatibility - vector overlap; - set_intersection(part_ids.begin(), part_ids.end(), - it->second.merged_set.begin(), it->second.merged_set.end(), - std::back_inserter(overlap)); + delete aln; + aln = NULL; + return concat_tree; +} - if (!overlap.empty()) continue; +/** + * k-means clustering of partitions using partition-specific tree length + * @return score (AIC/BIC/etc.) of the clustering + * @param[out] gene_sets + * @param[out[ model_names + */ +double doKmeansClustering(Params ¶ms, PhyloSuperTree *in_tree, + int ncluster, DoubleVector &lenvec, + ModelCheckpoint &model_info, ModelsBlock *models_block, + int num_threads, + vector > &gene_sets, StrVector &model_names) +{ - // take the union - part_ids.insert(it->second.merged_set.begin(), it->second.merged_set.end()); + cout << "k-means merging into " << ncluster << " partitions..." << endl; - // put the compatible pair to the set - res.insertPair(it->second); - } - } + ASSERT(lenvec.size() == in_tree->size()); + int npart = in_tree->size(); + IntVector weights; + weights.resize(npart, 1); + int *clusters = new int[npart]; + double *centers = new double[ncluster]; + RunKMeans1D(npart, ncluster, lenvec.data(), weights.data(), centers, clusters); -}; + SuperAlignment *super_aln = ((SuperAlignment*)in_tree->aln); -/** - * select models for all partitions - * @param[in,out] model_info (IN/OUT) all model information - * @return total number of parameters - */ -void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, ModelCheckpoint &model_info, ModelsBlock *models_block, int num_threads) { -// params.print_partition_info = true; -// params.print_conaln = true; - int i = 0; -// PhyloSuperTree::iterator it; - DoubleVector lhvec; // log-likelihood for each partition - DoubleVector dfvec; // number of parameters for each partition - DoubleVector lenvec; // tree length for each partition - double lhsum = 0.0; - int dfsum = 0; - if (params.partition_type != BRLEN_OPTIMIZE) { + double lhsum = 0.0; + int dfsum = 0; + if (params.partition_type == BRLEN_FIX || params.partition_type == BRLEN_SCALE) { dfsum = in_tree->getNBranchParameters(BRLEN_OPTIMIZE); if (params.partition_type == BRLEN_SCALE) dfsum -= 1; } - int ssize = in_tree->getAlnNSite(); - int64_t num_model = 0; - int64_t total_num_model = in_tree->size(); - - // 2017-06-07: -rcluster-max for max absolute number of pairs - if (params.partfinder_rcluster_max == 0) - params.partfinder_rcluster_max = max((size_t)1000, 10*in_tree->size()); - if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) { - double p = params.partfinder_rcluster/100.0; - size_t num_pairs = round(in_tree->size()*(in_tree->size()-1)*p/2); - if (p < 1.0) - num_pairs = min(num_pairs, params.partfinder_rcluster_max); - total_num_model += num_pairs; - for (i = in_tree->size()-2; i > 0; i--) - total_num_model += max(round(i*p), 1.0); + for (int cluster = 0; cluster < ncluster; cluster++) { + string set_name; + set merged_set; + for (int i = 0; i < in_tree->size(); i++) + if (clusters[i] == cluster) { + if (!set_name.empty()) + set_name += "+"; + set_name += in_tree->at(i)->aln->name; + merged_set.insert(i); + } + gene_sets.push_back(merged_set); + CandidateModel best_model; + bool done_before = false; + { + // if pairs previously examined, reuse the information + model_info.startStruct(set_name); + if (model_info.getBestModel(best_model.subst_name)) { + best_model.restoreCheckpoint(&model_info); + done_before = true; + } + model_info.endStruct(); + } + ModelCheckpoint part_model_info; + if (!done_before) { + Alignment *aln = super_aln->concatenateAlignments(merged_set); + PhyloTree *tree = in_tree->extractSubtree(merged_set); + tree->setAlignment(aln); + extractModelInfo(set_name, model_info, part_model_info); + tree->num_precision = in_tree->num_precision; + tree->setParams(¶ms); + tree->sse = params.SSE; + tree->optimize_by_newton = params.optimize_by_newton; + tree->setNumThreads(params.model_test_and_tree ? num_threads : 1); + /*if (params.model_test_and_tree) { + tree->setCheckpoint(new Checkpoint()); + tree->saveCheckpoint(); + } else*/ + { + tree->setCheckpoint(&part_model_info); + // trick to restore checkpoint + tree->restoreCheckpoint(); + tree->saveCheckpoint(); + } + best_model = CandidateModelSet().test(params, tree, part_model_info, models_block, + params.model_test_and_tree ? num_threads : 1, params.partition_type, + set_name, "", true); + best_model.restoreCheckpoint(&part_model_info); + model_names.push_back(best_model.getName()); + delete tree; + delete aln; + } + lhsum += best_model.logl; + dfsum += best_model.df; + { + if (!done_before) { + replaceModelInfo(set_name, model_info, part_model_info); + model_info.dump(); + cout.width(4); + cout << right << cluster+1 << " "; + cout.width(12); + cout << left << best_model.getName() << " "; + cout.width(11); + cout << best_model.logl << " " << set_name; + cout << endl; + } + } } + size_t ssize = in_tree->getAlnNSite(); + double score = computeInformationScore(lhsum, dfsum, ssize, params.model_test_criterion); + cout << "k-means score for " << ncluster << " partitions: " << score << " (LnL: " << lhsum << " " << "df: " << dfsum << ")" << endl; -#ifdef _OPENMP - if (num_threads <= 0) { - // partition selection scales well with many cores - num_threads = min((int64_t)countPhysicalCPUCores(), total_num_model); - omp_set_num_threads(num_threads); - cout << "NUMBER OF THREADS FOR PARTITION FINDING: " << num_threads << endl; - } -#endif + delete [] centers; + delete [] clusters; + return score; +} - double start_time = getRealTime(); +class SubsetPair : public pair { +public: + // distance between two partition pairs + double distance; +}; - SuperAlignment *super_aln = ((SuperAlignment*)in_tree->aln); +bool comparePairs(const SubsetPair &a, const SubsetPair &b) { + return a.distance < b.distance; +} - string concat_tree; - ModelInfo concat_info; +/* +bool comparePartition(const pair &a, const pair &b) { + return a.second > b.second; +} +*/ - // Analysis on supermatrix - { - Alignment *conaln = super_aln->concatenateAlignments(); - string model_name; - switch (conaln->seq_type) { - case SEQ_DNA: model_name = "GTR+F+G"; break; - case SEQ_PROTEIN: model_name = "LG+F+G"; break; - case SEQ_CODON: model_name = "GY"; break; // too much computation, thus no +G - case SEQ_BINARY: model_name = "GTR2+G"; break; - case SEQ_MORPH: model_name = "MK+G"; break; - case SEQ_POMO: model_name = "GTR+P"; break; - default: ASSERT(0 && "Unprocessed seq_type"); - - } - cout << "Testing " << model_name << " on supermatrix..." << endl; - concat_tree = testOneModel(model_name, params, conaln, model_info, concat_info, - models_block, num_threads, BRLEN_OPTIMIZE); - concat_info.computeICScores(ssize); - concat_info.saveCheckpoint(&model_info); - - - // read tree with branch lengths for linked partition model - if (params.partition_type != BRLEN_OPTIMIZE && !concat_tree.empty()) { - in_tree->readTreeString(concat_tree); - int part = 0; - for (auto it = in_tree->begin(); it != in_tree->end(); it++, part++) { - model_info.startStruct(in_tree->part_info[part].name); - (*it)->saveCheckpoint(); - model_info.endStruct(); - } - } - model_info.dump(); +bool compareJob(const pair &a, const pair &b) { + return (a.second == b.second)?(a.first < b.first):(a.second > b.second); +} - cout << concat_info.name << " / LnL: " << concat_info.logl - << " / df: " << concat_info.df << " / AIC: " << concat_info.AIC_score - << " / AICc: " << concat_info.AICc_score << " / BIC: " << concat_info.BIC_score << endl; - delete conaln; +/** + find k-closest partition pairs for rcluster algorithm + */ +void findClosestPairs(SuperAlignment *super_aln, DoubleVector &lenvec, vector > &gene_sets, + double log_transform, vector &closest_pairs) { + + for (int part1 = 0; part1 < lenvec.size()-1; part1++) + for (int part2 = part1+1; part2 < lenvec.size(); part2++) + if (super_aln->partitions[*gene_sets[part1].begin()]->seq_type == super_aln->partitions[*gene_sets[part2].begin()]->seq_type && + super_aln->partitions[*gene_sets[part1].begin()]->genetic_code == super_aln->partitions[*gene_sets[part2].begin()]->genetic_code) { + // only merge partitions of the same data type + SubsetPair pair; + pair.first = part1; + pair.second = part2; + if (log_transform) + pair.distance = fabs(log(lenvec[part1]) - log(lenvec[part2])); + else + pair.distance = fabs(lenvec[part1] - lenvec[part2]); + closest_pairs.push_back(pair); + } + if (!closest_pairs.empty() && Params::getInstance().partfinder_rcluster < 100) { + // sort distance + std::sort(closest_pairs.begin(), closest_pairs.end(), comparePairs); + size_t num_pairs = round(closest_pairs.size() * (Params::getInstance().partfinder_rcluster/100.0)); + num_pairs = min(num_pairs, Params::getInstance().partfinder_rcluster_max); + if (num_pairs <= 0) num_pairs = 1; + closest_pairs.erase(closest_pairs.begin() + num_pairs, closest_pairs.end()); } +} - cout << "Selecting individual models for " << in_tree->size() << " charsets using " << criterionName(params.model_test_criterion) << "..." << endl; - //cout << " No. AIC AICc BIC Charset" << endl; - cout << " No. Model Score Charset" << endl; +/** + merge vector src into dest, eliminating duplicates + */ +void mergePairs(vector &dest, vector &src) { + unordered_set dest_set; + for (SubsetPair s: dest) + dest_set.insert(convertIntToString(s.first) + "-" + convertIntToString(s.second)); + for (SubsetPair s: src) + if (dest_set.find(convertIntToString(s.first) + "-" + convertIntToString(s.second)) == dest_set.end()) + dest.push_back(s); +} - lhvec.resize(in_tree->size()); - dfvec.resize(in_tree->size()); - lenvec.resize(in_tree->size()); +struct workloadcmp { + bool operator() (const pair& a, const pair& b) const + {return (a.second==b.second)?a.firstsize()*(in_tree->size()-1)/2]; - pair *distID = new pair[in_tree->size()*(in_tree->size()-1)/2]; - - // sort partition by computational cost for OpenMP effciency - for (i = 0; i < in_tree->size(); i++) { - distID[i].first = i; - Alignment *this_aln = in_tree->at(i)->aln; - // computation cost is proportional to #sequences, #patterns, and #states - dist[i] = -((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; +void replaceModelInfo(ModelCheckpoint* model_info, ModelCheckpoint &new_info) { + for (auto it = new_info.begin(); it != new_info.end(); it++) { + model_info->put(it->first, it->second); + } +} + +bool isMixtureModel(ModelsBlock *models_block, string &model_str) { + size_t mix_pos; + for (mix_pos = 0; mix_pos < model_str.length(); mix_pos++) { + size_t next_mix_pos = model_str.find_first_of("+*", mix_pos); + string sub_model_str = model_str.substr(mix_pos, next_mix_pos-mix_pos); + if (models_block->findMixModel(sub_model_str)) + return true; + if (next_mix_pos == string::npos) + break; + mix_pos = next_mix_pos; } + return false; +} + +void CandidateModelSet::filterRates(int finished_model) { + if (Params::getInstance().score_diff_thres < 0) + return; + double best_score = DBL_MAX; + ASSERT(finished_model >= 0); + int model; + for (model = 0; model <= finished_model; model++) + if (at(model).subst_name == at(0).subst_name) { + if (!at(model).hasFlag(MF_DONE + MF_IGNORED)) + return; // only works if all models done + best_score = min(best_score, at(model).getScore()); + } - if (num_threads > 1) - { - quicksort(dist, 0, in_tree->size()-1, distID); - if (verbose_mode >= VB_MED) { - for (i = 0; i < in_tree->size(); i++) { - cout << i+1 << "\t" << in_tree->part_info[distID[i].first].name << endl; - } + double ok_score = best_score + Params::getInstance().score_diff_thres; + set ok_rates; + for (model = 0; model <= finished_model; model++) + if (at(model).getScore() <= ok_score) { + string rate_name = at(model).orig_rate_name; + ok_rates.insert(rate_name); } + for (model = finished_model+1; model < size(); model++) + if (ok_rates.find(at(model).orig_rate_name) == ok_rates.end()) + at(model).setFlag(MF_IGNORED); +} + +void CandidateModelSet::filterSubst(int finished_model) { + if (Params::getInstance().score_diff_thres < 0) + return; + double best_score = DBL_MAX; + ASSERT(finished_model >= 0); + int model; + for (model = 0; model <= finished_model; model++) + if (at(model).rate_name == at(0).rate_name) + best_score = min(best_score, at(model).getScore()); + + double ok_score = best_score + Params::getInstance().score_diff_thres; + set ok_model; + for (model = 0; model <= finished_model; model++) { + if (at(model).rate_name != at(0).rate_name) + continue; + if (at(model).getScore() <= ok_score) { + string subst_name = at(model).orig_subst_name; + ok_model.insert(subst_name); + } else + at(model).setFlag(MF_IGNORED); } + for (model = finished_model+1; model < size(); model++) + if (ok_model.find(at(model).orig_subst_name) == ok_model.end()) + at(model).setFlag(MF_IGNORED); +} - bool parallel_over_partitions = false; -#ifdef _OPENMP - parallel_over_partitions = !params.model_test_and_tree && (in_tree->size() >= num_threads); -#pragma omp parallel for private(i) schedule(dynamic) reduction(+: lhsum, dfsum) if(parallel_over_partitions) -#endif - for (int j = 0; j < in_tree->size(); j++) { - i = distID[j].first; - PhyloTree *this_tree = in_tree->at(i); - // scan through models for this partition, assuming the information occurs consecutively - ModelCheckpoint part_model_info; - extractModelInfo(in_tree->part_info[i].name, model_info, part_model_info); - // do the computation - string part_model_name; - if (params.model_name.empty()) - part_model_name = in_tree->part_info[i].model_name; - ModelInfo best_model; - best_model.name = testModel(params, this_tree, part_model_info, models_block, (parallel_over_partitions ? 1 : num_threads), params.partition_type, in_tree->part_info[i].name, false, part_model_name); +CandidateModel CandidateModelSet::test(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads, int brlen_type, + string set_name, string in_model_name, bool merge_phase) +{ - ASSERT(best_model.restoreCheckpoint(&part_model_info)); + ModelCheckpoint *checkpoint = &model_info; - double score = best_model.computeICScore(this_tree->getAlnNSite()); - in_tree->part_info[i].model_name = best_model.name; - lhsum += (lhvec[i] = best_model.logl); - dfsum += (dfvec[i] = best_model.df); - lenvec[i] = best_model.tree_len; + in_tree->params = ¶ms; + + // for ModelOMatic + Alignment *prot_aln = NULL; + Alignment *dna_aln = NULL; + bool do_modelomatic = params.modelomatic && in_tree->aln->seq_type == SEQ_CODON; + if (in_model_name.empty()) { + // TODO: in case of Neural network, return a CandidateModel instance of the model here + if (params.use_nn_model) { + // generate models with neural network + string subst_name = "GTR"; + string rate_name = "+G{0.1}"; // check if this really works + push_back(CandidateModel(subst_name, rate_name, in_tree->aln)); + } else { + // generate all models the normal way + generate(params, in_tree->aln, params.model_test_separate_rate, merge_phase); + } + if (do_modelomatic) { + ASSERT(!params.use_nn_model); + // generate models for protein + // adapter coefficient according to Whelan et al. 2015 + prot_aln = in_tree->aln->convertCodonToAA(); + int adjusted_df; + double adjusted_logl = computeAdapter(in_tree->aln, prot_aln, adjusted_df); + if (set_name.empty()) + cout << "Adjusted LnL: " << adjusted_logl << " df: " << adjusted_df << endl; + size_t start = size(); + generate(params, prot_aln, params.model_test_separate_rate, merge_phase); + size_t i; + for (i = start; i < size(); i++) { + at(i).logl = adjusted_logl; + at(i).df = adjusted_df; + } -#ifdef _OPENMP -#pragma omp critical -#endif - { - num_model++; - cout.width(4); - cout << right << num_model << " "; - cout.width(12); - cout << left << best_model.name << " "; - cout.width(11); - cout << score << " " << in_tree->part_info[i].name; - if (num_model >= 10) { - double remain_time = (total_num_model-num_model)*(getRealTime()-start_time)/num_model; - cout << "\t" << convert_time(getRealTime()-start_time) << " (" - << convert_time(remain_time) << " left)"; - } - cout << endl; - replaceModelInfo(in_tree->part_info[i].name, model_info, part_model_info); - model_info.dump(); - } - } - - double inf_score = computeInformationScore(lhsum, dfsum, ssize, params.model_test_criterion); - cout << "Full partition model " << criterionName(params.model_test_criterion) << " score: " << inf_score << " (LnL: " << lhsum << " df:" << dfsum << ")" << endl; - - if (params.model_name.find("LINK") == string::npos && params.model_name.find("MERGE") == string::npos) { - in_tree->printBestPartition((string(params.out_prefix) + ".best_scheme.nex").c_str()); - in_tree->printBestPartitionRaxml((string(params.out_prefix) + ".best_scheme").c_str()); - delete [] distID; - delete [] dist; - model_info.dump(true); - if (inf_score > concat_info.computeICScore(ssize) + 1.0) { - cout << endl; - outWarning("Partition model has worse fit than single model!"); - outWarning("Add MERGE to -m option to increase model fit!"); - } - return; - } - - /* following implements the greedy algorithm of Lanfear et al. (2012) */ -// int part1, part2; - vector > gene_sets; - gene_sets.resize(in_tree->size()); - StrVector model_names; - model_names.resize(in_tree->size()); - StrVector greedy_model_trees; - greedy_model_trees.resize(in_tree->size()); - for (i = 0; i < gene_sets.size(); i++) { - gene_sets[i].insert(i); - model_names[i] = in_tree->part_info[i].model_name; - greedy_model_trees[i] = in_tree->part_info[i].name; - } - cout << "Merging models to increase model fit (about " << total_num_model << " total partition schemes)..." << endl; - - while (gene_sets.size() >= 2) { - // stepwise merging charsets - - // list of all better pairs of partitions than current partitioning scheme - ModelPairSet better_pairs; - - size_t num_pairs = 0; - // 2015-06-24: begin rcluster algorithm - // compute distance between gene_sets - for (int part1 = 0; part1 < gene_sets.size()-1; part1++) - for (int part2 = part1+1; part2 < gene_sets.size(); part2++) - if (super_aln->partitions[*gene_sets[part1].begin()]->seq_type == super_aln->partitions[*gene_sets[part2].begin()]->seq_type && - super_aln->partitions[*gene_sets[part1].begin()]->genetic_code == super_aln->partitions[*gene_sets[part2].begin()]->genetic_code) - { - // only merge partitions of the same data type - dist[num_pairs] = fabs(lenvec[part1] - lenvec[part2]); - distID[num_pairs].first = part1; - distID[num_pairs].second = part2; - num_pairs++; - } - if (num_pairs > 0 && params.partfinder_rcluster < 100) { - // sort distance - quicksort(dist, 0, num_pairs-1, distID); - num_pairs = round(num_pairs * (params.partfinder_rcluster/100.0)); - num_pairs = min(num_pairs, params.partfinder_rcluster_max); - if (num_pairs <= 0) num_pairs = 1; - } - // sort partition by computational cost for OpenMP effciency - for (i = 0; i < num_pairs; i++) { - // computation cost is proportional to #sequences, #patterns, and #states - Alignment *this_aln = in_tree->at(distID[i].first)->aln; - dist[i] = -((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; - this_aln = in_tree->at(distID[i].second)->aln; - dist[i] -= ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; - } - if (num_threads > 1 && num_pairs >= 1) - quicksort(dist, 0, num_pairs-1, distID); - -#ifdef _OPENMP -#pragma omp parallel for private(i) schedule(dynamic) if(!params.model_test_and_tree) -#endif - for (int pair = 0; pair < num_pairs; pair++) { - // information of current partitions pair - ModelPair cur_pair; - cur_pair.part1 = distID[pair].first; - cur_pair.part2 = distID[pair].second; - ASSERT(cur_pair.part1 < cur_pair.part2); - cur_pair.merged_set.insert(gene_sets[cur_pair.part1].begin(), gene_sets[cur_pair.part1].end()); - cur_pair.merged_set.insert(gene_sets[cur_pair.part2].begin(), gene_sets[cur_pair.part2].end()); - for (auto it = cur_pair.merged_set.begin(); it != cur_pair.merged_set.end(); it++) { - if (it != cur_pair.merged_set.begin()) - cur_pair.set_name += "+"; - cur_pair.set_name += in_tree->part_info[*it].name; - } - ModelInfo best_model; - bool done_before = false; - { - // if pairs previously examined, reuse the information - model_info.startStruct(cur_pair.set_name); - if (model_info.getBestModel(best_model.name)) { - best_model.restoreCheckpoint(&model_info); - done_before = true; - } - model_info.endStruct(); - } - ModelCheckpoint part_model_info; - if (!done_before) { - Alignment *aln = super_aln->concatenateAlignments(cur_pair.merged_set); - PhyloTree *tree = in_tree->extractSubtree(cur_pair.merged_set); - tree->setAlignment(aln); - extractModelInfo(cur_pair.set_name, model_info, part_model_info); - tree->num_precision = in_tree->num_precision; - tree->setParams(¶ms); - tree->sse = params.SSE; - tree->optimize_by_newton = params.optimize_by_newton; - tree->num_threads = params.model_test_and_tree ? num_threads : 1; - if (params.model_test_and_tree) { - tree->setCheckpoint(new Checkpoint()); - tree->saveCheckpoint(); - } else { - tree->setCheckpoint(&part_model_info); - // trick to restore checkpoint - tree->restoreCheckpoint(); - tree->saveCheckpoint(); - } - best_model.name = testModel(params, tree, part_model_info, models_block, - params.model_test_and_tree ? num_threads : 1, params.partition_type, cur_pair.set_name); - best_model.restoreCheckpoint(&part_model_info); - if (params.model_test_and_tree) { - delete tree->getCheckpoint(); - } - delete tree; - delete aln; - } - cur_pair.logl = best_model.logl; - cur_pair.df = best_model.df; - cur_pair.model_name = best_model.name; - cur_pair.tree_len = best_model.tree_len; - double lhnew = lhsum - lhvec[cur_pair.part1] - lhvec[cur_pair.part2] + best_model.logl; - int dfnew = dfsum - dfvec[cur_pair.part1] - dfvec[cur_pair.part2] + best_model.df; - cur_pair.score = computeInformationScore(lhnew, dfnew, ssize, params.model_test_criterion); -#ifdef _OPENMP -#pragma omp critical -#endif - { - if (!done_before) { - replaceModelInfo(cur_pair.set_name, model_info, part_model_info); - model_info.dump(); - num_model++; - cout.width(4); - cout << right << num_model << " "; - cout.width(12); - cout << left << best_model.name << " "; - cout.width(11); - cout << cur_pair.score << " " << cur_pair.set_name; - if (num_model >= 10) { - double remain_time = max(total_num_model-num_model, (int64_t)0)*(getRealTime()-start_time)/num_model; - cout << "\t" << convert_time(getRealTime()-start_time) << " (" - << convert_time(remain_time) << " left)"; - } - cout << endl; - } - if (cur_pair.score < inf_score) - better_pairs.insertPair(cur_pair); - } - - } - if (better_pairs.empty()) break; - ModelPairSet compatible_pairs; - - int num_comp_pairs = params.partfinder_rcluster_fast ? gene_sets.size()/2 : 1; - better_pairs.getCompatiblePairs(num_comp_pairs, compatible_pairs); - if (compatible_pairs.size() > 1) - cout << compatible_pairs.size() << " compatible better partition pairs found" << endl; - - // 2017-12-21: simultaneously merging better pairs - for (auto it_pair = compatible_pairs.begin(); it_pair != compatible_pairs.end(); it_pair++) { - ModelPair opt_pair = it_pair->second; - - lhsum = lhsum - lhvec[opt_pair.part1] - lhvec[opt_pair.part2] + opt_pair.logl; - dfsum = dfsum - dfvec[opt_pair.part1] - dfvec[opt_pair.part2] + opt_pair.df; - inf_score = computeInformationScore(lhsum, dfsum, ssize, params.model_test_criterion); - ASSERT(inf_score <= opt_pair.score + 0.1); - - cout << "Merging " << opt_pair.set_name << " with " << criterionName(params.model_test_criterion) - << " score: " << inf_score << " (LnL: " << lhsum << " df: " << dfsum << ")" << endl; - // change entry opt_part1 to merged one - gene_sets[opt_pair.part1] = opt_pair.merged_set; - lhvec[opt_pair.part1] = opt_pair.logl; - dfvec[opt_pair.part1] = opt_pair.df; - lenvec[opt_pair.part1] = opt_pair.tree_len; - model_names[opt_pair.part1] = opt_pair.model_name; - greedy_model_trees[opt_pair.part1] = "(" + greedy_model_trees[opt_pair.part1] + "," + - greedy_model_trees[opt_pair.part2] + ")" + - convertIntToString(in_tree->size()-gene_sets.size()+1) + ":" + - convertDoubleToString(inf_score); - - // delete entry opt_part2 - lhvec.erase(lhvec.begin() + opt_pair.part2); - dfvec.erase(dfvec.begin() + opt_pair.part2); - lenvec.erase(lenvec.begin() + opt_pair.part2); - gene_sets.erase(gene_sets.begin() + opt_pair.part2); - model_names.erase(model_names.begin() + opt_pair.part2); - greedy_model_trees.erase(greedy_model_trees.begin() + opt_pair.part2); - - // decrease part ID for all pairs beyond opt_pair.part2 - auto next_pair = it_pair; - for (next_pair++; next_pair != compatible_pairs.end(); next_pair++) { - if (next_pair->second.part1 > opt_pair.part2) - next_pair->second.part1--; - if (next_pair->second.part2 > opt_pair.part2) - next_pair->second.part2--; - } - } - } - - string final_model_tree; - if (greedy_model_trees.size() == 1) - final_model_tree = greedy_model_trees[0]; - else { - final_model_tree = "("; - for (i = 0; i < greedy_model_trees.size(); i++) { - if (i>0) - final_model_tree += ","; - final_model_tree += greedy_model_trees[i]; - } - final_model_tree += ")"; - } - - cout << "BEST-FIT PARTITION MODEL: " << endl; - cout << " charpartition " << criterionName(params.model_test_criterion) << " = "; - for (i = 0; i < gene_sets.size(); i++) { - if (i > 0) - cout << ", "; - cout << model_names[i] << ":"; - for (auto j = gene_sets[i].begin(); j != gene_sets[i].end(); j++) { - cout << " " << in_tree->part_info[*j].name; - } - } - cout << ";" << endl; - cout << "Agglomerative model selection: " << final_model_tree << endl; - - delete [] distID; - delete [] dist; - if (gene_sets.size() < in_tree->size()) - mergePartitions(in_tree, gene_sets, model_names); - in_tree->printBestPartition((string(params.out_prefix) + ".best_scheme.nex").c_str()); - in_tree->printBestPartitionRaxml((string(params.out_prefix) + ".best_scheme").c_str()); - model_info.dump(true); - if (inf_score > concat_info.computeICScore(ssize) + 1.0) { - cout << endl; - outWarning("Partition merging found worse model than single model!"); - outWarning("Please do not use partition model!"); - } -} - -bool isMixtureModel(ModelsBlock *models_block, string &model_str) { - size_t mix_pos; - for (mix_pos = 0; mix_pos < model_str.length(); mix_pos++) { - size_t next_mix_pos = model_str.find_first_of("+*", mix_pos); - string sub_model_str = model_str.substr(mix_pos, next_mix_pos-mix_pos); - if (models_block->findMixModel(sub_model_str)) - return true; - if (next_mix_pos == string::npos) - break; - mix_pos = next_mix_pos; - } - return false; -} - -/* - * OBSOLETE: Helper function for testModels. - * Uses seq_type to return a model of the required class, which can then - * be used by a ModelFactory to produce more such objects. - * Gets a little complex in the case of DNA models, as - * Lie-Markov models are their own class distinct from time reversible models. - */ -/* -ModelMarkov* getPrototypeModel(SeqType seq_type, PhyloTree* tree, char *model_set) { - ModelMarkov *subst_model = NULL; - switch (seq_type) { - case SEQ_BINARY: - subst_model = new ModelBIN("JC2", "", FREQ_UNKNOWN, "", tree); - break; - case SEQ_PROTEIN: - subst_model = new ModelProtein("WAG", "", FREQ_UNKNOWN, "", tree); - break; - case SEQ_MORPH: - subst_model = new ModelMorphology("MK", "", FREQ_UNKNOWN, "", tree); - break; - case SEQ_CODON: - subst_model = new ModelCodon("GY", "", FREQ_UNKNOWN, "", tree); - break; - case SEQ_POMO: - // subst_model = new ModelPoMo("JC", "", FREQ_UNKNOWN, "", tree, ""); - // TODO DS: Implement model finder. - cout << "ERROR: Automatic model selection with PoMo not yet supported." << endl; - outError("Please provide a substitution model with, e.g., \"-m HKY+P\"."); - break; - case SEQ_DNA: - // This is the complicated case. Need to return either a ModelDNA, or a - // ModelLieMarkov - if (model_set && (strncmp(model_set, "liemarkov", 9) == 0 || strcmp(model_set,"strandsymmetric")==0)) { - // "liemarkov", "liemarkovry", "liemarkovws", "liemarkovmk", "strandsymmetric" - subst_model = new ModelLieMarkov("1.1", tree, "", FREQ_ESTIMATE, ""); - } else { - StrVector model_names; - bool foundLM = false; - bool foundTR = false; - if (model_set) { - convert_string_vec(model_set, model_names); - for (StrVector::iterator it = model_names.begin() ; it != model_names.end(); ++it) { - bool valid = ModelLieMarkov::validModelName(*it); - foundLM = foundLM || valid; - foundTR = foundTR || !valid; - } - } - if (foundLM && foundTR) { - outError("Currently we can't model test both Lie-Markov and non-Lie-Markov models\nat the same time. (You may have misspelled the name of a Lie-Markov model."); - } else if (foundLM) { - subst_model = new ModelLieMarkov("1.1", tree, "", FREQ_ESTIMATE, ""); - } else { - subst_model = new ModelDNA("JC", "", FREQ_UNKNOWN, "", tree); + // generate models for DNA + dna_aln = in_tree->aln->convertCodonToDNA(); + start = size(); + generate(params, dna_aln, params.model_test_separate_rate, merge_phase); + for (i = start; i < size(); i++) { + at(i).setFlag(MF_SAMPLE_SIZE_TRIPLE); } } - break; - default: - outError("Unrecognized seq_type, can't happen"); - } - return(subst_model); -} -*/ -string getSeqTypeName(SeqType seq_type) { - switch (seq_type) { - case SEQ_BINARY: return "binary"; - case SEQ_DNA: return "DNA"; - case SEQ_PROTEIN: return "protein"; - case SEQ_CODON: return "codon"; - case SEQ_MORPH: return "morphological"; - case SEQ_POMO: return "PoMo"; - case SEQ_UNKNOWN: return "unknown"; - case SEQ_MULTISTATE: return "MultiState"; + } else { + push_back(CandidateModel(in_model_name, "", in_tree->aln)); } -} - -string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, ModelsBlock *models_block, - int num_threads, int brlen_type, string set_name, bool print_mem_usage, string in_model_name) -{ - ModelCheckpoint *checkpoint = &model_info; - - SeqType seq_type = in_tree->aln->seq_type; - if (in_tree->isSuperTree()) - seq_type = ((PhyloSuperTree*)in_tree)->front()->aln->seq_type; - if (seq_type == SEQ_UNKNOWN) - outError("Unknown data for model testing."); - string sitelh_file = params.out_prefix; - sitelh_file += ".sitelh"; - in_tree->params = ¶ms; - StrVector model_names; DoubleVector model_scores; - int max_cats; - if (in_model_name.empty()) - max_cats = getModelList(params, in_tree->aln, model_names, params.model_test_separate_rate); - else { - max_cats = params.max_rate_cats; - model_names.push_back(in_model_name); - } - int model; - - if (print_mem_usage) { - uint64_t mem_size = in_tree->getMemoryRequired(max_cats); - cout << "NOTE: ModelFinder requires " << (mem_size / 1024) / 1024 << " MB RAM!" << endl; - if (mem_size >= getMemorySize()) { - outError("Memory required exceeds your computer RAM size!"); - } -#ifdef BINARY32 - if (mem_size >= 2000000000) { - outError("Memory required exceeds 2GB limit of 32-bit executable"); - } -#endif - } - - string best_model; - - if (in_tree->isSuperTree()) { - // select model for each partition - PhyloSuperTree *stree = (PhyloSuperTree*)in_tree; - testPartitionModel(params, stree, model_info, models_block, num_threads); -// stree->linkTrees(); - stree->mapTrees(); - string res_models = ""; - for (vector::iterator it = stree->part_info.begin(); it != stree->part_info.end(); it++) { - if (it != stree->part_info.begin()) res_models += ","; - res_models += (*it).model_name; - } - return res_models; - } + int model; + int best_model = -1; + Alignment *best_aln = in_tree->aln; int ssize = in_tree->aln->getNSite(); // sample size + //if (adjust) + // ssize = adjust->sample_size; if (params.model_test_sample_size) ssize = params.model_test_sample_size; if (set_name == "") { - cout << "ModelFinder will test " << model_names.size() << " " - << getSeqTypeName(seq_type) - << " models (sample size: " << ssize << ") ..." << endl; - if (verbose_mode >= VB_MED) { - for (auto i = model_names.begin(); i != model_names.end(); i++) - cout << *i << " "; - cout << endl; - } + cout << "ModelFinder will test up to " << size() << " "; + if (do_modelomatic) + cout << "codon/AA/DNA"; + else + cout << getSeqTypeName(in_tree->aln->seq_type); + cout << " models (sample size: " << ssize << ") ..." << endl; if (params.model_test_and_tree == 0) cout << " No. Model -LnL df AIC AICc BIC" << endl; } - if (params.print_site_lh) { - ofstream sitelh_out(sitelh_file.c_str()); - if (!sitelh_out.is_open()) - outError("Cannot write to file ", sitelh_file); - sitelh_out << model_names.size() << " " << in_tree->getAlnNSite() << endl; - sitelh_out.close(); - } -// uint64_t RAM_requirement = 0; - string best_model_AIC, best_model_AICc, best_model_BIC; + // uint64_t RAM_requirement = 0; + int best_model_AIC = -1, best_model_AICc = -1, best_model_BIC = -1; double best_score_AIC = DBL_MAX, best_score_AICc = DBL_MAX, best_score_BIC = DBL_MAX; string best_tree_AIC, best_tree_AICc, best_tree_BIC; +// CKP_RESTORE(best_score_AIC); +// CKP_RESTORE(best_score_AICc); +// CKP_RESTORE(best_score_BIC); +// CKP_RESTORE(best_model_AIC); +// CKP_RESTORE(best_model_AICc); +// CKP_RESTORE(best_model_BIC); + CKP_RESTORE(best_tree_AIC); CKP_RESTORE(best_tree_AICc); CKP_RESTORE(best_tree_BIC); -// string prev_tree_string = ""; -// int prev_model_id = -1; -// int skip_model = 0; - + // detect rate hetegeneity automatically or not + bool auto_rate = merge_phase ? iEquals(params.merge_rates, "AUTO") : iEquals(params.ratehet_set, "AUTO"); + bool auto_subst = merge_phase ? iEquals(params.merge_models, "AUTO") : iEquals(params.model_set, "AUTO"); + int rate_block = size(); + if (auto_rate) { + for (rate_block = 0; rate_block < size(); rate_block++) + if (rate_block+1 < size() && at(rate_block+1).subst_name != at(rate_block).subst_name) + break; + } + + int subst_block = size(); + if (auto_subst) { + for (subst_block = size()-1; subst_block >= 0; subst_block--) + if (at(subst_block).rate_name == at(0).rate_name) + break; + } + + //------------- MAIN FOR LOOP GOING THROUGH ALL MODELS TO BE TESTED ---------// - for (model = 0; model < model_names.size(); model++) { + for (model = 0; model < size(); model++) { + if (model == rate_block+1) + filterRates(rate_block); // auto filter rate models + if (model == subst_block+1) + filterSubst(subst_block); // auto filter substitution model + if (at(model).hasFlag(MF_IGNORED)) { + model_scores.push_back(DBL_MAX); + continue; + } //cout << model_names[model] << endl; - if (model_names[model][0] == '+') { + if (at(model).subst_name == "") { // now switching to test rate heterogeneity - if (best_model == "") + if (best_model == -1) switch (params.model_test_criterion) { case MTC_AIC: best_model = best_model_AIC; @@ -2058,52 +2153,57 @@ string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info break; default: ASSERT(0); } - model_names[model] = best_model + model_names[model]; + at(model).subst_name = at(best_model).subst_name; } // optimize model parameters - string orig_model_name = model_names[model]; - ModelInfo info; - info.set_name = set_name; + string orig_model_name = at(model).getName(); + // keep separate output model_info to only update model_info if better model found + ModelCheckpoint out_model_info; + //CandidateModel info; + //info.set_name = set_name; + at(model).set_name = set_name; string tree_string; - /***** main call to estimate model parameters ******/ - tree_string = testOneModel(model_names[model], params, in_tree->aln, - model_info, info, models_block, num_threads, brlen_type); + at(model).syncChkPoint = this->syncChkPoint; + tree_string = at(model).evaluate(params, + model_info, out_model_info, models_block, num_threads, brlen_type); + at(model).computeICScores(ssize); + at(model).setFlag(MF_DONE); - info.computeICScores(ssize); - info.saveCheckpoint(checkpoint); - - ModelInfo prev_info; + // for testing + CandidateModel prev_info; bool skip_model = false; + + bool check_condition = prev_info.restoreCheckpointRminus1(checkpoint, &at(model)); - if (prev_info.restoreCheckpointRminus1(checkpoint, info.name)) { + if (check_condition) { // check stop criterion for +R prev_info.computeICScores(ssize); switch (params.model_test_criterion) { case MTC_ALL: - if (info.AIC_score > prev_info.AIC_score && - info.AICc_score > prev_info.AICc_score && - info.BIC_score > prev_info.BIC_score) { + if (at(model).AIC_score > prev_info.AIC_score && + at(model).AICc_score > prev_info.AICc_score && + at(model).BIC_score > prev_info.BIC_score) { // skip remaining model skip_model = true; } break; case MTC_AIC: - if (info.AIC_score > prev_info.AIC_score) { + if (at(model).AIC_score > prev_info.AIC_score) { // skip remaining model skip_model = true; } break; case MTC_AICC: - if (info.AICc_score > prev_info.AICc_score) { + if (at(model).AICc_score > prev_info.AICc_score) { // skip remaining model skip_model = true; } break; case MTC_BIC: - if (info.BIC_score > prev_info.BIC_score) { + if (at(model).BIC_score > prev_info.BIC_score) { // skip remaining model skip_model = true; } @@ -2111,54 +2211,66 @@ string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info } } - if (info.AIC_score < best_score_AIC) { - best_model_AIC = info.name; - best_score_AIC = info.AIC_score; + if (at(model).AIC_score < best_score_AIC) { + best_model_AIC = model; + best_score_AIC = at(model).AIC_score; if (!tree_string.empty()) best_tree_AIC = tree_string; + // only update model_info with better model + if (params.model_test_criterion == MTC_AIC) { + model_info.putSubCheckpoint(&out_model_info, ""); + best_aln = at(model).aln; + } } - if (info.AICc_score < best_score_AICc) { - best_model_AICc = info.name; - best_score_AICc = info.AICc_score; + if (at(model).AICc_score < best_score_AICc) { + best_model_AICc = model; + best_score_AICc = at(model).AICc_score; if (!tree_string.empty()) best_tree_AICc = tree_string; + // only update model_info with better model + if (params.model_test_criterion == MTC_AICC) { + model_info.putSubCheckpoint(&out_model_info, ""); + best_aln = at(model).aln; + } } - if (info.BIC_score < best_score_BIC) { - best_model_BIC = info.name; - best_score_BIC = info.BIC_score; + if (at(model).BIC_score < best_score_BIC) { + best_model_BIC = model; + best_score_BIC = at(model).BIC_score; if (!tree_string.empty()) best_tree_BIC = tree_string; + // only update model_info with better model + if (params.model_test_criterion == MTC_BIC) { + model_info.putSubCheckpoint(&out_model_info, ""); + best_aln = at(model).aln; + } } switch (params.model_test_criterion) { - case MTC_AIC: model_scores.push_back(info.AIC_score); break; - case MTC_AICC: model_scores.push_back(info.AICc_score); break; - default: model_scores.push_back(info.BIC_score); break; + case MTC_AIC: model_scores.push_back(at(model).AIC_score); break; + case MTC_AICC: model_scores.push_back(at(model).AICc_score); break; + default: model_scores.push_back(at(model).BIC_score); break; } - CKP_SAVE(best_tree_AIC); CKP_SAVE(best_tree_AICc); CKP_SAVE(best_tree_BIC); checkpoint->dump(); - - if (set_name == "") { cout.width(3); cout << right << model+1 << " "; cout.width(13); - cout << left << info.name << " "; + cout << left << at(model).getName() << " "; cout.precision(3); cout << fixed; cout.width(12); - cout << -info.logl << " "; + cout << -at(model).logl << " "; cout.width(3); - cout << info.df << " "; + cout << at(model).df << " "; cout.width(12); - cout << info.AIC_score << " "; + cout << at(model).AIC_score << " "; cout.width(12); - cout << info.AICc_score << " " << info.BIC_score; + cout << at(model).AICc_score << " " << at(model).BIC_score; cout << endl; } @@ -2170,25 +2282,16 @@ string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info if ((posR = orig_model_name.find(rates[i])) != string::npos) break; string first_part = orig_model_name.substr(0, posR+2); - while (model < model_names.size()-1 && model_names[model+1].substr(0, posR+2) == first_part) { - model++; - model_scores.push_back(DBL_MAX); + for (int next = model+1; next < size() && at(next).getName().substr(0, posR+2) == first_part; next++) { + at(next).setFlag(MF_IGNORED); } } - } + ASSERT(model_scores.size() == size()); - ASSERT(model_scores.size() == model_names.size()); - - if (best_model_BIC.empty()) + if (best_model_BIC == -1) { outError("No models were examined! Please check messages above"); - - if (set_name == "") { - cout << "Akaike Information Criterion: " << best_model_AIC << endl; - cout << "Corrected Akaike Information Criterion: " << best_model_AICc << endl; - cout << "Bayesian Information Criterion: " << best_model_BIC << endl; - } - + } int *model_rank = new int[model_scores.size()]; // string best_tree; // BQM 2015-07-21: With Lars find best model @@ -2213,21 +2316,35 @@ string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info break; if (model > 0) model_list += " "; - model_list += model_names[model_rank[model]]; + model_list += at(model_rank[model]).getName(); } - checkpoint->put("best_model_list_" + criterionName(params.model_test_criterion), model_list); - CKP_SAVE(best_model_AIC); - CKP_SAVE(best_model_AICc); - CKP_SAVE(best_model_BIC); + model_info.putBestModelList(model_list); + model_info.put("best_model_AIC", at(best_model_AIC).getName()); + model_info.put("best_model_AICc", at(best_model_AICc).getName()); + model_info.put("best_model_BIC", at(best_model_BIC).getName()); CKP_SAVE(best_score_AIC); CKP_SAVE(best_score_AICc); CKP_SAVE(best_score_BIC); - - checkpoint->dump(true); + checkpoint->dump(); delete [] model_rank; + + // update alignment if best data type changed + if (best_aln != in_tree->aln) { + delete in_tree->aln; + in_tree->aln = best_aln; + if (best_aln == prot_aln) + prot_aln = NULL; + else + dna_aln = NULL; + } + + if (dna_aln) + delete dna_aln; + if (prot_aln) + delete prot_aln; // in_tree->deleteAllPartialLh(); @@ -2239,1033 +2356,1951 @@ string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info in_tree->readTreeString(best_tree); - if (set_name == "") { - cout << "Best-fit model: " << best_model << " chosen according to " - << criterionName(params.model_test_criterion) << endl; - } - if (params.print_site_lh) - cout << "Site log-likelihoods per model printed to " << sitelh_file << endl; - return best_model; -} - -int countDistinctTrees(const char *filename, bool rooted, IQTree *tree, IntVector &distinct_ids, bool exclude_duplicate) { - StringIntMap treels; - try { - ifstream in; - in.exceptions(ios::failbit | ios::badbit); - in.open(filename); - // remove the failbit - in.exceptions(ios::badbit); - int tree_id; - for (tree_id = 0; !in.eof(); tree_id++) { - if (exclude_duplicate) { - tree->freeNode(); - tree->readTree(in, rooted); - tree->setAlignment(tree->aln); - tree->setRootNode(tree->params->root); - StringIntMap::iterator it = treels.end(); - ostringstream ostr; - tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA); - it = treels.find(ostr.str()); - if (it != treels.end()) { // already in treels - distinct_ids.push_back(it->second); - } else { - distinct_ids.push_back(-1); - treels[ostr.str()] = tree_id; - } - } else { - // ignore tree - char ch; - do { - in >> ch; - } while (!in.eof() && ch != ';'); - distinct_ids.push_back(-1); - } - char ch; - in.exceptions(ios::goodbit); - (in) >> ch; - if (in.eof()) break; - in.unget(); - in.exceptions(ios::failbit | ios::badbit); + return at(best_model); +} - } - in.close(); - } catch (ios::failure) { - outError("Cannot read file ", filename); - } - if (exclude_duplicate) - return treels.size(); - else - return distinct_ids.size(); +int64_t CandidateModelSet::getNextModel() { + int64_t next_model; +#pragma omp critical + { + if (size() == 0) + next_model = -1; + else if (current_model == -1) + next_model = 0; + else { + for (next_model = current_model+1; next_model != current_model; next_model++) { + if (next_model == size()) + next_model = 0; + if (!at(next_model).hasFlag(MF_IGNORED + MF_WAITING + MF_RUNNING)) { + break; + } + } + } + } + if (next_model != current_model) { + current_model = next_model; + at(next_model).setFlag(MF_RUNNING); + return next_model; + } else + return -1; } -//const double TOL_RELL_SCORE = 0.01; +CandidateModel CandidateModelSet::evaluateAll(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads, int brlen_type, + string in_model_name, bool merge_phase, bool write_info) +{ + //ModelCheckpoint *checkpoint = &model_info; -/* - Problem: solve the following linear system equation: - a_1*x + b_1*y = c_1 - a_2*x + b_2*y = c_2 - .... - a_n*x + b_n*y = c_n + in_tree->params = ¶ms; -becomes minimizing weighted least square: - - sum_k { w_k*[ c_k - (a_k*x + b_k*y) ]^2 } - - -the solution is: - - x = [(sum_k w_k*b_k*c_k)*(sum_k w_k*a_k*b_k) - (sum_k w_k*a_k*c_k)(sum_k w_k*b_k^2)] / - [ (sum_k w_k*a_k*b_k)^2 - (sum_k w_k*a_k^2)*(sum_k w_k*b_k^2) ] + Alignment *prot_aln = NULL; + Alignment *dna_aln = NULL; + bool do_modelomatic = params.modelomatic && in_tree->aln->seq_type == SEQ_CODON; - y = [(sum_k w_k*a_k*c_k)*(sum_k w_k*a_k*b_k) - (sum_k w_k*b_k*c_k)(sum_k w_k*a_k^2)] / - [ (sum_k w_k*a_k*b_k)^2 - (sum_k w_k*a_k^2)*(sum_k w*k*b_k^2) ] - @param n number of data points - @param w weight vector of length n - @param a a value vector of length n - @param b b value vector of length n - @param c c value vector of length n - @param[out] x x-value - @param[out] y y-value - @return least square value -*/ -void doWeightedLeastSquare(int n, double *w, double *a, double *b, double *c, double &x, double &y, double &se) { - int k; - double BC = 0.0, AB = 0.0, AC = 0.0, A2 = 0.0, B2 = 0.0; - double denom; - for (k = 0; k < n; k++) { - double wa = w[k]*a[k]; - double wb = w[k]*b[k]; - AB += wa*b[k]; - BC += wb*c[k]; - AC += wa*c[k]; - A2 += wa*a[k]; - B2 += wb*b[k]; - } - denom = 1.0/(AB*AB - A2*B2); - x = (BC*AB - AC*B2) * denom; - y = (AC*AB - BC*A2) * denom; - se = -denom*(B2+A2+2*AB); - ASSERT(se >= 0.0); -} - -/** - MLE estimates for AU test -*/ -class OptimizationAUTest : public Optimization { - -public: - - OptimizationAUTest(double d, double c, int nscales, double *bp, double *rr, double *rr_inv) { - this->d = d; - this->c = c; - this->bp = bp; - this->rr = rr; - this->rr_inv = rr_inv; - this->nscales = nscales; - + if (in_model_name.empty()) { + generate(params, in_tree->aln, params.model_test_separate_rate, merge_phase); + if (do_modelomatic) { + // generate models for protein + // adapter coefficient according to Whelan et al. 2015 + prot_aln = in_tree->aln->convertCodonToAA(); + int adjusted_df; + double adjusted_logl = computeAdapter(in_tree->aln, prot_aln, adjusted_df); + if (write_info) + cout << "Adjusted LnL: " << adjusted_logl << " df: " << adjusted_df << endl; + size_t start = size(); + generate(params, prot_aln, params.model_test_separate_rate, merge_phase); + size_t i; + for (i = start; i < size(); i++) { + at(i).logl = adjusted_logl; + at(i).df = adjusted_df; + } + + // generate models for DNA + dna_aln = in_tree->aln->convertCodonToDNA(); + start = size(); + generate(params, dna_aln, params.model_test_separate_rate, merge_phase); + for (i = start; i < size(); i++) { + at(i).setFlag(MF_SAMPLE_SIZE_TRIPLE); + } + } + } else { + push_back(CandidateModel(in_model_name, "", in_tree->aln)); } - /** - return the number of dimensions - */ - virtual int getNDim() { return 2; } - - - /** - the target function which needs to be optimized - @param x the input vector x - @return the function value at x - */ - virtual double targetFunk(double x[]) { - d = x[1]; - c = x[2]; - double res = 0.0; - for (int k = 0; k < nscales; k++) { - double cdf = gsl_cdf_ugaussian_P(d*rr[k] + c*rr_inv[k]); - res += bp[k] * log(1.0 - cdf) + (1.0-bp[k])*log(cdf); - } - return res; - } - - void optimizeDC() { - double x[3], lower[3], upper[3]; - bool bound_check[3]; - x[1] = d; - x[2] = c; - lower[1] = lower[2] = 1e-4; - upper[1] = upper[2] = 100.0; - bound_check[1] = bound_check[2] = false; - minimizeMultiDimen(x, 2, lower, upper, bound_check, 1e-4); - d = x[1]; - c = x[2]; - } - - double d, c; - int nscales; - double *bp; - double *rr; - double *rr_inv; -}; - -/* BEGIN CODE WAS TAKEN FROM CONSEL PROGRAM */ - -/* binary search for a sorted vector - find k s.t. vec[k-1] <= t < vec[k] - */ -int cntdist2(double *vec, int bb, double t) -{ - int i,i0,i1; - - i0=0; i1=bb-1; - if(t < vec[0]) return 0; - else if(vec[bb-1] <= t) return bb; - - while(i1-i0>1) { - i=(i0+i1)/2; - if(vec[i] <= t) i0=i; - else i1=i; - } - - return i1; -} - -/* - smoothing the counting for a sorted vector - the piecewise linear function connecting - F(v[i]) = 1/(2n) + i/n, for i=0,...,n-1 - F(1.5v[0]-0.5v[1]) = 0 - F(1.5v[n-1]-0.5v[n-2]) = 1. - - 1. F(x)=0 for x<=1.5v[0]-0.5v[1] - - 2. F(x)=1/(2n) + (1/n)*(x-v[0])/(v[1]-v[0]) - for 1.5v[0]-0.5v[1] < x <= v[0] - - 3. F(x)=1/(2n) + i/n + (1/n)*(x-v[i])/(v[i]-v[i+1]) - for v[i] < x <= v[i+1], i=0,..., - - 4. F(x)=1-(1/2n) + (1/n)*(x-v[n-1])/(v[n-1]-v[n-2]) - for v[n-1] < x <= 1.5v[n-1]-0.5v[n-2] - - 5. F(x)=1 for x > 1.5v[n-1]-0.5v[n-2] - */ -double cntdist3(double *vec, int bb, double t) -{ - double p,n; - int i; - i=cntdist2(vec,bb,t)-1; /* to find vec[i] <= t < vec[i+1] */ - n=(double)bb; - if(i<0) { - if(vec[1]>vec[0]) p=0.5+(t-vec[0])/(vec[1]-vec[0]); - else p=0.0; - } else if(ivec[i]) p=0.5+(double)i+(t-vec[i])/(vec[i+1]-vec[i]); - else p=0.5+(double)i; /* <- should never happen */ - } else { - if(vec[bb-1]-vec[bb-2]>0) p=n-0.5+(t-vec[bb-1])/(vec[bb-1]-vec[bb-2]); - else p=n; - } - if(p>n) p=n; else if(p<0.0) p=0.0; - return p; -} - -double log3(double x) -{ - double y,z1,z2,z3,z4,z5; - if(fabs(x)>1.0e-3) { - y=-log(1.0-x); - } else { - z1=x; z2=z1*x; z3=z2*x; z4=z3*x; z5=z4*x; - y=((((z5/5.0)+z4/4.0)+z3/3.0)+z2/2.0)+z1; - } - return y; -} - -int mleloopmax=30; -double mleeps=1e-10; -int mlecoef(double *cnts, double *rr, double bb, int kk, - double *coef0, /* set initinal value (size=2) */ - double *lrt, double *df, /* LRT statistic */ - double *se - ) -{ - int i,m,loop; - double coef[2], update[2]; - double d1f, d2f, d11f, d12f, d22f; /* derivatives */ - double v11, v12, v22; /* inverse of -d??f */ - double a,e; - double s[kk], r[kk],c[kk], b[kk],z[kk],p[kk],d[kk],g[kk],h[kk]; - - m=0; - for(i=0;ialn->seq_type); + cout << " models (sample size: " << in_tree->aln->getNSite() << ") ..." << endl; + cout << " No. Model -LnL df AIC AICc BIC" << endl; } - if(m<2) return 1; - coef[0]=coef0[0]; /* signed distance */ - coef[1]=coef0[1]; /* curvature */ + double best_score = DBL_MAX; - for(loop=0;loop0.0 && p[i]<1.0) { - g[i]=d[i]*( d[i]*(-c[i]+2.0*c[i]*p[i]-b[i]*p[i]*p[i])/ - (p[i]*p[i]*(1.0-p[i])*(1.0-p[i])) - + z[i]*(c[i]-b[i]*p[i])/(p[i]*(1.0-p[i])) ); - h[i]=d[i]*(c[i]-b[i]*p[i])/(p[i]*(1.0-p[i])); - } else { g[i]=h[i]=0.0; } - d1f+= -h[i]*s[i]; d2f+= -h[i]/s[i]; - d11f+= g[i]*r[i]; d12f+= g[i]; d22f+= g[i]/r[i]; + // detect rate hetegeneity automatically or not + bool auto_rate = merge_phase ? iEquals(params.merge_rates, "AUTO") : iEquals(params.ratehet_set, "AUTO"); + bool auto_subst = merge_phase ? iEquals(params.merge_models, "AUTO") : iEquals(params.model_set, "AUTO"); + int rate_block = size(); + if (auto_rate) { + for (rate_block = 0; rate_block < size(); rate_block++) + if (rate_block+1 < size() && at(rate_block+1).subst_name != at(rate_block).subst_name) + break; } - - a=d11f*d22f-d12f*d12f; - if(a==0.0) { - return 2; + + int subst_block = size(); + if (auto_subst) { + for (subst_block = size()-1; subst_block >= 0; subst_block--) + if (at(subst_block).rate_name == at(0).rate_name) + break; } - v11=-d22f/a; v12=d12f/a; v22=-d11f/a; - - /* Newton-Raphson update */ - update[0]=v11*d1f+v12*d2f; update[1]=v12*d1f+v22*d2f; - coef[0]+=update[0]; coef[1]+=update[1]; - - /* check convergence */ - e=-d11f*update[0]*update[0]-2.0*d12f*update[0]*update[1] - -d22f*update[1]*update[1]; - - if(e0.0 && p[i]<1.0) { - *df+=1.0; - if(c[i]>0.0) a=c[i]*log(c[i]/b[i]/p[i]); else a=0.0; - if(c[i]name; + string tree_string; + + // main call to estimate model parameters + tree_string = at(model).evaluate(params, model_info, out_model_info, + models_block, num_threads, brlen_type); + at(model).computeICScores(); + at(model).setFlag(MF_DONE); + + int lower_model = getLowerKModel(model); + if (lower_model >= 0 && at(lower_model).getScore() < at(model).getScore()) { + // ignore all +R_k model with higher category + for (int higher_model = model; higher_model != -1; + higher_model = getHigherKModel(higher_model)) { + at(higher_model).setFlag(MF_IGNORED); + } + + } +#ifdef _OPENMP +#pragma omp critical + { +#endif + if (best_score > at(model).getScore()) { + best_score = at(model).getScore(); + if (!tree_string.empty()) { + //model_info.put("best_tree_" + criterionName(params.model_test_criterion), tree_string); + } + // only update model_info with better model + model_info.putSubCheckpoint(&out_model_info, ""); + } + model_info.dump(); + if (write_info) { + cout.width(3); + cout << right << model+1 << " "; + cout.width(13); + cout << left << at(model).getName() << " "; + + cout.precision(3); + cout << fixed; + cout.width(12); + cout << -at(model).logl << " "; + cout.width(3); + cout << at(model).df << " "; + cout.width(12); + cout << at(model).AIC_score << " "; + cout.width(12); + cout << at(model).AICc_score << " " << at(model).BIC_score; + cout << endl; + + } + if (model >= rate_block) + filterRates(model); // auto filter rate models + if (model >= subst_block) + filterSubst(model); // auto filter substitution model +#ifdef _OPENMP + } +#endif + } while (model != -1); + } + + // store the best model + ModelTestCriterion criteria[] = {MTC_AIC, MTC_AICC, MTC_BIC}; + for (auto mtc : criteria) { + int best_model = getBestModelID(mtc); + model_info.put("best_score_" + criterionName(mtc), at(best_model).getScore(mtc)); + model_info.put("best_model_" + criterionName(mtc), at(best_model).getName()); + } + + + /* sort models by their scores */ + multimap model_sorted; + for (int64_t model = 0; model < num_models; model++) + if (at(model).hasFlag(MF_DONE)) { + model_sorted.insert(multimap::value_type(at(model).getScore(), model)); + } + string model_list; + for (auto it = model_sorted.begin(); it != model_sorted.end(); it++) { + if (it != model_sorted.begin()) + model_list += " "; + model_list += at(it->second).getName(); + } + + model_info.putBestModelList(model_list); + model_info.dump(); + + // update alignment if best data type changed + int best_model = getBestModelID(params.model_test_criterion); + if (at(best_model).aln != in_tree->aln) { + delete in_tree->aln; + in_tree->aln = at(best_model).aln; + if (in_tree->aln == prot_aln) + prot_aln = NULL; + else + dna_aln = NULL; + } + + if (dna_aln) + delete dna_aln; + if (prot_aln) + delete prot_aln; + + return at(best_model); +} + + +/** + * select models for all partitions + * @param[in,out] model_info (IN/OUT) all model information + * @return total number of parameters + */ +void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads) +{ + + PartitionFinder partitionFinder(¶ms, in_tree, &model_info, models_block, num_threads); + partitionFinder.test_PartitionModel(); +} + + +/* + * send an integer vector from the master to a worker + */ +void sendVector(vector& data, int worker) { + + if (MPIHelper::getInstance().isWorker()) + return; + +#ifdef _IQTREE_MPI + int n = data.size(); + + // send the number of elements to the worker + MPI_Send(&n, 1, MPI_INT, worker, 0, MPI_COMM_WORLD); + if (n == 0) + return; + + int* buff = new int[n]; + int i; + for (i=0; i& data) { + + if (MPIHelper::getInstance().isMaster()) + return; + +#ifdef _IQTREE_MPI + int n; + data.clear(); + + // receive the number of elements from the master + MPI_Recv(&n, 1, MPI_INT, PROC_MASTER, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (n == 0) + return; + + int* buff = new int[n]; + // receive the number array from the master + MPI_Recv(buff, n, MPI_INT, PROC_MASTER, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + int i; + for (i=0; idump(); + + num_model++; + cout.width(4); + cout << right << num_model << " "; + if (tag != -1) + cout << tag << " "; + cout.width(12); + cout << left << model_name << " "; + cout.width(11); + cout << score << " "; + cout.width(11); + cout << tree_len << " " << set_name; + if (num_model >= 10) { + remain_time = max(total_num_model-num_model, (int64_t)0)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" + << convert_time(remain_time) << " left)"; + } + cout << endl; + } + + // update the number of jobs done + jobdone++; + } +} + +/* + * Show the the other worker's result of best model for the merge + */ +void PartitionFinder::showMergeResults(ModelCheckpoint& part_model_info, vector& tree_len, vector& model_name, vector& score, vector& set_name, vector& tag, int tot_jobs_done) { + + double remain_time; + int i; + +#ifdef _OPENMP +#pragma omp critical +#endif + { + replaceModelInfo(model_info, part_model_info); + model_info->dump(); + + for (i=0; i= 10) { + remain_time = max(total_num_model-num_model, (int64_t)0)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" << convert_time(remain_time) << " left)"; + } + cout << endl; + } + + // update the number of jobs done + jobdone += tot_jobs_done; + } + +} + +/** + * Process the computation of the best model for a merge with MPI + * Find the best model for merging two partitions in job_id + * @param job_id ID of closest_pairs array + * @param nthreads number of threads available for this job + * @param need_next_jobID whether it is needed to get the next tree ID + + * @return next job ID if need_next_treeID and (MASTER or IS_ASYN_COMM = 0), otherwise -1 + */ +int PartitionFinder::getBestModelForOneMergeMPI(int job_id, int nthreads, bool need_next_jobID, SyncChkPoint& syncChkPt) { + + CandidateModel best_model; + ModelPair cur_pair; + ModelCheckpoint part_model_info; + CandidateModelSet candModelSet; + double weight1, weight2, sum, cur_tree_len; + bool under_mpi, done_before, check; + string key; + double lhnew; + int dfnew; + int next_job_id = -1; + bool noChkMessage = false; + int job_type = 2; // compute the best model for the merge + + // information of current partitions pair + cur_pair.part1 = closest_pairs[job_id].first; + cur_pair.part2 = closest_pairs[job_id].second; + ASSERT(cur_pair.part1 < cur_pair.part2); + cur_pair.merged_set.insert(gene_sets[cur_pair.part1].begin(), gene_sets[cur_pair.part1].end()); + cur_pair.merged_set.insert(gene_sets[cur_pair.part2].begin(), gene_sets[cur_pair.part2].end()); + cur_pair.set_name = getSubsetName(in_tree, cur_pair.merged_set); + weight1 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part1]); + weight2 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part2]); + sum = 1.0 / (weight1 + weight2); + weight1 *= sum; + weight2 *= sum; + done_before = false; + + #ifdef _OPENMP + #pragma omp critical + #endif + { + // if pairs previously examined, reuse the information + model_info->startStruct(cur_pair.set_name); + if (model_info->getBestModel(best_model.subst_name)) { + best_model.restoreCheckpoint(model_info); + done_before = true; + } + model_info->endStruct(); + } + + cur_tree_len = 0.0; + if (!done_before) { + Alignment *aln = super_aln->concatenateAlignments(cur_pair.merged_set); + PhyloTree *tree = in_tree->extractSubtree(cur_pair.merged_set); + //tree->scaleLength((weight1*lenvec[cur_pair.part1] + weight2*lenvec[cur_pair.part2])/tree->treeLength()); + tree->scaleLength(sqrt(lenvec[cur_pair.part1]*lenvec[cur_pair.part2])/tree->treeLength()); + cur_tree_len = tree->treeLength(); + tree->setAlignment(aln); +#ifdef _OPENMP +#pragma omp critical +#endif +{ + extractModelInfo(cur_pair.set_name, *model_info, part_model_info); + transferModelParameters(in_tree, *model_info, part_model_info, gene_sets[cur_pair.part1], gene_sets[cur_pair.part2]); +} + tree->num_precision = in_tree->num_precision; + tree->setParams(params); + tree->sse = params->SSE; + tree->optimize_by_newton = params->optimize_by_newton; + tree->setNumThreads(params->model_test_and_tree ? num_threads : 1); + + tree->setCheckpoint(&part_model_info); + // trick to restore checkpoint + tree->restoreCheckpoint(); + tree->saveCheckpoint(); + + candModelSet.syncChkPoint = &(syncChkPt); + best_model = candModelSet.test(*params, tree, part_model_info, models_block, + nthreads, params->partition_type, cur_pair.set_name, "", true); + candModelSet.syncChkPoint = nullptr; + check = (best_model.restoreCheckpoint(&part_model_info)); + ASSERT(check); + delete tree; + delete aln; + } + + cur_pair.logl = best_model.logl; + cur_pair.df = best_model.df; + cur_pair.model_name = best_model.getName(); + cur_pair.tree_len = best_model.tree_len; + lhnew = lhsum - lhvec[cur_pair.part1] - lhvec[cur_pair.part2] + best_model.logl; + dfnew = dfsum - dfvec[cur_pair.part1] - dfvec[cur_pair.part2] + best_model.df; + cur_pair.score = computeInformationScore(lhnew, dfnew, ssize, params->model_test_criterion); + + if (MPIHelper::getInstance().isMaster()) { + // for Master + showMergeResult(part_model_info, cur_pair.tree_len, cur_pair.model_name, cur_pair.score, cur_pair.set_name, done_before, syncChkPt.mytag); + if (need_next_jobID) { + next_job_id = syncChkPt.getNextJobID(); + } + } else { + + // for Worker -- SYN communication + #ifdef SYN_COMM + + key = "pf_tree_id"; part_model_info.put(key, job_id); + key = "pf_tree_len"; part_model_info.put(key, cur_pair.tree_len); + key = "pf_model_name"; part_model_info.put(key, cur_pair.model_name); + key = "pf_score"; part_model_info.put(key, cur_pair.score); + key = "pf_set_name"; part_model_info.put(key, cur_pair.set_name); + key = "pf_done_before"; part_model_info.putBool(key, done_before); + next_job_id = syncChkPt.sendChkptToMaster(part_model_info, need_next_jobID, job_type); + + #endif + + // for Worker -- ONESIDE communication + #ifdef ONESIDE_COMM + + #ifdef _OPENMP + #pragma omp critical + #endif + { + if (!done_before) { + // consolidate part_model_info into the process_model_info + replaceModelInfo(cur_pair.set_name, process_model_info, part_model_info); + + tree_id_vec.push_back(job_id); + tree_len_vec.push_back(cur_pair.tree_len); + model_name_vec.push_back(cur_pair.model_name); + score_vec.push_back(cur_pair.score); + set_name_vec.push_back(cur_pair.set_name); + tag_vec.push_back(syncChkPt.mytag); + } + tot_jobs_done++; + } + + // send the process_model_info to master if time is long enough + next_job_id = syncChkPt.sendChkptToMaster(process_model_info, need_next_jobID, job_type); + + #endif + } + + return next_job_id; +} + +/* + * Show the result of best model for the partition + */ +void PartitionFinder::showPartitionResult(ModelCheckpoint& part_model_info, int tree_id, double tree_len, const string& model_name, double score, int tag) { + + PhyloTree *this_tree = in_tree->at(tree_id); + double remain_time; + +#ifdef _OPENMP +#pragma omp critical +#endif + { + num_model++; + cout.width(4); + cout << right << num_model << " "; + if (tag != -1) + cout << tag << " "; + cout.width(12); + cout << left << model_name << " "; + cout.width(11); + cout << score << " "; + cout.width(11); + cout << tree_len << " "; + cout << this_tree->aln->name; + if (num_model >= 10) { + remain_time = (double)(total_num_model-num_model)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" + << convert_time(remain_time) << " left)"; + } + cout << endl; + replaceModelInfo(this_tree->aln->name, *model_info, part_model_info); + model_info->dump(); + + // update the number of jobs done + jobdone++; + } +} + +/* + * Show a set of best-model results for the partition + */ +void PartitionFinder::showPartitionResults(ModelCheckpoint& part_model_info, vector& tree_id, vector& tree_len, vector& model_name, vector& score, vector& tag) { + + PhyloTree *this_tree; + double remain_time; + int i; + +#ifdef _OPENMP +#pragma omp critical +#endif + { + replaceModelInfo(model_info, part_model_info); + model_info->dump(); + + for (i=0; iat(tree_id[i]); + num_model++; + cout.width(4); + cout << right << num_model << " "; + if (tag[i] != -1) + cout << tag[i] << " "; + cout.width(12); + cout << left << model_name[i] << " "; + cout.width(11); + cout << score[i] << " "; + cout.width(11); + cout << tree_len[i] << " "; + cout << this_tree->aln->name; + if (num_model >= 10) { + remain_time = (double)(total_num_model-num_model)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" + << convert_time(remain_time) << " left)"; + } + cout << endl; + } + + // update the number of jobs done + jobdone += tree_id.size(); + } +} + + +/** + * Process the computation of the best model for a single partition with MPI + * + * nthreads : number of threads available for this job + * need_next_treeID : whether it is needed to get the next tree ID + * + * if need_next_treeID and (MASTER or IS_ASYN_COMM = 0) + * return the next Job ID from master + * else + * return -1 + */ +int PartitionFinder::computeBestModelforOnePartitionMPI(int tree_id, int nthreads, bool need_next_treeID, SyncChkPoint& syncChkPt) { + + CandidateModel best_model; + PhyloTree *this_tree; + ModelCheckpoint part_model_info; + CandidateModelSet candModelSet; + string part_model_name, key; + bool under_mpi, check; + double score, remain_time; + ostringstream ss; + int next_tree_id = -1; + bool noChkMessage = false; + int job_type = 1; // compute the best model for partition + + this_tree = in_tree->at(tree_id); + +#ifdef _OPENMP +#pragma omp critical +#endif +{ + extractModelInfo(this_tree->aln->name, *model_info, part_model_info); +} + + // do the computation + if (params->model_name.empty()) + part_model_name = this_tree->aln->model_name; + + candModelSet.syncChkPoint = &(syncChkPt); + + best_model = candModelSet.test(*params, this_tree, part_model_info, models_block, + nthreads, brlen_type, this_tree->aln->name, part_model_name, test_merge); + + candModelSet.syncChkPoint = nullptr; + + check = (best_model.restoreCheckpoint(&part_model_info)); + ASSERT(check); + score = best_model.computeICScore(this_tree->getAlnNSite()); + this_tree->aln->model_name = best_model.getName(); + + if (MPIHelper::getInstance().isMaster()) { + // for Master + + showPartitionResult(part_model_info, tree_id, best_model.tree_len, best_model.getName(), score, syncChkPt.mytag); + if (need_next_treeID) { + next_tree_id = syncChkPt.getNextJobID(); + } + + } else { + + // for Worker -- SYN communication + #ifdef SYN_COMM + + key = "pf_tree_id"; part_model_info.put(key, tree_id); + key = "pf_tree_len"; part_model_info.put(key, best_model.tree_len); + key = "pf_model_name"; part_model_info.put(key, best_model.getName()); + key = "pf_score"; part_model_info.put(key, score); + + // send the part_model_info to master if time is long enough + next_tree_id = syncChkPt.sendChkptToMaster(part_model_info, need_next_treeID, job_type); + + #endif + + // for Worker -- ONESIDE communication + #ifdef ONESIDE_COMM + + // consolidate part_model_info into the process_model_info + #ifdef _OPENMP + #pragma omp critical + #endif + { + replaceModelInfo(this_tree->aln->name, process_model_info, part_model_info); + tree_id_vec.push_back(tree_id); + tree_len_vec.push_back(best_model.tree_len); + model_name_vec.push_back(best_model.getName()); + score_vec.push_back(score); + tag_vec.push_back(syncChkPt.mytag); + } + + // send the process_model_info to master if time is long enough + next_tree_id = syncChkPt.sendChkptToMaster(process_model_info, need_next_treeID, job_type); + + #endif + } + + return next_tree_id; +} + +/** + * compute and process the best model for partitions (for MPI) + */ +void PartitionFinder::getBestModelforPartitionsMPI(int nthreads, vector* >& jobs) { + + if (jobs.empty()) + return; + + bool parallel_job = false; + +#ifdef _OPENMP + parallel_job = (jobs.size() > 1); + #pragma omp parallel for schedule(dynamic) if(parallel_job) +#endif + for (int i=0; i* curr_jobs = jobs[i]; + bool need_next_jobID = false; + for (int j=0; j < curr_jobs->size()-1; j++) { + computeBestModelforOnePartitionMPI(curr_jobs->at(j), (parallel_job ? 1 : nthreads), need_next_jobID, syncChkPt); + } + need_next_jobID = true; + int next_job_id = curr_jobs->at(curr_jobs->size()-1); // the last job in the list + while (next_job_id != -1) { + next_job_id = computeBestModelforOnePartitionMPI(next_job_id, (parallel_job ? 1 : nthreads), need_next_jobID, syncChkPt); + } + } + + // collect all the answers from workers + + if (MPIHelper::getInstance().isMaster()) { + SyncChkPoint syncChkPt(this, 0); + while (jobdone < tot_job_num) { + syncChkPt.masterSyncOtherChkpts(false); + } + } else { + + #ifdef ONESIDE_COMM + // worker sends the final process_model_info to master + bool need_nextJobID = false; + bool forceToSyn = true; + int job_type = 1; // partition + if (tree_id_vec.size() > 0) { + SyncChkPoint syncChkPt(this, 0); + syncChkPt.sendChkptToMaster(process_model_info, need_nextJobID, job_type, forceToSyn); + } + #endif + + } + +} + +/** + * compute and process the best model for merges (for MPI) + */ +void PartitionFinder::getBestModelforMergesMPI(int nthreads, vector* >& jobs) { + + if (jobs.empty()) + return; + + bool parallel_job = false; + +#ifdef _OPENMP + parallel_job = (jobs.size() > 1); + #pragma omp parallel for schedule(dynamic) if(parallel_job) +#endif + for (int i=0; i* curr_jobs = jobs[i]; + bool need_next_jobID = false; + for (int j=0; j < curr_jobs->size()-1; j++) { + getBestModelForOneMergeMPI(curr_jobs->at(j), (parallel_job ? 1 : nthreads), need_next_jobID, syncChkPt); + } + need_next_jobID = true; + int next_job_id = curr_jobs->at(curr_jobs->size()-1); // the last job in the list + while (next_job_id != -1) { + next_job_id = getBestModelForOneMergeMPI(next_job_id, (parallel_job ? 1 : nthreads), need_next_jobID, syncChkPt); + } + } + + // collect all the answers from workers + + if (MPIHelper::getInstance().isMaster()) { + SyncChkPoint syncChkPt(this, 0); + while (jobdone < tot_job_num) { + syncChkPt.masterSyncOtherChkpts(false); + } + } else { + + #ifdef ONESIDE_COMM + // worker sends the final process_model_info to master + bool need_nextJobID = false; + bool forceToSyn = true; + int job_type = 2; // merge + if (tot_jobs_done > 0) { + SyncChkPoint syncChkPt(this, 0); + syncChkPt.sendChkptToMaster(process_model_info, need_nextJobID, job_type, forceToSyn); + } + #endif + + } + +} + +/** + * compute and process the best model for partitions (without MPI) + * nthreads : the number of threads available for these jobs + */ +void PartitionFinder::getBestModelforPartitionsNoMPI(int nthreads, vector >& jobs) { + + if (jobs.empty()) + return; + + bool parallel_job = false; + +#ifdef _OPENMP + parallel_job = ((!params->model_test_and_tree) && nthreads > 1 && jobs.size() > nthreads); +#pragma omp parallel for schedule(dynamic) reduction(+: lhsum, dfsum) if (parallel_job) +#endif + for (int j = 0; j < jobs.size(); j++) { + int tree_id = jobs[j].first; + PhyloTree *this_tree = in_tree->at(tree_id); + // scan through models for this partition, assuming the information occurs consecutively + ModelCheckpoint part_model_info; + + #ifdef _OPENMP + #pragma omp critical + #endif + { + extractModelInfo(this_tree->aln->name, *model_info, part_model_info); + } + + // do the computation + string part_model_name; + if (params->model_name.empty()) + part_model_name = this_tree->aln->model_name; + CandidateModel best_model; + + best_model = CandidateModelSet().test(*params, this_tree, part_model_info, models_block, + (parallel_job ? 1 : nthreads), brlen_type, this_tree->aln->name, part_model_name, test_merge); + + bool check = (best_model.restoreCheckpoint(&part_model_info)); + ASSERT(check); + + double score = best_model.computeICScore(this_tree->getAlnNSite()); + this_tree->aln->model_name = best_model.getName(); + lhsum += (lhvec[tree_id] = best_model.logl); + dfsum += (dfvec[tree_id] = best_model.df); + lenvec[tree_id] = best_model.tree_len; + + #ifdef _OPENMP + #pragma omp critical + #endif + { + num_model++; + cout.width(4); + cout << right << num_model << " "; + cout.width(12); + cout << left << best_model.getName() << " "; + cout.width(11); + cout << score << " "; + cout.width(11); + cout << best_model.tree_len << " "; + cout << this_tree->aln->name; + if (num_model >= 10) { + double remain_time = (total_num_model-num_model)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" + << convert_time(remain_time) << " left)"; + } + cout << endl; + replaceModelInfo(this_tree->aln->name, *model_info, part_model_info); + model_info->dump(); + } + } +} + +/** + * compute and process the best model for merges (without MPI) + * nthreads : the number of threads available for these jobs + */ +void PartitionFinder::getBestModelforMergesNoMPI(int nthreads, vector >& jobs) { + if (jobs.empty()) + return; + + bool parallel_job = false; + +#ifdef _OPENMP + parallel_job = ((!params->model_test_and_tree) && nthreads > 1 && jobs.size() > nthreads); +#pragma omp parallel for schedule(dynamic) if (parallel_job) +#endif + for (int j = 0; j < jobs.size(); j++) { + // information of current partitions pair + int pair = jobs[j].first; + ModelPair cur_pair; + cur_pair.part1 = closest_pairs[pair].first; + cur_pair.part2 = closest_pairs[pair].second; + ASSERT(cur_pair.part1 < cur_pair.part2); + cur_pair.merged_set.insert(gene_sets[cur_pair.part1].begin(), gene_sets[cur_pair.part1].end()); + cur_pair.merged_set.insert(gene_sets[cur_pair.part2].begin(), gene_sets[cur_pair.part2].end()); + cur_pair.set_name = getSubsetName(in_tree, cur_pair.merged_set); + double weight1 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part1]); + double weight2 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part2]); + double sum = 1.0 / (weight1 + weight2); + weight1 *= sum; + weight2 *= sum; + CandidateModel best_model; + bool done_before = false; +#ifdef _OPENMP +#pragma omp critical +#endif + { + // if pairs previously examined, reuse the information + model_info->startStruct(cur_pair.set_name); + if (model_info->getBestModel(best_model.subst_name)) { + best_model.restoreCheckpoint(model_info); + done_before = true; + } + model_info->endStruct(); + } + ModelCheckpoint part_model_info; + double cur_tree_len = 0.0; + if (!done_before) { + Alignment *aln = super_aln->concatenateAlignments(cur_pair.merged_set); + PhyloTree *tree = in_tree->extractSubtree(cur_pair.merged_set); + //tree->scaleLength((weight1*lenvec[cur_pair.part1] + weight2*lenvec[cur_pair.part2])/tree->treeLength()); + tree->scaleLength(sqrt(lenvec[cur_pair.part1]*lenvec[cur_pair.part2])/tree->treeLength()); + cur_tree_len = tree->treeLength(); + tree->setAlignment(aln); + +#ifdef _OPENMP +#pragma omp critical +#endif + { + extractModelInfo(cur_pair.set_name, *model_info, part_model_info); + transferModelParameters(in_tree, *model_info, part_model_info, gene_sets[cur_pair.part1], gene_sets[cur_pair.part2]); + } + + tree->num_precision = in_tree->num_precision; + tree->setParams(params); + tree->sse = params->SSE; + tree->optimize_by_newton = params->optimize_by_newton; + tree->setNumThreads(params->model_test_and_tree ? num_threads : 1); + { + tree->setCheckpoint(&part_model_info); + // trick to restore checkpoint + tree->restoreCheckpoint(); + tree->saveCheckpoint(); + } + best_model = CandidateModelSet().test(*params, tree, part_model_info, models_block, + parallel_job ? 1 : nthreads, params->partition_type, cur_pair.set_name, "", true); + best_model.restoreCheckpoint(&part_model_info); + delete tree; + delete aln; + } + cur_pair.logl = best_model.logl; + cur_pair.df = best_model.df; + cur_pair.model_name = best_model.getName(); + cur_pair.tree_len = best_model.tree_len; + double lhnew = lhsum - lhvec[cur_pair.part1] - lhvec[cur_pair.part2] + best_model.logl; + int dfnew = dfsum - dfvec[cur_pair.part1] - dfvec[cur_pair.part2] + best_model.df; + cur_pair.score = computeInformationScore(lhnew, dfnew, ssize, params->model_test_criterion); +#ifdef _OPENMP +#pragma omp critical +#endif + { + if (!done_before) { + replaceModelInfo(cur_pair.set_name, *model_info, part_model_info); + model_info->dump(); + num_model++; + cout.width(4); + cout << right << num_model << " "; + cout.width(12); + cout << left << best_model.getName() << " "; + cout.width(11); + cout << cur_pair.score << " "; + cout.width(11); + cout << cur_pair.tree_len << " " << cur_pair.set_name; + if (num_model >= 10) { + double remain_time = max(total_num_model-num_model, (int64_t)0)*(getRealTime()-start_time)/num_model; + cout << "\t" << convert_time(getRealTime()-start_time) << " (" + << convert_time(remain_time) << " left)"; + } + cout << endl; + } + if (cur_pair.score < inf_score) + better_pairs.insertPair(cur_pair); + } + } +} + +/** + * compute the best model + * job_type = 1 : for all partitions + * job_type = 2 : for all merges + */ +void PartitionFinder::getBestModel(int job_type) { + + vector > jobIDs; + vector* > currJobs; + vector* jobs; + vector closest_p_vector; + bool run_parallel; + int i,w; + + if (job_type == 1) { + // for partitions + // sort partition by computational cost for OpenMP effciency + for (i = 0; i < in_tree->size(); i++) { + Alignment *this_aln = in_tree->at(i)->aln; + // computation cost is proportional to #sequences, #patterns, and #states + jobIDs.push_back({i, ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states}); + } + } else { + // for merges + ASSERT(gene_sets.size() == lenvec.size()); + better_pairs.clear(); + // find closest partition pairs + closest_pairs.clear(); + findClosestPairs(super_aln, lenvec, gene_sets, false, closest_pairs); + if (params->partfinder_log_rate) { + // additional consider pairs by log-rate + vector log_closest_pairs; + findClosestPairs(super_aln, lenvec, gene_sets, true, log_closest_pairs); + mergePairs(closest_pairs, log_closest_pairs); + } + +/* +#ifdef _IQTREE_MPI + if (MPIHelper::getInstance().getNumProcesses() > 0) { + if (MPIHelper::getInstance().isMaster()) { + // for Master + // send to each worker + for (i=0; iat(closest_pairs[i].first)->aln; + closest_pairs[i].distance = -((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; + this_aln = in_tree->at(closest_pairs[i].second)->aln; + closest_pairs[i].distance -= ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states; + jobIDs.push_back({i, -closest_pairs[i].distance}); + } + } + tot_job_num = jobIDs.size(); + jobdone = 0; + + if (!params->model_test_and_tree && jobIDs.size() > 1) { + if ((num_threads > 1 && num_processes == 1)|| (num_processes > 1 && MPIHelper::getInstance().isMaster())) { + std::sort(jobIDs.begin(), jobIDs.end(), compareJob); + } + } + +#ifdef _IQTREE_MPI + if (num_processes == 1 || params->model_test_and_tree) { + // not using MPI +#endif + + if (job_type == 1) { + getBestModelforPartitionsNoMPI(num_threads, jobIDs); + } else { + getBestModelforMergesNoMPI(num_threads, jobIDs); + } + +#ifdef _IQTREE_MPI + } else { + + // assign the initial jobs to processors + // "currJobs" will contain the set of initial jobs this processor needs to work on + jobAssignment(jobIDs, currJobs); + + // if the number of job lists < num_threads, then consolid all the jobs to the first list + if (currJobs.size() > 1 && currJobs.size() < num_threads) { + for (i=1; iinsert(currJobs[0]->end(),currJobs[i]->begin(),currJobs[i]->end()); + delete(currJobs[i]); + } + currJobs.resize(1); + } + + // initialize the value of base + base = MPIHelper::getInstance().getProcessID() * num_threads; + + // initialize the syn time + last_syn_time = getRealTime(); + + // initialize the vectors + tree_id_vec.clear(); + tree_len_vec.clear(); + model_name_vec.clear(); + score_vec.clear(); + set_name_vec.clear(); + tag_vec.clear(); + tot_jobs_done = 0; + + // initialize the checkpoint for the whole processor + process_model_info.clear(); + + // compute the best model + if (job_type == 1) { + getBestModelforPartitionsMPI(num_threads, currJobs); + } else { + getBestModelforMergesMPI(num_threads, currJobs); + } + + MPI_Barrier(MPI_COMM_WORLD); + + // distribute the checkpoints from Master to Workers + + if (MPIHelper::getInstance().isMaster()) { + // Master processor broadcast model_info to other processors + for (i = 1; i < num_processes; i++) { + MPIHelper::getInstance().sendCheckpoint(model_info, i); + } + } else { + MPIHelper::getInstance().recvCheckpoint(model_info, PROC_MASTER); + } + + // consolidate the results + if (job_type == 1) { + consolidPartitionResults(); + } else { + consolidMergeResults(); + } + } +#endif + +} + +/* + * Consolidate the partition results (for MPI) + */ +void PartitionFinder::consolidPartitionResults() { + int i; + +#ifdef _IQTREE_MPI + for (i = 0; i < in_tree->size(); i++) { + PhyloTree *this_tree = in_tree->at(i); + + string bestModel_key = this_tree->aln->name + CKP_SEP + "best_model_" + criterionName(params->model_test_criterion); + string bestModel; + string bestScore_key = this_tree->aln->name + CKP_SEP + "best_score_" + criterionName(params->model_test_criterion); + double bestScore; + + ASSERT(model_info->getString(bestModel_key, bestModel)); + ASSERT(model_info->get(bestScore_key, bestScore)); + + string info_key = this_tree->aln->name + CKP_SEP + bestModel; + string info; + double logL; + int df; + double treeLen; + + ASSERT(model_info->getString(info_key, info)); + size_t pos1 = info.find_first_of(" "); + ASSERT (pos1 != string::npos && pos1 > 0); + size_t pos2 = info.find_first_of(" ", pos1+1); + ASSERT (pos2 != string::npos && pos2 > pos1+1); + logL = atof(info.substr(0,pos1).c_str()); + df = atoi(info.substr(pos1+1,pos2-pos1-1).c_str()); + treeLen = atof(info.substr(pos2+1).c_str()); + + this_tree->aln->model_name = bestModel; + lhsum += (lhvec[i] = logL); + dfsum += (dfvec[i] = df); + lenvec[i] = treeLen; + } +#endif +} + +/* + * Consolidate the merge results (for MPI) + */ +void PartitionFinder::consolidMergeResults() { + +#ifdef _IQTREE_MPI + better_pairs.clear(); + for (size_t pair = 0; pair < closest_pairs.size(); pair++) { + // information of current partitions pair + ModelPair cur_pair; + cur_pair.part1 = closest_pairs[pair].first; + cur_pair.part2 = closest_pairs[pair].second; + ASSERT(cur_pair.part1 < cur_pair.part2); + cur_pair.merged_set.clear(); + cur_pair.merged_set.insert(gene_sets[cur_pair.part1].begin(), gene_sets[cur_pair.part1].end()); + cur_pair.merged_set.insert(gene_sets[cur_pair.part2].begin(), gene_sets[cur_pair.part2].end()); + cur_pair.set_name = getSubsetName(in_tree, cur_pair.merged_set); + double weight1 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part1]); + double weight2 = getSubsetAlnLength(in_tree, gene_sets[cur_pair.part2]); + double sum = 1.0 / (weight1 + weight2); + weight1 *= sum; + weight2 *= sum; + CandidateModel best_model; + + model_info->startStruct(cur_pair.set_name); + ASSERT(model_info->getBestModel(best_model.subst_name)); + best_model.restoreCheckpoint(model_info); + model_info->endStruct(); + + cur_pair.logl = best_model.logl; + cur_pair.df = best_model.df; + cur_pair.model_name = best_model.getName(); + cur_pair.tree_len = best_model.tree_len; + + double lhnew = lhsum - lhvec[cur_pair.part1] - lhvec[cur_pair.part2] + best_model.logl; + int dfnew = dfsum - dfvec[cur_pair.part1] - dfvec[cur_pair.part2] + best_model.df; + cur_pair.score = computeInformationScore(lhnew, dfnew, ssize, params->model_test_criterion); + + if (cur_pair.score < inf_score) { + better_pairs.insertPair(cur_pair); + } + } + +#endif +} + +void PartitionFinder::test_PartitionModel() { + + int i, job_type; + + lhsum = 0.0; + dfsum = 0; + if (params->partition_type == BRLEN_FIX || params->partition_type == BRLEN_SCALE) { + dfsum = in_tree->getNBranchParameters(BRLEN_OPTIMIZE); + if (params->partition_type == BRLEN_SCALE) + dfsum -= 1; + } + ssize = in_tree->getAlnNSite(); + num_model = 0; + total_num_model = in_tree->size(); + + // initialize the shared memory space + initialMPIShareMemory(); + + // 2017-06-07: -rcluster-max for max absolute number of pairs + if (params->partfinder_rcluster_max == 0) { + params->partfinder_rcluster_max = max((size_t)1000, 10 * in_tree->size()); + } + + if (params->partition_merge != MERGE_NONE) { + double p = params->partfinder_rcluster/100.0; + size_t num_pairs = round(in_tree->size()*(in_tree->size()-1)*p/2); + if (p < 1.0) + num_pairs = min(num_pairs, params->partfinder_rcluster_max); + total_num_model += num_pairs; + for (i = in_tree->size()-2; i > 0; i--) + total_num_model += max(round(i*p), 1.0); + } + +#ifdef _OPENMP + if (num_threads <= 0) { + // partition selection scales well with many cores + num_threads = min((int64_t)countPhysicalCPUCores(), total_num_model); + num_threads = min(num_threads, params->num_threads_max); + omp_set_num_threads(num_threads); + cout << "NUMBER OF THREADS FOR PARTITION FINDING: " << num_threads << endl; + } +#endif + + start_time = getRealTime(); + + super_aln = ((SuperAlignment*)in_tree->aln); + + cout << "Selecting individual models for " << in_tree->size() << " charsets using " << criterionName(params->model_test_criterion) << "..." << endl; + //cout << " No. AIC AICc BIC Charset" << endl; + cout << " No. Model Score TreeLen Charset" << endl; + + lhvec.resize(in_tree->size()); + dfvec.resize(in_tree->size()); + lenvec.resize(in_tree->size()); + + test_merge = (params->partition_merge != MERGE_NONE) && params->partition_type != TOPO_UNLINKED && (in_tree->size() > 1); + + brlen_type = params->partition_type; + if (brlen_type == TOPO_UNLINKED) { + brlen_type = BRLEN_OPTIMIZE; + } + + // compute the best model for all partitions + job_type = 1; // for all partitions + getBestModel(job_type); + + // in case ModelOMatic change the alignment + fixPartitions(in_tree); + + inf_score = computeInformationScore(lhsum, dfsum, ssize, params->model_test_criterion); + cout << "Full partition model " << criterionName(params->model_test_criterion) + << " score: " << inf_score << " (LnL: " << lhsum << " df:" << dfsum << ")" << endl; + + if (!test_merge) { + super_aln->printBestPartition((string(params->out_prefix) + ".best_scheme.nex").c_str()); + super_aln->printBestPartitionRaxml((string(params->out_prefix) + ".best_scheme").c_str()); + model_info->dump(); + return; + } + + StrVector model_names; + StrVector greedy_model_trees; + + gene_sets.resize(in_tree->size()); + model_names.resize(in_tree->size()); + greedy_model_trees.resize(in_tree->size()); + for (i = 0; i < gene_sets.size(); i++) { + gene_sets[i].insert(i); + model_names[i] = in_tree->at(i)->aln->model_name; + greedy_model_trees[i] = in_tree->at(i)->aln->name; + } + + if (params->partition_merge == MERGE_KMEANS) { + // kmeans cluster based on parition tree length + double cur_score = inf_score; + for (int ncluster = in_tree->size()-1; ncluster >= 1; ncluster--) { + vector > this_gene_sets; + StrVector this_model_names; + //double sum = in_tree->size()/std::accumulate(lenvec.begin(), lenvec.end(), 0.0); + double score = doKmeansClustering(*params, in_tree, ncluster, lenvec, *model_info, + models_block, num_threads, this_gene_sets, this_model_names); + if (score < cur_score) { + cout << "Better score found: " << score << endl; + cur_score = score; + gene_sets = this_gene_sets; + model_names = this_model_names; + } else { + //break; + } + } + } else { + cout << "Merging models to increase model fit (about " << total_num_model << " total partition schemes)..." << endl; } - } - *lrt *= 2.0; *df -= 2.0; - - /* write back the results */ - coef0[0]=coef[0]; coef0[1]=coef[1]; - *se = v11 + v22 - 2*v12; -// vmat[0][0]=v11;vmat[0][1]=vmat[1][0]=v12;vmat[1][1]=v22; - if(loop==mleloopmax || *df< -0.01) i=1; else i=0; - return i; -} -/* END CODE WAS TAKEN FROM CONSEL PROGRAM */ + /* following implements the greedy algorithm of Lanfear et al. (2012) */ + while (params->partition_merge != MERGE_KMEANS && gene_sets.size() >= 2) { + // stepwise merging charsets -/** - @param tree_lhs RELL score matrix of size #trees x #replicates -*/ -void performAUTest(Params ¶ms, PhyloTree *tree, double *pattern_lhs, vector &info) { - - if (params.topotest_replicates < 10000) - outWarning("Too few replicates for AU test. At least -zb 10000 for reliable results!"); - - /* STEP 1: specify scale factors */ - size_t nscales = 10; - double r[] = {0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4}; - double rr[] = {sqrt(0.5), sqrt(0.6), sqrt(0.7), sqrt(0.8), sqrt(0.9), 1.0, - sqrt(1.1), sqrt(1.2), sqrt(1.3), sqrt(1.4)}; - double rr_inv[] = {sqrt(1/0.5), sqrt(1/0.6), sqrt(1/0.7), sqrt(1/0.8), sqrt(1/0.9), 1.0, - sqrt(1/1.1), sqrt(1/1.2), sqrt(1/1.3), sqrt(1/1.4)}; + // get the closest partition pairs, and + // compute the best model for each pair + job_type = 2; // for all merges + getBestModel(job_type); - /* STEP 2: compute bootstrap proportion */ - size_t ntrees = info.size(); - size_t nboot = params.topotest_replicates; -// double nboot_inv = 1.0 / nboot; - - size_t nptn = tree->getAlnNPattern(); - size_t maxnptn = get_safe_upper_limit(nptn); - -// double *bp = new double[ntrees*nscales]; -// memset(bp, 0, sizeof(double)*ntrees*nscales); - - double *treelhs; - cout << (ntrees*nscales*nboot*sizeof(double) >> 20) << " MB required for AU test" << endl; - treelhs = new double[ntrees*nscales*nboot]; - if (!treelhs) - outError("Not enough memory to perform AU test!"); - - size_t k, tid, ptn; + if (better_pairs.empty()) break; + ModelPairSet compatible_pairs; - double start_time = getRealTime(); + int num_comp_pairs = params->partition_merge == MERGE_RCLUSTERF ? gene_sets.size()/2 : 1; + better_pairs.getCompatiblePairs(num_comp_pairs, compatible_pairs); + if (compatible_pairs.size() > 1) + cout << compatible_pairs.size() << " compatible better partition pairs found" << endl; - cout << "Generating " << nscales << " x " << nboot << " multiscale bootstrap replicates... "; + // 2017-12-21: simultaneously merging better pairs + for (auto it_pair = compatible_pairs.begin(); it_pair != compatible_pairs.end(); it_pair++) { + ModelPair opt_pair = it_pair->second; -#ifdef _OPENMP - #pragma omp parallel private(k, tid, ptn) - { - int *rstream; - init_random(params.ran_seed + omp_get_thread_num(), false, &rstream); -#else - int *rstream = randstream; -#endif - size_t boot; - int *boot_sample = aligned_alloc(maxnptn); - memset(boot_sample, 0, maxnptn*sizeof(int)); + lhsum = lhsum - lhvec[opt_pair.part1] - lhvec[opt_pair.part2] + opt_pair.logl; + dfsum = dfsum - dfvec[opt_pair.part1] - dfvec[opt_pair.part2] + opt_pair.df; + inf_score = computeInformationScore(lhsum, dfsum, ssize, params->model_test_criterion); + ASSERT(inf_score <= opt_pair.score + 0.1); + + cout << "Merging " << opt_pair.set_name << " with " << criterionName(params->model_test_criterion) + << " score: " << inf_score << " (LnL: " << lhsum << " df: " << dfsum << ")" << endl; + // change entry opt_part1 to merged one + gene_sets[opt_pair.part1] = opt_pair.merged_set; + lhvec[opt_pair.part1] = opt_pair.logl; + dfvec[opt_pair.part1] = opt_pair.df; + lenvec[opt_pair.part1] = opt_pair.tree_len; + model_names[opt_pair.part1] = opt_pair.model_name; + greedy_model_trees[opt_pair.part1] = "(" + greedy_model_trees[opt_pair.part1] + "," + + greedy_model_trees[opt_pair.part2] + ")" + + convertIntToString(in_tree->size()-gene_sets.size()+1) + ":" + + convertDoubleToString(inf_score); + + // delete entry opt_part2 + lhvec.erase(lhvec.begin() + opt_pair.part2); + dfvec.erase(dfvec.begin() + opt_pair.part2); + lenvec.erase(lenvec.begin() + opt_pair.part2); + gene_sets.erase(gene_sets.begin() + opt_pair.part2); + model_names.erase(model_names.begin() + opt_pair.part2); + greedy_model_trees.erase(greedy_model_trees.begin() + opt_pair.part2); + + // decrease part ID for all pairs beyond opt_pair.part2 + auto next_pair = it_pair; + for (next_pair++; next_pair != compatible_pairs.end(); next_pair++) { + if (next_pair->second.part1 > opt_pair.part2) + next_pair->second.part1--; + if (next_pair->second.part2 > opt_pair.part2) + next_pair->second.part2--; + } + } + } + + string final_model_tree; + if (greedy_model_trees.size() == 1) + final_model_tree = greedy_model_trees[0]; + else { + final_model_tree = "("; + for (i = 0; i < greedy_model_trees.size(); i++) { + if (i>0) + final_model_tree += ","; + final_model_tree += greedy_model_trees[i]; + } + final_model_tree += ")"; + } + + cout << "Agglomerative model selection: " << final_model_tree << endl; - double *boot_sample_dbl = aligned_alloc(maxnptn); + if (gene_sets.size() < in_tree->size()) + mergePartitions(in_tree, gene_sets, model_names); + + if (!iEquals(params->merge_models, "all")) { + // test all candidate models again + lhsum = 0.0; + dfsum = 0; + if (params->partition_type == BRLEN_FIX || params->partition_type == BRLEN_SCALE) { + dfsum = in_tree->getNBranchParameters(BRLEN_OPTIMIZE); + if (params->partition_type == BRLEN_SCALE) + dfsum -= 1; + } + // compute the best model for all partitions + // but this time "test_merge = false" + test_merge = false; + job_type = 1; // for all partitions + getBestModel(job_type); + } + + inf_score = computeInformationScore(lhsum, dfsum, ssize, params->model_test_criterion); + cout << "Best partition model " << criterionName(params->model_test_criterion) << " score: " << inf_score << " (LnL: " << lhsum << " df:" << dfsum << ")" << endl; + + ((SuperAlignment*)in_tree->aln)->printBestPartition((string(params->out_prefix) + ".best_scheme.nex").c_str()); + ((SuperAlignment*)in_tree->aln)->printBestPartitionRaxml((string(params->out_prefix) + ".best_scheme").c_str()); + model_info->dump(); -#ifdef _OPENMP - #pragma omp for schedule(dynamic) + // free the MPI share memory + freeMPIShareMemory(); +} + +/* + * initialize the shared memory space to be accessed by the other processors + */ +void PartitionFinder::initialMPIShareMemory() { +#ifdef _IQTREE_MPI + #ifdef ONESIDE_COMM + if (MPIHelper::getInstance().getProcessID()==PROC_MASTER) { + val_ptr = (int*) malloc(sizeof(int)); + MPI_Win_create(val_ptr, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + } else { + val_ptr = NULL; + MPI_Win_create(val_ptr, 0, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + } + #else + win = NULL; + val_ptr = NULL; + #endif +#endif +} + +/* + * free the shared memory space + */ +void PartitionFinder::freeMPIShareMemory() { +#ifdef _IQTREE_MPI + #ifdef ONESIDE_COMM + MPI_Win_free(&win); + #endif + if (val_ptr != nullptr) { + delete[] val_ptr; + val_ptr = nullptr; + } #endif - for (k = 0; k < nscales; k++) { - string str = "SCALE=" + convertDoubleToString(r[k]); - for (boot = 0; boot < nboot; boot++) { - tree->aln->createBootstrapAlignment(boot_sample, str.c_str(), rstream); - for (ptn = 0; ptn < maxnptn; ptn++) - boot_sample_dbl[ptn] = boot_sample[ptn]; - double max_lh = -DBL_MAX, second_max_lh = -DBL_MAX; - int max_tid = -1; - for (tid = 0; tid < ntrees; tid++) { - double *pattern_lh = pattern_lhs + (tid*maxnptn); - double tree_lh; - if (params.SSE == LK_386) { - tree_lh = 0.0; - for (ptn = 0; ptn < nptn; ptn++) - tree_lh += pattern_lh[ptn] * boot_sample_dbl[ptn]; +} + +/* + * For MPI + * assign initial jobs to processors + * input: a set of jobs ordered by the estimated computational costs + * + * DIST_RATIO: the ratio of the total jobs distributed to the processors + */ +void PartitionFinder::jobAssignment(vector > &job_ids, vector* >&currJobs) { + + int num_job_to_dist = (int) (DIST_RATIO * job_ids.size()); // the number of jobs (at least) going to distribe + if (num_job_to_dist > job_ids.size()) + num_job_to_dist = job_ids.size(); + int num_job_assigned = 0; + int n = num_processes * num_threads; + multimap*,jobcomp> assignJobs; + multimap*,jobcomp>::iterator itr, itr2; + int i,w; + + nextjob = 0; + currJobs.clear(); + remain_job_list.clear(); + + if (MPIHelper::getInstance().isMaster()) { + // MASTER: assign one job to every thread of the processors + for (i=0; i; + job->push_back(job_ids[i].first); + assignJobs.insert(pair* > (job_ids[i].second, job)); + num_job_assigned++; + } + while (num_job_assigned < num_job_to_dist) { + itr = assignJobs.begin(); + itr->second->push_back(job_ids[num_job_assigned].first); + assignJobs.insert(pair* > (itr->first + job_ids[num_job_assigned].second, itr->second)); + assignJobs.erase(itr); + num_job_assigned++; + } + /* + cout << "!! assignment of the jobs:" << endl; + i=0; + for (itr=assignJobs.begin(); itr!=assignJobs.end(); itr++) { + cout << "!! job list " << ++i << " has " << itr->second->size() << " jobs with estimated cost " << itr->first << endl; + } + */ + // place all the unassigned jobs to the array remain_job_list + for (i=num_job_assigned; isecond); + itr++; + } + } + +#ifdef _IQTREE_MPI + // MASTER: assign num_threads jobs to each worker + for (w=1; wsecond), w); + delete (itr->second); + itr++; } else { - tree_lh = tree->dotProductDoubleCall(pattern_lh, boot_sample_dbl, nptn); + vector empty_array; + sendVector(empty_array, w); } - // rescale lh - tree_lh /= r[k]; - - // find the max and second max - if (tree_lh > max_lh) { - second_max_lh = max_lh; - max_lh = tree_lh; - max_tid = tid; - } else if (tree_lh > second_max_lh) - second_max_lh = tree_lh; - - treelhs[(tid*nscales+k)*nboot + boot] = tree_lh; } - - // compute difference from max_lh - for (tid = 0; tid < ntrees; tid++) - if (tid != max_tid) - treelhs[(tid*nscales+k)*nboot + boot] = max_lh - treelhs[(tid*nscales+k)*nboot + boot]; - else - treelhs[(tid*nscales+k)*nboot + boot] = second_max_lh - max_lh; -// bp[k*ntrees+max_tid] += nboot_inv; + // send the remain job list to each worker + sendVector(remain_job_list, w); } - - // sort the replicates - for (tid = 0; tid < ntrees; tid++) { - quicksort(treelhs + (tid*nscales+k)*nboot, 0, nboot-1); + if (val_ptr != nullptr) { + val_ptr[0] = 0; } - - } +#endif - aligned_free(boot_sample_dbl); - aligned_free(boot_sample); + } else { -#ifdef _OPENMP - finish_random(rstream); - } +#ifdef _IQTREE_MPI + // WORKER: receive jobs from the master + for (i=0; i* job_array = new vector; + recVector(*job_array); + if (job_array->size() > 0) + currJobs.push_back(job_array); + else + delete(job_array); + } + // receive the remain job list from the master + recVector(remain_job_list); #endif -// if (verbose_mode >= VB_MED) { -// cout << "scale"; -// for (k = 0; k < nscales; k++) -// cout << "\t" << r[k]; -// cout << endl; -// for (tid = 0; tid < ntrees; tid++) { -// cout << tid; -// for (k = 0; k < nscales; k++) { -// cout << "\t" << bp[tid+k*ntrees]; -// } -// cout << endl; -// } -// } - - cout << getRealTime() - start_time << " seconds" << endl; + } +} + +/* constructor + */ +SyncChkPoint::SyncChkPoint(PartitionFinder* pf, int thres_id) { + + pfinder = pf; + mytag = thres_id + pf->base; +} + +/* + * FOR MASTER + * Show the other worker's result of best model + */ +void SyncChkPoint::showResult(ModelCheckpoint& part_model_info, int work_tag) { + string key, data_num; + int job_type; - /* STEP 3: weighted least square fit */ + key = "pf_data_num"; + ASSERT(part_model_info.get(key, data_num)); + key = "pf_job_type"; + ASSERT(part_model_info.get(key, job_type)); - double *cc = new double[nscales]; - double *w = new double[nscales]; - double *this_bp = new double[nscales]; - cout << "TreeID\tAU\tRSS\td\tc" << endl; - for (tid = 0; tid < ntrees; tid++) { - double *this_stat = treelhs + tid*nscales*nboot; - double xn = this_stat[(nscales/2)*nboot + nboot/2], x; - double c, d; // c, d in original paper - int idf0 = -2; - double z = 0.0, z0 = 0.0, thp = 0.0, th = 0.0, ze = 0.0, ze0 = 0.0; - double pval, se; - int df; - double rss = 0.0; - int step; - const int max_step = 30; - bool failed = false; - for (step = 0; step < max_step; step++) { - x = xn; - int num_k = 0; - for (k = 0; k < nscales; k++) { - this_bp[k] = cntdist3(this_stat + k*nboot, nboot, x) / nboot; - if (this_bp[k] <= 0 || this_bp[k] >= 1) { - cc[k] = w[k] = 0.0; - } else { - double bp_val = this_bp[k]; - cc[k] = -gsl_cdf_ugaussian_Pinv(bp_val); - double bp_pdf = gsl_ran_ugaussian_pdf(cc[k]); - w[k] = bp_pdf*bp_pdf*nboot / (bp_val*(1.0-bp_val)); - num_k++; - } - } - df = num_k-2; - if (num_k >= 2) { - // first obtain d and c by weighted least square - doWeightedLeastSquare(nscales, w, rr, rr_inv, cc, d, c, se); - - se = gsl_ran_ugaussian_pdf(d-c)*sqrt(se); - - // second, perform MLE estimate of d and c - // OptimizationAUTest mle(d, c, nscales, this_bp, rr, rr_inv); - // mle.optimizeDC(); - // d = mle.d; - // c = mle.c; - - /* STEP 4: compute p-value according to Eq. 11 */ - pval = 1.0 - gsl_cdf_ugaussian_P(d-c); - z = -pval; - ze = se; - // compute sum of squared difference - rss = 0.0; - for (k = 0; k < nscales; k++) { - double diff = cc[k] - (rr[k]*d + rr_inv[k]*c); - rss += w[k] * diff * diff; - } - - } else { - // not enough data for WLS - double sum = 0.0; - for (k = 0; k < nscales; k++) - sum += cc[k]; - if (sum >= 0.0) - pval = 0.0; - else - pval = 1.0; - se = 0.0; - d = c = 0.0; - rss = 0.0; - if (verbose_mode >= VB_MED) - cout << " error in wls" << endl; - } - - // maximum likelhood fit -// double coef0[2] = {d, c}; -// double df; -// int mlefail = mlecoef(this_bp, r, nboot, nscales, coef0, &rss, &df, &se); -// -// if (!mlefail) { -// d = coef0[0]; -// c = coef0[1]; -// pval = 1.0 - gsl_cdf_ugaussian_P(d-c); -// z = -pval; -// ze = se; -// } - - if (verbose_mode >= VB_MED) { - cout.unsetf(ios::fixed); - cout << "\t" << step << "\t" << th << "\t" << x << "\t" << pval << "\t" << se << "\t" << nscales-2 << "\t" << d << "\t" << c << "\t" << z << "\t" << ze << "\t" << rss << endl; - } - - if(df < 0 && idf0 < 0) { failed = true; break;} /* degenerated */ - - if ((df < 0) || (idf0 >= 0 && (z-z0)*(x-thp) > 0.0 && fabs(z-z0)>0.1*ze0)) { - if (verbose_mode >= VB_MED) - cout << " non-monotone" << endl; - th=x; - xn=0.5*x+0.5*thp; - continue; - } - if(idf0 >= 0 && (fabs(z-z0)<0.01*ze0)) { - if(fabs(th)<1e-10) - xn=th; - else th=x; - } else - xn=0.5*th+0.5*x; - info[tid].au_pvalue = pval; - thp=x; - z0=z; - ze0=ze; - idf0 = nscales-2; - if(fabs(x-th)<1e-10) break; + if (data_num == "single") { + double tree_len,score; + string model_name, set_name; + int tree_id; + bool done_before; + + key = "pf_tree_len"; + ASSERT(part_model_info.get(key, tree_len)); + key = "pf_model_name"; + ASSERT(part_model_info.get(key, model_name)); + key = "pf_score"; + ASSERT(part_model_info.get(key, score)); + + if (job_type == 1) { + // partition + key = "pf_tree_id"; + ASSERT(part_model_info.get(key, tree_id)); + pfinder->showPartitionResult(part_model_info, tree_id, tree_len, model_name, score, work_tag); + } else { + // merge + key = "pf_set_name"; + ASSERT(part_model_info.get(key, set_name)); + key = "pf_done_before"; + ASSERT(part_model_info.getBool(key, done_before)); + pfinder->showMergeResult(part_model_info, tree_len, model_name, score, set_name, done_before, work_tag); } - if (failed && verbose_mode >= VB_MED) - cout << " degenerated" << endl; + } else { + vector tree_id_vec; + vector tree_len_vec; + vector model_name_vec; + vector score_vec; + vector tag_vec; + vector set_name_vec; + int tot_jobsdone; + key = "pf_tree_len"; + ASSERT(part_model_info.getVector(key, tree_len_vec)); + key = "pf_model_name"; + ASSERT(part_model_info.getVector(key, model_name_vec)); + key = "pf_score"; + ASSERT(part_model_info.getVector(key, score_vec)); + key = "pf_tag"; + ASSERT(part_model_info.getVector(key, tag_vec)); - if (step == max_step) { - if (verbose_mode >= VB_MED) - cout << " non-convergence" << endl; - failed = true; + if (job_type == 1) { + // partition + key = "pf_tree_id"; + ASSERT(part_model_info.getVector(key, tree_id_vec)); + pfinder->showPartitionResults(part_model_info, tree_id_vec, tree_len_vec, model_name_vec, score_vec, tag_vec); + } else { + // merge + key = "pf_set_name"; + ASSERT(part_model_info.getVector(key, set_name_vec)); + key = "pf_tot_jobs_done"; + ASSERT(part_model_info.get(key, tot_jobsdone)); + pfinder->showMergeResults(part_model_info, tree_len_vec, model_name_vec, score_vec, set_name_vec, tag_vec, tot_jobsdone); } - - double pchi2 = (failed) ? 0.0 : computePValueChiSquare(rss, df); - cout << tid+1 << "\t" << info[tid].au_pvalue << "\t" << rss << "\t" << d << "\t" << c; - - // warning if p-value of chi-square < 0.01 (rss too high) - if (pchi2 < 0.01) - cout << " !!!"; - cout << endl; } - - delete [] this_bp; - delete [] w; - delete [] cc; -// delete [] bp; } +/* + * FOR MASTER - synchronize the checkpoints from the other processors + * Receive checkpoint from worker and send the next Job ID to workers + * increase the value of next_job and job_done by 1 + * update the master's checkpoint: model_info + */ +void SyncChkPoint::masterSyncOtherChkpts(bool chk_gotMessage) { + + if (MPIHelper::getInstance().isWorker() || mytag > 0) + return; -void evaluateTrees(Params ¶ms, IQTree *tree, vector &info, IntVector &distinct_ids) -{ - if (!params.treeset_file) - return; - cout << endl; - //MTreeSet trees(params.treeset_file, params.is_rooted, params.tree_burnin, params.tree_max_count); - cout << "Reading trees in " << params.treeset_file << " ..." << endl; - size_t ntrees = countDistinctTrees(params.treeset_file, params.is_rooted, tree, distinct_ids, params.distinct_trees); - if (ntrees < distinct_ids.size()) { - cout << "WARNING: " << distinct_ids.size() << " trees detected but only " << ntrees << " distinct trees will be evaluated" << endl; - } else { - cout << ntrees << (params.distinct_trees ? " distinct" : "") << " trees detected" << endl; - } - if (ntrees == 0) return; - ifstream in(params.treeset_file); - - //if (trees.size() == 1) return; - //string tree_file = params.treeset_file; - string tree_file = params.out_prefix; - tree_file += ".trees"; - ofstream treeout; - //if (!params.fixed_branch_length) { - treeout.open(tree_file.c_str()); - //} - string score_file = params.out_prefix; - score_file += ".treelh"; - ofstream scoreout; - if (params.print_tree_lh) - scoreout.open(score_file.c_str()); - string site_lh_file = params.out_prefix; - site_lh_file += ".sitelh"; - if (params.print_site_lh) { - ofstream site_lh_out(site_lh_file.c_str()); - site_lh_out << ntrees << " " << tree->getAlnNSite() << endl; - site_lh_out.close(); - } + ModelCheckpoint proc_model_info; + string key; + int worker, next_jobID, job_type, work_tag, tree_id; + bool job_finished, need_nextJobID, proceed, thread_finished, is_old_result; + map::iterator itr; - if (params.print_partition_lh && !tree->isSuperTree()) { - outWarning("-wpl does not work with non-partition model"); - params.print_partition_lh = false; - } - string part_lh_file = params.out_prefix; - part_lh_file += ".partlh"; - if (params.print_partition_lh) { - ofstream part_lh_out(part_lh_file.c_str()); - part_lh_out << ntrees << " " << ((PhyloSuperTree*)tree)->size() << endl; - part_lh_out.close(); - } + next_jobID = -1; + job_finished = false; + is_old_result = false; - double time_start = getRealTime(); - - int *boot_samples = NULL; - size_t boot; - //double *saved_tree_lhs = NULL; - double *tree_lhs = NULL; // RELL score matrix of size #trees x #replicates - double *pattern_lh = NULL; - double *pattern_lhs = NULL; - double *orig_tree_lh = NULL; // Original tree log-likelihoods - double *max_lh = NULL; - double *lhdiff_weights = NULL; - size_t nptn = tree->getAlnNPattern(); - size_t maxnptn = get_safe_upper_limit(nptn); - - if (params.topotest_replicates && ntrees > 1) { - size_t mem_size = (size_t)params.topotest_replicates*nptn*sizeof(int) + - ntrees*params.topotest_replicates*sizeof(double) + - (nptn + ntrees*3 + params.topotest_replicates*2)*sizeof(double) + - ntrees*sizeof(TreeInfo) + - params.do_weighted_test*(ntrees * nptn * sizeof(double) + ntrees*ntrees*sizeof(double)); - cout << "Note: " << ((double)mem_size/1024)/1024 << " MB of RAM required!" << endl; - if (mem_size > getMemorySize()-100000) - outWarning("The required memory does not fit in RAM!"); - cout << "Creating " << params.topotest_replicates << " bootstrap replicates..." << endl; - if (!(boot_samples = new int [params.topotest_replicates*nptn])) - outError(ERR_NO_MEMORY); -#ifdef _OPENMP - #pragma omp parallel private(boot) if(nptn > 10000) - { - int *rstream; - init_random(params.ran_seed + omp_get_thread_num(), false, &rstream); - #pragma omp for schedule(static) -#else - int *rstream = randstream; -#endif - for (boot = 0; boot < params.topotest_replicates; boot++) - tree->aln->createBootstrapAlignment(boot_samples + (boot*nptn), params.bootstrap_spec, rstream); -#ifdef _OPENMP - finish_random(rstream); - } -#endif - cout << "done" << endl; - //if (!(saved_tree_lhs = new double [ntrees * params.topotest_replicates])) - // outError(ERR_NO_MEMORY); - if (!(tree_lhs = new double [ntrees * params.topotest_replicates])) - outError(ERR_NO_MEMORY); - if (params.do_weighted_test || params.do_au_test) { - if (!(lhdiff_weights = new double [ntrees * ntrees])) - outError(ERR_NO_MEMORY); - pattern_lhs = aligned_alloc(ntrees*maxnptn); -// if (!(pattern_lhs = new double[ntrees* nptn])) -// outError(ERR_NO_MEMORY); - } - pattern_lh = aligned_alloc(maxnptn); -// if (!(pattern_lh = new double[nptn])) -// outError(ERR_NO_MEMORY); - if (!(orig_tree_lh = new double[ntrees])) - outError(ERR_NO_MEMORY); - if (!(max_lh = new double[params.topotest_replicates])) - outError(ERR_NO_MEMORY); - } - int tree_index, tid, tid2; - info.resize(ntrees); - //for (MTreeSet::iterator it = trees.begin(); it != trees.end(); it++, tree_index++) { - for (tree_index = 0, tid = 0; tree_index < distinct_ids.size(); tree_index++) { - - cout << "Tree " << tree_index + 1; - if (distinct_ids[tree_index] >= 0) { - cout << " / identical to tree " << distinct_ids[tree_index]+1 << endl; - // ignore tree - char ch; - do { - in >> ch; - } while (!in.eof() && ch != ';'); - continue; - } - tree->freeNode(); - bool rooted = tree->rooted; - tree->readTree(in, rooted); - tree->setAlignment(tree->aln); - tree->setRootNode(params.root); - if (tree->isSuperTree()) - ((PhyloSuperTree*) tree)->mapTrees(); - - tree->initializeAllPartialLh(); - tree->fixNegativeBranch(false); - if (!params.fixed_branch_length) { - tree->setCurScore(tree->optimizeAllBranches(100, 0.001)); - } else { - tree->setCurScore(tree->computeLikelihood()); - } - treeout << "[ tree " << tree_index+1 << " lh=" << tree->getCurScore() << " ]"; - tree->printTree(treeout); - treeout << endl; - if (params.print_tree_lh) - scoreout << tree->getCurScore() << endl; - - cout << " / LogL: " << tree->getCurScore() << endl; - - if (pattern_lh) { - double curScore = tree->getCurScore(); - memset(pattern_lh, 0, maxnptn*sizeof(double)); - tree->computePatternLikelihood(pattern_lh, &curScore); - if (params.do_weighted_test || params.do_au_test) - memcpy(pattern_lhs + tid*maxnptn, pattern_lh, maxnptn*sizeof(double)); - } - if (params.print_site_lh) { - string tree_name = "Tree" + convertIntToString(tree_index+1); - printSiteLh(site_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str()); - } - if (params.print_partition_lh) { - string tree_name = "Tree" + convertIntToString(tree_index+1); - printPartitionLh(part_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str()); - } - info[tid].logl = tree->getCurScore(); +#ifdef _IQTREE_MPI - if (!params.topotest_replicates || ntrees <= 1) { - tid++; - continue; - } - // now compute RELL scores - orig_tree_lh[tid] = tree->getCurScore(); - double *tree_lhs_offset = tree_lhs + (tid*params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) { - double lh = 0.0; - int *this_boot_sample = boot_samples + (boot*nptn); - for (size_t ptn = 0; ptn < nptn; ptn++) - lh += pattern_lh[ptn] * this_boot_sample[ptn]; - tree_lhs_offset[boot] = lh; - } - tid++; - } + if (chk_gotMessage) { + // only proceed if there is a message + while (gotMessage(work_tag, worker)) { + + // receive checkpoint from the WORKER + recvCheckpoint(&proc_model_info, worker, work_tag); + + #ifdef SYN_COMM + key = "need_nextJobID"; + need_nextJobID = proc_model_info.getBool(key); + if (need_nextJobID) { + // get the next Job ID + next_jobID = getNextJobID(); + // send the next job ID to the WORKER + MPI_Send(&next_jobID, 1, MPI_INT, worker, work_tag, MPI_COMM_WORLD); + } + #endif - ASSERT(tid == ntrees); - - if (params.topotest_replicates && ntrees > 1) { - double *tree_probs = new double[ntrees]; - memset(tree_probs, 0, ntrees*sizeof(double)); - int *tree_ranks = new int[ntrees]; - - /* perform RELL BP method */ - cout << "Performing RELL-BP test..." << endl; - int *maxtid = new int[params.topotest_replicates]; - double *maxL = new double[params.topotest_replicates]; - int *maxcount = new int[params.topotest_replicates]; - memset(maxtid, 0, params.topotest_replicates*sizeof(int)); - memcpy(maxL, tree_lhs, params.topotest_replicates*sizeof(double)); - for (boot = 0; boot < params.topotest_replicates; boot++) - maxcount[boot] = 1; - for (tid = 1; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) - if (tree_lhs_offset[boot] > maxL[boot] + params.ufboot_epsilon) { - maxL[boot] = tree_lhs_offset[boot]; - maxtid[boot] = tid; - maxcount[boot] = 1; - } else if (tree_lhs_offset[boot] > maxL[boot] - params.ufboot_epsilon && - random_double() <= 1.0/(maxcount[boot]+1)) { - maxL[boot] = max(maxL[boot],tree_lhs_offset[boot]); - maxtid[boot] = tid; - maxcount[boot]++; - } - } - for (boot = 0; boot < params.topotest_replicates; boot++) - tree_probs[maxtid[boot]] += 1.0; - for (tid = 0; tid < ntrees; tid++) { - tree_probs[tid] /= params.topotest_replicates; - info[tid].rell_confident = false; - info[tid].rell_bp = tree_probs[tid]; - } - sort_index(tree_probs, tree_probs + ntrees, tree_ranks); - double prob_sum = 0.0; - // obtain the confidence set - for (tid = ntrees-1; tid >= 0; tid--) { - info[tree_ranks[tid]].rell_confident = true; - prob_sum += tree_probs[tree_ranks[tid]]; - if (prob_sum > 0.95) break; - } + showResult(proc_model_info, work_tag); - // sanity check - for (tid = 0, prob_sum = 0.0; tid < ntrees; tid++) - prob_sum += tree_probs[tid]; - if (fabs(prob_sum-1.0) > 0.01) - outError("Internal error: Wrong ", __func__); - - delete [] maxcount; - delete [] maxL; - delete [] maxtid; - - /* now do the SH test */ - cout << "Performing KH and SH test..." << endl; - // SH centering step - for (boot = 0; boot < params.topotest_replicates; boot++) - max_lh[boot] = -DBL_MAX; - double *avg_lh = new double[ntrees]; - for (tid = 0; tid < ntrees; tid++) { - avg_lh[tid] = 0.0; - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) - avg_lh[tid] += tree_lhs_offset[boot]; - avg_lh[tid] /= params.topotest_replicates; - for (boot = 0; boot < params.topotest_replicates; boot++) { - max_lh[boot] = max(max_lh[boot], tree_lhs_offset[boot] - avg_lh[tid]); - } - } + proc_model_info.clear(); - double orig_max_lh = orig_tree_lh[0]; - size_t orig_max_id = 0; - double orig_2ndmax_lh = -DBL_MAX; - size_t orig_2ndmax_id = -1; - // find the max tree ID - for (tid = 1; tid < ntrees; tid++) - if (orig_max_lh < orig_tree_lh[tid]) { - orig_max_lh = orig_tree_lh[tid]; - orig_max_id = tid; - } - // find the 2nd max tree ID - for (tid = 0; tid < ntrees; tid++) - if (tid != orig_max_id && orig_2ndmax_lh < orig_tree_lh[tid]) { - orig_2ndmax_lh = orig_tree_lh[tid]; - orig_2ndmax_id = tid; - } + } + } else { + + // receive checkpoint from any worker with any tag + recvAnyCheckpoint(&proc_model_info, worker, work_tag); + + #ifdef SYN_COMM + key = "need_nextJobID"; + need_nextJobID = proc_model_info.getBool(key); + if (need_nextJobID) { + // get the next Job ID + next_jobID = getNextJobID(); + // send the next job ID to the WORKER + MPI_Send(&next_jobID, 1, MPI_INT, worker, work_tag, MPI_COMM_WORLD); + } + #endif + showResult(proc_model_info, work_tag); + } - // SH compute p-value - for (tid = 0; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - // SH compute original deviation from max_lh - info[tid].kh_pvalue = 0.0; - info[tid].sh_pvalue = 0.0; - size_t max_id = (tid != orig_max_id) ? orig_max_id : orig_2ndmax_id; - double orig_diff = orig_tree_lh[max_id] - orig_tree_lh[tid] - avg_lh[tid]; - double *max_kh = tree_lhs + (max_id * params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) { - if (max_lh[boot] - tree_lhs_offset[boot] > orig_diff) - info[tid].sh_pvalue += 1.0; - //double max_kh_here = max(max_kh[boot]-avg_lh[max_id], tree_lhs_offset[boot]-avg_lh[tid]); - double max_kh_here = (max_kh[boot]-avg_lh[max_id]); - if (max_kh_here - tree_lhs_offset[boot] > orig_diff) - info[tid].kh_pvalue += 1.0; - } - info[tid].sh_pvalue /= params.topotest_replicates; - info[tid].kh_pvalue /= params.topotest_replicates; - } +#endif +} - if (params.do_weighted_test) { - - cout << "Computing pairwise logl difference variance ..." << endl; - /* computing lhdiff_weights as 1/sqrt(lhdiff_variance) */ - for (tid = 0; tid < ntrees; tid++) { - double *pattern_lh1 = pattern_lhs + (tid * maxnptn); - lhdiff_weights[tid*ntrees+tid] = 0.0; - for (tid2 = tid+1; tid2 < ntrees; tid2++) { - double lhdiff_variance = tree->computeLogLDiffVariance(pattern_lh1, pattern_lhs + (tid2*maxnptn)); - lhdiff_weights[tid*ntrees+tid2] = 1.0/sqrt(lhdiff_variance); - lhdiff_weights[tid2*ntrees+tid] = lhdiff_weights[tid*ntrees+tid2]; - } - } +/* + * FOR WORKER + * send checkpoint to master + * + * return the next Job ID from master if necessary + */ +int SyncChkPoint::sendChkptToMaster(ModelCheckpoint &model_info, bool need_nextJobID, int job_type, bool forceToSyn) { + + if (MPIHelper::getInstance().getNumProcesses() == 1 || MPIHelper::getInstance().isMaster()) { + return -1; + } - // Weighted KH and SH test - cout << "Performing WKH and WSH test..." << endl; - for (tid = 0; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - info[tid].wkh_pvalue = 0.0; - info[tid].wsh_pvalue = 0.0; - double worig_diff = -DBL_MAX; - size_t max_id = -1; - for (tid2 = 0; tid2 < ntrees; tid2++) - if (tid2 != tid) { - double wdiff = (orig_tree_lh[tid2] - orig_tree_lh[tid])*lhdiff_weights[tid*ntrees+tid2]; - if (wdiff > worig_diff) { - worig_diff = wdiff; - max_id = tid2; - } - } - for (boot = 0; boot < params.topotest_replicates; boot++) { - double wmax_diff = -DBL_MAX; - for (tid2 = 0; tid2 < ntrees; tid2++) - if (tid2 != tid) - wmax_diff = max(wmax_diff, - (tree_lhs[tid2*params.topotest_replicates+boot] - avg_lh[tid2] - - tree_lhs_offset[boot] + avg_lh[tid]) * lhdiff_weights[tid*ntrees+tid2]); - if (wmax_diff > worig_diff) - info[tid].wsh_pvalue += 1.0; - wmax_diff = (tree_lhs[max_id*params.topotest_replicates+boot] - avg_lh[max_id] - - tree_lhs_offset[boot] + avg_lh[tid]); - if (wmax_diff > orig_tree_lh[max_id] - orig_tree_lh[tid]) - info[tid].wkh_pvalue += 1.0; - } - info[tid].wsh_pvalue /= params.topotest_replicates; - info[tid].wkh_pvalue /= params.topotest_replicates; - } - } - - delete [] avg_lh; - - /* now to ELW - Expected Likelihood Weight method */ - cout << "Performing ELW test..." << endl; - - for (boot = 0; boot < params.topotest_replicates; boot++) - max_lh[boot] = -DBL_MAX; - for (tid = 0; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) - max_lh[boot] = max(max_lh[boot], tree_lhs_offset[boot]); - } - double *sumL = new double[params.topotest_replicates]; - memset(sumL, 0, sizeof(double) * params.topotest_replicates); - for (tid = 0; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) { - tree_lhs_offset[boot] = exp(tree_lhs_offset[boot] - max_lh[boot]); - sumL[boot] += tree_lhs_offset[boot]; - } - } - for (tid = 0; tid < ntrees; tid++) { - double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); - tree_probs[tid] = 0.0; - for (boot = 0; boot < params.topotest_replicates; boot++) { - tree_probs[tid] += (tree_lhs_offset[boot] / sumL[boot]); - } - tree_probs[tid] /= params.topotest_replicates; - info[tid].elw_confident = false; - info[tid].elw_value = tree_probs[tid]; - } + int next_jobID = -1; + string key; - sort_index(tree_probs, tree_probs + ntrees, tree_ranks); - prob_sum = 0.0; - // obtain the confidence set - for (tid = ntrees-1; tid >= 0; tid--) { - info[tree_ranks[tid]].elw_confident = true; - prob_sum += tree_probs[tree_ranks[tid]]; - if (prob_sum > 0.95) break; - } +#ifdef _IQTREE_MPI + + #ifdef SYN_COMM + // workers: send checkpoint to MASTER synchronously + key = "need_nextJobID"; model_info.putBool(key, need_nextJobID); + key = "pf_job_type"; model_info.put(key, job_type); + key = "pf_data_num"; model_info.put(key, "single"); + + #ifdef _OPENMP + #pragma omp critical + #endif + { + sendCheckpoint(&model_info, PROC_MASTER, mytag); + if (need_nextJobID) { + // receive the next job ID from MASTER synchronously + MPI_Recv(&next_jobID, 1, MPI_INT, PROC_MASTER, mytag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + model_info.clear(); + #endif - // sanity check - for (tid = 0, prob_sum = 0.0; tid < ntrees; tid++) - prob_sum += tree_probs[tid]; - if (fabs(prob_sum-1.0) > 0.01) - outError("Internal error: Wrong ", __func__); - delete [] sumL; + #ifdef ONESIDE_COMM + + if (need_nextJobID) { + next_jobID = getNextJobID(); + } + + string str = ""; - if (params.do_au_test) { - cout << "Performing approximately unbiased (AU) test..." << endl; - performAUTest(params, tree, pattern_lhs, info); + #ifdef _OPENMP + #pragma omp critical + #endif + { + if (forceToSyn || (getRealTime() - pfinder->last_syn_time > TIME_SYN && (pfinder->tot_jobs_done > 0 || pfinder->tree_id_vec.size() > 0))) { + // prepare to do synchronization + pfinder->last_syn_time = getRealTime(); + key = "pf_tree_id"; model_info.putVector(key, pfinder->tree_id_vec); + key = "pf_tree_len"; model_info.putVector(key, pfinder->tree_len_vec); + key = "pf_model_name"; model_info.putVector(key, pfinder->model_name_vec); + key = "pf_score"; model_info.putVector(key, pfinder->score_vec); + key = "pf_tag"; model_info.putVector(key, pfinder->tag_vec); + key = "pf_job_type"; model_info.put(key, job_type); + key = "pf_data_num"; model_info.put(key, "multiple"); + key = "pf_tot_jobs_done"; model_info.put(key, pfinder->tot_jobs_done); + key = "pf_set_name"; model_info.putVector(key, pfinder->set_name_vec); + + stringstream ss; + model_info.dump(ss); + str = ss.str(); + + // do synchronization with Master + MPIHelper::getInstance().sendString(str, PROC_MASTER, mytag); + + // clear all vectors + pfinder->tree_id_vec.clear(); + pfinder->tree_len_vec.clear(); + pfinder->model_name_vec.clear(); + pfinder->score_vec.clear(); + pfinder->tag_vec.clear(); + pfinder->tot_jobs_done = 0; + + // clear the checkpoint for this process + model_info.clear(); + } } - delete [] tree_ranks; - delete [] tree_probs; - } - if (max_lh) - delete [] max_lh; - if (orig_tree_lh) - delete [] orig_tree_lh; - if (pattern_lh) - aligned_free(pattern_lh); - if (pattern_lhs) - aligned_free(pattern_lhs); - if (lhdiff_weights) - delete [] lhdiff_weights; - if (tree_lhs) - delete [] tree_lhs; - //if (saved_tree_lhs) - // delete [] saved_tree_lhs; - if (boot_samples) - delete [] boot_samples; - - if (params.print_tree_lh) { - scoreout.close(); - } + #endif + +#endif - treeout.close(); - in.close(); + return next_jobID; +} + +/* + * receive an integer from the master (for synchronous communication) + */ +int SyncChkPoint::recvInt(int tag) { + int mesg = -1; +#ifdef _IQTREE_MPI + MPI_Recv(&mesg, 1, MPI_INT, PROC_MASTER, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); +#endif + return mesg; +} + +/* + * get the next Job ID + */ +int SyncChkPoint::getNextJobID() { + int one = 1, indx = -1, nxtJobID = -1; + +#ifdef _IQTREE_MPI - cout << "Time for evaluating all trees: " << getRealTime() - time_start << " sec." << endl; + #ifdef SYN_COMM + if (MPIHelper::getInstance().isMaster()) { + // get the next Job ID + #ifdef _OPENMP + #pragma omp critical + #endif + { + if (pfinder->nextjob < pfinder->remain_job_list.size()) { + nxtJobID = pfinder->remain_job_list[pfinder->nextjob]; + pfinder->nextjob++; + } + } + } + #endif + + #ifdef ONESIDE_COMM + + #ifdef _OPENMP + #pragma omp critical + #endif + { + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, PROC_MASTER, 0, pfinder->win); + MPI_Fetch_and_op(&one, &indx, MPI_INT, PROC_MASTER, 0, MPI_SUM, pfinder->win); + MPI_Win_unlock(PROC_MASTER, pfinder->win); + if (indx >= 0 && indx < pfinder->remain_job_list.size()) { + nxtJobID = pfinder->remain_job_list[indx]; + } + } + + #endif +#endif + + return nxtJobID; } +#ifdef _IQTREE_MPI +void SyncChkPoint::sendCheckpoint(Checkpoint *ckp, int dest, int tag) { + stringstream ss; + ckp->dump(ss); + string str = ss.str(); + MPIHelper::getInstance().sendString(str, dest, tag); +} -void evaluateTrees(Params ¶ms, IQTree *tree) { - vector info; - IntVector distinct_ids; - evaluateTrees(params, tree, info, distinct_ids); +void SyncChkPoint::recvCheckpoint(Checkpoint *ckp, int src, int tag) { + string str; + MPIHelper::getInstance().recvString(str, src, tag); + stringstream ss(str); + ckp->load(ss); } +void SyncChkPoint::recvAnyCheckpoint(Checkpoint *ckp, int& src, int& tag) { + string str; + recvAnyString(str, src, tag); + stringstream ss(str); + ckp->load(ss); +} +void SyncChkPoint::recvAnyString(string &str, int& src, int& tag) { + MPI_Status status; + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + int msgCount; + MPI_Get_count(&status, MPI_CHAR, &msgCount); + // receive the message + char *recvBuffer = new char[msgCount]; + MPI_Recv(recvBuffer, msgCount, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); + str = recvBuffer; + src = status.MPI_SOURCE; + tag = status.MPI_TAG; + delete [] recvBuffer; +} +/* + * Check for incoming messages + * if there is a message, collect the tag value and the source + */ +bool SyncChkPoint::gotMessage(int& tag, int& source) { + if (MPIHelper::getInstance().getNumProcesses() == 1 || mytag > 0) + return false; + int flag = 0; + MPI_Status status; + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); + if (flag) { + tag = status.MPI_TAG; + source = status.MPI_SOURCE; + return true; + } else + return false; +} +#endif diff --git a/main/phylotesting.h b/main/phylotesting.h index f0ca8d722..daf8eaa48 100644 --- a/main/phylotesting.h +++ b/main/phylotesting.h @@ -8,76 +8,293 @@ #ifndef PHYLOTESTING_H_ #define PHYLOTESTING_H_ +#ifdef _IQTREE_MPI + #include +#endif + #include "utils/tools.h" #include "utils/checkpoint.h" +#include "nclextra/modelsblock.h" +#include "alignment/superalignment.h" +#include "utils/MPIHelper.h" class PhyloTree; class IQTree; +class ModelCheckpoint; +class SyncChkPoint; +class PhyloSuperTree; +class SubsetPair; +const int MF_SAMPLE_SIZE_TRIPLE = 1; +const int MF_IGNORED = 2; +const int MF_RUNNING = 4; +const int MF_WAITING = 8; +const int MF_DONE = 16; -class ModelInfo { +/** + Candidate model under testing + */ +class CandidateModel { + public: - string set_name; // subset name - string name; // model name - double logl; // tree log likelihood - int df; // #parameters - double tree_len; // tree length, added 2015-06-24 for rcluster algorithm - string tree; // added 2015-04-28: tree string - double AIC_score, AICc_score, BIC_score; // scores - double AIC_weight, AICc_weight, BIC_weight; // weights - bool AIC_conf, AICc_conf, BIC_conf; // in confidence set? + + /** constructor */ + CandidateModel(int flag = 0) { + logl = 0.0; + df = 0; + tree_len = 0.0; + aln = NULL; + AIC_score = DBL_MAX; + AICc_score = DBL_MAX; + BIC_score = DBL_MAX; + this->flag = flag; + syncChkPoint = nullptr; + } + + CandidateModel(string subst_name, string rate_name, Alignment *aln, int flag = 0) : CandidateModel(flag) { + this->subst_name = orig_subst_name = subst_name; + this->rate_name = orig_rate_name = rate_name; + this->aln = aln; + syncChkPoint = nullptr; + } + + CandidateModel(Alignment *aln, int flag = 0) : CandidateModel(flag) { + this->aln = aln; + getUsualModel(aln); + syncChkPoint = nullptr; + } + + string getName() { + return subst_name + rate_name; + } + + /** + get usual model for a given alignment + @param aln input alignment + @return length of the alignment + */ + size_t getUsualModel(Alignment *aln); + + /** + evaluate this model + @param params program parameters + @param in_aln input alignment + @param[in] in_model_info input checkpointing information + @param[out] out_model_info output checkpointing information + @param models_block models block + @param num_thread number of threads + @param brlen_type BRLEN_OPTIMIZE | BRLEN_FIX | BRLEN_SCALE | TOPO_UNLINKED + @return tree string + */ + string evaluate(Params ¶ms, + ModelCheckpoint &in_model_info, ModelCheckpoint &out_model_info, + ModelsBlock *models_block, int &num_threads, int brlen_type); + + /** + evaluate concatenated alignment + */ + string evaluateConcatenation(Params ¶ms, SuperAlignment *super_aln, + ModelCheckpoint &model_info, ModelsBlock *models_block, int num_threads); /** - compute information criterion scores (AIC, AICc, BIC) - */ + compute information criterion scores (AIC, AICc, BIC) + */ void computeICScores(size_t sample_size); + void computeICScores(); /** - compute information criterion scores (AIC, AICc, BIC) - */ + compute information criterion scores (AIC, AICc, BIC) + */ double computeICScore(size_t sample_size); + + /** @return model score */ + double getScore(); + + /** @return model score */ + double getScore(ModelTestCriterion mtc); /** - save model into checkpoint - */ + save model into checkpoint + */ void saveCheckpoint(Checkpoint *ckp) { stringstream ostr; ostr.precision(10); ostr << logl << " " << df << " " << tree_len; if (!tree.empty()) ostr << " " << tree; - ckp->put(name, ostr.str()); + ckp->put(getName(), ostr.str()); } - + /** - restore model from checkpoint - */ + restore model from checkpoint + */ bool restoreCheckpoint(Checkpoint *ckp) { string val; - if (ckp->getString(name, val)) { + if (ckp->getString(getName(), val)) { stringstream str(val); str >> logl >> df >> tree_len; return true; } return false; } + + /** + restore model from checkpoint + */ + bool restoreCheckpointRminus1(Checkpoint *ckp, CandidateModel *model); + + /** turn on some flag with OR operator */ + void setFlag(int flag) { + this->flag |= flag; + } + + bool hasFlag(int flag) { + return (this->flag & flag) != 0; + } + + string set_name; // subset name + string subst_name; // substitution matrix name + string orig_subst_name; // original substitution name + string rate_name; // rate heterogeneity name + string orig_rate_name; // original rate heterogeneity name + double logl; // tree log likelihood + int df; // #parameters + double tree_len; // tree length, added 2015-06-24 for rcluster algorithm + string tree; // added 2015-04-28: tree string + double AIC_score, AICc_score, BIC_score; // scores + double AIC_weight, AICc_weight, BIC_weight; // weights + bool AIC_conf, AICc_conf, BIC_conf; // in confidence set? + + Alignment *aln; // associated alignment /** - restore model from checkpoint - */ - bool restoreCheckpointRminus1(Checkpoint *ckp, string &model_name) { + Synchronization of check point for MPI + */ + SyncChkPoint* syncChkPoint; + +protected: + + /** flag */ + int flag; +}; + +/** + set of candidate models + */ +class CandidateModelSet : public vector { +public: + + CandidateModelSet() : vector() { + current_model = -1; + syncChkPoint = nullptr; + } + + /** get ID of the best model */ + int getBestModelID(ModelTestCriterion mtc); + + /** + * get the list of model + * @param params program parameters + * @param aln alignment + * param separate_rate true to separate rates from models + * @param merge_phase true to consider models for merging phase + * @return maximum number of rate categories + */ + int generate(Params ¶ms, Alignment *aln, bool separate_rate, bool merge_phase); + + /** + Filter out all "non-promissing" rate models + */ + void filterRates(int finished_model); + + /** + Filter out all "non-promissing" substitution models + */ + void filterSubst(int finished_model); + + /** + testing the best-fit model + return in params.freq_type and params.rate_type + @param params global program parameters + @param in_tree phylogenetic tree + @param model_info (IN/OUT) information for all models considered + @param models_block global model definition + @param num_threads number of threads + @param brlen_type BRLEN_OPTIMIZE | BRLEN_FIX | BRLEN_SCALE | TOPO_UNLINK + @param set_name for partition model selection + @param in_model_name a specific model name if testing one model + @param adjust model adjustment for modelomatic + @param merge_phase true to consider models for merging phase + @return name of best-fit-model + */ + CandidateModel test(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads, int brlen_type, + string set_name = "", string in_model_name = "", + bool merge_phase = false); + + /** + for a rate model XXX+R[k], return XXX+R[k-j] that finished + @return the index of fewer category +R model that finished + */ + int getLowerKModel(int model) { size_t posR; const char *rates[] = {"+R", "*R", "+H", "*H"}; for (int i = 0; i < sizeof(rates)/sizeof(char*); i++) { - if ((posR = model_name.find(rates[i])) != string::npos) { - int cat = convert_int(model_name.substr(posR+2).c_str()); - name = model_name.substr(0, posR+2) + convertIntToString(cat-1); - return restoreCheckpoint(ckp); + if ((posR = at(model).rate_name.find(rates[i])) == string::npos) + continue; + int cat = convert_int(at(model).rate_name.substr(posR+2).c_str()); + for (int prev_model = model-1; prev_model >= 0; prev_model--, cat--) { + string name = at(model).rate_name.substr(0, posR+2) + convertIntToString(cat-1); + if (at(prev_model).rate_name != name) + break; + if (!at(prev_model).hasFlag(MF_DONE)) + continue; + return prev_model; } } - return false; + return -1; + } + + int getHigherKModel(int model) { + size_t posR; + const char *rates[] = {"+R", "*R", "+H", "*H"}; + for (int i = 0; i < sizeof(rates)/sizeof(char*); i++) { + if ((posR = at(model).rate_name.find(rates[i])) == string::npos) + continue; + size_t this_posR = at(model).rate_name.find(rates[i]); + ASSERT(this_posR != string::npos); + int cat = convert_int(at(model).rate_name.substr(this_posR+2).c_str()); + for (int next_model = model+1; next_model < size(); next_model++, cat++) { +// if (at(next_model).name.substr(0, posR) != orig_name.substr(0, posR)) +// break; + string rate_name = at(model).rate_name.substr(posR, 2) + convertIntToString(cat+1); + if (at(next_model).rate_name.find(rate_name) == string::npos) + break; + return next_model; + } + } + return -1; } + /** get the next model to evaluate in parallel */ + int64_t getNextModel(); + + /** + evaluate all models in parallel + */ + CandidateModel evaluateAll(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, + ModelsBlock *models_block, int num_threads, int brlen_type, + string in_model_name = "", bool merge_phase = false, bool write_info = true); + + /** + Synchronization of check point for MPI + */ + SyncChkPoint* syncChkPoint; + +private: + + /** current model */ + int64_t current_model; }; //typedef vector ModelCheckpoint; @@ -93,13 +310,27 @@ class ModelCheckpoint : public Checkpoint { */ bool getBestModel(string &best_model); + /* + get the best model list + @param[out] best_model_list list of the best model + @return TRUE if best model found, FALSE otherwise (unfinished job) + */ + bool getBestModelList(string &best_model_list); + + /* + put the best model list + @param best_model_list list of the best model + @return TRUE if best model found, FALSE otherwise (unfinished job) + */ + void putBestModelList(string &best_model_list); + /* get the ordered model list according to AIC, AICc or BIC @param tree associated tree @param[out] ordered_models list of models ordered by specified criterion @return TRUE if ordered_models found, FALSE otherwise (unfinished job) */ - bool getOrderedModels(PhyloTree *tree, vector &ordered_models); + bool getOrderedModels(PhyloTree *tree, CandidateModelSet &ordered_models); /* get the best tree @@ -110,20 +341,309 @@ class ModelCheckpoint : public Checkpoint { }; -struct TreeInfo { - double logl; // log likelihood - double se; // standard error of deltaL (logl difference to max), or square root of variance - double rell_bp; // bootstrap proportion by RELL method - bool rell_confident; // confidence set for RELL-BP - double sh_pvalue; // p-value by Shimodaira-Hasegawa test - double wsh_pvalue; // p-value by weighted Shimodaira-Hasegawa test - double kh_pvalue; // p-value by Kishino-Hasegawa test - double wkh_pvalue; // p-value by weighted Kishino-Hasegawa test - double elw_value; // ELW - expected likelihood weights test - bool elw_confident; // to represent confidence set of ELW test - double au_pvalue; // p-value by approximately unbiased (AU) test +/** model information by merging two partitions */ +struct ModelPair { + /** score after merging */ + double score; + /** ID of partition 1 */ + int part1; + /** ID of partition 2 */ + int part2; + /** log-likelihood */ + double logl; + /** degree of freedom */ + int df; + /** tree length */ + double tree_len; + /** IDs of merged partitions */ + set merged_set; + /** set name */ + string set_name; + /* best model name */ + string model_name; }; +class ModelPairSet : public multimap { + +public: + + /** insert a partition pair */ + void insertPair(ModelPair &pair) { + insert(value_type(pair.score, pair)); + } + + /** + find the maximum compatible partition pairs + @param num max number of pairs to return + */ + void getCompatiblePairs(int num, ModelPairSet &res) { + set part_ids; + + for (auto it = begin(); it != end() && res.size() < num; it++) { + + // check for compatibility + vector overlap; + set_intersection(part_ids.begin(), part_ids.end(), + it->second.merged_set.begin(), it->second.merged_set.end(), + std::back_inserter(overlap)); + + if (!overlap.empty()) continue; + + // take the union + part_ids.insert(it->second.merged_set.begin(), it->second.merged_set.end()); + + // put the compatible pair to the set + res.insertPair(it->second); + } + } + +}; + +/* + * This class is designed for partition finder + */ +class PartitionFinder { + +private: + int brlen_type; + bool test_merge; + SuperAlignment *super_aln; + + /** + * Process the computation of the best model for a single partition with MPI + * + * nthreads : number of threads available for this job + * need_next_treeID : whether it is needed to get the next tree ID + * + * if need_next_treeID, then + * if WORKER and IS_ASYN_COMM = 1 (i.e. asynchronous communication) + * return the index of the array storing MPI_Request + * else + * return the next Job ID from master + * else + * return -1 + */ + int computeBestModelforOnePartitionMPI(int tree_id, int nthreads, bool need_next_treeID, SyncChkPoint& syncChkPt); + + /** + * Process the computation of the best model for a merge with MPI + * + * nthreads : number of threads available for this job + * need_next_treeID : whether it is needed to get the next tree ID + * + * if need_next_treeID and (MASTER or IS_ASYN_COMM = 0) + * return the next Job ID from master + * else + * return -1 + */ + int getBestModelForOneMergeMPI(int job_id, int nthreads, bool need_next_jobID, SyncChkPoint& syncChkPt); + + /** + * compute and process the best model for partitions (for MPI) + */ + void getBestModelforPartitionsMPI(int nthreads, vector* >& jobs); + + /** + * compute and process the best model for merges (for MPI) + */ + void getBestModelforMergesMPI(int nthreads, vector* >& jobs); + + /** + * compute and process the best model for partitions (without MPI) + * nthreads : the number of threads available for these jobs + */ + void getBestModelforPartitionsNoMPI(int nthreads, vector >& jobs); + + /** + * compute and process the best model for merges (without MPI) + * nthreads : the number of threads available for these jobs + */ + void getBestModelforMergesNoMPI(int nthreads, vector >& jobs); + + /** + * compute the best model + * job_type = 1 : for all partitions + * job_type = 2 : for all merges + */ + void getBestModel(int job_type); + + /* + * Consolidate the partition results (for MPI) + */ + void consolidPartitionResults(); + + /* + * Consolidate the merge results (for MPI) + */ + void consolidMergeResults(); + +public: + ModelCheckpoint *model_info; + DoubleVector lhvec; // log-likelihood for each partition + IntVector dfvec; // number of parameters for each partition + DoubleVector lenvec; // tree length for each partition + double lhsum; + int dfsum; + double start_time; + int64_t total_num_model; + int64_t num_model; + vector closest_pairs; + vector > gene_sets; + PhyloSuperTree* in_tree; + size_t ssize; + Params *params; + double inf_score; + ModelPairSet better_pairs; // list of all better pairs of partitions than current partitioning scheme + + ModelsBlock *models_block; + int num_threads; + int num_processes; + + int nextjob; + int jobdone; + int tot_job_num; + vector remain_job_list; + + int base; + + // for ONE-SIDE communication + double last_syn_time; + vector tree_id_vec; + vector tree_len_vec; + vector model_name_vec; + vector score_vec; + vector set_name_vec; + vector tag_vec; + int tot_jobs_done; + ModelCheckpoint process_model_info; + +#ifdef _IQTREE_MPI + // shared memory space to be accessed by the other processors + int *val_ptr; + MPI_Win win; +#endif + + /* Constructor + */ + PartitionFinder(Params *inparams, PhyloSuperTree* intree, ModelCheckpoint *modelinfo, + ModelsBlock *modelsblock, int numthreads); + + /* + * Perform the computation + */ + void test_PartitionModel(); + + /* + * initialize the shared memory space to be accessed by the other processors + */ + void initialMPIShareMemory(); + + /* + * free the shared memory space + */ + void freeMPIShareMemory(); + + /* + * For MPI + * assign initial jobs to processors + * input: a set of jobs ordered by the estimated computational costs + * + * DIST_RATIO: the ratio of the total jobs distributed to the processors + */ + void jobAssignment(vector > &job_ids, vector* >&currJobs); + + /* + * Show the result of best model for the partition + */ + void showPartitionResult(ModelCheckpoint& part_model_info, int tree_id, double tree_len, const string& model_name, double score, int tag); + + /* + * Show the result of best model for the partition + */ + void showPartitionResults(ModelCheckpoint& part_model_info, vector& tree_id, vector& tree_len, vector& model_name, vector& score, vector& tag); + + /* + * Show the the other worker's result of best model for the merge + */ + void showMergeResult(ModelCheckpoint& part_model_info, double tree_len, const string& model_name, double score, string& set_name, bool done_before, int tag); + + /* + * Show the the other worker's result of best model for the merge + */ + void showMergeResults(ModelCheckpoint& part_model_info, vector& tree_len, vector& model_name, vector& score, vector& set_name, vector& tag, int tot_jobs_done); +}; + + +/* + * This class is designed for synchronization of checkpoints for partition finder + */ +class SyncChkPoint { + +private: + // shared among threads + PartitionFinder* pfinder; + +public: + + int mytag; + + /* constructor + */ + SyncChkPoint(PartitionFinder* pf, int thres_id); + + /* + * Show the result of best model + */ + void showResult(ModelCheckpoint& part_model_info, int tag); + + /* + * FOR MASTER - synchronize the checkpoints from the other processors + * Receive checkpoint from worker and send the next Job ID to workers + * increase the value of next_job and job_done by 1 + * update the master's checkpoint: model_info + */ + void masterSyncOtherChkpts(bool chk_gotMessage = true); + + /* + * FOR WORKER + * send checkpoint to master + * clear the checkpoint + * + * if need_nextJobID and NOT ASYN_COMM (i.e. non-asynchronous communication) + * return the next Job ID from master + * else -1 + */ + int sendChkptToMaster(ModelCheckpoint &model_info, bool need_nextJobID, int job_type, bool forceToSyn = false); + + /* + * receive an integer from the master (for synchronous communication) + */ + int recvInt(int tag); + + /* + * get the next Job ID by accessing the shared memory in the master process + */ + int getNextJobID(); + +#ifdef _IQTREE_MPI + + void sendCheckpoint(Checkpoint *ckp, int dest, int tag); + + void recvCheckpoint(Checkpoint *ckp, int src, int tag); + + void recvAnyCheckpoint(Checkpoint *ckp, int& src, int& tag); + + void recvAnyString(string &str, int& src, int& tag); + + /* + * Check for incoming messages + * if there is a message, collect the tag value and the source + */ + bool gotMessage(int& tag, int& source); + +#endif + +}; /** * computing AIC, AICc, and BIC scores @@ -135,15 +655,21 @@ double computeInformationScore(double tree_lh, int df, int ssize, ModelTestCrite string criterionName(ModelTestCriterion mtc); /** - * check if the model file contains correct information - * @param model_file model file names - * @param model_name (OUT) vector of model names - * @param lh_scores (OUT) vector of tree log-likelihoods - * @param df_vec (OUT) vector of degrees of freedom (or K) - * @return TRUE if success, FALSE failed. + perform ModelFinder to find the best-fit model + @param params program parameters + @param iqtree phylogenetic tree + @param model_info (IN/OUT) information for all models considered */ +void runModelFinder(Params ¶ms, IQTree &iqtree, ModelCheckpoint &model_info); -bool checkModelFile(string model_file, bool is_partitioned, ModelCheckpoint &infos); +/** + perform ModelFinderNN to find the best-fit model (uses neural network for model inference) + @param params program parameters + @param iqtree phylogenetic tree + @param model_info (IN/OUT) information for all models considered + */ + // added by TD +void runModelFinderNN(Params ¶ms, IQTree &iqtree, ModelCheckpoint &model_info); /** testing the best-fit model @@ -155,78 +681,10 @@ bool checkModelFile(string model_file, bool is_partitioned, ModelCheckpoint &inf @param print_mem_usage true to print RAM memory used (default: false) @return name of best-fit-model */ -string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, - ModelsBlock *models_block, int num_threads, int brlen_type, - string set_name = "", bool print_mem_usage = false, string in_model_name = ""); - -/** - * print site log likelihoods to a fileExists - * @param filename output file name - * @param tree phylogenetic tree - * @param ptn_lh pattern log-likelihoods, will be computed if NULL - * @param append TRUE to append to existing file, FALSE otherwise - * @param linename name of the line, default "Site_Lh" if NULL - */ -void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL, - bool append = false, const char *linename = NULL); - -/** - * print partition log likelihoods to a file - * @param filename output file name - * @param tree phylogenetic tree - * @param ptn_lh pattern log-likelihoods, will be computed if NULL - * @param append TRUE to append to existing file, FALSE otherwise - * @param linename name of the line, default "Site_Lh" if NULL - */ -void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL, - bool append = false, const char *linename = NULL); +//string testModel(Params ¶ms, PhyloTree* in_tree, ModelCheckpoint &model_info, +// ModelsBlock *models_block, int num_threads, int brlen_type, +// string set_name = "", bool print_mem_usage = false, string in_model_name = ""); -/** - * print site log likelihoods per category to a file - * @param filename output file name - * @param tree phylogenetic tree - */ -void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl); - -/** - * print site posterior probabilities per rate/mixture category to a file - * @param filename output file name - * @param tree phylogenetic tree - */ -void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl); - -/** - * print site state frequency vectors (for Huaichun) - * @param filename output file name - * @param tree phylogenetic tree -*/ -void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs = NULL); - -/** - * print site state frequency vectors (for Huaichun) - * @param filename output file name - * @param aln alignment -*/ -void printSiteStateFreq(const char* filename, Alignment *aln); - -/** - print ancestral sequences - @param filename output file name - @param tree phylogenetic tree - @param ast either AST_MARGINAL or AST_JOINT -*/ -void printAncestralSequences(const char*filename, PhyloTree *tree, AncestralSeqType ast); - -/** - * Evaluate user-trees with possibility of tree topology tests - * @param params program parameters - * @param tree current tree - * @param info (OUT) output information - * @param distinct_ids IDs of distinct trees - */ -void evaluateTrees(Params ¶ms, IQTree *tree, vector &info, IntVector &distinct_ids); - -void evaluateTrees(Params ¶ms, IQTree *tree); /** get sequence type for a model name @@ -234,9 +692,8 @@ void evaluateTrees(Params ¶ms, IQTree *tree); @param seq_type (OUT) sequence type, SEQ_UNKNOWN if is not determined @return 1 for parametric model, 2 for empirical model */ -int getSeqType(const char *model_name, SeqType &seq_type); - -string getSeqType(string model_name); +int detectSeqType(const char *model_name, SeqType &seq_type); +string detectSeqTypeName(string model_name); #endif /* PHYLOTESTING_H_ */ diff --git a/main/timetree.cpp b/main/timetree.cpp new file mode 100644 index 000000000..1ba54a4bc --- /dev/null +++ b/main/timetree.cpp @@ -0,0 +1,348 @@ +/* + * timetree.cpp + * Interface to call dating method incl. LSD2 + * Created on: Apr 4, 2020 + * Author: minh + */ + +#include "timetree.h" + +#ifdef USE_LSD2 +#include "lsd2/src/lsd.h" +#endif + +/** map from taxon name to date */ +typedef unordered_map TaxonDateMap; +#define YEAR_SCALE 100000 + +/** + @param[in] date date string + @return converted date as a float or YYYY-MM[-DD] format + */ +string convertDate(string date) { + // check for range in x:y format + if (date.find(':') != string::npos) { + StrVector vec; + convert_string_vec(date.c_str(), vec, ':'); + if (vec.size() != 2) + outError("Invalid date range " + date); + if (vec[0].empty() || vec[0] == "NA") + return "u(" + vec[1] + ")"; + if (vec[1].empty() || vec[1] == "NA") + return "l(" + vec[0] + ")"; + + return "b(" + vec[0] + "," + vec[1] + ")"; + } + if (date.empty() || !isdigit(date[0]) || date[0] == '-') + return date; + DoubleVector vec; + try { + convert_double_vec(date.c_str(), vec, '-'); + } catch (...) { + outError("Invalid date " + date); + } + // otherwise, return the original date string + return date; +} + +/** + read a date file. Each line has two strings: name and date + */ +void readDateFile(string date_file, set &node_names, TaxonDateMap &dates) { + try { + cout << "Reading date file " << date_file << " ..." << endl; + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(date_file); + int line_num; + for (line_num = 1; !in.eof(); line_num++) { + string line_out = "Line " + convertIntToString(line_num) + ": "; + string line; + if (!safeGetline(in, line)) + break; + // ignore comment + if (line.find('#') != string::npos) + line = line.substr(0, line.find('#')); + trimString(line); + if (line.empty()) // ignore empty line + continue; + string name, date; + istringstream line_in(line); + if (!(line_in >> name >> date)) + throw line_out + "'" + line + "' does not contain name and date"; + // error checking, make sure that name appear in tree + StrVector name_vec; + convert_string_vec(name.c_str(), name_vec); + for (auto s : name_vec) + if (node_names.find(s) == node_names.end()) + throw line_out + "'" + s + "' does not appear in tree"; + // error checking, make sure is date is valid + if (date.empty()) + throw line_out + "date is empty"; + try { + int end_pos; + convert_double(date.c_str(), end_pos); + } catch (string str) { + throw line_out + str; + } + dates[name] = date; + } + in.clear(); + // set the failbit again + in.exceptions(ios::failbit | ios::badbit); + in.close(); + } catch (ios::failure) { + outError(ERR_READ_INPUT, date_file); + } catch (string str) { + outError(str); + } catch (...) { + outError("Error reading date file " + date_file); + } +} + +/** read the date information from the alignment taxon names */ +void readDateTaxName(set &nodenames, TaxonDateMap &dates) { + cout << "Extracting date from node names..." << endl; + for (string name : nodenames) { + // get the date in the taxon name after the '|' sign + auto pos = name.rfind('|'); + if (pos == string::npos) + continue; + string date = name.substr(pos+1); + try { + // try to parse + int end_pos; + convert_double(date.c_str(), end_pos); + // it works! so get the date + dates[name] = date; + } catch (...) { + // does not work, ignore the taxon name + continue; + } + } +} + +void writeOutgroup(ostream &out, const char *outgroup) { + StrVector outgroup_names; + convert_string_vec(outgroup, outgroup_names); + try { + out << outgroup_names.size() << endl; + for (auto outgroup : outgroup_names) { + out << outgroup << endl; + } + } catch (...) { + ASSERT(0 && "Error writing outgroup stream"); + } +} + +void writeDate(string date_file, ostream &out, set &nodenames) { + TaxonDateMap dates; + if (date_file == "TAXNAME") { + // read the dates from alignment taxon names + readDateTaxName(nodenames, dates); + } else { + readDateFile(date_file, nodenames, dates); + } + // only retain taxon appearing in alignment + TaxonDateMap retained_dates; + set outgroup_set; + if (Params::getInstance().root) { + StrVector outgroup_names; + convert_string_vec(Params::getInstance().root, outgroup_names); + for (auto name : outgroup_names) + outgroup_set.insert(name); + } + if (verbose_mode >= VB_MED) + cout << "Node\tDate" << endl; + for (auto name: nodenames) { + string date = "NA"; + if (dates.find(name) == dates.end()) { + // taxon present in the dates +// if (!Params::getInstance().date_tip.empty()) +// date = Params::getInstance().date_tip; + } else if (outgroup_set.find(name) == outgroup_set.end() || Params::getInstance().date_with_outgroup) { + // ignore the date of the outgroup + date = dates[name]; + } + if (date != "NA") { + retained_dates[name] = date; + dates.erase(name); + } + if (verbose_mode >= VB_MED) + cout << name << "\t" << date << endl; + } + + // add remaining ancestral dates + for (auto date : dates) { + if (date.first.substr(0,4) == "mrca" || date.first.substr(0,8) == "ancestor") + retained_dates[date.first] = date.second; + else if (date.first.find(',') != string::npos) { + retained_dates["ancestor(" + date.first + ")"] = date.second; + } else if (outgroup_set.find(date.first) == outgroup_set.end() || Params::getInstance().date_with_outgroup) { + retained_dates[date.first] = date.second; + } + } + +// if (!Params::getInstance().date_root.empty()) { +// retained_dates["root"] = Params::getInstance().date_root; +// } + + cout << retained_dates.size() << " dates extracted" << endl; + try { + out << retained_dates.size() << endl; + for (auto date : retained_dates) { + out << date.first << " " << convertDate(date.second) << endl; + } + } catch (...) { + ASSERT(0 && "Error writing date stream"); + } +} + +#ifdef USE_LSD2 +void runLSD2(PhyloTree *tree) { + string basename = (string)Params::getInstance().out_prefix + ".timetree"; + string treefile = basename + ".subst"; + stringstream tree_stream, outgroup_stream, date_stream; + tree->printTree(tree_stream); + StrVector arg = {"lsd", "-i", treefile, "-s", convertIntToString(tree->getAlnNSite()), "-o", basename}; + if (Params::getInstance().date_debug) { + ofstream out(treefile); + out << tree_stream.str(); + out.close(); + cout << "Tree printed to " << treefile << endl; + } + + if (Params::getInstance().date_replicates > 0) { + arg.push_back("-f"); + arg.push_back(convertIntToString(Params::getInstance().date_replicates)); + if (Params::getInstance().clock_stddev >= 0) { + arg.push_back("-q"); + arg.push_back(convertDoubleToString(Params::getInstance().clock_stddev)); + } + } + + if (Params::getInstance().date_outlier >= 0) { + arg.push_back("-e"); + arg.push_back(convertIntToString(Params::getInstance().date_outlier)); + } + + if (Params::getInstance().root) { + // print outgroup file for LSD + writeOutgroup(outgroup_stream, Params::getInstance().root); + string outgroup_file = basename + ".outgroup"; + arg.push_back("-g"); + arg.push_back(outgroup_file); // only fake file + if (!Params::getInstance().date_with_outgroup) + arg.push_back("-G"); + if (Params::getInstance().date_debug) { + ofstream out(outgroup_file); + out << outgroup_stream.str(); + out.close(); + cout << "Outgroup printed to " << outgroup_file << endl; + } + } else { + // search for all possible rootings + arg.push_back("-r"); + arg.push_back("a"); + } + + if (Params::getInstance().date_file != "") { + // parse the date file + set nodenames; + tree->getNodeName(nodenames); + writeDate(Params::getInstance().date_file, date_stream, nodenames); + string date_file = basename + ".date"; + arg.push_back("-d"); + arg.push_back(date_file); // only fake file + if (Params::getInstance().date_debug) { + ofstream out(date_file); + out << date_stream.str(); + out.close(); + cout << "Date file printed to " << date_file << endl; + } + } + // input tip and root date + if (Params::getInstance().date_root != "") { + arg.push_back("-a"); + arg.push_back(convertDate(Params::getInstance().date_root)); + } + + if (Params::getInstance().date_tip != "") { + arg.push_back("-z"); + arg.push_back(convertDate(Params::getInstance().date_tip)); + } + + lsd::InputOutputStream io(tree_stream.str(), outgroup_stream.str(), date_stream.str(), "", ""); + + if (Params::getInstance().dating_options != "") { + // extra options for LSD + StrVector options; + convert_string_vec(Params::getInstance().dating_options.c_str(), options, ' '); + for (auto opt : options) + if (!opt.empty()) + arg.push_back(opt); + } + + cout << "Building time tree by least-square dating (LSD) with command:" << endl; + + int argc = arg.size(); + char *argv[argc]; + for (int i = 0; i < argc; i++) + argv[i] = (char*)arg[i].c_str(); + std::copy(arg.begin(), arg.end(), std::ostream_iterator(std::cout, " ")); + cout << endl; + + // main call to LSD! + lsd::buildTimeTree(argc, argv, &io); + + // fetch the output + string report_file = basename + ".lsd"; + //string tree1_file = basename + ".raw"; + string tree2_file = basename + ".nex"; + string tree3_file = basename + ".nwk"; + try { + ofstream out; + out.open(report_file); + out << ((ostringstream*)io.outResult)->str(); + out.close(); +// out.open(tree1_file); +// out << ((ostringstream*)io.outTree1)->str(); +// out.close(); + out.open(tree2_file); + out << ((ostringstream*)io.outTree2)->str(); + out.close(); + out.open(tree3_file); + out << ((stringstream*)io.outTree3)->str(); + out.close(); + } catch (...) { + outError("Couldn't write LSD output files"); + } + + if (((stringstream*)io.outTree3)->str().empty()) { + outError("Something went wrong, LSD could not date the tree"); + } else { + cout << "LSD results written to:" << endl; + cout << " LSD report: " << report_file << endl; + // cout << " Time tree in nexus format: " << tree1_file << endl; + cout << " Time tree in nexus format: " << tree2_file << endl; + cout << " Time tree in newick format: " << tree3_file << endl; + cout << endl; + } +} +#endif + +void doTimeTree(PhyloTree *tree) { + + cout << "--- Start phylogenetic dating ---" << endl; + cout.unsetf(ios::fixed); + +#ifdef USE_LSD2 + if (Params::getInstance().dating_method == "LSD") { + runLSD2(tree); + cout << "--- End phylogenetic dating ---" << endl; + return; + } +#endif + // This line shouldn't be reached + outError("Unsupported " + Params::getInstance().dating_method + " dating method"); +} diff --git a/main/timetree.h b/main/timetree.h new file mode 100644 index 000000000..e21e07a27 --- /dev/null +++ b/main/timetree.h @@ -0,0 +1,19 @@ +/* + * Time tree for phylogenetic dating + * + * Created on: Apr 4, 2020 + * Author: minh + */ + +#ifndef TIMETREE_H_ +#define TIMETREE_H_ + +#include "tree/phylotree.h" + +/** + main function to build time-tree + @param tree input phylogenetic tree + */ +void doTimeTree(PhyloTree *tree); + +#endif diff --git a/main/treetesting.cpp b/main/treetesting.cpp new file mode 100644 index 000000000..50a806dea --- /dev/null +++ b/main/treetesting.cpp @@ -0,0 +1,1465 @@ +/* + * phylotesting.cpp + * + * Created on: Sep 21, 2019 + * Author: minh + */ + + + +#ifdef HAVE_CONFIG_H +#include +#endif +#include + +#include "treetesting.h" +#include "tree/phylotree.h" +#include "tree/phylosupertree.h" +#include "gsl/mygsl.h" +#include "utils/timeutil.h" + + +void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh, + bool append, const char *linename) { + double *pattern_lh; + if (!ptn_lh) { + pattern_lh = new double[tree->getAlnNPattern()]; + tree->computePatternLikelihood(pattern_lh); + } else + pattern_lh = ptn_lh; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + if (append) { + out.open(filename, ios::out | ios::app); + } else { + out.open(filename); + out << 1 << " " << tree->getAlnNSite() << endl; + } + IntVector pattern_index; + tree->aln->getSitePatternIndex(pattern_index); + if (!linename) + out << "Site_Lh "; + else { + out.width(10); + out << left << linename; + } + for (size_t i = 0; i < tree->getAlnNSite(); i++) + out << " " << pattern_lh[pattern_index[i]]; + out << endl; + out.close(); + if (!append) + cout << "Site log-likelihoods printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + + if (!ptn_lh) + delete[] pattern_lh; +} + +void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh, + bool append, const char *linename) { + + ASSERT(tree->isSuperTree()); + PhyloSuperTree *stree = (PhyloSuperTree*)tree; + double *pattern_lh; + if (!ptn_lh) { + pattern_lh = new double[tree->getAlnNPattern()]; + tree->computePatternLikelihood(pattern_lh); + } else + pattern_lh = ptn_lh; + + double partition_lh[stree->size()]; + double *pattern_lh_ptr = pattern_lh; + for (int part = 0; part < stree->size(); part++) { + size_t nptn = stree->at(part)->getAlnNPattern(); + partition_lh[part] = 0.0; + for (int i = 0; i < nptn; i++) + partition_lh[part] += pattern_lh_ptr[i] * stree->at(part)->ptn_freq[i]; + pattern_lh_ptr += nptn; + } + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + if (append) { + out.open(filename, ios::out | ios::app); + } else { + out.open(filename); + out << 1 << " " << stree->size() << endl; + } + if (!linename) + out << "Part_Lh "; + else { + out.width(10); + out << left << linename; + } + for (int i = 0; i < stree->size(); ++i) { + out << " " << partition_lh[i]; + } + out << endl; + out.close(); + if (!append) + cout << "Partition log-likelihoods printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + delete[] pattern_lh; +} + +void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) { + + if (wsl == WSL_NONE || wsl == WSL_SITE) + return; + int ncat = tree->getNumLhCat(wsl); + if (tree->isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)tree; + for (auto it = stree->begin(); it != stree->end(); it++) { + int part_ncat = (*it)->getNumLhCat(wsl); + if (part_ncat > ncat) + ncat = part_ncat; + } + } + int i; + + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + out << "# Site likelihood per rate/mixture category" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab=read.table('" << filename << "',header=TRUE,fill=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl; + if (tree->isSuperTree()) { + out << "# Part: Partition ID (1=" << ((PhyloSuperTree*)tree)->at(0)->aln->name << ", etc)" << endl + << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; + } else + out << "# Site: Alignment site ID" << endl; + + out << "# LnL: Logarithm of site likelihood" << endl + << "# Thus, sum of LnL is equal to tree log-likelihood" << endl + << "# LnLW_k: Logarithm of (category-k site likelihood times category-k weight)" << endl + << "# Thus, sum of exp(LnLW_k) is equal to exp(LnL)" << endl; + + if (tree->isSuperTree()) { + out << "Part\tSite\tLnL"; + } else + out << "Site\tLnL"; + for (i = 0; i < ncat; i++) + out << "\tLnLW_" << i+1; + out << endl; + out.precision(4); + out.setf(ios::fixed); + + tree->writeSiteLh(out, wsl); + + out.close(); + cout << "Site log-likelihoods per category printed to " << filename << endl; + /* + if (!tree->isSuperTree()) { + cout << "Log-likelihood of constant sites: " << endl; + double const_prob = 0.0; + for (i = 0; i < tree->aln->getNPattern(); i++) + if (tree->aln->at(i).isConst()) { + Pattern pat = tree->aln->at(i); + for (Pattern::iterator it = pat.begin(); it != pat.end(); it++) + cout << tree->aln->convertStateBackStr(*it); + cout << ": " << pattern_lh[i] << endl; + const_prob += exp(pattern_lh[i]); + } + cout << "Probability of const sites: " << const_prob << endl; + } + */ + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} + +void printAncestralSequences(const char *out_prefix, PhyloTree *tree, AncestralSeqType ast) { + + // int *joint_ancestral = NULL; + // + // if (tree->params->print_ancestral_sequence == AST_JOINT) { + // joint_ancestral = new int[nptn*tree->leafNum]; + // tree->computeJointAncestralSequences(joint_ancestral); + // } + + string filename = (string)out_prefix + ".state"; + // string filenameseq = (string)out_prefix + ".stateseq"; + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename.c_str()); + out.setf(ios::fixed, ios::floatfield); + out.precision(5); + + // ofstream outseq; + // outseq.exceptions(ios::failbit | ios::badbit); + // outseq.open(filenameseq.c_str()); + + NodeVector nodes; + tree->getInternalNodes(nodes); + + double *marginal_ancestral_prob; + int *marginal_ancestral_seq; + + // if (tree->params->print_ancestral_sequence == AST_JOINT) + // outseq << 2*(tree->nodeNum-tree->leafNum) << " " << nsites << endl; + // else + // outseq << (tree->nodeNum-tree->leafNum) << " " << nsites << endl; + // + // int name_width = max(tree->aln->getMaxSeqNameLength(),6)+10; + + out << "# Ancestral state reconstruction for all nodes in " << tree->params->out_prefix << ".treefile" << endl + << "# This file can be read in MS Excel or in R with command:" << endl + << "# tab=read.table('" << tree->params->out_prefix << ".state',header=TRUE)" << endl + << "# Columns are tab-separated with following meaning:" << endl + << "# Node: Node name in the tree" << endl; + if (tree->isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)tree; + out << "# Part: Partition ID (1=" << stree->at(0)->aln->name << ", etc)" << endl + << "# Site: Site ID within partition (starting from 1 for each partition)" << endl; + } else + out << "# Site: Alignment site ID" << endl; + + out << "# State: Most likely state assignment" << endl + << "# p_X: Posterior probability for state X (empirical Bayesian method)" << endl; + + if (tree->isSuperTree()) { + PhyloSuperTree *stree = (PhyloSuperTree*)tree; + out << "Node\tPart\tSite\tState"; + for (size_t i = 0; i < stree->front()->aln->num_states; i++) + out << "\tp_" << stree->front()->aln->convertStateBackStr(i); + } else { + out << "Node\tSite\tState"; + for (size_t i = 0; i < tree->aln->num_states; i++) + out << "\tp_" << tree->aln->convertStateBackStr(i); + } + out << endl; + + + bool orig_kernel_nonrev; + tree->initMarginalAncestralState(out, orig_kernel_nonrev, marginal_ancestral_prob, marginal_ancestral_seq); + + for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++) { + PhyloNode *node = (PhyloNode*)(*it); + PhyloNode *dad = (PhyloNode*)node->neighbors[0]->node; + + tree->computeMarginalAncestralState((PhyloNeighbor*)dad->findNeighbor(node), dad, + marginal_ancestral_prob, marginal_ancestral_seq); + + // int *joint_ancestral_node = joint_ancestral + (node->id - tree->leafNum)*nptn; + + // set node name if neccessary + if (node->name.empty() || !isalpha(node->name[0])) { + node->name = "Node" + convertIntToString(node->id-tree->leafNum+1); + } + + // print ancestral state probabilities + tree->writeMarginalAncestralState(out, node, marginal_ancestral_prob, marginal_ancestral_seq); + + // print ancestral sequences + // outseq.width(name_width); + // outseq << left << node->name << " "; + // for (i = 0; i < nsites; i++) + // outseq << tree->aln->convertStateBackStr(marginal_ancestral_seq[pattern_index[i]]); + // outseq << endl; + // + // if (tree->params->print_ancestral_sequence == AST_JOINT) { + // outseq.width(name_width); + // outseq << left << (node->name+"_joint") << " "; + // for (i = 0; i < nsites; i++) + // outseq << tree->aln->convertStateBackStr(joint_ancestral_node[pattern_index[i]]); + // outseq << endl; + // } + } + + tree->endMarginalAncestralState(orig_kernel_nonrev, marginal_ancestral_prob, marginal_ancestral_seq); + + out.close(); + // outseq.close(); + cout << "Ancestral state probabilities printed to " << filename << endl; + // cout << "Ancestral sequences printed to " << filenameseq << endl; + + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + + // if (joint_ancestral) + // delete[] joint_ancestral; + +} + +void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) { + + if (wsl == WSL_NONE || wsl == WSL_SITE) + return; + // error checking + if (!tree->getModel()->isMixture()) { + if (wsl != WSL_RATECAT) { + outWarning("Switch now to '-wspr' as it is the only option for non-mixture model"); + wsl = WSL_RATECAT; + } + } else { + // mixture model + if (wsl == WSL_MIXTURE_RATECAT && tree->getModelFactory()->fused_mix_rate) { + outWarning("-wspmr is not suitable for fused mixture model, switch now to -wspm"); + wsl = WSL_MIXTURE; + } + } + size_t cat, ncat = tree->getNumLhCat(wsl); + double *ptn_prob_cat = new double[((size_t)tree->getAlnNPattern())*ncat]; + tree->computePatternProbabilityCategory(ptn_prob_cat, wsl); + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + if (tree->isSuperTree()) + out << "Set\t"; + out << "Site"; + for (cat = 0; cat < ncat; cat++) + out << "\tp" << cat+1; + out << endl; + IntVector pattern_index; + if (tree->isSuperTree()) { + PhyloSuperTree *super_tree = (PhyloSuperTree*)tree; + size_t offset = 0; + for (PhyloSuperTree::iterator it = super_tree->begin(); it != super_tree->end(); it++) { + size_t part_ncat = (*it)->getNumLhCat(wsl); + (*it)->aln->getSitePatternIndex(pattern_index); + size_t nsite = (*it)->aln->getNSite(); + for (size_t site = 0; site < nsite; ++site) { + out << (it-super_tree->begin())+1 << "\t" << site+1; + double *prob_cat = ptn_prob_cat + (offset+pattern_index[site]*part_ncat); + for (cat = 0; cat < part_ncat; cat++) + out << "\t" << prob_cat[cat]; + out << endl; + } + offset += (*it)->aln->getNPattern()*(*it)->getNumLhCat(wsl); + } + } else { + tree->aln->getSitePatternIndex(pattern_index); + size_t nsite = tree->getAlnNSite(); + for (size_t site = 0; site < nsite; ++site) { + out << site+1; + double *prob_cat = ptn_prob_cat + pattern_index[site]*ncat; + for (cat = 0; cat < ncat; cat++) { + out << "\t" << prob_cat[cat]; + } + out << endl; + } + } + out.close(); + cout << "Site probabilities per category printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + +} + + +void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs) { + size_t nsites = tree->getAlnNSite(); + size_t nstates = tree->aln->num_states; + double *ptn_state_freq; + if (state_freqs) { + ptn_state_freq = state_freqs; + } else { + ptn_state_freq = new double[((size_t)tree->getAlnNPattern()) * nstates]; + tree->computePatternStateFreq(ptn_state_freq); + } + + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + IntVector pattern_index; + tree->aln->getSitePatternIndex(pattern_index); + for (size_t i = 0; i < nsites; ++i) { + out.width(6); + out << left << i+1 << " "; + double *state_freq = &ptn_state_freq[pattern_index[i]*nstates]; + for (size_t j = 0; j < nstates; ++j) { + out.width(15); + out << state_freq[j] << " "; + } + out << endl; + } + out.close(); + cout << "Site state frequency vectors printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } + if (!state_freqs) + delete [] ptn_state_freq; +} + +void printSiteStateFreq(const char* filename, Alignment *aln) { + if (aln->site_state_freq.empty()) + return; + size_t nsites = aln->getNSite(); + int nstates = aln->num_states; + try { + ofstream out; + out.exceptions(ios::failbit | ios::badbit); + out.open(filename); + IntVector pattern_index; + aln->getSitePatternIndex(pattern_index); + for (size_t i = 0; i < nsites; ++i) { + out.width(6); + out << left << i+1 << " "; + double *state_freq = aln->site_state_freq[pattern_index[i]]; + for (size_t j = 0; j < nstates; ++j) { + out.width(15); + out << state_freq[j] << " "; + } + out << endl; + } + out.close(); + cout << "Site state frequency vectors printed to " << filename << endl; + } catch (ios::failure) { + outError(ERR_WRITE_OUTPUT, filename); + } +} + +int countDistinctTrees(const char *filename, bool rooted, IQTree *tree, IntVector &distinct_ids, bool exclude_duplicate) { + StringIntMap treels; + try { + ifstream in; + in.exceptions(ios::failbit | ios::badbit); + in.open(filename); + // remove the failbit + in.exceptions(ios::badbit); + int tree_id; + for (tree_id = 0; !in.eof(); tree_id++) { + if (exclude_duplicate) { + tree->freeNode(); + tree->readTree(in, rooted); + tree->setAlignment(tree->aln); + tree->setRootNode(tree->params->root); + StringIntMap::iterator it = treels.end(); + ostringstream ostr; + tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA); + it = treels.find(ostr.str()); + if (it != treels.end()) { // already in treels + distinct_ids.push_back(it->second); + } else { + distinct_ids.push_back(-1); + treels[ostr.str()] = tree_id; + } + } else { + // ignore tree + char ch; + do { + in >> ch; + } while (!in.eof() && ch != ';'); + distinct_ids.push_back(-1); + } + char ch; + in.exceptions(ios::goodbit); + (in) >> ch; + if (in.eof()) break; + in.unget(); + in.exceptions(ios::failbit | ios::badbit); + + } + in.close(); + } catch (ios::failure) { + outError("Cannot read file ", filename); + } + if (exclude_duplicate) + return treels.size(); + else + return distinct_ids.size(); +} + +//const double TOL_RELL_SCORE = 0.01; + +/* + Problem: solve the following linear system equation: + a_1*x + b_1*y = c_1 + a_2*x + b_2*y = c_2 + .... + a_n*x + b_n*y = c_n + + becomes minimizing weighted least square: + + sum_k { w_k*[ c_k - (a_k*x + b_k*y) ]^2 } + + + the solution is: + + x = [(sum_k w_k*b_k*c_k)*(sum_k w_k*a_k*b_k) - (sum_k w_k*a_k*c_k)(sum_k w_k*b_k^2)] / + [ (sum_k w_k*a_k*b_k)^2 - (sum_k w_k*a_k^2)*(sum_k w_k*b_k^2) ] + + y = [(sum_k w_k*a_k*c_k)*(sum_k w_k*a_k*b_k) - (sum_k w_k*b_k*c_k)(sum_k w_k*a_k^2)] / + [ (sum_k w_k*a_k*b_k)^2 - (sum_k w_k*a_k^2)*(sum_k w*k*b_k^2) ] + + @param n number of data points + @param w weight vector of length n + @param a a value vector of length n + @param b b value vector of length n + @param c c value vector of length n + @param[out] x x-value + @param[out] y y-value + @return least square value + */ +void doWeightedLeastSquare(int n, double *w, double *a, double *b, double *c, double &x, double &y, double &se) { + int k; + double BC = 0.0, AB = 0.0, AC = 0.0, A2 = 0.0, B2 = 0.0; + double denom; + for (k = 0; k < n; k++) { + double wa = w[k]*a[k]; + double wb = w[k]*b[k]; + AB += wa*b[k]; + BC += wb*c[k]; + AC += wa*c[k]; + A2 += wa*a[k]; + B2 += wb*b[k]; + } + denom = 1.0/(AB*AB - A2*B2); + x = (BC*AB - AC*B2) * denom; + y = (AC*AB - BC*A2) * denom; + + se = -denom*(B2+A2+2*AB); + ASSERT(se >= 0.0); +} + +/** + MLE estimates for AU test + */ +class OptimizationAUTest : public Optimization { + +public: + + OptimizationAUTest(double d, double c, int nscales, double *bp, double *rr, double *rr_inv) { + this->d = d; + this->c = c; + this->bp = bp; + this->rr = rr; + this->rr_inv = rr_inv; + this->nscales = nscales; + + } + + /** + return the number of dimensions + */ + virtual int getNDim() { return 2; } + + + /** + the target function which needs to be optimized + @param x the input vector x + @return the function value at x + */ + virtual double targetFunk(double x[]) { + d = x[1]; + c = x[2]; + double res = 0.0; + for (int k = 0; k < nscales; k++) { + double cdf = gsl_cdf_ugaussian_P(d*rr[k] + c*rr_inv[k]); + res += bp[k] * log(1.0 - cdf) + (1.0-bp[k])*log(cdf); + } + return res; + } + + void optimizeDC() { + double x[3], lower[3], upper[3]; + bool bound_check[3]; + x[1] = d; + x[2] = c; + lower[1] = lower[2] = 1e-4; + upper[1] = upper[2] = 100.0; + bound_check[1] = bound_check[2] = false; + minimizeMultiDimen(x, 2, lower, upper, bound_check, 1e-4); + d = x[1]; + c = x[2]; + } + + double d, c; + int nscales; + double *bp; + double *rr; + double *rr_inv; +}; + + +/* BEGIN CODE WAS TAKEN FROM CONSEL PROGRAM */ + +/* binary search for a sorted vector + find k s.t. vec[k-1] <= t < vec[k] + */ +int cntdist2(double *vec, int bb, double t) +{ + int i,i0,i1; + + i0=0; i1=bb-1; + if(t < vec[0]) return 0; + else if(vec[bb-1] <= t) return bb; + + while(i1-i0>1) { + i=(i0+i1)/2; + if(vec[i] <= t) i0=i; + else i1=i; + } + + return i1; +} + +/* + smoothing the counting for a sorted vector + the piecewise linear function connecting + F(v[i]) = 1/(2n) + i/n, for i=0,...,n-1 + F(1.5v[0]-0.5v[1]) = 0 + F(1.5v[n-1]-0.5v[n-2]) = 1. + + 1. F(x)=0 for x<=1.5v[0]-0.5v[1] + + 2. F(x)=1/(2n) + (1/n)*(x-v[0])/(v[1]-v[0]) + for 1.5v[0]-0.5v[1] < x <= v[0] + + 3. F(x)=1/(2n) + i/n + (1/n)*(x-v[i])/(v[i]-v[i+1]) + for v[i] < x <= v[i+1], i=0,..., + + 4. F(x)=1-(1/2n) + (1/n)*(x-v[n-1])/(v[n-1]-v[n-2]) + for v[n-1] < x <= 1.5v[n-1]-0.5v[n-2] + + 5. F(x)=1 for x > 1.5v[n-1]-0.5v[n-2] + */ +double cntdist3(double *vec, int bb, double t) +{ + double p,n; + int i; + i=cntdist2(vec,bb,t)-1; /* to find vec[i] <= t < vec[i+1] */ + n=(double)bb; + if(i<0) { + if(vec[1]>vec[0]) p=0.5+(t-vec[0])/(vec[1]-vec[0]); + else p=0.0; + } else if(ivec[i]) p=0.5+(double)i+(t-vec[i])/(vec[i+1]-vec[i]); + else p=0.5+(double)i; /* <- should never happen */ + } else { + if(vec[bb-1]-vec[bb-2]>0) p=n-0.5+(t-vec[bb-1])/(vec[bb-1]-vec[bb-2]); + else p=n; + } + if(p>n) p=n; else if(p<0.0) p=0.0; + return p; +} + +double log3(double x) +{ + double y,z1,z2,z3,z4,z5; + if(fabs(x)>1.0e-3) { + y=-log(1.0-x); + } else { + z1=x; z2=z1*x; z3=z2*x; z4=z3*x; z5=z4*x; + y=((((z5/5.0)+z4/4.0)+z3/3.0)+z2/2.0)+z1; + } + return y; +} + +int mleloopmax=30; +double mleeps=1e-10; +int mlecoef(double *cnts, double *rr, double bb, int kk, + double *coef0, /* set initinal value (size=2) */ + double *lrt, int *df, /* LRT statistic */ + double *se + ) +{ + int i,m,loop; + double coef[2], update[2]; + double d1f, d2f, d11f, d12f, d22f; /* derivatives */ + double v11, v12, v22; /* inverse of -d??f */ + double a,e; + double s[kk], r[kk],c[kk], b[kk],z[kk],p[kk],d[kk],g[kk],h[kk]; + + m=0; + for(i=0;i0.0 && p[i]<1.0) { + g[i]=d[i]*( d[i]*(-c[i]+2.0*c[i]*p[i]-b[i]*p[i]*p[i])/ + (p[i]*p[i]*(1.0-p[i])*(1.0-p[i])) + + z[i]*(c[i]-b[i]*p[i])/(p[i]*(1.0-p[i])) ); + h[i]=d[i]*(c[i]-b[i]*p[i])/(p[i]*(1.0-p[i])); + } else { g[i]=h[i]=0.0; } + d1f+= -h[i]*s[i]; d2f+= -h[i]/s[i]; + d11f+= g[i]*r[i]; d12f+= g[i]; d22f+= g[i]/r[i]; + } + + a=d11f*d22f-d12f*d12f; + if(a==0.0) { + return 2; + } + v11=-d22f/a; v12=d12f/a; v22=-d11f/a; + + /* Newton-Raphson update */ + update[0]=v11*d1f+v12*d2f; update[1]=v12*d1f+v22*d2f; + coef[0]+=update[0]; coef[1]+=update[1]; + + /* check convergence */ + e=-d11f*update[0]*update[0]-2.0*d12f*update[0]*update[1] + -d22f*update[1]*update[1]; + + if(e0.0 && p[i]<1.0) { + *df+=1; + if(c[i]>0.0) a=c[i]*log(c[i]/b[i]/p[i]); else a=0.0; + if(c[i] &info) { + + if (params.topotest_replicates < 10000) + outWarning("Too few replicates for AU test. At least -zb 10000 for reliable results!"); + + /* STEP 1: specify scale factors */ + size_t nscales = 10; + double r[] = {0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4}; + double rr[] = {sqrt(0.5), sqrt(0.6), sqrt(0.7), sqrt(0.8), sqrt(0.9), 1.0, + sqrt(1.1), sqrt(1.2), sqrt(1.3), sqrt(1.4)}; + double rr_inv[] = {sqrt(1/0.5), sqrt(1/0.6), sqrt(1/0.7), sqrt(1/0.8), sqrt(1/0.9), 1.0, + sqrt(1/1.1), sqrt(1/1.2), sqrt(1/1.3), sqrt(1/1.4)}; + + /* STEP 2: compute bootstrap proportion */ + size_t ntrees = info.size(); + size_t nboot = params.topotest_replicates; + // double nboot_inv = 1.0 / nboot; + + size_t nptn = tree->getAlnNPattern(); + size_t maxnptn = get_safe_upper_limit(nptn); + + // double *bp = new double[ntrees*nscales]; + // memset(bp, 0, sizeof(double)*ntrees*nscales); + + double *treelhs; + cout << (ntrees*nscales*nboot*sizeof(double) >> 20) << " MB required for AU test" << endl; + treelhs = new double[ntrees*nscales*nboot]; + if (!treelhs) + outError("Not enough memory to perform AU test!"); + + size_t k, tid, ptn; + + double start_time = getRealTime(); + + cout << "Generating " << nscales << " x " << nboot << " multiscale bootstrap replicates... "; + +#ifdef _OPENMP +#pragma omp parallel private(k, tid, ptn) + { + int *rstream; + init_random(params.ran_seed + omp_get_thread_num(), false, &rstream); +#else + int *rstream = randstream; +#endif + size_t boot; + int *boot_sample = aligned_alloc(maxnptn); + memset(boot_sample, 0, maxnptn*sizeof(int)); + + double *boot_sample_dbl = aligned_alloc(maxnptn); + +#ifdef _OPENMP +#pragma omp for schedule(dynamic) +#endif + for (int k = 0; k < nscales; ++k) { + string str = "SCALE=" + convertDoubleToString(r[k]); + for (boot = 0; boot < nboot; boot++) { + if (r[k] == 1.0 && boot == 0) + // 2018-10-23: get one of the bootstrap sample as the original alignment + tree->aln->getPatternFreq(boot_sample); + else + tree->aln->createBootstrapAlignment(boot_sample, str.c_str(), rstream); + for (ptn = 0; ptn < maxnptn; ptn++) + boot_sample_dbl[ptn] = boot_sample[ptn]; + double max_lh = -DBL_MAX, second_max_lh = -DBL_MAX; + int max_tid = -1; + for (tid = 0; tid < ntrees; tid++) { + double *pattern_lh = pattern_lhs + (tid*maxnptn); + double tree_lh; + if (params.SSE == LK_386) { + tree_lh = 0.0; + for (ptn = 0; ptn < nptn; ptn++) + tree_lh += pattern_lh[ptn] * boot_sample_dbl[ptn]; + } else { + tree_lh = tree->dotProductDoubleCall(pattern_lh, boot_sample_dbl, nptn); + } + // rescale lh + tree_lh /= r[k]; + + // find the max and second max + if (tree_lh > max_lh) { + second_max_lh = max_lh; + max_lh = tree_lh; + max_tid = tid; + } else if (tree_lh > second_max_lh) + second_max_lh = tree_lh; + + treelhs[(tid*nscales+k)*nboot + boot] = tree_lh; + } + + // compute difference from max_lh + for (tid = 0; tid < ntrees; tid++) + if (tid != max_tid) + treelhs[(tid*nscales+k)*nboot + boot] = max_lh - treelhs[(tid*nscales+k)*nboot + boot]; + else + treelhs[(tid*nscales+k)*nboot + boot] = second_max_lh - max_lh; + // bp[k*ntrees+max_tid] += nboot_inv; + } // for boot + + // sort the replicates + for (tid = 0; tid < ntrees; tid++) { + quicksort(treelhs + (tid*nscales+k)*nboot, 0, nboot-1); + } + + } // for scale + + aligned_free(boot_sample_dbl); + aligned_free(boot_sample); + +#ifdef _OPENMP + finish_random(rstream); + } +#endif + + // if (verbose_mode >= VB_MED) { + // cout << "scale"; + // for (k = 0; k < nscales; k++) + // cout << "\t" << r[k]; + // cout << endl; + // for (tid = 0; tid < ntrees; tid++) { + // cout << tid; + // for (k = 0; k < nscales; k++) { + // cout << "\t" << bp[tid+k*ntrees]; + // } + // cout << endl; + // } + // } + + cout << getRealTime() - start_time << " seconds" << endl; + + /* STEP 3: weighted least square fit */ + + double *cc = new double[nscales]; + double *w = new double[nscales]; + double *this_bp = new double[nscales]; + cout << "TreeID\tAU\tRSS\td\tc" << endl; + for (tid = 0; tid < ntrees; tid++) { + double *this_stat = treelhs + tid*nscales*nboot; + double xn = this_stat[(nscales/2)*nboot + nboot/2], x; + double c, d; // c, d in original paper + int idf0 = -2; + double z = 0.0, z0 = 0.0, thp = 0.0, th = 0.0, ze = 0.0, ze0 = 0.0; + double pval, se; + int df; + double rss = 0.0; + int step; + const int max_step = 30; + bool failed = false; + for (step = 0; step < max_step; step++) { + x = xn; + int num_k = 0; + for (k = 0; k < nscales; k++) { + this_bp[k] = cntdist3(this_stat + k*nboot, nboot, x) / nboot; + if (this_bp[k] <= 0 || this_bp[k] >= 1) { + cc[k] = w[k] = 0.0; + } else { + double bp_val = this_bp[k]; + cc[k] = -gsl_cdf_ugaussian_Pinv(bp_val); + double bp_pdf = gsl_ran_ugaussian_pdf(cc[k]); + w[k] = bp_pdf*bp_pdf*nboot / (bp_val*(1.0-bp_val)); + num_k++; + } + } + df = num_k-2; + if (num_k >= 2) { + // first obtain d and c by weighted least square + doWeightedLeastSquare(nscales, w, rr, rr_inv, cc, d, c, se); + + // maximum likelhood fit + double coef0[2] = {d, c}; + int mlefail = mlecoef(this_bp, r, nboot, nscales, coef0, &rss, &df, &se); + + if (!mlefail) { + d = coef0[0]; + c = coef0[1]; + } + + se = gsl_ran_ugaussian_pdf(d-c)*sqrt(se); + + // second, perform MLE estimate of d and c + // OptimizationAUTest mle(d, c, nscales, this_bp, rr, rr_inv); + // mle.optimizeDC(); + // d = mle.d; + // c = mle.c; + + /* STEP 4: compute p-value according to Eq. 11 */ + pval = gsl_cdf_ugaussian_Q(d-c); + z = -pval; + ze = se; + // compute sum of squared difference + rss = 0.0; + for (k = 0; k < nscales; k++) { + double diff = cc[k] - (rr[k]*d + rr_inv[k]*c); + rss += w[k] * diff * diff; + } + + } else { + // not enough data for WLS + int num0 = 0; + for (k = 0; k < nscales; k++) + if (this_bp[k] <= 0.0) num0++; + if (num0 > nscales/2) + pval = 0.0; + else + pval = 1.0; + se = 0.0; + d = c = 0.0; + rss = 0.0; + if (verbose_mode >= VB_MED) + cout << " error in wls" << endl; + //info[tid].au_pvalue = pval; + //break; + } + + + if (verbose_mode >= VB_MED) { + cout.unsetf(ios::fixed); + cout << "\t" << step << "\t" << th << "\t" << x << "\t" << pval << "\t" << se << "\t" << nscales-2 << "\t" << d << "\t" << c << "\t" << z << "\t" << ze << "\t" << rss << endl; + } + + if(df < 0 && idf0 < 0) { failed = true; break;} /* degenerated */ + + if ((df < 0) || (idf0 >= 0 && (z-z0)*(x-thp) > 0.0 && fabs(z-z0)>0.1*ze0)) { + if (verbose_mode >= VB_MED) + cout << " non-monotone" << endl; + th=x; + xn=0.5*x+0.5*thp; + continue; + } + if(idf0 >= 0 && (fabs(z-z0)<0.01*ze0)) { + if(fabs(th)<1e-10) + xn=th; + else th=x; + } else + xn=0.5*th+0.5*x; + info[tid].au_pvalue = pval; + thp=x; + z0=z; + ze0=ze; + idf0 = df; + if(fabs(x-th)<1e-10) break; + } // for step + + if (failed && verbose_mode >= VB_MED) + cout << " degenerated" << endl; + + if (step == max_step) { + if (verbose_mode >= VB_MED) + cout << " non-convergence" << endl; + failed = true; + } + + double pchi2 = (failed) ? 0.0 : computePValueChiSquare(rss, df); + cout << tid+1 << "\t" << info[tid].au_pvalue << "\t" << rss << "\t" << d << "\t" << c; + + // warning if p-value of chi-square < 0.01 (rss too high) + if (pchi2 < 0.01) + cout << " !!!"; + cout << endl; + } + + delete [] this_bp; + delete [] w; + delete [] cc; + + cout << "Time for AU test: " << getRealTime() - start_time << " seconds" << endl; + // delete [] bp; +} + + +void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vector &info, IntVector &distinct_ids) +{ + if (treeset_file.empty()) + return; + cout << endl; + //MTreeSet trees(treeset_file, params.is_rooted, params.tree_burnin, params.tree_max_count); + cout << "Reading trees in " << treeset_file << " ..." << endl; + size_t ntrees = countDistinctTrees(treeset_file.c_str(), params.is_rooted, tree, distinct_ids, params.distinct_trees); + if (ntrees < distinct_ids.size()) { + cout << "WARNING: " << distinct_ids.size() << " trees detected but only " << ntrees << " distinct trees will be evaluated" << endl; + } else { + cout << ntrees << (params.distinct_trees ? " distinct" : "") << " trees detected" << endl; + } + if (ntrees == 0) return; + ifstream in(treeset_file); + + //if (trees.size() == 1) return; + //string tree_file = treeset_file; + string tree_file = params.out_prefix; + tree_file += ".trees"; + ofstream treeout; + //if (!params.fixed_branch_length) { + treeout.open(tree_file.c_str()); + //} + string score_file = params.out_prefix; + score_file += ".treelh"; + ofstream scoreout; + if (params.print_tree_lh) + scoreout.open(score_file.c_str()); + string site_lh_file = params.out_prefix; + site_lh_file += ".sitelh"; + if (params.print_site_lh) { + ofstream site_lh_out(site_lh_file.c_str()); + site_lh_out << ntrees << " " << tree->getAlnNSite() << endl; + site_lh_out.close(); + } + + if (params.print_partition_lh && !tree->isSuperTree()) { + outWarning("-wpl does not work with non-partition model"); + params.print_partition_lh = false; + } + string part_lh_file = params.out_prefix; + part_lh_file += ".partlh"; + if (params.print_partition_lh) { + ofstream part_lh_out(part_lh_file.c_str()); + part_lh_out << ntrees << " " << ((PhyloSuperTree*)tree)->size() << endl; + part_lh_out.close(); + } + + double time_start = getRealTime(); + + int *boot_samples = NULL; + //double *saved_tree_lhs = NULL; + double *tree_lhs = NULL; // RELL score matrix of size #trees x #replicates + double *pattern_lh = NULL; + double *pattern_lhs = NULL; + double *orig_tree_lh = NULL; // Original tree log-likelihoods + double *max_lh = NULL; + double *lhdiff_weights = NULL; + size_t nptn = tree->getAlnNPattern(); + size_t maxnptn = get_safe_upper_limit(nptn); + + if (params.topotest_replicates && ntrees > 1) { + size_t mem_size = (size_t)params.topotest_replicates*nptn*sizeof(int) + + ntrees*params.topotest_replicates*sizeof(double) + + (nptn + ntrees*3 + params.topotest_replicates*2)*sizeof(double) + + ntrees*sizeof(TreeInfo) + + params.do_weighted_test*(ntrees * nptn * sizeof(double) + ntrees*ntrees*sizeof(double)); + cout << "Note: " << ((double)mem_size/1024)/1024 << " MB of RAM required!" << endl; + if (mem_size > getMemorySize()-100000) + outWarning("The required memory does not fit in RAM!"); + cout << "Creating " << params.topotest_replicates << " bootstrap replicates..." << endl; + if (!(boot_samples = new int [params.topotest_replicates*nptn])) + outError(ERR_NO_MEMORY); +#ifdef _OPENMP +#pragma omp parallel if(nptn > 10000) + { + int *rstream; + init_random(params.ran_seed + omp_get_thread_num(), false, &rstream); +#pragma omp for schedule(static) +#else + int *rstream = randstream; +#endif + for (size_t boot = 0; boot < params.topotest_replicates; boot++) + if (boot == 0) + tree->aln->getPatternFreq(boot_samples + (boot*nptn)); + else + tree->aln->createBootstrapAlignment(boot_samples + (boot*nptn), params.bootstrap_spec, rstream); +#ifdef _OPENMP + finish_random(rstream); + } +#endif + cout << "done" << endl; + //if (!(saved_tree_lhs = new double [ntrees * params.topotest_replicates])) + // outError(ERR_NO_MEMORY); + if (!(tree_lhs = new double [ntrees * params.topotest_replicates])) + outError(ERR_NO_MEMORY); + if (params.do_weighted_test || params.do_au_test) { + if (!(lhdiff_weights = new double [ntrees * ntrees])) + outError(ERR_NO_MEMORY); + pattern_lhs = aligned_alloc(ntrees*maxnptn); + // if (!(pattern_lhs = new double[ntrees* nptn])) + // outError(ERR_NO_MEMORY); + } + pattern_lh = aligned_alloc(maxnptn); + // if (!(pattern_lh = new double[nptn])) + // outError(ERR_NO_MEMORY); + if (!(orig_tree_lh = new double[ntrees])) + outError(ERR_NO_MEMORY); + if (!(max_lh = new double[params.topotest_replicates])) + outError(ERR_NO_MEMORY); + } + int tree_index, tid, tid2; + info.resize(ntrees); + //for (MTreeSet::iterator it = trees.begin(); it != trees.end(); it++, tree_index++) { + for (tree_index = 0, tid = 0; tree_index < distinct_ids.size(); tree_index++) { + + cout << "Tree " << tree_index + 1; + if (distinct_ids[tree_index] >= 0) { + cout << " / identical to tree " << distinct_ids[tree_index]+1 << endl; + // ignore tree + char ch; + do { + in >> ch; + } while (!in.eof() && ch != ';'); + continue; + } + tree->freeNode(); + tree->readTree(in, tree->rooted); + if (!tree->findNodeName(tree->aln->getSeqName(0))) { + outError("Taxon " + tree->aln->getSeqName(0) + " not found in tree"); + } + + if (tree->rooted && tree->getModel()->isReversible()) { + if (tree->leafNum != tree->aln->getNSeq()+1) + outError("Tree does not have same number of taxa as alignment"); + tree->convertToUnrooted(); + } else if (!tree->rooted && !tree->getModel()->isReversible()) { + if (tree->leafNum != tree->aln->getNSeq()) + outError("Tree does not have same number of taxa as alignment"); + tree->convertToRooted(); + } + tree->setAlignment(tree->aln); + tree->setRootNode(params.root); + if (tree->isSuperTree()) + ((PhyloSuperTree*) tree)->mapTrees(); + + tree->initializeAllPartialLh(); + tree->fixNegativeBranch(false); + if (params.fixed_branch_length) { + tree->setCurScore(tree->computeLikelihood()); + } else if (params.topotest_optimize_model) { + tree->getModelFactory()->optimizeParameters(BRLEN_OPTIMIZE, false, params.modelEps); + tree->setCurScore(tree->computeLikelihood()); + } else { + tree->setCurScore(tree->optimizeAllBranches(100, 0.001)); + } + treeout << "[ tree " << tree_index+1 << " lh=" << tree->getCurScore() << " ]"; + tree->printTree(treeout); + treeout << endl; + if (params.print_tree_lh) + scoreout << tree->getCurScore() << endl; + + cout << " / LogL: " << tree->getCurScore() << endl; + + if (pattern_lh) { + double curScore = tree->getCurScore(); + memset(pattern_lh, 0, maxnptn*sizeof(double)); + tree->computePatternLikelihood(pattern_lh, &curScore); + if (params.do_weighted_test || params.do_au_test) + memcpy(pattern_lhs + tid*maxnptn, pattern_lh, maxnptn*sizeof(double)); + } + if (params.print_site_lh) { + string tree_name = "Tree" + convertIntToString(tree_index+1); + printSiteLh(site_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str()); + } + if (params.print_partition_lh) { + string tree_name = "Tree" + convertIntToString(tree_index+1); + printPartitionLh(part_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str()); + } + info[tid].logl = tree->getCurScore(); + + if (!params.topotest_replicates || ntrees <= 1) { + tid++; + continue; + } + // now compute RELL scores + orig_tree_lh[tid] = tree->getCurScore(); + double *tree_lhs_offset = tree_lhs + (tid*params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; boot++) { + double lh = 0.0; + int *this_boot_sample = boot_samples + (boot*nptn); + for (size_t ptn = 0; ptn < nptn; ptn++) + lh += pattern_lh[ptn] * this_boot_sample[ptn]; + tree_lhs_offset[boot] = lh; + } + tid++; + } + + ASSERT(tid == ntrees); + + if (params.topotest_replicates && ntrees > 1) { + double *tree_probs = new double[ntrees]; + memset(tree_probs, 0, ntrees*sizeof(double)); + int *tree_ranks = new int[ntrees]; + + /* perform RELL BP method */ + cout << "Performing RELL-BP test..." << endl; + int *maxtid = new int[params.topotest_replicates]; + double *maxL = new double[params.topotest_replicates]; + int *maxcount = new int[params.topotest_replicates]; + memset(maxtid, 0, params.topotest_replicates*sizeof(int)); + memcpy(maxL, tree_lhs, params.topotest_replicates*sizeof(double)); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + maxcount[boot] = 1; + for (tid = 1; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + if (tree_lhs_offset[boot] > maxL[boot] + params.ufboot_epsilon) { + maxL[boot] = tree_lhs_offset[boot]; + maxtid[boot] = tid; + maxcount[boot] = 1; + } else if (tree_lhs_offset[boot] > maxL[boot] - params.ufboot_epsilon && + random_double() <= 1.0/(maxcount[boot]+1)) { + maxL[boot] = max(maxL[boot],tree_lhs_offset[boot]); + maxtid[boot] = tid; + maxcount[boot]++; + } + } + for ( size_t boot = 0; boot < params.topotest_replicates; ++boot) + tree_probs[maxtid[boot]] += 1.0; + for (tid = 0; tid < ntrees; tid++) { + tree_probs[tid] /= params.topotest_replicates; + info[tid].rell_confident = false; + info[tid].rell_bp = tree_probs[tid]; + } + sort_index(tree_probs, tree_probs + ntrees, tree_ranks); + double prob_sum = 0.0; + // obtain the confidence set + for (tid = ntrees-1; tid >= 0; tid--) { + info[tree_ranks[tid]].rell_confident = true; + prob_sum += tree_probs[tree_ranks[tid]]; + if (prob_sum > 0.95) break; + } + + // sanity check + for (tid = 0, prob_sum = 0.0; tid < ntrees; tid++) + prob_sum += tree_probs[tid]; + if (fabs(prob_sum-1.0) > 0.01) + outError("Internal error: Wrong ", __func__); + + delete [] maxcount; + delete [] maxL; + delete [] maxtid; + + /* now do the SH test */ + cout << "Performing KH and SH test..." << endl; + // SH centering step + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + max_lh[boot] = -DBL_MAX; + double *avg_lh = new double[ntrees]; + for (tid = 0; tid < ntrees; tid++) { + avg_lh[tid] = 0.0; + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + avg_lh[tid] += tree_lhs_offset[boot]; + avg_lh[tid] /= params.topotest_replicates; + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) { + max_lh[boot] = max(max_lh[boot], tree_lhs_offset[boot] - avg_lh[tid]); + } + } + + double orig_max_lh = orig_tree_lh[0]; + size_t orig_max_id = 0; + double orig_2ndmax_lh = -DBL_MAX; + size_t orig_2ndmax_id = -1; + // find the max tree ID + for (tid = 1; tid < ntrees; tid++) + if (orig_max_lh < orig_tree_lh[tid]) { + orig_max_lh = orig_tree_lh[tid]; + orig_max_id = tid; + } + // find the 2nd max tree ID + for (tid = 0; tid < ntrees; tid++) + if (tid != orig_max_id && orig_2ndmax_lh < orig_tree_lh[tid]) { + orig_2ndmax_lh = orig_tree_lh[tid]; + orig_2ndmax_id = tid; + } + + + // SH compute p-value + for (tid = 0; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + // SH compute original deviation from max_lh + info[tid].kh_pvalue = 0.0; + info[tid].sh_pvalue = 0.0; + size_t max_id = (tid != orig_max_id) ? orig_max_id : orig_2ndmax_id; + double orig_diff = orig_tree_lh[max_id] - orig_tree_lh[tid] - avg_lh[tid]; + double *max_kh = tree_lhs + (max_id * params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) { + if (max_lh[boot] - tree_lhs_offset[boot] > orig_diff) + info[tid].sh_pvalue += 1.0; + //double max_kh_here = max(max_kh[boot]-avg_lh[max_id], tree_lhs_offset[boot]-avg_lh[tid]); + double max_kh_here = (max_kh[boot]-avg_lh[max_id]); + if (max_kh_here - tree_lhs_offset[boot] > orig_diff) + info[tid].kh_pvalue += 1.0; + } + info[tid].sh_pvalue /= params.topotest_replicates; + info[tid].kh_pvalue /= params.topotest_replicates; + } + + if (params.do_weighted_test) { + + cout << "Computing pairwise logl difference variance ..." << endl; + /* computing lhdiff_weights as 1/sqrt(lhdiff_variance) */ + for (tid = 0; tid < ntrees; tid++) { + double *pattern_lh1 = pattern_lhs + (tid * maxnptn); + lhdiff_weights[tid*ntrees+tid] = 0.0; + for (tid2 = tid+1; tid2 < ntrees; tid2++) { + double lhdiff_variance = tree->computeLogLDiffVariance(pattern_lh1, pattern_lhs + (tid2*maxnptn)); + lhdiff_weights[tid*ntrees+tid2] = 1.0/sqrt(lhdiff_variance); + lhdiff_weights[tid2*ntrees+tid] = lhdiff_weights[tid*ntrees+tid2]; + } + } + + // Weighted KH and SH test + cout << "Performing WKH and WSH test..." << endl; + for (tid = 0; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + info[tid].wkh_pvalue = 0.0; + info[tid].wsh_pvalue = 0.0; + double worig_diff = -DBL_MAX; + size_t max_id = -1; + for (tid2 = 0; tid2 < ntrees; tid2++) + if (tid2 != tid) { + double wdiff = (orig_tree_lh[tid2] - orig_tree_lh[tid])*lhdiff_weights[tid*ntrees+tid2]; + if (wdiff > worig_diff) { + worig_diff = wdiff; + max_id = tid2; + } + } + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) { + double wmax_diff = -DBL_MAX; + for (tid2 = 0; tid2 < ntrees; tid2++) + if (tid2 != tid) + wmax_diff = max(wmax_diff, + (tree_lhs[tid2*params.topotest_replicates+boot] - avg_lh[tid2] - + tree_lhs_offset[boot] + avg_lh[tid]) * lhdiff_weights[tid*ntrees+tid2]); + if (wmax_diff > worig_diff) + info[tid].wsh_pvalue += 1.0; + wmax_diff = (tree_lhs[max_id*params.topotest_replicates+boot] - avg_lh[max_id] - + tree_lhs_offset[boot] + avg_lh[tid]); + if (wmax_diff > orig_tree_lh[max_id] - orig_tree_lh[tid]) + info[tid].wkh_pvalue += 1.0; + } + info[tid].wsh_pvalue /= params.topotest_replicates; + info[tid].wkh_pvalue /= params.topotest_replicates; + } + } + + delete [] avg_lh; + + /* now to ELW - Expected Likelihood Weight method */ + cout << "Performing ELW test..." << endl; + + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + max_lh[boot] = -DBL_MAX; + for (tid = 0; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) + max_lh[boot] = max(max_lh[boot], tree_lhs_offset[boot]); + } + double *sumL = new double[params.topotest_replicates]; + memset(sumL, 0, sizeof(double) * params.topotest_replicates); + for (tid = 0; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) { + tree_lhs_offset[boot] = exp(tree_lhs_offset[boot] - max_lh[boot]); + sumL[boot] += tree_lhs_offset[boot]; + } + } + for (tid = 0; tid < ntrees; tid++) { + double *tree_lhs_offset = tree_lhs + (tid * params.topotest_replicates); + tree_probs[tid] = 0.0; + for (size_t boot = 0; boot < params.topotest_replicates; ++boot) { + tree_probs[tid] += (tree_lhs_offset[boot] / sumL[boot]); + } + tree_probs[tid] /= params.topotest_replicates; + info[tid].elw_confident = false; + info[tid].elw_value = tree_probs[tid]; + } + + sort_index(tree_probs, tree_probs + ntrees, tree_ranks); + prob_sum = 0.0; + // obtain the confidence set + for (tid = ntrees-1; tid >= 0; tid--) { + info[tree_ranks[tid]].elw_confident = true; + prob_sum += tree_probs[tree_ranks[tid]]; + if (prob_sum > 0.95) break; + } + + // sanity check + for (tid = 0, prob_sum = 0.0; tid < ntrees; tid++) + prob_sum += tree_probs[tid]; + if (fabs(prob_sum-1.0) > 0.01) + outError("Internal error: Wrong ", __func__); + delete [] sumL; + + if (params.do_au_test) { + cout << "Performing approximately unbiased (AU) test..." << endl; + performAUTest(params, tree, pattern_lhs, info); + } + + delete [] tree_ranks; + delete [] tree_probs; + + } + delete [] max_lh; + delete [] orig_tree_lh; + aligned_free(pattern_lh); + aligned_free(pattern_lhs); + delete [] lhdiff_weights; + delete [] tree_lhs; + delete [] boot_samples; + + if (params.print_tree_lh) { + scoreout.close(); + } + + treeout.close(); + in.close(); + + cout << "Time for evaluating all trees: " << getRealTime() - time_start << " sec." << endl; + +} + + +void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree) { + vector info; + IntVector distinct_ids; + evaluateTrees(treeset_file, params, tree, info, distinct_ids); +} + + + diff --git a/main/treetesting.h b/main/treetesting.h new file mode 100644 index 000000000..2f6e739a5 --- /dev/null +++ b/main/treetesting.h @@ -0,0 +1,102 @@ +/* + * treetesting.h + * + * Created on: Sep 21, 2019 + * Author: minh + */ + +#ifndef TREETESTING_H_ +#define TREETESTING_H_ + +#include "utils/tools.h" +#include "alignment/alignment.h" + +class PhyloTree; +class IQTree; + +struct TreeInfo { + double logl; // log likelihood + double se; // standard error of deltaL (logl difference to max), or square root of variance + double rell_bp; // bootstrap proportion by RELL method + bool rell_confident; // confidence set for RELL-BP + double sh_pvalue; // p-value by Shimodaira-Hasegawa test + double wsh_pvalue; // p-value by weighted Shimodaira-Hasegawa test + double kh_pvalue; // p-value by Kishino-Hasegawa test + double wkh_pvalue; // p-value by weighted Kishino-Hasegawa test + double elw_value; // ELW - expected likelihood weights test + bool elw_confident; // to represent confidence set of ELW test + double au_pvalue; // p-value by approximately unbiased (AU) test +}; + + +/** + * print site log likelihoods to a fileExists + * @param filename output file name + * @param tree phylogenetic tree + * @param ptn_lh pattern log-likelihoods, will be computed if NULL + * @param append TRUE to append to existing file, FALSE otherwise + * @param linename name of the line, default "Site_Lh" if NULL + */ +void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL, + bool append = false, const char *linename = NULL); + +/** + * print partition log likelihoods to a file + * @param filename output file name + * @param tree phylogenetic tree + * @param ptn_lh pattern log-likelihoods, will be computed if NULL + * @param append TRUE to append to existing file, FALSE otherwise + * @param linename name of the line, default "Site_Lh" if NULL + */ +void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL, + bool append = false, const char *linename = NULL); + +/** + * print site log likelihoods per category to a file + * @param filename output file name + * @param tree phylogenetic tree + */ +void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl); + +/** + * print site posterior probabilities per rate/mixture category to a file + * @param filename output file name + * @param tree phylogenetic tree + */ +void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl); + +/** + * print site state frequency vectors (for Huaichun) + * @param filename output file name + * @param tree phylogenetic tree +*/ +void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs = NULL); + +/** + * print site state frequency vectors (for Huaichun) + * @param filename output file name + * @param aln alignment +*/ +void printSiteStateFreq(const char* filename, Alignment *aln); + +/** + print ancestral sequences + @param filename output file name + @param tree phylogenetic tree + @param ast either AST_MARGINAL or AST_JOINT +*/ +void printAncestralSequences(const char*filename, PhyloTree *tree, AncestralSeqType ast); + +/** + * Evaluate user-trees with possibility of tree topology tests + * @param params program parameters + * @param tree current tree + * @param info (OUT) output information + * @param distinct_ids IDs of distinct trees + */ +void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vector &info, IntVector &distinct_ids); + +void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree); + + +#endif // TREETESTING_H_ diff --git a/model/CMakeLists.txt b/model/CMakeLists.txt index ded00eec0..8564be1f6 100644 --- a/model/CMakeLists.txt +++ b/model/CMakeLists.txt @@ -2,6 +2,7 @@ add_library(model modelmarkov.cpp modelmarkov.h modelbin.cpp modelbin.h modeldna.cpp modeldna.h +modeldnaerror.cpp modeldnaerror.h modelfactory.cpp modelfactory.h modelprotein.cpp modelprotein.h modelset.cpp modelset.h diff --git a/model/modelbin.cpp b/model/modelbin.cpp index e948ff163..b3dbbb39c 100644 --- a/model/modelbin.cpp +++ b/model/modelbin.cpp @@ -32,12 +32,18 @@ void ModelBIN::init(const char *model_name, string model_params, StateFreqType f name = model_name; full_name = model_name; if (name == "JC2") { - freq = FREQ_EQUAL; + def_freq = FREQ_EQUAL; } else if (name == "GTR2") { - freq = FREQ_ESTIMATE; + def_freq = FREQ_ESTIMATE; } else { readParameters(model_name); } + if (freq_params != "") { + readStateFreq(freq_params); + } + if (model_params != "") { + readRates(model_params); + } if (freq == FREQ_UNKNOWN || def_freq == FREQ_EQUAL) freq = def_freq; ModelMarkov::init(freq); } @@ -45,3 +51,28 @@ void ModelBIN::init(const char *model_name, string model_params, StateFreqType f void ModelBIN::startCheckpoint() { checkpoint->startStruct("ModelBIN"); } + +string ModelBIN::getNameParams() { + //if (num_params == 0) return name; + ostringstream retname; + retname << name; +// if (!fixed_parameters) { +// retname << '{'; +// int nrates = getNumRateEntries(); +// for (int i = 0; i < nrates; i++) { +// if (i>0) retname << ','; +// retname << rates[i]; +// } +// retname << '}'; +// } +// getNameParamsFreq(retname); + retname << freqTypeString(freq_type, phylo_tree->aln->seq_type, true); + if (freq_type == FREQ_EMPIRICAL || freq_type == FREQ_ESTIMATE || + (freq_type == FREQ_USER_DEFINED)) { + retname << "{" << state_freq[0]; + for (int i = 1; i < num_states; i++) + retname << "," << state_freq[i]; + retname << "}"; + } + return retname.str(); +} diff --git a/model/modelbin.h b/model/modelbin.h index 1eba72a38..ef93e6b8c 100644 --- a/model/modelbin.h +++ b/model/modelbin.h @@ -48,7 +48,7 @@ class ModelBIN : public ModelMarkov /** * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} */ - virtual string getNameParams() { return name; } + virtual string getNameParams(); /** start structure for checkpointing diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp index 803f66789..759651434 100644 --- a/model/modelcodon.cpp +++ b/model/modelcodon.cpp @@ -10,7 +10,7 @@ const double MIN_OMEGA_KAPPA = 0.001; -const double MAX_OMEGA_KAPPA = 100.0; +const double MAX_OMEGA_KAPPA = 50.0; /* Empirical codon model restricted (Kosiol et al. 2007), source: http://www.ebi.ac.uk/goldman/ECM/ */ string model_ECMrest1 = @@ -670,9 +670,9 @@ void ModelCodon::readCodonModel(istream &in, bool reset_params) { } memset(state_freq, 0, num_states*sizeof(double)); for (i = 0; i < num_states; i++) - state_freq[i] = MIN_FREQUENCY; + state_freq[i] = Params::getInstance().min_state_freq; for (i = 0; i < nscodons; i++) - state_freq[state_map[i]] = f[i]-(num_states-nscodons)*MIN_FREQUENCY/nscodons; + state_freq[state_map[i]] = f[i]-(num_states-nscodons)*Params::getInstance().min_state_freq/nscodons; if (reset_params) { fix_omega = fix_kappa = fix_kappa2 = true; @@ -981,7 +981,7 @@ void ModelCodon::setBounds(double *lower_bound, double *upper_bound, bool *bound for (i = ndim-num_states+2; i <= ndim; i++) { // lower_bound[i] = MIN_FREQUENCY/state_freq[highest_freq_state]; // upper_bound[i] = state_freq[highest_freq_state]/MIN_FREQUENCY; - lower_bound[i] = MIN_FREQUENCY; + lower_bound[i] = Params::getInstance().min_state_freq; // upper_bound[i] = 100.0; upper_bound[i] = 1.0; bound_check[i] = false; @@ -989,34 +989,38 @@ void ModelCodon::setBounds(double *lower_bound, double *upper_bound, bool *bound } } -double ModelCodon::optimizeParameters(double gradient_epsilon) { - int ndim = getNDim(); - - // return if nothing to be optimized - if (ndim == 0) return 0.0; - - if (verbose_mode >= VB_MAX) - cout << "Optimizing " << name << " model parameters..." << endl; - - +double ModelCodon::optimizeParameters(double gradient_epsilon) { + if (fixed_parameters) { + return 0.0; + } + int ndim = getNDim(); + if (ndim == 0) { + // return if nothing to be optimized + return 0.0; + } + if (verbose_mode >= VB_MAX) { + cout << "Optimizing " << name << " model parameters..." << endl; + } double *variables = new double[ndim+1]; double *upper_bound = new double[ndim+1]; double *lower_bound = new double[ndim+1]; bool *bound_check = new bool[ndim+1]; double score; - for (int i = 0; i < num_states; i++) - if (state_freq[i] > state_freq[highest_freq_state]) + for (int i = 0; i < num_states; i++) { + if (state_freq[i] > state_freq[highest_freq_state]) { highest_freq_state = i; - + } + } // by BFGS algorithm setVariables(variables); setBounds(lower_bound, upper_bound, bound_check); - if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos) + if (phylo_tree->params->optimize_alg_freerate.find("BFGS-B") == string::npos) { score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); - else - score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE)); - + } + else { + score = -L_BFGS_B(ndim, variables + 1, lower_bound + 1, upper_bound + 1, max(gradient_epsilon, TOL_RATE)); + } bool changed = getVariables(variables); // BQM 2015-09-07: normalize state_freq if (freq_type == FREQ_ESTIMATE) { diff --git a/model/modeldna.cpp b/model/modeldna.cpp index df9650b46..f23451a24 100644 --- a/model/modeldna.cpp +++ b/model/modeldna.cpp @@ -183,6 +183,8 @@ void ModelDNA::init(const char *model_name, string model_params, StateFreqType f full_name = "Time reversible ("+name+")"; } else { readParameters(model_name); + name = full_name = model_name; + freq = FREQ_USER_DEFINED; //name += " (user-defined)"; } } @@ -210,7 +212,8 @@ void ModelDNA::saveCheckpoint() { // up to model_parameters[num_params] // setVariables(model_parameters); startCheckpoint(); - CKP_ARRAY_SAVE(6, rates); + if (!fixed_parameters) + CKP_ARRAY_SAVE(6, rates); endCheckpoint(); ModelMarkov::saveCheckpoint(); } @@ -219,13 +222,17 @@ void ModelDNA::restoreCheckpoint() { // curiously, this seems to be the only plase ModelDNA uses model_parameters. ModelMarkov::restoreCheckpoint(); startCheckpoint(); - CKP_ARRAY_RESTORE(6, rates); + if (!fixed_parameters) + CKP_ARRAY_RESTORE(6, rates); endCheckpoint(); // getVariables(model_parameters); // updates rates and state_freq string rate_spec = param_spec; for (auto i = rate_spec.begin(); i != rate_spec.end(); i++) *i = *i + '0'; - ASSERT(setRateType(rate_spec)); + + if (!rate_spec.empty()) + if (!setRateType(rate_spec)) + ASSERT(0 && "Cannot set rate_spec"); decomposeRateMatrix(); if (phylo_tree) @@ -239,24 +246,25 @@ void ModelDNA::readRates(string str) throw(const char*) { for (j = 0; j < param_spec.length(); j++) rates[j] = 1.0; num_params = 0; - for (i = 0; i < nrates && end_pos < str.length(); i++) { + for (i = 0; i <= nrates && end_pos < str.length(); i++) { int new_end_pos; double rate = 0; + int id = (i < nrates) ? i+1 : 0; if (str[end_pos] == '?') { - param_fixed[i+1] = false; + param_fixed[id] = false; end_pos++; rate = 1.0; num_params++; } else { if (Params::getInstance().optimize_rate_matrix) { num_params++; - param_fixed[i+1] = false; + param_fixed[id] = false; } else if (Params::getInstance().optimize_from_given_params) { num_params++; - param_fixed[i+1] = false; + param_fixed[id] = false; } else { - param_fixed[i+1] = true; + param_fixed[id] = true; } try { rate = convert_double(str.substr(end_pos).c_str(), new_end_pos); @@ -267,7 +275,7 @@ void ModelDNA::readRates(string str) throw(const char*) { } if (rate < 0.0) outError("Negative rates found"); - if (i == nrates-1 && end_pos < str.length()) + if (i == nrates && end_pos < str.length()) outError("More than " + convertIntToString(nrates) + " rate parameters specified in " + str); if (i < nrates-1 && end_pos >= str.length()) outError("Unexpected end of string ", str); @@ -275,7 +283,7 @@ void ModelDNA::readRates(string str) throw(const char*) { outError("Comma to separate rates not found in ", str); end_pos++; for (j = 0; j < param_spec.length(); j++) - if (param_spec[j] == i+1) + if (param_spec[j] == id) rates[j] = rate; } } @@ -284,17 +292,20 @@ void ModelDNA::readRates(string str) throw(const char*) { string ModelDNA::getNameParams() { if (num_params == 0) return name; ostringstream retname; - retname << name << '{'; - int nrates = getNumRateEntries(); - int k = 0; - for (int i = 0; i < nrates; i++) { - if (param_spec[i] > k) { - if (k>0) retname << ','; - retname << rates[i]; - k++; - } - } - retname << '}'; + retname << name; + if (!fixed_parameters) { + retname << '{'; + int nrates = getNumRateEntries(); + int k = 0; + for (int i = 0; i < nrates; i++) { + if (param_spec[i] > k) { + if (k>0) retname << ','; + retname << rates[i]; + k++; + } + } + retname << '}'; + } getNameParamsFreq(retname); return retname.str(); } @@ -360,7 +371,10 @@ bool ModelDNA::setRateType(string rate_str) { for (i = 0; i <= num_params; i++) avg_rates[i] /= num_rates[i]; for (i = 0; i < param_spec.size(); i++) { - rates[i] = avg_rates[(int)param_spec[i]] / avg_rates[0]; + if (avg_rates[0] > 0.0) + rates[i] = avg_rates[(int)param_spec[i]] / avg_rates[0]; + else + rates[i] = avg_rates[(int)param_spec[i]]; } if (verbose_mode >= VB_DEBUG) { cout << "Initialized rates: "; @@ -368,8 +382,14 @@ bool ModelDNA::setRateType(string rate_str) { cout << rates[i] << " "; cout << endl; } - param_fixed.resize(num_params+1, false); - param_fixed[0] = true; // fix the last entry + if (param_fixed.size() == num_params + 1) { + num_params = 0; + for (auto p : param_fixed) + if (!p) num_params++; + } else { + param_fixed.resize(num_params+1, false); + param_fixed[0] = true; // fix the last entry + } delete [] num_rates; delete [] avg_rates; return true; @@ -377,27 +397,38 @@ bool ModelDNA::setRateType(string rate_str) { int ModelDNA::getNDim() { + if (fixed_parameters) + { + return 0; + } ASSERT(freq_type != FREQ_UNKNOWN); // possible TO-DO: cache nFreqParams(freq_type) to avoid repeat calls. // return (num_params+nFreqParams(freq_type)); +// if (linked_model && linked_model != this) +// return 0; + int ndim = num_params; - if (freq_type == FREQ_ESTIMATE) - ndim += num_states-1; - else - ndim += nFreqParams(freq_type); + if (freq_type == FREQ_ESTIMATE) { + ndim += num_states - 1; + } else { + ndim += nFreqParams(freq_type); + } return ndim; } -void ModelDNA::writeParameters(ostream &out) { +void ModelDNA::writeParameters(ostream& out) { int i; if (freq_type == FREQ_ESTIMATE) { for (i = 0; i < num_states; i++) out << "\t" << state_freq[i]; } - if (num_params == 0) return; - if (num_params <= 1) + if (num_params == 0) { + return; + } + if (num_params <= 1) { out << "\t" << rates[1]; + } else { int nrateout = getNumRateEntries() - 1; for (i = 0; i < nrateout; i++) @@ -405,6 +436,21 @@ void ModelDNA::writeParameters(ostream &out) { } } +void ModelDNA::computeTipLikelihood(PML::StateType state, double *state_lk) { + if (state < num_states || state >= 18) { + ModelSubst::computeTipLikelihood(state, state_lk); + return; + } + + // special treatment for ambiguous (polymorphic) state + memset(state_lk, 0, num_states*sizeof(double)); + int cstate = state-num_states+1; + for (int i = 0; i < num_states; i++) { + if ((cstate) & (1 << i)) + state_lk[i] = 1.0; + } +} + /* * getVariables *changes* the state of the model, setting from *variables * Returns true if the model state has changed, false if not. @@ -429,10 +475,10 @@ bool ModelDNA::getVariables(double *variables) { // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization int ndim = getNDim(); changed |= memcmpcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double)); - double sum = 0; - for (i = 0; i < num_states-1; i++) - sum += state_freq[i]; - state_freq[num_states-1] = 1.0 - sum; +// double sum = 0; +// for (i = 0; i < num_states-1; i++) +// sum += state_freq[i]; +// state_freq[num_states-1] = 1.0 - sum; } else { // BQM: for special DNA freq stuffs from MDW changed |= freqsFromParams(state_freq,variables+num_params+1,freq_type); diff --git a/model/modeldna.h b/model/modeldna.h index 82730c00d..0deec1f94 100644 --- a/model/modeldna.h +++ b/model/modeldna.h @@ -108,6 +108,12 @@ class ModelDNA : public ModelMarkov */ virtual void writeParameters(ostream &out); + /** compute the tip likelihood vector of a state for Felsenstein's pruning algorithm + @param state character state + @param[out] state_lk state likehood vector of size num_states + */ + virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + protected: /** diff --git a/model/modeldnaerror.cpp b/model/modeldnaerror.cpp new file mode 100644 index 000000000..269a85b5c --- /dev/null +++ b/model/modeldnaerror.cpp @@ -0,0 +1,173 @@ +// +// modeldnaerror.cpp +// model +// +// Created by Minh Bui on 26/6/20. +// + +#include "modeldnaerror.h" + +// Bound for sequencing error probability (epsilon) +#define MIN_EPSILON 0.0001 +#define MAX_EPSILON 0.5 + +ModelDNAError::ModelDNAError(PhyloTree *tree) +: ModelDNA(tree) +{ + epsilon = 0.05; + fix_epsilon = false; +} + +ModelDNAError::ModelDNAError(const char *model_name, string model_params, + StateFreqType freq, string freq_params, string seqerr, PhyloTree *tree) +: ModelDNA(model_name, model_params, freq, freq_params, tree) +{ + epsilon = 0.05; + fix_epsilon = false; + seqerr_name = seqerr; + // now parse the epsilon parameter + string::size_type pos; + if ((pos = seqerr.find(OPEN_BRACKET)) != string::npos) { + auto end_pos = seqerr.find(CLOSE_BRACKET); + if (end_pos == string::npos) + outError("Missing closing bracket in " + seqerr); + epsilon = convert_double(seqerr.substr(pos+1, end_pos-pos-1).c_str()); + if (epsilon < 0.0 || epsilon > 1.0) + outError("Sequencing error probability " + convertDoubleToString(epsilon) + " is not between 0 and 1"); + if (!Params::getInstance().optimize_from_given_params) + fix_epsilon = true; + seqerr_name = seqerr.substr(0, pos); + } +} + +void ModelDNAError::startCheckpoint() { + checkpoint->startStruct("ModelDNAError"); +} + +void ModelDNAError::saveCheckpoint() { + startCheckpoint(); + if (!fix_epsilon) + CKP_SAVE(epsilon); + endCheckpoint(); + ModelDNA::saveCheckpoint(); +} + +void ModelDNAError::restoreCheckpoint() { + ModelDNA::restoreCheckpoint(); + startCheckpoint(); + if (!fix_epsilon) + CKP_RESTORE(epsilon); + endCheckpoint(); +} + +string ModelDNAError::getName() { + string retname = ModelDNA::getName(); + retname += seqerr_name; + return retname; +} + +string ModelDNAError::getNameParams() { + string retname = ModelDNA::getNameParams(); + retname += seqerr_name + "{" + convertDoubleToString(epsilon) + "}"; + return retname; +} + +void ModelDNAError::writeInfo(ostream &out) { + ModelDNA::writeInfo(out); + auto prec = out.precision(6); + out << "Sequencing error probability: " << epsilon << endl; + out.precision(prec); +} + +int ModelDNAError::getNDim() { + if (fix_epsilon) + return ModelDNA::getNDim(); + else + return ModelDNA::getNDim() + 1; +} + +void ModelDNAError::computeTipLikelihood(PML::StateType state, double *state_lk) { + if (epsilon == 0.0) + return ModelDNA::ModelSubst::computeTipLikelihood(state, state_lk); + + int i; + + int b = -1; + if (seqerr_name == "+EA") + b = 0; + else if (seqerr_name == "+EC") + b = 1; + else if (seqerr_name == "+EG") + b = 2; + else if (seqerr_name == "+ET") + b = 3; + else if (seqerr_name == "+E") + b = -1; + else { + outError("Unknown sequencing error model " + seqerr_name); + } + + // true for observed states, false for unobserved state + bool observed[4] = {false, false, false, false}; + int num_observed = 0; // number of observed states + if (state < 4) { + // single state + observed[state] = true; + num_observed = 1; + } else if (state < 18) { + // ambiguous (polymorphic) state + int cstate = state-num_states+1; + for (i = 0; i < num_states; i++) + if ((cstate) & (1 << i)) { + observed[i] = true; + num_observed++; + } + } else { + // unknown state + for (i = 0; i < num_states; i++) + observed[i] = true; + num_observed = num_states; + } + + double observed_lk; // likelihood of observed state + double unobserved_lk; // likelihood of unobserved state + if (b >= 0) { + // nucleotide-specific error towards nucleotide b (Nicola de Maio) + observed_lk = observed[b] ? 1.0 : 1.0-epsilon; + unobserved_lk = observed[b] ? epsilon : 0.0; + } else { + // uniform error model (Felsenstein 2004) + observed_lk = 1.0 - (4-num_observed)*epsilon/3.0; + unobserved_lk = num_observed*epsilon/3.0; + } + for (i = 0; i < num_states; i++) + state_lk[i] = observed[i] ? observed_lk : unobserved_lk; +} + +void ModelDNAError::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) { + ModelDNA::setBounds(lower_bound, upper_bound, bound_check); + if (!fix_epsilon) { + int id = ModelDNA::getNDim()+1; + lower_bound[id] = MIN_EPSILON; + upper_bound[id] = MAX_EPSILON; + bound_check[id] = false; + } +} + +bool ModelDNAError::getVariables(double *variables) { + bool changed = ModelDNA::getVariables(variables); + if (!fix_epsilon) { + int id = ModelDNA::getNDim()+1; + changed |= (epsilon != variables[id]); + epsilon = variables[id]; + } + return changed; +} + +void ModelDNAError::setVariables(double *variables) { + ModelDNA::setVariables(variables); + if (!fix_epsilon) { + int id = ModelDNA::getNDim()+1; + variables[id] = epsilon; + } +} diff --git a/model/modeldnaerror.h b/model/modeldnaerror.h new file mode 100644 index 000000000..fb9ea64d4 --- /dev/null +++ b/model/modeldnaerror.h @@ -0,0 +1,115 @@ +// +// modeldnaerror.hpp +// model +// +// Created by Minh Bui on 26/6/20. +// + +#ifndef modeldnaerror_hpp +#define modeldnaerror_hpp + +#include "modeldna.h" + +/** +DNA models with sequecing error + + @author BUI Quang Minh +*/ +class ModelDNAError : public ModelDNA +{ +public: + + /** + constructor + @param tree associated tree for the model + */ + ModelDNAError(PhyloTree *tree); + + /** + constructor + @param model_name model name, e.g., JC, HKY. + @param freq state frequency type + @param tree associated phylogenetic tree + */ + ModelDNAError(const char *model_name, string model_params, StateFreqType freq, string freq_params, string seqerr, PhyloTree *tree); + + /** + start structure for checkpointing + */ + virtual void startCheckpoint(); + + /** + save object into the checkpoint + */ + virtual void saveCheckpoint(); + + /** + restore object from the checkpoint + */ + virtual void restoreCheckpoint(); + + /** + * @return model name + */ + virtual string getName(); + + /** + * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} + */ + virtual string getNameParams(); + + /** + write information + @param out output stream + */ + virtual void writeInfo(ostream &out); + + + /** + return the number of dimensions + */ + virtual int getNDim(); + + /** + * setup the bounds for joint optimization with BFGS + */ + virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); + + + /** compute the tip likelihood vector of a state for Felsenstein's pruning algorithm + @param state character state + @param[out] state_lk state likehood vector of size num_states + */ + virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + +protected: + + /** + this function is served for the multi-dimension optimization. It should pack the model parameters + into a vector that is index from 1 (NOTE: not from 0) + @param variables (OUT) vector of variables, indexed from 1 + */ + virtual void setVariables(double *variables); + + /** + this function is served for the multi-dimension optimization. It should assign the model parameters + from a vector of variables that is index from 1 (NOTE: not from 0) + @param variables vector of variables, indexed from 1 + @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) + */ + virtual bool getVariables(double *variables); + +private: + + /** sequencing error */ + double epsilon; + + /** true to fix epsilon */ + bool fix_epsilon; + + /** error model name */ + string seqerr_name; + +}; + +#endif /* modeldnaerror_hpp */ diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp index 60a7803e4..18f0e2383 100644 --- a/model/modelfactory.cpp +++ b/model/modelfactory.cpp @@ -44,150 +44,138 @@ #include "nclextra/myreader.h" #include -string::size_type posRateHeterotachy(string &model_name) { - string::size_type pos1 = 0, pos2 = 0; - do { - pos1 = model_name.find("+H", pos1); - if (pos1 == string::npos) break; - } while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2])); - - do { - pos2 = model_name.find("*H"); - if (pos2 == string::npos) break; - } while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2])); - +string::size_type findSubStr(string &name, string sub1, string sub2) { + string::size_type pos1, pos2; + for (pos1 = 0; pos1 != string::npos; pos1++) { + pos1 = name.find(sub1, pos1); + if (pos1 == string::npos) + break; + if (pos1+2 >= name.length() || !isalpha(name[pos1+2])) { + break; + } + } + + for (pos2 = 0; pos2 != string::npos; pos2++) { + pos2 = name.find(sub2, pos2); + if (pos2 == string::npos) + break; + if (pos2+2 >= name.length() ||!isalpha(name[pos2+2])) + break; + } + if (pos1 != string::npos && pos2 != string::npos) { return min(pos1, pos2); } else if (pos1 != string::npos) return pos1; else return pos2; +} +string::size_type posRateHeterotachy(string model_name) { + return findSubStr(model_name, "+H", "*H"); } string::size_type posRateFree(string &model_name) { - string::size_type pos1 = 0, pos2 = 0; - do { - pos1 = model_name.find("+R", pos1); - if (pos1 == string::npos) break; - } while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2])); - - do { - pos2 = model_name.find("*R"); - if (pos2 == string::npos) break; - } while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2])); - - if (pos1 != string::npos && pos2 != string::npos) { - return min(pos1, pos2); - } else if (pos1 != string::npos) - return pos1; - else - return pos2; - + return findSubStr(model_name, "+R", "*R"); } string::size_type posPOMO(string &model_name) { - string::size_type pos1 = 0, pos2 = 0; - do { - pos1 = model_name.find("+P", pos1); - if (pos1 == string::npos) break; - } while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2])); - - do { - pos2 = model_name.find("*P"); - if (pos2 == string::npos) break; - } while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2])); - - if (pos1 != string::npos && pos2 != string::npos) { - return min(pos1, pos2); - } else if (pos1 != string::npos) - return pos1; - else - return pos2; - + return findSubStr(model_name, "+P", "*P"); } ModelsBlock *readModelsDefinition(Params ¶ms) { - ModelsBlock *models_block = new ModelsBlock; + ModelsBlock *models_block = new ModelsBlock; + + try + { + // loading internal model definitions + stringstream in(builtin_mixmodels_definition); + ASSERT(in && "stringstream is OK"); + NxsReader nexus; + nexus.Add(models_block); + MyToken token(in); + nexus.Execute(token); + } catch (...) { + ASSERT(0 && "predefined mixture models not initialized"); + } - try - { - // loading internal model definitions - stringstream in(builtin_mixmodels_definition); + try + { + // loading internal protei model definitions + stringstream in(builtin_prot_models); ASSERT(in && "stringstream is OK"); - NxsReader nexus; - nexus.Add(models_block); - MyToken token(in); - nexus.Execute(token); -// int num_model = 0, num_freq = 0; -// for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++) -// if ((*it).flag & NM_FREQ) num_freq++; else num_model++; -// cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl; - } catch (...) { - ASSERT(0 && "predefined mixture models initialized"); + NxsReader nexus; + nexus.Add(models_block); + MyToken token(in); + nexus.Execute(token); + } catch (...) { + ASSERT(0 && "predefined protein models not initialized"); } - if (params.model_def_file) { - cout << "Reading model definition file " << params.model_def_file << " ... "; - MyReader nexus(params.model_def_file); - nexus.Add(models_block); - MyToken token(nexus.inf); - nexus.Execute(token); - int num_model = 0, num_freq = 0; - for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++) - if ((*it).flag & NM_FREQ) num_freq++; else num_model++; - cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl; - } - return models_block; + if (params.model_def_file) { + cout << "Reading model definition file " << params.model_def_file << " ... "; + MyReader nexus(params.model_def_file); + nexus.Add(models_block); + MyToken token(nexus.inf); + nexus.Execute(token); + int num_model = 0, num_freq = 0; + for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++) + if (it->second.flag & NM_FREQ) num_freq++; else num_model++; + cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl; + } + return models_block; } ModelFactory::ModelFactory() : CheckpointFactory() { - model = NULL; - site_rate = NULL; - store_trans_matrix = false; - is_storing = false; - joint_optimize = false; - fused_mix_rate = false; - unobserved_ptns = ""; + model = NULL; + site_rate = NULL; + store_trans_matrix = false; + is_storing = false; + joint_optimize = false; + fused_mix_rate = false; + ASC_type = ASC_NONE; + syncChkPoint = nullptr; } size_t findCloseBracket(string &str, size_t start_pos) { - int counter = 0; - for (size_t pos = start_pos+1; pos < str.length(); pos++) { - if (str[pos] == '{') counter++; - if (str[pos] == '}') { - if (counter == 0) return pos; else counter--; - } + int counter = 0; + for (size_t pos = start_pos+1; pos < str.length(); pos++) { + if (str[pos] == '{') counter++; + if (str[pos] == '}') { + if (counter == 0) return pos; else counter--; + } } - return string::npos; + return string::npos; } ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, ModelsBlock *models_block) : CheckpointFactory() { - store_trans_matrix = params.store_trans_matrix; - is_storing = false; - joint_optimize = params.optimize_model_rate_joint; - fused_mix_rate = false; + store_trans_matrix = params.store_trans_matrix; + is_storing = false; + joint_optimize = params.optimize_model_rate_joint; + fused_mix_rate = false; + ASC_type = ASC_NONE; + syncChkPoint = nullptr; string model_str = model_name; - string rate_str; + string rate_str; - try { + try { - if (model_str == "") { - if (tree->aln->seq_type == SEQ_DNA) model_str = "HKY"; - else if (tree->aln->seq_type == SEQ_PROTEIN) model_str = "LG"; - else if (tree->aln->seq_type == SEQ_BINARY) model_str = "GTR2"; - else if (tree->aln->seq_type == SEQ_CODON) model_str = "GY"; - else if (tree->aln->seq_type == SEQ_MORPH) model_str = "MK"; + if (model_str == "") { + if (tree->aln->seq_type == SEQ_DNA) model_str = "HKY"; + else if (tree->aln->seq_type == SEQ_PROTEIN) model_str = "LG"; + else if (tree->aln->seq_type == SEQ_BINARY) model_str = "GTR2"; + else if (tree->aln->seq_type == SEQ_CODON) model_str = "GY"; + else if (tree->aln->seq_type == SEQ_MORPH) model_str = "MK"; else if (tree->aln->seq_type == SEQ_POMO) model_str = "HKY+P"; - else model_str = "JC"; - if (tree->aln->seq_type != SEQ_POMO) + else model_str = "JC"; + if (tree->aln->seq_type != SEQ_POMO && !params.model_joint) outWarning("Default model "+model_str + " may be under-fitting. Use option '-m TEST' to determine the best-fit model."); - } + } - /********* preprocessing model string ****************/ - NxsModel *nxsmodel = NULL; + /********* preprocessing model string ****************/ + NxsModel *nxsmodel = NULL; string new_model_str = ""; size_t mix_pos; @@ -207,11 +195,11 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, cout << "Model " << model_str << " is alias for " << new_model_str << endl; model_str = new_model_str; - // nxsmodel = models_block->findModel(model_str); - // if (nxsmodel && nxsmodel->description.find_first_of("+*") != string::npos) { - // cout << "Model " << model_str << " is alias for " << nxsmodel->description << endl; - // model_str = nxsmodel->description; - // } + // nxsmodel = models_block->findModel(model_str); + // if (nxsmodel && nxsmodel->description.find_first_of("+*") != string::npos) { + // cout << "Model " << model_str << " is alias for " << nxsmodel->description << endl; + // model_str = nxsmodel->description; + // } // Detect PoMo and throw error if sequence type is PoMo but +P is // not given. This makes the model string cleaner and @@ -224,20 +212,62 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, outError("Provided alignment is exclusively used by PoMo but model string does not contain, e.g., \"+P\"."); // Decompose model string into model_str and rate_str string. - size_t spec_pos = model_str.find_first_of("{+*"); - if (spec_pos != string::npos) { - if (model_str[spec_pos] == '{') { - // Scan for the corresponding '}'. - size_t pos = findCloseBracket(model_str, spec_pos); - if (pos == string::npos) - outError("Model name has wrong bracket notation '{...}'"); - rate_str = model_str.substr(pos+1); - model_str = model_str.substr(0, pos+1); + size_t spec_pos = model_str.find_first_of("{+*"); + if (spec_pos != string::npos) { + if (model_str[spec_pos] == '{') { + // Scan for the corresponding '}'. + size_t pos = findCloseBracket(model_str, spec_pos); + if (pos == string::npos) + outError("Model name has wrong bracket notation '{...}'"); + rate_str = model_str.substr(pos+1); + model_str = model_str.substr(0, pos+1); } else { rate_str = model_str.substr(spec_pos); model_str = model_str.substr(0, spec_pos); } } + + // decompose +F from rate_str + string freq_str = ""; + while ((spec_pos = rate_str.find("+F")) != string::npos) { + size_t end_pos = rate_str.find_first_of("+*", spec_pos+1); + if (end_pos == string::npos) { + freq_str += rate_str.substr(spec_pos); + rate_str = rate_str.substr(0, spec_pos); + } else { + freq_str += rate_str.substr(spec_pos, end_pos - spec_pos); + rate_str = rate_str.substr(0, spec_pos) + rate_str.substr(end_pos); + } + } + + // set to model_joint if set + if (Params::getInstance().model_joint) { + model_str = Params::getInstance().model_joint; + freq_str = ""; + while ((spec_pos = model_str.find("+F")) != string::npos) { + size_t end_pos = model_str.find_first_of("+*", spec_pos+1); + if (end_pos == string::npos) { + freq_str += model_str.substr(spec_pos); + model_str = model_str.substr(0, spec_pos); + } else { + freq_str += model_str.substr(spec_pos, end_pos - spec_pos); + model_str = model_str.substr(0, spec_pos) + model_str.substr(end_pos); + } + } + } + + // move error model +E from rate_str to model_str + //string seqerr_str = ""; + while ((spec_pos = rate_str.find("+E")) != string::npos) { + size_t end_pos = rate_str.find_first_of("+*", spec_pos+1); + if (end_pos == string::npos) { + model_str += rate_str.substr(spec_pos); + rate_str = rate_str.substr(0, spec_pos); + } else { + model_str += rate_str.substr(spec_pos, end_pos - spec_pos); + rate_str = rate_str.substr(0, spec_pos) + rate_str.substr(end_pos); + } + } // PoMo; +NXX and +W or +S because those flags are handled when // reading in the data. Set PoMo parameters (heterozygosity). @@ -295,7 +325,6 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, // models, the heterozygosity can be set separately for each model and the // +P{}, +GXX and +I flags should already be inside the model definition. if (model_str.substr(0, 3) != "MIX" && pomo) { - // +P{} flag. p_pos = posPOMO(rate_str); if (p_pos != string::npos) { @@ -347,207 +376,206 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, // } } - // nxsmodel = models_block->findModel(model_str); - // if (nxsmodel && nxsmodel->description.find("MIX") != string::npos) { - // cout << "Model " << model_str << " is alias for " << nxsmodel->description << endl; - // model_str = nxsmodel->description; - // } + // nxsmodel = models_block->findModel(model_str); + // if (nxsmodel && nxsmodel->description.find("MIX") != string::npos) { + // cout << "Model " << model_str << " is alias for " << nxsmodel->description << endl; + // model_str = nxsmodel->description; + // } - /******************** initialize state frequency ****************************/ + /******************** initialize state frequency ****************************/ - StateFreqType freq_type = params.freq_type; + StateFreqType freq_type = params.freq_type; - if (freq_type == FREQ_UNKNOWN) { - switch (tree->aln->seq_type) { - case SEQ_BINARY: freq_type = FREQ_ESTIMATE; break; // default for binary: optimized frequencies - case SEQ_PROTEIN: freq_type = FREQ_USER_DEFINED; break; // default for protein: frequencies of the empirical AA matrix - case SEQ_MORPH: freq_type = FREQ_EQUAL; break; - case SEQ_CODON: freq_type = FREQ_UNKNOWN; break; + if (freq_type == FREQ_UNKNOWN) { + switch (tree->aln->seq_type) { + case SEQ_BINARY: freq_type = FREQ_ESTIMATE; break; // default for binary: optimized frequencies + case SEQ_PROTEIN: break; // let ModelProtein decide by itself + case SEQ_MORPH: freq_type = FREQ_EQUAL; break; + case SEQ_CODON: freq_type = FREQ_UNKNOWN; break; break; - default: freq_type = FREQ_EMPIRICAL; break; // default for DNA, PoMo and others: counted frequencies from alignment - } - } + default: freq_type = FREQ_EMPIRICAL; break; // default for DNA, PoMo and others: counted frequencies from alignment + } + } // first handle mixture frequency - string::size_type posfreq = rate_str.find("+FMIX"); - string freq_params; + string::size_type posfreq = freq_str.find("+FMIX"); + string freq_params; size_t close_bracket; if (posfreq != string::npos) { - string freq_str; - size_t last_pos = rate_str.find_first_of("+*", posfreq+1); - - if (last_pos == string::npos) { - freq_str = rate_str.substr(posfreq); - rate_str = rate_str.substr(0, posfreq); - } else { - freq_str = rate_str.substr(posfreq, last_pos-posfreq); - rate_str = rate_str.substr(0, posfreq) + rate_str.substr(last_pos); - } - - if (freq_str[5] != OPEN_BRACKET) + string fmix_str; + size_t last_pos = freq_str.find_first_of("+*", posfreq+1); + + if (last_pos == string::npos) { + fmix_str = freq_str.substr(posfreq); + freq_str = freq_str.substr(0, posfreq); + } else { + fmix_str = freq_str.substr(posfreq, last_pos-posfreq); + freq_str = freq_str.substr(0, posfreq) + freq_str.substr(last_pos); + } + + if (fmix_str[5] != OPEN_BRACKET) outError("Mixture-frequency must start with +FMIX{"); - close_bracket = freq_str.find(CLOSE_BRACKET); + close_bracket = fmix_str.find(CLOSE_BRACKET); if (close_bracket == string::npos) - outError("Close bracket not found in ", freq_str); - if (close_bracket != freq_str.length()-1) - outError("Wrong close bracket position ", freq_str); + outError("Close bracket not found in ", fmix_str); + if (close_bracket != fmix_str.length()-1) + outError("Wrong close bracket position ", fmix_str); freq_type = FREQ_MIXTURE; - freq_params = freq_str.substr(6, close_bracket-6); + freq_params = fmix_str.substr(6, close_bracket-6); } // then normal frequency - if (rate_str.find("+FO") != string::npos) - posfreq = rate_str.find("+FO"); - else if (rate_str.find("+Fo") != string::npos) - posfreq = rate_str.find("+Fo"); + if (freq_str.find("+FO") != string::npos) + posfreq = freq_str.find("+FO"); + else if (freq_str.find("+Fo") != string::npos) + posfreq = freq_str.find("+Fo"); else - posfreq = rate_str.find("+F"); + posfreq = freq_str.find("+F"); bool optimize_mixmodel_weight = params.optimize_mixmodel_weight; - if (posfreq != string::npos) { - string freq_str; - size_t last_pos = rate_str.find_first_of("+*", posfreq+1); - if (last_pos == string::npos) { - freq_str = rate_str.substr(posfreq); - rate_str = rate_str.substr(0, posfreq); - } else { - freq_str = rate_str.substr(posfreq, last_pos-posfreq); - rate_str = rate_str.substr(0, posfreq) + rate_str.substr(last_pos); - } - - if (freq_str.length() > 2 && freq_str[2] == OPEN_BRACKET) { + if (posfreq != string::npos) { + string fstr; + size_t last_pos = freq_str.find_first_of("+*", posfreq+1); + if (last_pos == string::npos) { + fstr = freq_str.substr(posfreq); + freq_str = freq_str.substr(0, posfreq); + } else { + fstr = freq_str.substr(posfreq, last_pos-posfreq); + freq_str = freq_str.substr(0, posfreq) + freq_str.substr(last_pos); + } + + if (fstr.length() > 2 && fstr[2] == OPEN_BRACKET) { if (freq_type == FREQ_MIXTURE) outError("Mixture frequency with user-defined frequency is not allowed"); - close_bracket = freq_str.find(CLOSE_BRACKET); + close_bracket = fstr.find(CLOSE_BRACKET); if (close_bracket == string::npos) - outError("Close bracket not found in ", freq_str); - if (close_bracket != freq_str.length()-1) - outError("Wrong close bracket position ", freq_str); + outError("Close bracket not found in ", fstr); + if (close_bracket != fstr.length()-1) + outError("Wrong close bracket position ", fstr); freq_type = FREQ_USER_DEFINED; - freq_params = freq_str.substr(3, close_bracket-3); - } else if (freq_str == "+FC" || freq_str == "+Fc" || freq_str == "+F") { + freq_params = fstr.substr(3, close_bracket-3); + } else if (fstr == "+FC" || fstr == "+Fc" || fstr == "+F") { if (freq_type == FREQ_MIXTURE) { freq_params = "empirical," + freq_params; optimize_mixmodel_weight = true; } else freq_type = FREQ_EMPIRICAL; - } else if (freq_str == "+FU" || freq_str == "+Fu") { + } else if (fstr == "+FU" || fstr == "+Fu") { if (freq_type == FREQ_MIXTURE) outError("Mixture frequency with user-defined frequency is not allowed"); else freq_type = FREQ_USER_DEFINED; - } else if (freq_str == "+FQ" || freq_str == "+Fq") { + } else if (fstr == "+FQ" || fstr == "+Fq") { if (freq_type == FREQ_MIXTURE) outError("Mixture frequency with equal frequency is not allowed"); else freq_type = FREQ_EQUAL; - } else if (freq_str == "+FO" || freq_str == "+Fo") { + } else if (fstr == "+FO" || fstr == "+Fo") { if (freq_type == FREQ_MIXTURE) { freq_params = "optimize," + freq_params; optimize_mixmodel_weight = true; } else freq_type = FREQ_ESTIMATE; - } else if (freq_str == "+F1x4" || freq_str == "+F1X4") { + } else if (fstr == "+F1x4" || fstr == "+F1X4") { if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_CODON_1x4; - } else if (freq_str == "+F3x4" || freq_str == "+F3X4") { + } else if (fstr == "+F3x4" || fstr == "+F3X4") { if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_CODON_3x4; - } else if (freq_str == "+F3x4C" || freq_str == "+F3x4c" || freq_str == "+F3X4C" || freq_str == "+F3X4c") { + } else if (fstr == "+F3x4C" || fstr == "+F3x4c" || fstr == "+F3X4C" || fstr == "+F3X4c") { if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_CODON_3x4C; - } else if (freq_str == "+FRY") { - // MDW to Minh: I don't know how these should interact with FREQ_MIXTURE, - // so as nearly everything else treats it as an error, I do too. + } else if (fstr == "+FRY") { + // MDW to Minh: I don't know how these should interact with FREQ_MIXTURE, + // so as nearly everything else treats it as an error, I do too. // BQM answer: that's fine if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_DNA_RY; - } else if (freq_str == "+FWS") { + } else if (fstr == "+FWS") { if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_DNA_WS; - } else if (freq_str == "+FMK") { + } else if (fstr == "+FMK") { if (freq_type == FREQ_MIXTURE) - outError("Mixture frequency with " + freq_str + " is not allowed"); + outError("Mixture frequency with " + fstr + " is not allowed"); else freq_type = FREQ_DNA_MK; } else { // might be "+F####" where # are digits try { - freq_type = parseStateFreqDigits(freq_str.substr(2)); // throws an error if not in +F#### format + freq_type = parseStateFreqDigits(fstr.substr(2)); // throws an error if not in +F#### format } catch (...) { - outError("Unknown state frequency type ",freq_str); + outError("Unknown state frequency type ",fstr); } } // model_str = model_str.substr(0, posfreq); } - /******************** initialize model ****************************/ - - if (tree->aln->site_state_freq.empty()) { - if (model_str.substr(0, 3) == "MIX" || freq_type == FREQ_MIXTURE) { - string model_list; - if (model_str.substr(0, 3) == "MIX") { - if (model_str[3] != OPEN_BRACKET) - outError("Mixture model name must start with 'MIX{'"); - if (model_str.rfind(CLOSE_BRACKET) != model_str.length()-1) - outError("Close bracket not found at the end of ", model_str); - model_list = model_str.substr(4, model_str.length()-5); - model_str = model_str.substr(0, 3); - } - model = new ModelMixture(model_name, model_str, model_list, models_block, freq_type, freq_params, tree, optimize_mixmodel_weight); - } else { - // string model_desc; - // NxsModel *nxsmodel = models_block->findModel(model_str); - // if (nxsmodel) model_desc = nxsmodel->description; - model = createModel(model_str, models_block, freq_type, freq_params, tree); - } -// fused_mix_rate &= model->isMixture() && site_rate->getNRate() > 1; - } else { - // site-specific model - if (model_str == "JC" || model_str == "POISSON") - outError("JC is not suitable for site-specific model"); - model = new ModelSet(model_str.c_str(), tree); - ModelSet *models = (ModelSet*)model; // assign pointer for convenience - models->init((params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL); - int i; - models->pattern_model_map.resize(tree->aln->getNPattern(), -1); - for (i = 0; i < tree->aln->getNSite(); i++) { - models->pattern_model_map[tree->aln->getPatternID(i)] = tree->aln->site_model[i]; - //cout << "site " << i << " ptn " << tree->aln->getPatternID(i) << " -> model " << site_model[i] << endl; - } - double *state_freq = new double[model->num_states]; - double *rates = new double[model->getNumRateEntries()]; - for (i = 0; i < tree->aln->site_state_freq.size(); i++) { - ModelMarkov *modeli; - if (i == 0) { - modeli = (ModelMarkov*)createModel(model_str, models_block, (params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL, "", tree); - modeli->getStateFrequency(state_freq); - modeli->getRateMatrix(rates); - } else { - modeli = (ModelMarkov*)createModel(model_str, models_block, FREQ_EQUAL, "", tree); - modeli->setStateFrequency(state_freq); - modeli->setRateMatrix(rates); - } - if (tree->aln->site_state_freq[i]) - modeli->setStateFrequency (tree->aln->site_state_freq[i]); - - modeli->init(FREQ_USER_DEFINED); - models->push_back(modeli); - } - delete [] rates; - delete [] state_freq; + /******************** initialize model ****************************/ + + if (tree->aln->site_state_freq.empty()) { + if (model_str.substr(0, 3) == "MIX" || freq_type == FREQ_MIXTURE) { + string model_list; + if (model_str.substr(0, 3) == "MIX") { + if (model_str[3] != OPEN_BRACKET) + outError("Mixture model name must start with 'MIX{'"); + if (model_str.rfind(CLOSE_BRACKET) != model_str.length()-1) + outError("Close bracket not found at the end of ", model_str); + model_list = model_str.substr(4, model_str.length()-5); + model_str = model_str.substr(0, 3); + } + model = new ModelMixture(model_name, model_str, model_list, models_block, freq_type, freq_params, tree, optimize_mixmodel_weight); + } else { + // string model_desc; + // NxsModel *nxsmodel = models_block->findModel(model_str); + // if (nxsmodel) model_desc = nxsmodel->description; + model = createModel(model_str, models_block, freq_type, freq_params, tree); + } +// fused_mix_rate &= model->isMixture() && site_rate->getNRate() > 1; + } else { + // site-specific model + if (model_str == "JC" || model_str == "POISSON") + outError("JC is not suitable for site-specific model"); + model = new ModelSet(model_str.c_str(), tree); + ModelSet *models = (ModelSet*)model; // assign pointer for convenience + models->init((params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL); + models->pattern_model_map.resize(tree->aln->getNPattern(), -1); + for (size_t i = 0; i < tree->aln->getNSite(); ++i) { + models->pattern_model_map[tree->aln->getPatternID(i)] = tree->aln->site_model[i]; + //cout << "site " << i << " ptn " << tree->aln->getPatternID(i) << " -> model " << site_model[i] << endl; + } + double *state_freq = new double[model->num_states]; + double *rates = new double[model->getNumRateEntries()]; + for (size_t i = 0; i < tree->aln->site_state_freq.size(); ++i) { + ModelMarkov *modeli; + if (i == 0) { + modeli = (ModelMarkov*)createModel(model_str, models_block, (params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL, "", tree); + modeli->getStateFrequency(state_freq); + modeli->getRateMatrix(rates); + } else { + modeli = (ModelMarkov*)createModel(model_str, models_block, FREQ_EQUAL, "", tree); + modeli->setStateFrequency(state_freq); + modeli->setRateMatrix(rates); + } + if (tree->aln->site_state_freq[i]) + modeli->setStateFrequency (tree->aln->site_state_freq[i]); + + modeli->init(FREQ_USER_DEFINED); + models->push_back(modeli); + } + delete [] rates; + delete [] state_freq; models->joinEigenMemory(); models->decomposeRateMatrix(); @@ -555,23 +583,67 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, // delete information of the old alignment // tree->aln->ordered_pattern.clear(); // tree->deleteAllPartialLh(); - } + } -// if (model->isMixture()) -// cout << "Mixture model with " << model->getNMixtures() << " components!" << endl; +// if (model->isMixture()) +// cout << "Mixture model with " << model->getNMixtures() << " components!" << endl; - /******************** initialize ascertainment bias correction model ****************************/ + /******************** initialize ascertainment bias correction model ****************************/ - string::size_type posasc; + string::size_type posasc; - if ((posasc = rate_str.find("+ASC")) != string::npos) { - // ascertainment bias correction - unobserved_ptns = tree->aln->getUnobservedConstPatterns(); - // rebuild the seq_states to contain states of unobserved constant patterns - tree->aln->buildSeqStates(true); -// if (unobserved_ptns.size() <= 0) -// outError("Invalid use of +ASC because all constant patterns are observed in the alignment"); - if (tree->aln->frac_invariant_sites > 0) { + if ((posasc = rate_str.find("+ASC_INF")) != string::npos) { + // ascertainment bias correction + ASC_type = ASC_INFORMATIVE; + tree->aln->getUnobservedConstPatterns(ASC_type, unobserved_ptns); + + // rebuild the seq_states to contain states of unobserved constant patterns + //tree->aln->buildSeqStates(model->seq_states, true); + if (tree->aln->num_informative_sites != tree->getAlnNSite()) { + if (!params.partition_file) { + string infsites_file = ((string)params.out_prefix + ".infsites.phy"); + tree->aln->printAlignment(params.aln_output_format, infsites_file.c_str(), false, NULL, EXCLUDE_UNINF); + cerr << "For your convenience alignment with parsimony-informative sites printed to " << infsites_file << endl; + } + outError("Invalid use of +ASC_INF because of " + convertIntToString(tree->getAlnNSite() - tree->aln->num_informative_sites) + + " parsimony-uninformative sites in the alignment"); + } + if (verbose_mode >= VB_MED) + cout << "Ascertainment bias correction: " << unobserved_ptns.size() << " unobservable uninformative patterns"<< endl; + rate_str = rate_str.substr(0, posasc) + rate_str.substr(posasc+8); + } else if ((posasc = rate_str.find("+ASC_MIS")) != string::npos) { + // initialize Holder's ascertainment bias correction model + ASC_type = ASC_VARIANT_MISSING; + tree->aln->getUnobservedConstPatterns(ASC_type, unobserved_ptns); + // rebuild the seq_states to contain states of unobserved constant patterns + //tree->aln->buildSeqStates(model->seq_states, true); + if (tree->aln->frac_invariant_sites > 0) { + if (!params.partition_file) { + string varsites_file = ((string)params.out_prefix + ".varsites.phy"); + tree->aln->printAlignment(params.aln_output_format, varsites_file.c_str(), false, NULL, EXCLUDE_INVAR); + cerr << "For your convenience alignment with variable sites printed to " << varsites_file << endl; + } + outError("Invalid use of +ASC_MIS because of " + convertIntToString(tree->aln->frac_invariant_sites*tree->aln->getNSite()) + + " invariant sites in the alignment"); + } + if (verbose_mode >= VB_MED) + cout << "Holder's ascertainment bias correction: " << unobserved_ptns.size() << " unobservable constant patterns" << endl; + rate_str = rate_str.substr(0, posasc) + rate_str.substr(posasc+8); + } else if ((posasc = rate_str.find("+ASC")) != string::npos) { + // ascertainment bias correction + ASC_type = ASC_VARIANT; + tree->aln->getUnobservedConstPatterns(ASC_type, unobserved_ptns); + + // delete rarely observed state + for (int i = unobserved_ptns.size()-1; i >= 0; i--) + if (model->state_freq[(int)unobserved_ptns[i][0]] < 1e-8) + unobserved_ptns.erase(unobserved_ptns.begin() + i); + + // rebuild the seq_states to contain states of unobserved constant patterns + //tree->aln->buildSeqStates(model->seq_states, true); +// if (unobserved_ptns.size() <= 0) +// outError("Invalid use of +ASC because all constant patterns are observed in the alignment"); + if (tree->aln->frac_invariant_sites > 0) { // cerr << tree->aln->frac_invariant_sites*tree->aln->getNSite() << " invariant sites observed in the alignment" << endl; // for (Alignment::iterator pit = tree->aln->begin(); pit != tree->aln->end(); pit++) // if (pit->isInvariant()) { @@ -582,24 +654,24 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, // } if (!params.partition_file) { string varsites_file = ((string)params.out_prefix + ".varsites.phy"); - tree->aln->printPhylip(varsites_file.c_str(), false, NULL, false, true); + tree->aln->printAlignment(params.aln_output_format, varsites_file.c_str(), false, NULL, EXCLUDE_INVAR); cerr << "For your convenience alignment with variable sites printed to " << varsites_file << endl; } outError("Invalid use of +ASC because of " + convertIntToString(tree->aln->frac_invariant_sites*tree->aln->getNSite()) + " invariant sites in the alignment"); } - cout << "Ascertainment bias correction: " << unobserved_ptns.size() << " unobservable constant patterns"<< endl; + if (verbose_mode >= VB_MED) + cout << "Ascertainment bias correction: " << unobserved_ptns.size() << " unobservable constant patterns"<< endl; rate_str = rate_str.substr(0, posasc) + rate_str.substr(posasc+4); - } else { - tree->aln->buildSeqStates(false); + } else { + //tree->aln->buildSeqStates(model->seq_states, false); } + /******************** initialize site rate heterogeneity ****************************/ - /******************** initialize site rate heterogeneity ****************************/ - - string::size_type posI = rate_str.find("+I"); - string::size_type posG = rate_str.find("+G"); - string::size_type posG2 = rate_str.find("*G"); + string::size_type posI = rate_str.find("+I"); + string::size_type posG = rate_str.find("+G"); + string::size_type posG2 = rate_str.find("*G"); if (posG != string::npos && posG2 != string::npos) { cout << "NOTE: both +G and *G were specified, continue with " << ((posG < posG2)? rate_str.substr(posG,2) : rate_str.substr(posG2,2)) << endl; @@ -609,8 +681,8 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, fused_mix_rate = true; } - string::size_type posR = rate_str.find("+R"); // FreeRate model - string::size_type posR2 = rate_str.find("*R"); // FreeRate model + string::size_type posR = rate_str.find("+R"); // FreeRate model + string::size_type posR2 = rate_str.find("*R"); // FreeRate model if (posG != string::npos && (posR != string::npos || posR2 != string::npos)) { outWarning("Both Gamma and FreeRate models were specified, continue with FreeRate model"); @@ -628,8 +700,8 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, fused_mix_rate = true; } - string::size_type posH = rate_str.find("+H"); // heterotachy model - string::size_type posH2 = rate_str.find("*H"); // heterotachy model + string::size_type posH = rate_str.find("+H"); // heterotachy model + string::size_type posH2 = rate_str.find("*H"); // heterotachy model if (posG != string::npos && (posH != string::npos || posH2 != string::npos)) { outWarning("Both Gamma and heterotachy models were specified, continue with heterotachy model"); @@ -652,161 +724,161 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, fused_mix_rate = true; } - string::size_type posX; - /* create site-rate heterogeneity */ - int num_rate_cats = params.num_rate_cats; - if (fused_mix_rate && model->isMixture()) num_rate_cats = model->getNMixtures(); - double gamma_shape = params.gamma_shape; - double p_invar_sites = params.p_invar_sites; - string freerate_params = ""; - if (posI != string::npos) { - // invariable site model - if (rate_str.length() > posI+2 && rate_str[posI+2] == OPEN_BRACKET) { - close_bracket = rate_str.find(CLOSE_BRACKET, posI); - if (close_bracket == string::npos) - outError("Close bracket not found in ", rate_str); - p_invar_sites = convert_double(rate_str.substr(posI+3, close_bracket-posI-3).c_str()); - if (p_invar_sites < 0 || p_invar_sites >= 1) - outError("p_invar must be in [0,1)"); - } else if (rate_str.length() > posI+2 && rate_str[posI+2] != '+' && rate_str[posI+2] != '*') - outError("Wrong model name ", rate_str); - } - if (posG != string::npos) { - // Gamma rate model - int end_pos = 0; - if (rate_str.length() > posG+2 && isdigit(rate_str[posG+2])) { - num_rate_cats = convert_int(rate_str.substr(posG+2).c_str(), end_pos); - if (num_rate_cats < 1) outError("Wrong number of rate categories"); - } - if (rate_str.length() > posG+2+end_pos && rate_str[posG+2+end_pos] == OPEN_BRACKET) { - close_bracket = rate_str.find(CLOSE_BRACKET, posG); - if (close_bracket == string::npos) - outError("Close bracket not found in ", rate_str); - gamma_shape = convert_double(rate_str.substr(posG+3+end_pos, close_bracket-posG-3-end_pos).c_str()); -// if (gamma_shape < MIN_GAMMA_SHAPE || gamma_shape > MAX_GAMMA_SHAPE) { -// stringstream str; -// str << "Gamma shape parameter " << gamma_shape << "out of range [" -// << MIN_GAMMA_SHAPE << ',' << MAX_GAMMA_SHAPE << "]" << endl; -// outError(str.str()); -// } - } else if (rate_str.length() > posG+2+end_pos && rate_str[posG+2+end_pos] != '+') - outError("Wrong model name ", rate_str); - } - if (posR != string::npos) { - // FreeRate model - int end_pos = 0; - if (rate_str.length() > posR+2 && isdigit(rate_str[posR+2])) { - num_rate_cats = convert_int(rate_str.substr(posR+2).c_str(), end_pos); - if (num_rate_cats < 1) outError("Wrong number of rate categories"); - } - if (rate_str.length() > posR+2+end_pos && rate_str[posR+2+end_pos] == OPEN_BRACKET) { - close_bracket = rate_str.find(CLOSE_BRACKET, posR); - if (close_bracket == string::npos) - outError("Close bracket not found in ", rate_str); - freerate_params = rate_str.substr(posR+3+end_pos, close_bracket-posR-3-end_pos).c_str(); - } else if (rate_str.length() > posR+2+end_pos && rate_str[posR+2+end_pos] != '+') - outError("Wrong model name ", rate_str); - } - - string heterotachy_params = ""; - if (posH != string::npos) { - // Heterotachy model - int end_pos = 0; - if (rate_str.length() > posH+2 && isdigit(rate_str[posH+2])) { - num_rate_cats = convert_int(rate_str.substr(posH+2).c_str(), end_pos); - if (num_rate_cats < 1) outError("Wrong number of rate categories"); + string::size_type posX; + /* create site-rate heterogeneity */ + int num_rate_cats = params.num_rate_cats; + if (fused_mix_rate && model->isMixture()) num_rate_cats = model->getNMixtures(); + double gamma_shape = params.gamma_shape; + double p_invar_sites = params.p_invar_sites; + string freerate_params = ""; + if (posI != string::npos) { + // invariable site model + if (rate_str.length() > posI+2 && rate_str[posI+2] == OPEN_BRACKET) { + close_bracket = rate_str.find(CLOSE_BRACKET, posI); + if (close_bracket == string::npos) + outError("Close bracket not found in ", rate_str); + p_invar_sites = convert_double(rate_str.substr(posI+3, close_bracket-posI-3).c_str()); + if (p_invar_sites < 0 || p_invar_sites >= 1) + outError("p_invar must be in [0,1)"); + } else if (rate_str.length() > posI+2 && rate_str[posI+2] != '+' && rate_str[posI+2] != '*') + outError("Wrong model name ", rate_str); + } + if (posG != string::npos) { + // Gamma rate model + int end_pos = 0; + if (rate_str.length() > posG+2 && isdigit(rate_str[posG+2])) { + num_rate_cats = convert_int(rate_str.substr(posG+2).c_str(), end_pos); + if (num_rate_cats < 1) outError("Wrong number of rate categories"); + } + if (rate_str.length() > posG+2+end_pos && rate_str[posG+2+end_pos] == OPEN_BRACKET) { + close_bracket = rate_str.find(CLOSE_BRACKET, posG); + if (close_bracket == string::npos) + outError("Close bracket not found in ", rate_str); + gamma_shape = convert_double(rate_str.substr(posG+3+end_pos, close_bracket-posG-3-end_pos).c_str()); +// if (gamma_shape < MIN_GAMMA_SHAPE || gamma_shape > MAX_GAMMA_SHAPE) { +// stringstream str; +// str << "Gamma shape parameter " << gamma_shape << "out of range [" +// << MIN_GAMMA_SHAPE << ',' << MAX_GAMMA_SHAPE << "]" << endl; +// outError(str.str()); +// } + } else if (rate_str.length() > posG+2+end_pos && rate_str[posG+2+end_pos] != '+') + outError("Wrong model name ", rate_str); + } + if (posR != string::npos) { + // FreeRate model + int end_pos = 0; + if (rate_str.length() > posR+2 && isdigit(rate_str[posR+2])) { + num_rate_cats = convert_int(rate_str.substr(posR+2).c_str(), end_pos); + if (num_rate_cats < 1) outError("Wrong number of rate categories"); + } + if (rate_str.length() > posR+2+end_pos && rate_str[posR+2+end_pos] == OPEN_BRACKET) { + close_bracket = rate_str.find(CLOSE_BRACKET, posR); + if (close_bracket == string::npos) + outError("Close bracket not found in ", rate_str); + freerate_params = rate_str.substr(posR+3+end_pos, close_bracket-posR-3-end_pos).c_str(); + } else if (rate_str.length() > posR+2+end_pos && rate_str[posR+2+end_pos] != '+') + outError("Wrong model name ", rate_str); + } + + string heterotachy_params = ""; + if (posH != string::npos) { + // Heterotachy model + int end_pos = 0; + if (rate_str.length() > posH+2 && isdigit(rate_str[posH+2])) { + num_rate_cats = convert_int(rate_str.substr(posH+2).c_str(), end_pos); + if (num_rate_cats < 1) outError("Wrong number of rate categories"); } else { if (!model->isMixture() || !fused_mix_rate) outError("Please specify number of heterotachy classes (e.g., +H2)"); } - if (rate_str.length() > posH+2+end_pos && rate_str[posH+2+end_pos] == OPEN_BRACKET) { - close_bracket = rate_str.find(CLOSE_BRACKET, posH); - if (close_bracket == string::npos) - outError("Close bracket not found in ", rate_str); - heterotachy_params = rate_str.substr(posH+3+end_pos, close_bracket-posH-3-end_pos).c_str(); - } else if (rate_str.length() > posH+2+end_pos && rate_str[posH+2+end_pos] != '+') - outError("Wrong model name ", rate_str); - } - - - if (rate_str.find('+') != string::npos || rate_str.find('*') != string::npos) { - //string rate_str = model_str.substr(pos); + if (rate_str.length() > posH+2+end_pos && rate_str[posH+2+end_pos] == OPEN_BRACKET) { + close_bracket = rate_str.find(CLOSE_BRACKET, posH); + if (close_bracket == string::npos) + outError("Close bracket not found in ", rate_str); + heterotachy_params = rate_str.substr(posH+3+end_pos, close_bracket-posH-3-end_pos).c_str(); + } else if (rate_str.length() > posH+2+end_pos && rate_str[posH+2+end_pos] != '+') + outError("Wrong model name ", rate_str); + } + + + if (rate_str.find('+') != string::npos || rate_str.find('*') != string::npos) { + //string rate_str = model_str.substr(pos); if (posI != string::npos && posH != string::npos) { - site_rate = new RateHeterotachyInvar(num_rate_cats, heterotachy_params, p_invar_sites, tree); + site_rate = new RateHeterotachyInvar(num_rate_cats, heterotachy_params, p_invar_sites, tree); } else if (posH != string::npos) { - site_rate = new RateHeterotachy(num_rate_cats, heterotachy_params, tree); - } else if (posI != string::npos && posG != string::npos) { - site_rate = new RateGammaInvar(num_rate_cats, gamma_shape, params.gamma_median, - p_invar_sites, params.optimize_alg_gammai, tree, false); - } else if (posI != string::npos && posR != string::npos) { - site_rate = new RateFreeInvar(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, p_invar_sites, params.optimize_alg, tree); - } else if (posI != string::npos) { - site_rate = new RateInvar(p_invar_sites, tree); - } else if (posG != string::npos) { - site_rate = new RateGamma(num_rate_cats, gamma_shape, params.gamma_median, tree); - } else if (posR != string::npos) { - site_rate = new RateFree(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, params.optimize_alg, tree); -// } else if ((posX = rate_str.find("+M")) != string::npos) { -// tree->setLikelihoodKernel(LK_NORMAL); -// params.rate_mh_type = true; -// if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { -// num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); -// if (num_rate_cats < 0) outError("Wrong number of rate categories"); -// } else num_rate_cats = -1; -// if (num_rate_cats >= 0) -// site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, -// params.rate_file, tree, params.rate_mh_type); -// else -// site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type); -// site_rate->setTree(tree); -// } else if ((posX = rate_str.find("+D")) != string::npos) { -// tree->setLikelihoodKernel(LK_NORMAL); -// params.rate_mh_type = false; -// if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { -// num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); -// if (num_rate_cats < 0) outError("Wrong number of rate categories"); -// } else num_rate_cats = -1; -// if (num_rate_cats >= 0) -// site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, -// params.rate_file, tree, params.rate_mh_type); -// else -// site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type); -// site_rate->setTree(tree); -// } else if ((posX = rate_str.find("+NGS")) != string::npos) { -// tree->setLikelihoodKernel(LK_NORMAL); -// if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) { -// num_rate_cats = convert_int(rate_str.substr(posX+4).c_str()); -// if (num_rate_cats < 0) outError("Wrong number of rate categories"); -// } else num_rate_cats = -1; -// site_rate = new NGSRateCat(tree, num_rate_cats); -// site_rate->setTree(tree); -// } else if ((posX = rate_str.find("+NGS")) != string::npos) { -// tree->setLikelihoodKernel(LK_NORMAL); -// if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) { -// num_rate_cats = convert_int(rate_str.substr(posX+4).c_str()); -// if (num_rate_cats < 0) outError("Wrong number of rate categories"); -// } else num_rate_cats = -1; -// site_rate = new NGSRate(tree); -// site_rate->setTree(tree); - } else if ((posX = rate_str.find("+K")) != string::npos) { - if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { - num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); - if (num_rate_cats < 1) outError("Wrong number of rate categories"); - } - site_rate = new RateKategory(num_rate_cats, tree); - } else - outError("Invalid rate heterogeneity type"); -// if (model_str.find('+') != string::npos) -// model_str = model_str.substr(0, model_str.find('+')); -// else -// model_str = model_str.substr(0, model_str.find('*')); - } else { - site_rate = new RateHeterogeneity(); - site_rate->setTree(tree); - } - - if (fused_mix_rate) { - if (!model->isMixture()) { + site_rate = new RateHeterotachy(num_rate_cats, heterotachy_params, tree); + } else if (posI != string::npos && posG != string::npos) { + site_rate = new RateGammaInvar(num_rate_cats, gamma_shape, params.gamma_median, + p_invar_sites, params.optimize_alg_gammai, tree, false); + } else if (posI != string::npos && posR != string::npos) { + site_rate = new RateFreeInvar(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, p_invar_sites, params.optimize_alg_freerate, tree); + } else if (posI != string::npos) { + site_rate = new RateInvar(p_invar_sites, tree); + } else if (posG != string::npos) { + site_rate = new RateGamma(num_rate_cats, gamma_shape, params.gamma_median, tree); + } else if (posR != string::npos) { + site_rate = new RateFree(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, params.optimize_alg_freerate, tree); +// } else if ((posX = rate_str.find("+M")) != string::npos) { +// tree->setLikelihoodKernel(LK_NORMAL); +// params.rate_mh_type = true; +// if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { +// num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); +// if (num_rate_cats < 0) outError("Wrong number of rate categories"); +// } else num_rate_cats = -1; +// if (num_rate_cats >= 0) +// site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, +// params.rate_file, tree, params.rate_mh_type); +// else +// site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type); +// site_rate->setTree(tree); +// } else if ((posX = rate_str.find("+D")) != string::npos) { +// tree->setLikelihoodKernel(LK_NORMAL); +// params.rate_mh_type = false; +// if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { +// num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); +// if (num_rate_cats < 0) outError("Wrong number of rate categories"); +// } else num_rate_cats = -1; +// if (num_rate_cats >= 0) +// site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, +// params.rate_file, tree, params.rate_mh_type); +// else +// site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type); +// site_rate->setTree(tree); +// } else if ((posX = rate_str.find("+NGS")) != string::npos) { +// tree->setLikelihoodKernel(LK_NORMAL); +// if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) { +// num_rate_cats = convert_int(rate_str.substr(posX+4).c_str()); +// if (num_rate_cats < 0) outError("Wrong number of rate categories"); +// } else num_rate_cats = -1; +// site_rate = new NGSRateCat(tree, num_rate_cats); +// site_rate->setTree(tree); +// } else if ((posX = rate_str.find("+NGS")) != string::npos) { +// tree->setLikelihoodKernel(LK_NORMAL); +// if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) { +// num_rate_cats = convert_int(rate_str.substr(posX+4).c_str()); +// if (num_rate_cats < 0) outError("Wrong number of rate categories"); +// } else num_rate_cats = -1; +// site_rate = new NGSRate(tree); +// site_rate->setTree(tree); + } else if ((posX = rate_str.find("+K")) != string::npos) { + if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) { + num_rate_cats = convert_int(rate_str.substr(posX+2).c_str()); + if (num_rate_cats < 1) outError("Wrong number of rate categories"); + } + site_rate = new RateKategory(num_rate_cats, tree); + } else + outError("Invalid rate heterogeneity type"); +// if (model_str.find('+') != string::npos) +// model_str = model_str.substr(0, model_str.find('+')); +// else +// model_str = model_str.substr(0, model_str.find('*')); + } else { + site_rate = new RateHeterogeneity(); + site_rate->setTree(tree); + } + + if (fused_mix_rate) { + if (!model->isMixture()) { if (verbose_mode >= VB_MED) cout << endl << "NOTE: Using mixture model with unlinked " << model_str << " parameters" << endl; string model_list = model_str; @@ -815,37 +887,38 @@ ModelFactory::ModelFactory(Params ¶ms, string &model_name, PhyloTree *tree, model_list += "," + model_str; model = new ModelMixture(model_name, model_str, model_list, models_block, freq_type, freq_params, tree, optimize_mixmodel_weight); } - if (model->getNMixtures() != site_rate->getNRate()) - outError("Mixture model and site rate model do not have the same number of categories"); -// if (!tree->isMixlen()) { - // reset mixture model - model->setFixMixtureWeight(true); - int mix, nmix = model->getNMixtures(); - for (mix = 0; mix < nmix; mix++) { - ((ModelMarkov*)model->getMixtureClass(mix))->total_num_subst = 1.0; - model->setMixtureWeight(mix, 1.0); - } - model->decomposeRateMatrix(); + if (model->getNMixtures() != site_rate->getNRate()) { + outError("Mixture model and site rate model do not have the same number of categories"); + } + //if (!tree->isMixlen()) { + // reset mixture model + model->setFixMixtureWeight(true); + int mix, nmix = model->getNMixtures(); + for (mix = 0; mix < nmix; mix++) { + ((ModelMarkov*)model->getMixtureClass(mix))->total_num_subst = 1.0; + model->setMixtureWeight(mix, 1.0); + } + model->decomposeRateMatrix(); // } else { // site_rate->setFixParams(1); // int c, ncat = site_rate->getNRate(); // for (c = 0; c < ncat; c++) // site_rate->setProp(c, 1.0); // } - } + } - tree->discardSaturatedSite(params.discard_saturated_site); + tree->discardSaturatedSite(params.discard_saturated_site); - } catch (const char* str) { - outError(str); - } + } catch (const char* str) { + outError(str); + } } void ModelFactory::setCheckpoint(Checkpoint *checkpoint) { - CheckpointFactory::setCheckpoint(checkpoint); - model->setCheckpoint(checkpoint); - site_rate->setCheckpoint(checkpoint); + CheckpointFactory::setCheckpoint(checkpoint); + model->setCheckpoint(checkpoint); + site_rate->setCheckpoint(checkpoint); } void ModelFactory::startCheckpoint() { @@ -874,7 +947,7 @@ void ModelFactory::restoreCheckpoint() { } int ModelFactory::getNParameters(int brlen_type) { - int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + + int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + site_rate->getTree()->getNBranchParameters(brlen_type); return df; @@ -896,9 +969,9 @@ double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst */ double ModelFactory::optimizeParametersOnly(int num_steps, double gradient_epsilon, double cur_logl) { - double logl; - /* Optimize substitution and heterogeneity rates independently */ - if (!joint_optimize) { + double logl; + /* Optimize substitution and heterogeneity rates independently */ + if (!joint_optimize) { // more steps for fused mix rate model int steps; if (false && fused_mix_rate && model->getNDim() > 0 && site_rate->getNDim() > 0) { @@ -910,7 +983,10 @@ double ModelFactory::optimizeParametersOnly(int num_steps, double gradient_epsil } double prev_logl = cur_logl; for (int step = 0; step < steps; step++) { - double model_lh = model->optimizeParameters(gradient_epsilon); + double model_lh = 0.0; + // only optimized if model is not linked + model_lh = model->optimizeParameters(gradient_epsilon); + double rate_lh = site_rate->optimizeParameters(gradient_epsilon); if (rate_lh == 0.0) @@ -921,11 +997,11 @@ double ModelFactory::optimizeParametersOnly(int num_steps, double gradient_epsil break; prev_logl = logl; } - } else { - /* Optimize substitution and heterogeneity rates jointly using BFGS */ - logl = optimizeAllParameters(gradient_epsilon); - } - return logl; + } else { + /* Optimize substitution and heterogeneity rates jointly using BFGS */ + logl = optimizeAllParameters(gradient_epsilon); + } + return logl; } double ModelFactory::optimizeAllParameters(double gradient_epsilon) { @@ -980,38 +1056,38 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info if (!site_rate->isGammai() || site_rate->isFixPInvar() || site_rate->isFixGammaShape() || site_rate->getTree()->aln->frac_const_sites == 0.0 || model->isMixture()) return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon); - double begin_time = getRealTime(); + double begin_time = getRealTime(); PhyloTree *tree = site_rate->getTree(); - double frac_const = tree->aln->frac_const_sites; + double frac_const = tree->aln->frac_const_sites; tree->setCurScore(tree->computeLikelihood()); - /* Back up branch lengths and substitutional rates */ - DoubleVector initBranLens; - DoubleVector bestLens; - tree->saveBranchLengths(initBranLens); + /* Back up branch lengths and substitutional rates */ + DoubleVector initBranLens; + DoubleVector bestLens; + tree->saveBranchLengths(initBranLens); bestLens = initBranLens; -// int numRateEntries = tree->getModel()->getNumRateEntries(); +// int numRateEntries = tree->getModel()->getNumRateEntries(); Checkpoint *model_ckp = new Checkpoint; Checkpoint *best_ckp = new Checkpoint; Checkpoint *saved_ckp = model->getCheckpoint(); *model_ckp = *saved_ckp; -// double *rates = new double[numRateEntries]; -// double *bestRates = new double[numRateEntries]; -// tree->getModel()->getRateMatrix(rates); -// int numStates = tree->aln->num_states; -// double *state_freqs = new double[numStates]; -// tree->getModel()->getStateFrequency(state_freqs); - - /* Best estimates found */ -// double *bestStateFreqs = new double[numStates]; - double bestLogl = -DBL_MAX; - double bestAlpha = 0.0; - double bestPInvar = 0.0; - - double testInterval = (frac_const - MIN_PINVAR * 2) / 9; - double initPInv = MIN_PINVAR; - double initAlpha = site_rate->getGammaShape(); +// double *rates = new double[numRateEntries]; +// double *bestRates = new double[numRateEntries]; +// tree->getModel()->getRateMatrix(rates); +// int numStates = tree->aln->num_states; +// double *state_freqs = new double[numStates]; +// tree->getModel()->getStateFrequency(state_freqs); + + /* Best estimates found */ +// double *bestStateFreqs = new double[numStates]; + double bestLogl = -DBL_MAX; + double bestAlpha = 0.0; + double bestPInvar = 0.0; + + double testInterval = (frac_const - MIN_PINVAR * 2) / 9; + double initPInv = MIN_PINVAR; + double initAlpha = site_rate->getGammaShape(); if (Params::getInstance().opt_gammai_fast) { initPInv = frac_const/2; @@ -1107,31 +1183,31 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info // ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs); // -- - tree->restoreBranchLengths(bestLens); + tree->restoreBranchLengths(bestLens); // tree->getModel()->decomposeRateMatrix(); - tree->clearAllPartialLH(); - tree->setCurScore(tree->computeLikelihood()); + tree->clearAllPartialLH(); + tree->setCurScore(tree->computeLikelihood()); if (write_info) { cout << "Optimal pinv,alpha: " << bestPInvar << ", " << bestAlpha << " / "; cout << "LogL: " << tree->getCurScore() << endl << endl; } ASSERT(fabs(tree->getCurScore() - bestLogl) < 1.0); -// delete [] rates; -// delete [] state_freqs; -// delete [] bestRates; -// delete [] bestStateFreqs; +// delete [] rates; +// delete [] state_freqs; +// delete [] bestRates; +// delete [] bestStateFreqs; delete model_ckp; delete best_ckp; - double elapsed_secs = getRealTime() - begin_time; - if (write_info) - cout << "Parameters optimization took " << elapsed_secs << " sec" << endl; + double elapsed_secs = getRealTime() - begin_time; + if (write_info) + cout << "Parameters optimization took " << elapsed_secs << " sec" << endl; // updating global variable is not safe! -// Params::getInstance().testAlpha = false; +// Params::getInstance().testAlpha = false; // 2016-03-14: this was missing! return tree->getCurScore(); @@ -1169,75 +1245,113 @@ vector ModelFactory::optimizeGammaInvWithInitValue(int fixed_len, double double ModelFactory::optimizeParameters(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { - ASSERT(model); - ASSERT(site_rate); + ASSERT(model); + ASSERT(site_rate); // double defaultEpsilon = logl_epsilon; - double begin_time = getRealTime(); - double cur_lh; - PhyloTree *tree = site_rate->getTree(); - ASSERT(tree); + double begin_time = getRealTime(); + double cur_lh; + PhyloTree *tree = site_rate->getTree(); + ASSERT(tree); + + stopStoringTransMatrix(); - stopStoringTransMatrix(); - // modified by Thomas Wong on Sept 11, 15 // no optimization of branch length in the first round + double optimizeStartTime = getRealTime(); cur_lh = tree->computeLikelihood(); tree->setCurScore(cur_lh); - if (verbose_mode >= VB_MED || write_info) { - cout << "1. Initial log-likelihood: " << cur_lh << endl; + if (verbose_mode >= VB_MED || write_info) { + int p = -1; + + // SET precision to 17 (temporarily) + if (verbose_mode >= VB_DEBUG) p = cout.precision(17); + + // PRINT Log-Likelihood + if (verbose_mode >= VB_MED) { + cout << "1. Initial log-likelihood: " << cur_lh << " (took " << + (getRealTime() - optimizeStartTime) << " wall-clock sec)" << endl; + } else { + cout << "1. Initial log-likelihood: " << cur_lh << endl; + } + + // RESTORE previous precision + if (verbose_mode >= VB_DEBUG) cout.precision(p); + if (verbose_mode >= VB_MAX) { tree->printTree(cout); cout << endl; } } - // For UpperBounds ----------- - //cout<<"MLCheck = "<mlCheck <mlCheck == 0){ - tree->mlInitial = cur_lh; - } - // --------------------------- + // For UpperBounds ----------- + //cout<<"MLCheck = "<mlCheck <mlCheck == 0){ + tree->mlInitial = cur_lh; + } + // --------------------------- - int i; - //bool optimize_rate = true; -// double gradient_epsilon = min(logl_epsilon, 0.01); // epsilon for parameters starts at epsilon for logl - for (i = 2; i < tree->params->num_param_iterations; i++) { + int i; + //bool optimize_rate = true; +// double gradient_epsilon = min(logl_epsilon, 0.01); // epsilon for parameters starts at epsilon for logl + for (i = 2; i < tree->params->num_param_iterations; i++) { double new_lh; - // changed to opimise edge length first, and then Q,W,R inside the loop by Thomas on Sept 11, 15 - if (fixed_len == BRLEN_OPTIMIZE) - new_lh = tree->optimizeAllBranches(min(i,3), logl_epsilon); // loop only 3 times in total (previously in v0.9.6 5 times) + // synchronize the checkpoints of the other processors + if (syncChkPoint != nullptr) { + syncChkPoint->masterSyncOtherChkpts(); + } + + // changed to opimise edge length first, and then Q,W,R inside the loop + if (fixed_len == BRLEN_OPTIMIZE) + new_lh = tree->optimizeAllBranches(min(i,3), logl_epsilon); // loop only 3 times in total (previously in v0.9.6 5 times) else if (fixed_len == BRLEN_SCALE) { double scaling = 1.0; new_lh = tree->optimizeTreeLengthScaling(MIN_BRLEN_SCALE, scaling, MAX_BRLEN_SCALE, gradient_epsilon); } else new_lh = cur_lh; + // synchronize the checkpoints of the other processors + if (syncChkPoint != nullptr) { + syncChkPoint->masterSyncOtherChkpts(); + } + new_lh = optimizeParametersOnly(i, gradient_epsilon, new_lh); - if (new_lh == 0.0) { + // synchronize the checkpoints of the other processors + if (syncChkPoint != nullptr) { + syncChkPoint->masterSyncOtherChkpts(); + } + + if (new_lh == 0.0) { if (fixed_len == BRLEN_OPTIMIZE) cur_lh = tree->optimizeAllBranches(tree->params->num_param_iterations, logl_epsilon); else if (fixed_len == BRLEN_SCALE) { double scaling = 1.0; cur_lh = tree->optimizeTreeLengthScaling(MIN_BRLEN_SCALE, scaling, MAX_BRLEN_SCALE, gradient_epsilon); } - break; - } - if (verbose_mode >= VB_MED) { - model->writeInfo(cout); - site_rate->writeInfo(cout); + break; + } + if (verbose_mode >= VB_MED) { + model->writeInfo(cout); + site_rate->writeInfo(cout); if (fixed_len == BRLEN_SCALE) cout << "Scaled tree length: " << tree->treeLength() << endl; - } - if (new_lh > cur_lh + logl_epsilon) { - cur_lh = new_lh; - if (write_info) - cout << i << ". Current log-likelihood: " << cur_lh << endl; - } else { - site_rate->classifyRates(new_lh); + } + if (new_lh > cur_lh + logl_epsilon) { + cur_lh = new_lh; + if (write_info) { + if (verbose_mode >= VB_MED) { + cout << i << ". Current log-likelihood: " << cur_lh + << " (after " << (getRealTime() - optimizeStartTime) << " wall-clock sec)" + << endl; + } else { + cout << i << ". Current log-likelihood: " << cur_lh << endl; + } + } + } else { + site_rate->classifyRates(new_lh); if (fixed_len == BRLEN_OPTIMIZE) cur_lh = tree->optimizeAllBranches(100, logl_epsilon); else if (fixed_len == BRLEN_SCALE) { @@ -1245,10 +1359,10 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info, cur_lh = tree->optimizeTreeLengthScaling(MIN_BRLEN_SCALE, scaling, MAX_BRLEN_SCALE, gradient_epsilon); } break; - } - } + } + } - // normalize rates s.t. branch lengths are #subst per site + // normalize rates s.t. branch lengths are #subst per site // if (Params::getInstance().optimize_alg_gammai != "EM") { double mean_rate = site_rate->rescaleRates(); @@ -1260,140 +1374,144 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info, } } - - - if (verbose_mode >= VB_MED || write_info) - cout << "Optimal log-likelihood: " << cur_lh << endl; - - // For UpperBounds ----------- - if(tree->mlCheck == 0) - tree->mlFirstOpt = cur_lh; - // --------------------------- - - if (verbose_mode <= VB_MIN && write_info) { - model->writeInfo(cout); - site_rate->writeInfo(cout); + if (Params::getInstance().root_find && tree->rooted && Params::getInstance().root_move_dist > 0) { + cur_lh = tree->optimizeRootPosition(Params::getInstance().root_move_dist, write_info, logl_epsilon); + if (verbose_mode >= VB_MED || write_info) + cout << "Rooting log-likelihood: " << cur_lh << endl; + } + + if (verbose_mode >= VB_MED || write_info) + cout << "Optimal log-likelihood: " << cur_lh << endl; + + // For UpperBounds ----------- + if(tree->mlCheck == 0) + tree->mlFirstOpt = cur_lh; + // --------------------------- + + if (verbose_mode <= VB_MIN && write_info) { + model->writeInfo(cout); + site_rate->writeInfo(cout); if (fixed_len == BRLEN_SCALE) cout << "Scaled tree length: " << tree->treeLength() << endl; - } - double elapsed_secs = getRealTime() - begin_time; - if (write_info) - cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl; - startStoringTransMatrix(); - - // For UpperBounds ----------- - tree->mlCheck = 1; - // --------------------------- - - tree->setCurScore(cur_lh); - return cur_lh; + } + double elapsed_secs = getRealTime() - begin_time; + if (write_info) + cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl; + startStoringTransMatrix(); + + // For UpperBounds ----------- + tree->mlCheck = 1; + // --------------------------- + + tree->setCurScore(cur_lh); + return cur_lh; } /** * @return TRUE if parameters are at the boundary that may cause numerical unstability */ bool ModelFactory::isUnstableParameters() { - if (model->isUnstableParameters()) return true; - return false; + if (model->isUnstableParameters()) return true; + return false; } void ModelFactory::startStoringTransMatrix() { - if (!store_trans_matrix) return; - is_storing = true; + if (!store_trans_matrix) return; + is_storing = true; } void ModelFactory::stopStoringTransMatrix() { - if (!store_trans_matrix) return; - is_storing = false; - if (!empty()) { - for (iterator it = begin(); it != end(); it++) - delete it->second; - clear(); - } + if (!store_trans_matrix) return; + is_storing = false; + if (!empty()) { + for (iterator it = begin(); it != end(); it++) + delete it->second; + clear(); + } } double ModelFactory::computeTrans(double time, int state1, int state2) { - return model->computeTrans(time, state1, state2); + return model->computeTrans(time, state1, state2); } double ModelFactory::computeTrans(double time, int state1, int state2, double &derv1, double &derv2) { - return model->computeTrans(time, state1, state2, derv1, derv2); + return model->computeTrans(time, state1, state2, derv1, derv2); } void ModelFactory::computeTransMatrix(double time, double *trans_matrix, int mixture) { - if (!store_trans_matrix || !is_storing || model->isSiteSpecificModel()) { - model->computeTransMatrix(time, trans_matrix, mixture); - return; - } - int mat_size = model->num_states * model->num_states; - iterator ass_it = find(round(time * 1e6)); - if (ass_it == end()) { - // allocate memory for 3 matricies - double *trans_entry = new double[mat_size * 3]; - trans_entry[mat_size] = trans_entry[mat_size+1] = 0.0; - model->computeTransMatrix(time, trans_entry, mixture); - ass_it = insert(value_type(round(time * 1e6), trans_entry)).first; - } else { - //if (verbose_mode >= VB_MAX) - //cout << "ModelFactory bingo" << endl; - } - - memcpy(trans_matrix, ass_it->second, mat_size * sizeof(double)); + if (!store_trans_matrix || !is_storing || model->isSiteSpecificModel()) { + model->computeTransMatrix(time, trans_matrix, mixture); + return; + } + int mat_size = model->num_states * model->num_states; + iterator ass_it = find(round(time * 1e6)); + if (ass_it == end()) { + // allocate memory for 3 matricies + double *trans_entry = new double[mat_size * 3]; + trans_entry[mat_size] = trans_entry[mat_size+1] = 0.0; + model->computeTransMatrix(time, trans_entry, mixture); + ass_it = insert(value_type(round(time * 1e6), trans_entry)).first; + } else { + //if (verbose_mode >= VB_MAX) + //cout << "ModelFactory bingo" << endl; + } + + memcpy(trans_matrix, ass_it->second, mat_size * sizeof(double)); } void ModelFactory::computeTransDerv(double time, double *trans_matrix, - double *trans_derv1, double *trans_derv2, int mixture) { - if (!store_trans_matrix || !is_storing || model->isSiteSpecificModel()) { - model->computeTransDerv(time, trans_matrix, trans_derv1, trans_derv2, mixture); - return; - } - int mat_size = model->num_states * model->num_states; - iterator ass_it = find(round(time * 1e6)); - if (ass_it == end()) { - // allocate memory for 3 matricies - double *trans_entry = new double[mat_size * 3]; - trans_entry[mat_size] = trans_entry[mat_size+1] = 0.0; - model->computeTransDerv(time, trans_entry, trans_entry+mat_size, trans_entry+(mat_size*2), mixture); - ass_it = insert(value_type(round(time * 1e6), trans_entry)).first; - } else if (ass_it->second[mat_size] == 0.0 && ass_it->second[mat_size+1] == 0.0) { - double *trans_entry = ass_it->second; - model->computeTransDerv(time, trans_entry, trans_entry+mat_size, trans_entry+(mat_size*2), mixture); - } - memcpy(trans_matrix, ass_it->second, mat_size * sizeof(double)); - memcpy(trans_derv1, ass_it->second + mat_size, mat_size * sizeof(double)); - memcpy(trans_derv2, ass_it->second + (mat_size*2), mat_size * sizeof(double)); + double *trans_derv1, double *trans_derv2, int mixture) { + if (!store_trans_matrix || !is_storing || model->isSiteSpecificModel()) { + model->computeTransDerv(time, trans_matrix, trans_derv1, trans_derv2, mixture); + return; + } + int mat_size = model->num_states * model->num_states; + iterator ass_it = find(round(time * 1e6)); + if (ass_it == end()) { + // allocate memory for 3 matricies + double *trans_entry = new double[mat_size * 3]; + trans_entry[mat_size] = trans_entry[mat_size+1] = 0.0; + model->computeTransDerv(time, trans_entry, trans_entry+mat_size, trans_entry+(mat_size*2), mixture); + ass_it = insert(value_type(round(time * 1e6), trans_entry)).first; + } else if (ass_it->second[mat_size] == 0.0 && ass_it->second[mat_size+1] == 0.0) { + double *trans_entry = ass_it->second; + model->computeTransDerv(time, trans_entry, trans_entry+mat_size, trans_entry+(mat_size*2), mixture); + } + memcpy(trans_matrix, ass_it->second, mat_size * sizeof(double)); + memcpy(trans_derv1, ass_it->second + mat_size, mat_size * sizeof(double)); + memcpy(trans_derv2, ass_it->second + (mat_size*2), mat_size * sizeof(double)); } ModelFactory::~ModelFactory() { - for (iterator it = begin(); it != end(); it++) - delete it->second; - clear(); + for (iterator it = begin(); it != end(); it++) + delete it->second; + clear(); } /************* FOLLOWING SERVE FOR JOINT OPTIMIZATION OF MODEL AND RATE PARAMETERS *******/ int ModelFactory::getNDim() { - return model->getNDim() + site_rate->getNDim(); + return model->getNDim() + site_rate->getNDim(); } double ModelFactory::targetFunk(double x[]) { - model->getVariables(x); - // need to compute rates again if p_inv or Gamma shape changes! - if (model->state_freq[model->num_states-1] < MIN_RATE) return 1.0e+12; - model->decomposeRateMatrix(); - site_rate->phylo_tree->clearAllPartialLH(); - return site_rate->targetFunk(x + model->getNDim()); + model->getVariables(x); + // need to compute rates again if p_inv or Gamma shape changes! + if (model->state_freq[model->num_states-1] < MIN_RATE) return 1.0e+12; + model->decomposeRateMatrix(); + site_rate->phylo_tree->clearAllPartialLH(); + return site_rate->targetFunk(x + model->getNDim()); } void ModelFactory::setVariables(double *variables) { - model->setVariables(variables); - site_rate->setVariables(variables + model->getNDim()); + model->setVariables(variables); + site_rate->setVariables(variables + model->getNDim()); } bool ModelFactory::getVariables(double *variables) { - bool changed = model->getVariables(variables); - changed |= site_rate->getVariables(variables + model->getNDim()); + bool changed = model->getVariables(variables); + changed |= site_rate->getVariables(variables + model->getNDim()); return changed; } diff --git a/model/modelfactory.h b/model/modelfactory.h index 23ad14040..a158e1155 100644 --- a/model/modelfactory.h +++ b/model/modelfactory.h @@ -26,6 +26,7 @@ #include "nclextra/modelsblock.h" #include "utils/checkpoint.h" #include "alignment/alignment.h" +#include "main/phylotesting.h" const double MIN_BRLEN_SCALE = 0.01; const double MAX_BRLEN_SCALE = 100.0; @@ -37,7 +38,7 @@ ModelsBlock *readModelsDefinition(Params ¶ms); @param model_name model name string @return position of +H or *H in the model string, string::npos if not found */ -string::size_type posRateHeterotachy(string &model_name); +string::size_type posRateHeterotachy(string model_name); /** return the position of +R or *R in the model name @@ -225,8 +226,13 @@ class ModelFactory : public unordered_map, public Optimization, pu * encoded constant sites that are unobservable and added in the alignment * this involves likelihood function for ascertainment bias correction for morphological or SNP data (Lewis 2001) */ - string unobserved_ptns; + vector unobserved_ptns; + /** ascertainment bias correction type */ + ASCType ASC_type; + + ASCType getASC() { return ASC_type; } + /** * optimize model and site_rate parameters * @param gradient_epsilon to control stop @@ -258,6 +264,10 @@ class ModelFactory : public unordered_map, public Optimization, pu double optimizeAllParameters(double gradient_epsilon); + /** + Synchronization of check point for MPI + */ + SyncChkPoint* syncChkPoint; protected: @@ -278,6 +288,7 @@ class ModelFactory : public unordered_map, public Optimization, pu vector optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, double initPInv, double initAlpha, DoubleVector &lenvec, Checkpoint *model_ckp); + }; #endif diff --git a/model/modelfactorymixlen.cpp b/model/modelfactorymixlen.cpp index 2350c12eb..79998d794 100644 --- a/model/modelfactorymixlen.cpp +++ b/model/modelfactorymixlen.cpp @@ -125,14 +125,11 @@ string ModelFactoryMixlen::sortClassesByTreeLength() { ((ModelMarkov*)model->getMixtureClass(m))->setEigenvalues(&model->getEigenvalues()[m*num_states]); ((ModelMarkov*)model->getMixtureClass(m))->setEigenvectors(&model->getEigenvectors()[m*num_states*num_states]); ((ModelMarkov*)model->getMixtureClass(m))->setInverseEigenvectors(&model->getInverseEigenvectors()[m*num_states*num_states]); + ((ModelMarkov*)model->getMixtureClass(m))->setInverseEigenvectorsTransposed(&model->getInverseEigenvectorsTransposed()[m*num_states*num_states]); } model->decomposeRateMatrix(); - -// model->writeInfo(cout); site_rate->writeInfo(cout); - } - tree->clearAllPartialLH(); ASSERT(fabs(score - tree->computeLikelihood()) < 0.1); } @@ -151,9 +148,11 @@ string ModelFactoryMixlen::sortClassesByTreeLength() { int ModelFactoryMixlen::getNParameters(int brlen_type) { int df = ModelFactory::getNParameters(brlen_type); - if (brlen_type == BRLEN_OPTIMIZE) - df += site_rate->phylo_tree->branchNum * (site_rate->phylo_tree->getMixlen()-1); - else if (brlen_type == BRLEN_SCALE) - df += (site_rate->phylo_tree->getMixlen()-1); + if (brlen_type == BRLEN_OPTIMIZE) { + df += site_rate->phylo_tree->branchNum * (site_rate->phylo_tree->getMixlen() - 1); + } + else if (brlen_type == BRLEN_SCALE) { + df += (site_rate->phylo_tree->getMixlen() - 1); + } return df; } diff --git a/model/modelliemarkov.cpp b/model/modelliemarkov.cpp index fe79bcc33..e5622f8db 100644 --- a/model/modelliemarkov.cpp +++ b/model/modelliemarkov.cpp @@ -17,11 +17,9 @@ * Currently symmetry permutation is applied every time setRates is called. * Would be more efficient to apply it just once to basis in constructor. */ -#ifdef USE_EIGEN3 #include #include using namespace Eigen; -#endif #include "modelliemarkov.h" #include #undef NDEBUG @@ -330,7 +328,6 @@ void ModelLieMarkov::init(const char *model_name, string model_params, StateFreq { // TODO: why is freq_params not handled here? - nondiagonalizable = false; ASSERT(NUM_RATES==getNumRateEntries()); StateFreqType expected_freq_type; // returned by getLieMarkovModelInfo but not used here getLieMarkovModelInfo((string)model_name, name, full_name, model_num, symmetry, expected_freq_type); @@ -375,13 +372,14 @@ void ModelLieMarkov::init(const char *model_name, string model_params, StateFreq // we could make virtual setRates in ModelMarkov and // perhaps move this code all into there. - MDW void ModelLieMarkov::startCheckpoint() { - checkpoint->startStruct("ModelLieMarkov"); + checkpoint->startStruct("ModelLieMarkov" + name); } void ModelLieMarkov::saveCheckpoint() { // saves model_parameters startCheckpoint(); - CKP_ARRAY_SAVE(num_params, model_parameters); + if (num_params > 0) + CKP_ARRAY_SAVE(num_params, model_parameters); endCheckpoint(); ModelMarkov::saveCheckpoint(); } @@ -390,7 +388,8 @@ void ModelLieMarkov::restoreCheckpoint() { ModelMarkov::restoreCheckpoint(); // restores model_parameters startCheckpoint(); - CKP_ARRAY_RESTORE(num_params, model_parameters); + if (num_params > 0) + CKP_ARRAY_RESTORE(num_params, model_parameters); endCheckpoint(); setRates(); // updates rate matrix decomposeRateMatrix(); // updates eigen system. @@ -995,7 +994,8 @@ void ModelLieMarkov::setRates() { } void ModelLieMarkov::decomposeRateMatrix() { - ModelMarkov::decomposeRateMatrix(); + return ModelMarkov::decomposeRateMatrix(); + /* if (phylo_tree->params->matrix_exp_technique == MET_SCALING_SQUARING) return; if (phylo_tree->params->matrix_exp_technique == MET_EIGEN3LIB_DECOMPOSITION) { @@ -1007,10 +1007,10 @@ void ModelLieMarkov::decomposeRateMatrix() { decomposeRateMatrixClosedForm(); return; } + */ } void ModelLieMarkov::decomposeRateMatrixEigen3lib() { -#ifdef USE_EIGEN3 nondiagonalizable = false; // until proven otherwise Matrix4d mat(rate_matrix); mat.transpose(); @@ -1051,10 +1051,6 @@ void ModelLieMarkov::decomposeRateMatrixEigen3lib() { for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) ASSERT(abs(check(i,j)) < 1e-4); -#else - outError("Please install Eigen3 library for this option ", __func__); -#endif - } const static int a2index[] = {-1, 0, 0, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -2046,7 +2042,8 @@ void ModelLieMarkov::decomposeRateMatrixClosedForm() { } void ModelLieMarkov::computeTransMatrix(double time, double *trans_matrix, int mixture) { -#ifdef USE_EIGEN3 + return ModelMarkov::computeTransMatrix(time, trans_matrix, mixture); + /* MatrixExpTechnique technique = phylo_tree->params->matrix_exp_technique; if (technique == MET_SCALING_SQUARING || nondiagonalizable ) { Matrix4d A = Map(rate_matrix); @@ -2106,9 +2103,6 @@ void ModelLieMarkov::computeTransMatrix(double time, double *trans_matrix, int m } else ModelMarkov::computeTransMatrix(time, trans_matrix); - -#else - ModelMarkov::computeTransMatrix(time, trans_matrix); -#endif + */ } diff --git a/model/modelliemarkov.h b/model/modelliemarkov.h index 825f99094..f9b811e18 100644 --- a/model/modelliemarkov.h +++ b/model/modelliemarkov.h @@ -75,7 +75,7 @@ class ModelLieMarkov: public ModelMarkov { void decomposeRateMatrixClosedForm(); /** decompose rate matrix using Eigen library */ - void decomposeRateMatrixEigen3lib(); + virtual void decomposeRateMatrixEigen3lib(); /** compute the transition probability matrix. @@ -102,9 +102,8 @@ class ModelLieMarkov: public ModelMarkov { int model_num; // 0->1.1, etc to 36->12.12 void setBasis(); virtual void setRates(); - bool nondiagonalizable; // will be set true for nondiagonalizable rate matrices, then will use scaled squaring method for matrix exponentiation. - /** + /** this function is served for the multi-dimension optimization. It should pack the model parameters into a vector that is index from 1 (NOTE: not from 0) @param variables (OUT) vector of variables, indexed from 1 diff --git a/model/modelmarkov.cpp b/model/modelmarkov.cpp index 00509c8d9..7a10e4032 100644 --- a/model/modelmarkov.cpp +++ b/model/modelmarkov.cpp @@ -23,37 +23,44 @@ #include "modelliemarkov.h" #include "modelunrest.h" +#include +#include +using namespace Eigen; + +#include +#include /** number of squaring for scaling-squaring technique */ -const int TimeSquare = 10; +//const int TimeSquare = 10; //----- declaration of some helper functions -----/ -int matexp (double Q[], double t, int n, int TimeSquare, double space[]); -int computeStateFreqFromQMatrix (double Q[], double pi[], int n, double space[]); +//int matexp (double Q[], double t, int n, int TimeSquare); +int computeStateFreqFromQMatrix (double Q[], double pi[], int n); //const double MIN_FREQ_RATIO = MIN_FREQUENCY; //const double MAX_FREQ_RATIO = 1.0/MIN_FREQUENCY; -ModelMarkov::ModelMarkov(PhyloTree *tree, bool reversible) +ModelMarkov::ModelMarkov(PhyloTree *tree, bool reversible, bool adapt_tree) : ModelSubst(tree->aln->num_states), EigenDecomposition() { phylo_tree = tree; - rates = NULL; + rates = nullptr; // variables for reversible model - eigenvalues = eigenvectors = inv_eigenvectors = NULL; + eigenvalues = nullptr; + eigenvectors = nullptr; + inv_eigenvectors = nullptr; + inv_eigenvectors_transposed = nullptr; highest_freq_state = num_states-1; freq_type = FREQ_UNKNOWN; half_matrix = true; highest_freq_state = num_states-1; // variables for non-reversible model - fixed_parameters = false; -// model_parameters = NULL; - rate_matrix = NULL; - temp_space = NULL; - eigenvalues_imag = NULL; - ceval = cevec = cinv_evec = NULL; + rate_matrix = nullptr; + eigenvalues_imag = nullptr; + ceval = cevec = cinv_evec = nullptr; + nondiagonalizable = false; if (reversible) { name = "Rev"; @@ -62,10 +69,11 @@ ModelMarkov::ModelMarkov(PhyloTree *tree, bool reversible) name = "NonRev"; full_name = "General non-reversible model"; } - setReversible(reversible); + setReversible(reversible, adapt_tree); } -void ModelMarkov::setReversible(bool reversible) { +void ModelMarkov::setReversible(bool reversible, bool adapt_tree) { + bool old_reversible = is_reversible; is_reversible = reversible; if (reversible) { @@ -73,23 +81,23 @@ void ModelMarkov::setReversible(bool reversible) { int i; int nrate = getNumRateEntries(); - if (rates) - delete [] rates; + delete [] rates; rates = new double[nrate]; - for (i=0; i < nrate; i++) + for (i=0; i < nrate; i++) { rates[i] = 1.0; - - if (!eigenvalues) - eigenvalues = aligned_alloc(num_states); - if (!eigenvectors) - eigenvectors = aligned_alloc(num_states*num_states); - if (!inv_eigenvectors) - inv_eigenvectors = aligned_alloc(num_states*num_states); - + } + size_t num_states_squared = num_states * num_states; + ensure_aligned_allocated(eigenvalues, num_states); + ensure_aligned_allocated(eigenvectors, num_states_squared); + ensure_aligned_allocated(inv_eigenvectors, num_states_squared); + ensure_aligned_allocated(inv_eigenvectors_transposed, num_states_squared); + num_params = nrate - 1; - if (phylo_tree->rooted) { - cout << "Converting rooted to unrooted tree..." << endl; + if (adapt_tree && phylo_tree && phylo_tree->rooted) { + if (verbose_mode >= VB_MED) { + cout << "Converting rooted to unrooted tree..." << endl; + } phylo_tree->convertToUnrooted(); } } else { @@ -97,31 +105,49 @@ void ModelMarkov::setReversible(bool reversible) { ignore_state_freq = true; int num_rates = getNumRateEntries(); - - // reallocate the mem spaces - if (rates) - delete [] rates; - rates = new double [num_rates]; - memset(rates, 0, sizeof(double) * (num_rates)); - if (!rate_matrix) rate_matrix = aligned_alloc(num_states*num_states); - if (!temp_space) - temp_space = aligned_alloc(num_states*num_states); - if (!eigenvalues_imag) - eigenvalues_imag = aligned_alloc(num_states); - - if (!ceval) - ceval = aligned_alloc >(num_states); - if (!cevec) - cevec = aligned_alloc >(num_states*num_states); - if (!cinv_evec) - cinv_evec = aligned_alloc >(num_states*num_states); + + // reallocate the mem spaces + if (rates && old_reversible) { + // copy old reversible rates into new non-reversible + for (int i = 0, k = 0; i < num_states; i++) { + for (int j = i+1; j < num_states; j++, k++) { + rate_matrix[i*num_states+j] = rates[k] * state_freq[j]; + rate_matrix[j*num_states+i] = rates[k] * state_freq[i]; + } + } + delete [] rates; + rates = new double[num_rates]; + int k = 0; + int pos = 0; + for (int i = 0; i < num_states; i++) { + for (int j = 0; j < num_states; j++, pos++) { + if (i!=j) { + rates[k] = rate_matrix[pos]; + ++k; + } + } + } + ASSERT(k == num_rates); + } else { + delete [] rates; + rates = new double [num_rates]; + memset(rates, 0, sizeof(double) * (num_rates)); + } - if (!phylo_tree->rooted) { - cout << "Converting unrooted to rooted tree..." << endl; + size_t num_states_squared = num_states * num_states; + ensure_aligned_allocated(eigenvalues_imag, num_states); + ensure_aligned_allocated(ceval, num_states); + ensure_aligned_allocated(cevec, num_states_squared); + ensure_aligned_allocated(cinv_evec, num_states_squared); + + if (adapt_tree && phylo_tree && !phylo_tree->rooted) { + if (verbose_mode >= VB_MED) + cout << "Converting unrooted to rooted tree..." << endl; phylo_tree->convertToRooted(); } + num_params = num_rates - 1; } } @@ -162,7 +188,6 @@ void ModelMarkov::restoreCheckpoint() { endCheckpoint(); } - void ModelMarkov::setTree(PhyloTree *tree) { phylo_tree = tree; } @@ -241,13 +266,15 @@ string ModelMarkov::getNameParams() { ostringstream retname; retname << name; // if (num_states != 4) retname << num_states; - retname << '{'; - int nrates = getNumRateEntries(); - for (int i = 0; i < nrates; i++) { - if (i>0) retname << ','; - retname << rates[i]; - } - retname << '}'; + if (!fixed_parameters) { + retname << '{'; + int nrates = getNumRateEntries(); + for (int i = 0; i < nrates; i++) { + if (i>0) retname << ','; + retname << rates[i]; + } + retname << '}'; + } getNameParamsFreq(retname); return retname.str(); } @@ -255,7 +282,8 @@ string ModelMarkov::getNameParams() { void ModelMarkov::getNameParamsFreq(ostream &retname) { // "+F..." but without {frequencies} retname << freqTypeString(freq_type, phylo_tree->aln->seq_type, true); - + if (fixed_parameters) + return; if (freq_type == FREQ_EMPIRICAL || freq_type == FREQ_ESTIMATE || (freq_type == FREQ_USER_DEFINED && phylo_tree->aln->seq_type == SEQ_DNA)) { retname << "{" << state_freq[0]; @@ -274,10 +302,10 @@ void ModelMarkov::init_state_freq(StateFreqType type) { case FREQ_EQUAL: if (phylo_tree->aln->seq_type == SEQ_CODON) { int nscodon = phylo_tree->aln->getNumNonstopCodons(); - double freq_codon = (1.0-(num_states-nscodon)*MIN_FREQUENCY)/(nscodon); + double freq_codon = (1.0-(num_states-nscodon)*Params::getInstance().min_state_freq)/(nscodon); for (i = 0; i < num_states; i++) if (phylo_tree->aln->isStopCodon(i)) - state_freq[i] = MIN_FREQUENCY; + state_freq[i] = Params::getInstance().min_state_freq; else state_freq[i] = freq_codon; } else { @@ -292,9 +320,11 @@ void ModelMarkov::init_state_freq(StateFreqType type) { double ntfreq[12]; phylo_tree->aln->computeCodonFreq(freq_type, state_freq, ntfreq); // phylo_tree->aln->computeCodonFreq(state_freq); - } else if (phylo_tree->aln->seq_type != SEQ_POMO) - phylo_tree->aln->computeStateFreq(state_freq); - for (i = 0; i < num_states; i++) + } else if (phylo_tree->aln->seq_type != SEQ_POMO) { + double emp_state_freq[num_states]; + phylo_tree->aln->computeStateFreq(emp_state_freq); + setStateFrequency(emp_state_freq); + } for (i = 0; i < num_states; i++) if (state_freq[i] > state_freq[highest_freq_state]) highest_freq_state = i; break; @@ -322,9 +352,11 @@ void ModelMarkov::init(StateFreqType type) { void ModelMarkov::writeInfo(ostream &out) { if (is_reversible && num_states == 4) { - report_rates(out, "Rate parameters", rates); - report_state_freqs(out); + report_rates(out, "Rate parameters", rates); + report_state_freqs(out); //if (freq_type != FREQ_ESTIMATE) return; + } else if (is_reversible && num_states == 2) { + report_state_freqs(out); } else if (!is_reversible) { // non-reversible // int i; @@ -335,7 +367,7 @@ void ModelMarkov::writeInfo(ostream &out) { if (num_states != 4) return; report_rates(out, "Substitution rates", rates); - report_state_freqs(out, state_freq); + report_state_freqs(out, state_freq); } } @@ -372,42 +404,99 @@ void ModelMarkov::report_rates(ostream& out, string title, double *r) { } void ModelMarkov::report_state_freqs(ostream& out, double *custom_state_freq) { - double *f; - if (custom_state_freq) f = custom_state_freq; - else f = state_freq; - out << setprecision(3); - out << "Base frequencies:"; - out << " A: " << f[0]; - out << " C: " << f[1]; - out << " G: " << f[2]; - out << " T: " << f[3]; - out << endl; + double *f; + if (custom_state_freq) f = custom_state_freq; + else f = state_freq; + if (num_states == 4) { + out << setprecision(3); + out << "Base frequencies:"; + out << " A: " << f[0]; + out << " C: " << f[1]; + out << " G: " << f[2]; + out << " T: " << f[3]; + out << endl; + } else if (num_states == 2) { + out << setprecision(3); + out << "State frequencies:"; + out << " 0: " << f[0]; + out << " 1: " << f[1]; + out << endl; + } +} + +void ModelMarkov::computeTransMatrixNonrev(double time, double *trans_matrix, int mixture) { + auto technique = phylo_tree->params->matrix_exp_technique; + if (technique == MET_SCALING_SQUARING || nondiagonalizable) { + // scaling and squaring technique + Map,Aligned >rate_mat(rate_matrix, num_states, num_states); + Map >trans_mat(trans_matrix, num_states, num_states); + MatrixXd mat = rate_mat; + mat = (mat*time).exp(); + if (mat.minCoeff() < 0) { + outWarning("negative trans_mat"); + } + // sanity check rows sum to 1 + VectorXd row_sum = mat.rowwise().sum(); + double mincoeff = row_sum.minCoeff(); + double maxcoeff = row_sum.maxCoeff(); + ASSERT(maxcoeff < 1.001 && mincoeff > 0.999); + trans_mat = mat; + } else if (phylo_tree->params->matrix_exp_technique == MET_EIGEN3LIB_DECOMPOSITION) { + VectorXcd ceval_exp(num_states); + ArrayXcd eval = Map(ceval, num_states); + ceval_exp = (eval*time).exp().matrix(); + Map cevectors(cevec, num_states, num_states); + Map cinv_evectors(cinv_evec, num_states, num_states); + MatrixXcd res = cevectors * ceval_exp.asDiagonal() * cinv_evectors; + Map >map_trans(trans_matrix,num_states,num_states); + map_trans = res.real(); + // sanity check rows sum to 1 + VectorXd row_sum = map_trans.rowwise().sum(); + double mincoeff = row_sum.minCoeff(); + double maxcoeff = row_sum.maxCoeff(); + if (maxcoeff > 1.0001 || mincoeff < 0.9999) { + if (verbose_mode >= VB_MED) + cout << "INFO: Switch to scaling-squaring due to unstable eigen-decomposition rowsum: " + << mincoeff << " to " << maxcoeff << endl; + nondiagonalizable = true; + computeTransMatrixNonrev(time, trans_matrix, mixture); + nondiagonalizable = false; + } + } else { + ASSERT(0 && "this line should not be reached"); + } + } void ModelMarkov::computeTransMatrix(double time, double *trans_matrix, int mixture) { if (!is_reversible) { - if (phylo_tree->params->matrix_exp_technique == MET_EIGEN_DECOMPOSITION) { - computeTransMatrixEigen(time, trans_matrix); - } else if (phylo_tree->params->matrix_exp_technique == MET_SCALING_SQUARING) { - // scaling and squaring technique - int statesqr = num_states*num_states; - memcpy(trans_matrix, rate_matrix, statesqr*sizeof(double)); - matexp(trans_matrix, time, num_states, TimeSquare, temp_space); - } else { - ASSERT(0 && "this line should not be reached"); - } + computeTransMatrixNonrev(time, trans_matrix, mixture); return; - // 2016-04-05: 2nd version - // for (int i = 0; i < statesqr; i++) - // trans_matrix[i] *= time; - // double space[NCODE*NCODE*3] = {0}; - // matexp2(trans_matrix, num_states, 7, 5, space); } /* compute P(t) */ double evol_time = time / total_num_subst; - double exptime[num_states]; + + if (Params::getInstance().experimental) { + double eval_exp[num_states]; + calculateExponentOfScalarMultiply(eigenvalues, num_states, evol_time, eval_exp); + aTimesDiagonalBTimesTransposeOfC( eigenvectors, eval_exp + , inv_eigenvectors_transposed, num_states, trans_matrix); + return; + } else { + VectorXd eval_exp(num_states); + ArrayXd eval = Map(eigenvalues, num_states); + eval_exp = (eval*evol_time).exp().matrix(); + Map,Aligned> evectors(eigenvectors, num_states, num_states); + Map,Aligned> inv_evectors(inv_eigenvectors, num_states, num_states); + MatrixXd res = evectors * eval_exp.asDiagonal() * inv_evectors; + Map >map_trans(trans_matrix,num_states,num_states); + map_trans = res; + return; + } + /* + double exptime[num_states]; int i, j, k; for (i = 0; i < num_states; i++) @@ -416,7 +505,7 @@ void ModelMarkov::computeTransMatrix(double time, double *trans_matrix, int mixt int row_offset; for (i = 0, row_offset = 0; i < num_states; i++, row_offset+=num_states) { double *trans_row = trans_matrix + row_offset; - for (j = i+1; j < num_states; j ++) { + for (j = i+1; j < num_states; j ++) { // compute upper triangle entries double *trans_entry = trans_row + j; // double *coeff_entry = eigen_coeff + ((row_offset+j)*num_states); @@ -437,6 +526,7 @@ void ModelMarkov::computeTransMatrix(double time, double *trans_matrix, int mixt sum += trans_row[j]; trans_row[i] = 1.0 - sum; // update diagonal entry } + */ // delete [] exptime; } @@ -452,60 +542,219 @@ double ModelMarkov::computeTrans(double time, int state1, int state2) { return trans_prob; } else { // non-reversible -// double *trans_matrix = new double[num_states*num_states]; - computeTransMatrix(time, temp_space); - double trans = temp_space[state1*num_states+state2]; -// delete [] trans_matrix; + double *trans_matrix = new double[num_states*num_states]; + computeTransMatrix(time, trans_matrix); + double trans = trans_matrix[state1*num_states+state2]; + delete [] trans_matrix; return trans; } } double ModelMarkov::computeTrans(double time, int state1, int state2, double &derv1, double &derv2) { - double evol_time = time / total_num_subst; - int i; + double evol_time = time / total_num_subst; + double trans_prob = 0.0; + derv1 = derv2 = 0.0; + for (int i = 0; i < num_states; i++) { + double trans = eigenvectors[state1*num_states+i] + * inv_eigenvectors[i*num_states+state2] + * exp(evol_time * eigenvalues[i]); + double trans2 = trans * eigenvalues[i]; + trans_prob += trans; + derv1 += trans2; + derv2 += trans2 * eigenvalues[i]; + } + return trans_prob; +} -// double *coeff_entry = eigen_coeff + ((state1*num_states+state2)*num_states); - double trans_prob = 0.0; - derv1 = derv2 = 0.0; - for (i = 0; i < num_states; i++) { - double trans = eigenvectors[state1*num_states+i] * inv_eigenvectors[i*num_states+state2] * exp(evol_time * eigenvalues[i]); - double trans2 = trans * eigenvalues[i]; - trans_prob += trans; - derv1 += trans2; - derv2 += trans2 * eigenvalues[i]; - } - return trans_prob; +void ModelMarkov::calculateExponentOfScalarMultiply ( const double* source, int size + , double scalar, double* dest) { + if (size == 4) { + Vec4d v; + v.load(source); + exp(v * scalar).store(dest); + return; + } + int offset=0; + if (4 < size) { + Vec4d v; + int step = Vec4d::size(); + int integralSize = size - (size & (step - 1)); + for (; offset < integralSize; offset+=step) { + v.load(source+offset); + exp(v * scalar).store(dest+offset); + } + } + //Do the last few operations one at a time + for (; offset > trans_mat(trans_matrix, num_states, num_states); + Map > rate_mat(rate_matrix, num_states, num_states); + MatrixXd prod = rate_mat * trans_mat; + Map > derv1_mat(trans_derv1, num_states, num_states); + derv1_mat = prod; + + // Second derivative = Q * Q * e^(Qt) + prod = rate_mat * prod; + Map > derv2_mat(trans_derv2, num_states, num_states); + derv2_mat = prod; + + /* + for (int i = 0; i < num_states; i++) + for (int j = 0; j < num_states; j++) { double val = 0.0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += rate_matrix[i*num_states+k] * trans_matrix[k*num_states+j]; trans_derv1[i*num_states+j] = val; } // Second derivative = Q * Q * e^(Qt) - for (i = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) { + for (int i = 0; i < num_states; i++) + for (int j = 0; j < num_states; j++) { double val = 0.0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += rate_matrix[i*num_states+k] * trans_derv1[k*num_states+j]; trans_derv2[i*num_states+j] = val; } + */ return; } double evol_time = time / total_num_subst; + + if (Params::getInstance().experimental) { + //James' version + double eval_exp[num_states]; + calculateExponentOfScalarMultiply(eigenvalues, num_states, evol_time, eval_exp); + aTimesDiagonalBTimesTransposeOfC( eigenvectors, eval_exp + , inv_eigenvectors_transposed, num_states, trans_matrix); + + double eval_exp_derv1[num_states]; + calculateHadamardProduct(eigenvalues, eval_exp, num_states, eval_exp_derv1); + aTimesDiagonalBTimesTransposeOfC( eigenvectors, eval_exp_derv1 + , inv_eigenvectors_transposed, num_states, trans_derv1); + + double* eval_exp_derv2 = &eval_exp[0]; //reuse it, why not? + calculateHadamardProduct(eigenvalues, eval_exp_derv1, num_states, eval_exp_derv2); + aTimesDiagonalBTimesTransposeOfC( eigenvectors, eval_exp_derv2 + , inv_eigenvectors_transposed, num_states, trans_derv2); + } + else + { + //EIGEN version + ArrayXd eval = Map(eigenvalues, num_states); + ArrayXd eval_exp = (eval*evol_time).exp(); + ArrayXd eval_exp_derv1 = eval_exp*eval; + ArrayXd eval_exp_derv2 = eval_exp_derv1*eval; + Map,Aligned> evectors(eigenvectors, num_states, num_states); + Map,Aligned> inv_evectors(inv_eigenvectors, num_states, num_states); + MatrixXd res = evectors * eval_exp.matrix().asDiagonal() * inv_evectors; + Map >map_trans(trans_matrix,num_states,num_states); + map_trans = res; + + res = evectors * eval_exp_derv1.matrix().asDiagonal() * inv_evectors; + Map >map_derv1(trans_derv1,num_states,num_states); + map_derv1 = res; + + res = evectors * eval_exp_derv2.matrix().asDiagonal() * inv_evectors; + Map >map_derv2(trans_derv2,num_states,num_states); + map_derv2 = res; + } + + /* + //Flat version double exptime[num_states]; for (i = 0; i < num_states; i++) @@ -535,6 +784,7 @@ void ModelMarkov::computeTransDerv(double time, double *trans_matrix, } } } + */ // delete [] exptime; } @@ -549,6 +799,23 @@ void ModelMarkov::setRateMatrix(double* rate_mat) memcpy(rates, rate_mat, nrate * sizeof(double)); } +void ModelMarkov::setFullRateMatrix(double* rate_mat, double *freq) +{ + int i, j, k; + if (isReversible()) { + for (i = 0, k = 0; i < num_states; i++) + for (j = i+1; j < num_states; j++) + rates[k++] = rate_mat[i*num_states+j] / freq[j]; + memcpy(state_freq, freq, sizeof(double)*num_states); + } else { + // non-reversible + for (i = 0, k = 0; i < num_states; i++) + for (j = 0; j < num_states; j++) + if (i != j) + rates[k++] = rate_mat[i*num_states+j]; + } +} + void ModelMarkov::getStateFrequency(double *freq, int mixture) { ASSERT(state_freq); ASSERT(freq_type != FREQ_UNKNOWN); @@ -571,7 +838,39 @@ void ModelMarkov::getStateFrequency(double *freq, int mixture) { void ModelMarkov::setStateFrequency(double* freq) { ASSERT(state_freq); - memcpy(state_freq, freq, sizeof(double) * num_states); + /* + if (!isReversible()) { + // integrate out state_freq from rate_matrix + int i, j, k = 0; + for (i = 0, k = 0; i < num_states; i++) + for (j = 0; j < num_states; j++) + if (i != j) { + rates[k] = (rates[k])*freq[j]; + if (state_freq[j] != 0.0) + rates[k] /= state_freq[j]; + k++; + } + } + */ + ModelSubst::setStateFrequency(freq); +} + +void ModelMarkov::adaptStateFrequency(double* freq) +{ + ASSERT(state_freq); + if (!isReversible()) { + // integrate out state_freq from rate_matrix + int i, j, k = 0; + for (i = 0, k = 0; i < num_states; i++) + for (j = 0; j < num_states; j++) + if (i != j) { + rates[k] = (rates[k])*freq[j]; + if (state_freq[j] > ZERO_FREQ) + rates[k] /= state_freq[j]; + k++; + } + } + ModelSubst::setStateFrequency(freq); } void ModelMarkov::getQMatrix(double *q_mat) { @@ -611,10 +910,10 @@ int ModelMarkov::getNDim() { if (fixed_parameters) return 0; if (!is_reversible) - return (num_params); + return num_params; // reversible model - int ndim = num_params; + int ndim = num_params; if (freq_type == FREQ_ESTIMATE) ndim += num_states-1; return ndim; @@ -625,13 +924,18 @@ int ModelMarkov::getNDimFreq() { // BQM, 2017-05-02: getNDimFreq should return degree of freedom, which is not included in getNDim() // That's why 0 is returned for FREQ_ESTIMATE, num_states-1 for FREQ_EMPIRICAL - if (freq_type == FREQ_EMPIRICAL) - return num_states-1; - else if (freq_type == FREQ_CODON_1x4) + if (fixed_parameters) + return 0; + + if (freq_type == FREQ_EMPIRICAL) { + return num_states - 1; + } + else if (freq_type == FREQ_CODON_1x4) { return 3; - else if (freq_type == FREQ_CODON_3x4 || freq_type == FREQ_CODON_3x4C) + } + else if (freq_type == FREQ_CODON_3x4 || freq_type == FREQ_CODON_3x4C) { return 9; - + } // commented out due to reason above // if (phylo_tree->aln->seq_type == SEQ_DNA) { // return nFreqParams(freq_type); @@ -665,10 +969,10 @@ void ModelMarkov::setVariables(double *variables) { // return; // } - if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1); + if (is_reversible && freq_type == FREQ_ESTIMATE) nrate -= (num_states-1); if (nrate > 0) memcpy(variables+1, rates, nrate*sizeof(double)); - if (freq_type == FREQ_ESTIMATE) { + if (is_reversible && freq_type == FREQ_ESTIMATE) { // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization int ndim = getNDim(); memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double)); @@ -691,14 +995,14 @@ bool ModelMarkov::getVariables(double *variables) { // return changed; // } - if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1); + if (is_reversible && freq_type == FREQ_ESTIMATE) nrate -= (num_states-1); if (nrate > 0) { for (i = 0; i < nrate; i++) changed |= (rates[i] != variables[i+1]); memcpy(rates, variables+1, nrate * sizeof(double)); } - if (freq_type == FREQ_ESTIMATE) { + if (is_reversible && freq_type == FREQ_ESTIMATE) { // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization int ndim = getNDim(); @@ -731,18 +1035,27 @@ bool ModelMarkov::getVariables(double *variables) { double ModelMarkov::targetFunk(double x[]) { bool changed = getVariables(x); - if (state_freq[num_states-1] < 0) return 1.0e+12; - if (changed) { decomposeRateMatrix(); ASSERT(phylo_tree); phylo_tree->clearAllPartialLH(); +// if (nondiagonalizable) // matrix is ill-formed +// return 1.0e+30; } // avoid numerical issue if state_freq is too small - for (int i = 0; i < num_states; i++) - if (state_freq[i] < 0) - return 1.0e+12; + for (int i = 0; i < num_states; i++) { + if (state_freq[i] < 0 || (state_freq[i] > 0 && state_freq[i] < Params::getInstance().min_state_freq)) { + //outWarning("Weird state_freq[" + convertIntToString(i) + "]=" + convertDoubleToString(state_freq[i])); + return 1.0e+30; + } + } + +// if (!is_reversible) { +// for (int i = 0; i < num_states; i++) +// if (state_freq[i] < MIN_FREQUENCY) +// return 1.0e+30; +// } return -phylo_tree->computeLikelihood(); @@ -752,19 +1065,23 @@ bool ModelMarkov::isUnstableParameters() { int nrates = getNumRateEntries(); int i; // NOTE: zero rates are not consider unstable anymore - for (i = 0; i < nrates; i++) - if (/*rates[i] < MIN_RATE+TOL_RATE || */rates[i] > MAX_RATE*0.99) - return true; - - if (freq_type == FREQ_ESTIMATE) - for (i = 0; i < num_states; i++) - if (state_freq[i] > 0.0 && state_freq[i] < MIN_RATE+TOL_RATE) - return true; + for (i = 0; i < nrates; i++) { + if (/*rates[i] < MIN_RATE+TOL_RATE || */rates[i] > MAX_RATE * 0.99) { + return true; + } + } + if (freq_type == FREQ_ESTIMATE) { + for (i = 0; i < num_states; i++) { + if (state_freq[i] > 0.0 && state_freq[i] < MIN_RATE + TOL_RATE) { + return true; + } + } + } return false; } void ModelMarkov::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) { - ASSERT(is_reversible && "setBounds should only be called on subclass of ModelMarkov"); +// ASSERT(is_reversible && "setBounds should only be called on subclass of ModelMarkov"); int i, ndim = getNDim(); @@ -775,30 +1092,34 @@ void ModelMarkov::setBounds(double *lower_bound, double *upper_bound, bool *boun bound_check[i] = false; } - if (freq_type == FREQ_ESTIMATE) { - for (i = ndim-num_states+2; i <= ndim; i++) { + if (is_reversible && freq_type == FREQ_ESTIMATE) { + for (i = num_params+1; i <= num_params+num_states-1; i++) { // lower_bound[i] = MIN_FREQUENCY/state_freq[highest_freq_state]; // upper_bound[i] = state_freq[highest_freq_state]/MIN_FREQUENCY; - lower_bound[i] = MIN_FREQUENCY; + lower_bound[i] = Params::getInstance().min_state_freq; // upper_bound[i] = 100.0; upper_bound[i] = 1.0; bound_check[i] = false; } } else if (phylo_tree->aln->seq_type == SEQ_DNA) { setBoundsForFreqType(&lower_bound[num_params+1], &upper_bound[num_params+1], - &bound_check[num_params+1], MIN_FREQUENCY, freq_type); + &bound_check[num_params+1], Params::getInstance().min_state_freq, freq_type); } } double ModelMarkov::optimizeParameters(double gradient_epsilon) { + + if (fixed_parameters) { + return 0.0; + } int ndim = getNDim(); // return if nothing to be optimized if (ndim == 0) return 0.0; - if (verbose_mode >= VB_MAX) - cout << "Optimizing " << name << " model parameters..." << endl; - + if (verbose_mode >= VB_MAX) { + cout << "Optimizing " << name << " model parameters..." << endl; + } //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); double *variables = new double[ndim+1]; // used for BFGS numerical recipes @@ -808,19 +1129,24 @@ double ModelMarkov::optimizeParameters(double gradient_epsilon) { bool *bound_check = new bool[ndim+1]; double score; - for (int i = 0; i < num_states; i++) - if (state_freq[i] > state_freq[highest_freq_state]) + for (int i = 0; i < num_states; i++) { + if (state_freq[i] > state_freq[highest_freq_state]) { highest_freq_state = i; + } + } // by BFGS algorithm setVariables(variables); setVariables(variables2); setBounds(lower_bound, upper_bound, bound_check); -// if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos) -// score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); -// else -// score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE)); + if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos) + score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); + else + score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE)); + bool changed = getVariables(variables); + + /* 2019-09-05: REMOVED due to numerical issue (NAN) with L-BFGS-B // 2017-12-06: more robust optimization using 2 different routines // when estimates are at boundary score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); @@ -839,6 +1165,7 @@ double ModelMarkov::optimizeParameters(double gradient_epsilon) { changed = getVariables(variables); } } + */ // BQM 2015-09-07: normalize state_freq if (is_reversible && freq_type == FREQ_ESTIMATE) { @@ -860,48 +1187,153 @@ double ModelMarkov::optimizeParameters(double gradient_epsilon) { return score; } -void ModelMarkov::decomposeRateMatrix(){ - int i, j, k = 0; - - if (!is_reversible) { - double sum; - //double m[num_states]; - double *space = new double[num_states*(num_states+1)]; - - for (i = 0; i < num_states; i++) - state_freq[i] = 1.0/num_states; - - for (i = 0, k = 0; i < num_states; i++) { - rate_matrix[i*num_states+i] = 0.0; - double row_sum = 0.0; - for (j = 0; j < num_states; j++) - if (j != i) { - row_sum += (rate_matrix[i*num_states+j] = rates[k++]); - } - rate_matrix[i*num_states+i] = -row_sum; +void ModelMarkov::decomposeRateMatrixNonrev() { + int i, j, k = 0; + double sum; + //double m[num_states]; + double freq = 1.0/num_states; + + for (i = 0; i < num_states; i++) + state_freq[i] = freq; + + for (i = 0, k = 0; i < num_states; i++) { + double *rate_row = rate_matrix+(i*num_states); + double row_sum = 0.0; + for (j = 0; j < num_states; j++) + if (j != i) { + row_sum += (rate_row[j] = rates[k++]); + } + rate_row[i] = -row_sum; + } + computeStateFreqFromQMatrix(rate_matrix, state_freq, num_states); + + + for (i = 0, sum = 0.0; i < num_states; i++) { + sum -= rate_matrix[i*num_states+i] * state_freq[i]; /* exp. rate */ + } + + if (sum == 0.0) throw "Empty Q matrix"; + + double delta = total_num_subst / sum; /* 0.01 subst. per unit time */ + + for (i = 0; i < num_states; i++) { + double *rate_row = rate_matrix+(i*num_states); + for (j = 0; j < num_states; j++) { + rate_row[j] *= delta; } - computeStateFreqFromQMatrix(rate_matrix, state_freq, num_states, space); - - - for (i = 0, sum = 0.0; i < num_states; i++) { - sum -= rate_matrix[i*num_states+i] * state_freq[i]; /* exp. rate */ + } + + if (phylo_tree->params->matrix_exp_technique == MET_EIGEN_DECOMPOSITION) { + eigensystem_nonrev(rate_matrix, state_freq, eigenvalues, eigenvalues_imag, eigenvectors, inv_eigenvectors, num_states); + calculateSquareMatrixTranspose(inv_eigenvectors, num_states + , inv_eigenvectors_transposed); + return; + } + + + /******** using Eigen3 library ***********/ + + nondiagonalizable = false; // until proven otherwise + int n = 0; // the number of states where freq is non-zero + for (i = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) + n++; + int ii, jj; + MatrixXd Q(n, n); + VectorXd pi(n); + for (i = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) { + pi(ii) = state_freq[i]; + ii++; } - - if (sum == 0.0) throw "Empty Q matrix"; - - double delta = total_num_subst / sum; /* 0.01 subst. per unit time */ - - for (i = 0; i < num_states; i++) { - for (j = 0; j < num_states; j++) { - rate_matrix[i*num_states+j] *= delta; + // normalize pi to sum=1 + pi = pi*(1.0/pi.sum()); + // RowMajor for rate_matrix + if (n == num_states) + Q = Map,Aligned >(rate_matrix, num_states, num_states); + else { + for (i = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) { + for (j = 0, jj = 0; j < num_states; j++) + if (state_freq[j] > ZERO_FREQ) { + Q(ii,jj) = rate_matrix[i*num_states+j]; + jj++; + } + ii++; } + } + EigenSolver eigensolver(Q); + ASSERT (eigensolver.info() == Eigen::Success); + if (n == num_states) { + Map eval(ceval, num_states); + eval = eigensolver.eigenvalues(); + Map evec(cevec, num_states, num_states); + evec = eigensolver.eigenvectors(); + FullPivLU lu(evec); + if (lu.isInvertible()) { + Map inv_evec(cinv_evec, num_states, num_states); + inv_evec = lu.inverse(); + } else { + nondiagonalizable = true; + outWarning("evec not invertible"); } - delete [] space; - - if (phylo_tree->params->matrix_exp_technique == MET_EIGEN_DECOMPOSITION) { - eigensystem_nonrev(rate_matrix, state_freq, eigenvalues, eigenvalues_imag, eigenvectors, inv_eigenvectors, num_states); + } else { + // manual copy non-zero entries + for (i = 0, ii = 0; i < num_states; i++) { + if (state_freq[i] > ZERO_FREQ) { + ceval[i] = eigensolver.eigenvalues()(ii); + ii++; + } else { + ceval[i] = 0.0; + } + } + MatrixXcd evec = eigensolver.eigenvectors(); + MatrixXcd inv_evec; + FullPivLU lu(evec); + if (lu.isInvertible()) { + inv_evec = lu.inverse(); + } else { + nondiagonalizable = true; + outWarning("evec not invertible"); } - } else if (num_params == -1) { + for (i = 0, ii = 0; i < num_states; i++) { + auto *eigenvectors_ptr = cevec + (i*num_states); + auto *inv_eigenvectors_ptr = cinv_evec + (i*num_states); + if (state_freq[i] > ZERO_FREQ) { + for (j = 0, jj = 0; j < num_states; j++) { + if (state_freq[j] > ZERO_FREQ) { + eigenvectors_ptr[j] = evec(ii,jj); + inv_eigenvectors_ptr[j] = inv_evec(ii,jj); + jj++; + } else { + eigenvectors_ptr[j] = inv_eigenvectors_ptr[j] = (i == j); + } + } + ii++; + } else { + for (j = 0; j < num_states; j++) { + eigenvectors_ptr[j] = inv_eigenvectors_ptr[j] = (i == j); + } + } + } + calculateSquareMatrixTranspose(inv_eigenvectors, num_states + , inv_eigenvectors_transposed); + } + // sanity check + // MatrixXcd eval_diag = eval.asDiagonal(); + // MatrixXd check = (inv_evec * mat * evec - eval_diag).cwiseAbs(); + // ASSERT(check.maxCoeff() < 1e-4); +} + +void ModelMarkov::decomposeRateMatrix(){ + int i, j, k = 0; + + if (!is_reversible) { + decomposeRateMatrixNonrev(); + return; + } + + if (num_params == -1) { // reversible model // manual compute eigenvalues/vectors for F81-style model eigenvalues[0] = 0.0; @@ -928,9 +1360,14 @@ void ModelMarkov::decomposeRateMatrix(){ eigenvectors[i*num_states+i] = state_freq[0]/state_freq[i]; } - for (i = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) - inv_eigenvectors[i*num_states+j] = state_freq[j]*eigenvectors[j*num_states+i]; + for (i = 0; i < num_states; i++) { + for (j = 0; j < num_states; j++) { + inv_eigenvectors[i*num_states+j] + = state_freq[j]*eigenvectors[j*num_states+i]; + } + } + calculateSquareMatrixTranspose(inv_eigenvectors, num_states + , inv_eigenvectors_transposed); writeInfo(cout); // sanity check double *q = new double[num_states*num_states]; @@ -947,36 +1384,183 @@ void ModelMarkov::decomposeRateMatrix(){ } } delete [] q; - } else { - - // general reversible model - double **rate_matrix = new double*[num_states]; - - for (i = 0; i < num_states; i++) - rate_matrix[i] = new double[num_states]; + return; + } + auto technique = phylo_tree->params->matrix_exp_technique; + if (technique == MET_EIGEN3LIB_DECOMPOSITION) { + // Use Eigen3 library for eigen decomposition of symmetric matrix + int n = 0; // the number of states where freq is non-zero + for (i = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) + n++; + int ii, jj; + MatrixXd Q(n, n); + VectorXd pi(n); + for (i = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) { + pi(ii) = state_freq[i]; + ii++; + } + // normalize pi to sum=1 + pi = pi*(1.0/pi.sum()); + + ArrayXd pi_sqrt_arr = pi.array().sqrt(); + auto pi_sqrt = pi_sqrt_arr.matrix().asDiagonal(); + auto pi_sqrt_inv = pi_sqrt_arr.inverse().matrix().asDiagonal(); if (half_matrix) { - for (i = 0, k = 0; i < num_states; i++) { - rate_matrix[i][i] = 0.0; - for (j = i+1; j < num_states; j++, k++) { - rate_matrix[i][j] = (state_freq[i] <= ZERO_FREQ || state_freq[j] <= ZERO_FREQ) ? 0 : rates[k]; - rate_matrix[j][i] = rate_matrix[i][j]; + for (i = 0, k = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ){ + Q(ii,ii) = 0.0; + for (j = i+1, jj = ii+1; j < num_states; j++, k++) + if (state_freq[j] > ZERO_FREQ) { + Q(ii,jj) = Q(jj,ii) = rates[k]; + jj++; } - } + ASSERT(jj == n); + ii++; + } else + k += num_states-i-1; // 2019-04-27 BUG FIX: k is not increased properly! } else { // full matrix - for (i = 0; i < num_states; i++) { - memcpy(rate_matrix[i], &rates[i*num_states], num_states*sizeof(double)); - rate_matrix[i][i] = 0.0; + if (n == num_states) + Q = Map >(rates,num_states,num_states); + else { + for (i = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) { + for (j = 0, jj = 0; j < num_states; j++) + if (state_freq[j] > ZERO_FREQ) { + Q(ii,jj) = rates[i*num_states+j]; + jj++; + } + ii++; + } } } - /* eigensystem of 1 PAM rate matrix */ - eigensystem_sym(rate_matrix, state_freq, eigenvalues, eigenvectors, inv_eigenvectors, num_states); - //eigensystem(rate_matrix, state_freq, eigenvalues, eigenvectors, inv_eigenvectors, num_states); - for (i = num_states-1; i >= 0; i--) - delete [] rate_matrix[i]; - delete [] rate_matrix; - } + + // compute rate matrix + if (!ignore_state_freq) + Q *= pi.asDiagonal(); + + //make row sum equal zero + VectorXd Q_row_sum = Q.rowwise().sum(); + Q -= Q_row_sum.asDiagonal(); + + // normalize rat_mat + if (normalize_matrix) { + double scale_factor = total_num_subst / (Q_row_sum.dot(pi)); + Q *= scale_factor; + } + +// if (verbose_mode >= VB_DEBUG) +// cout << Q << endl; + + //symmetrize rate matrix + Q = pi_sqrt * Q * pi_sqrt_inv; + if (verbose_mode >= VB_DEBUG) + cout << "Symmetric rate matrix:" << endl << Q << endl; + if ((Q - Q.transpose()).cwiseAbs().maxCoeff() >= 0.01) { + cout << "Q: " << endl << Q << endl; + cout << "pi: " << pi << endl; + writeInfo(cout); + } + if ((Q - Q.transpose()).cwiseAbs().maxCoeff() > 0.1) { + // Somehow transformed Q is non-symmetric, revert to the old function + decomposeRateMatrixRev(); + return; + } + + // eigensolver + SelfAdjointEigenSolver eigensolver(Q); + if (eigensolver.info() != Eigen::Success) { + // Eigen3 failed, revert to the old function + decomposeRateMatrixRev(); + return; + } + if (eigensolver.eigenvalues().maxCoeff() > 1e-4) { + // "eigenvalues are not positive", revert to the old function + decomposeRateMatrixRev(); + return; + } + + if (n == num_states) { + Map eval(eigenvalues,num_states); + eval = eigensolver.eigenvalues(); + if (verbose_mode >= VB_DEBUG) + cout << "eval: " << eval << endl; + + Map,Aligned> evec(eigenvectors,num_states,num_states); + evec = pi_sqrt_inv * eigensolver.eigenvectors(); + + Map,Aligned> inv_evec(inv_eigenvectors,num_states,num_states); + inv_evec = eigensolver.eigenvectors().transpose() * pi_sqrt; + } else { + // manual copy non-zero entries + for (i = 0, ii = 0; i < num_states; i++) + if (state_freq[i] > ZERO_FREQ) { + eigenvalues[i] = eigensolver.eigenvalues()(ii); + ii++; + } else + eigenvalues[i] = 0.0; + MatrixXd evec = pi_sqrt_inv * eigensolver.eigenvectors(); + MatrixXd inv_evec = eigensolver.eigenvectors().transpose() * pi_sqrt; + for (i = 0, ii = 0; i < num_states; i++) { + double *eigenvectors_ptr = eigenvectors + (i*num_states); + double *inv_eigenvectors_ptr = inv_eigenvectors + (i*num_states); + if (state_freq[i] > ZERO_FREQ) { + for (j = 0, jj = 0; j < num_states; j++) + if (state_freq[j] > ZERO_FREQ) { + eigenvectors_ptr[j] = evec(ii,jj); + inv_eigenvectors_ptr[j] = inv_evec(ii,jj); + jj++; + } else { + eigenvectors_ptr[j] = inv_eigenvectors_ptr[j] = (i == j); + } + ii++; + } else { + for (j = 0; j < num_states; j++) { + eigenvectors_ptr[j] = inv_eigenvectors_ptr[j] = (i == j); + } + } + } + } + calculateSquareMatrixTranspose(inv_eigenvectors, num_states + , inv_eigenvectors_transposed); + return; + } + decomposeRateMatrixRev(); +} + +void ModelMarkov::decomposeRateMatrixRev() { + // general reversible model + double **rate_matrix = new double*[num_states]; + + for (int i = 0; i < num_states; i++) { + rate_matrix[i] = new double[num_states]; + } + if (half_matrix) { + for (int i = 0, k = 0; i < num_states; i++) { + rate_matrix[i][i] = 0.0; + for (int j = i+1; j < num_states; j++, k++) { + rate_matrix[i][j] = (state_freq[i] <= ZERO_FREQ || state_freq[j] <= ZERO_FREQ) ? 0 : rates[k]; + rate_matrix[j][i] = rate_matrix[i][j]; + } + } + } else { + // full matrix + for (int i = 0; i < num_states; i++) { + memcpy(rate_matrix[i], &rates[i*num_states], num_states*sizeof(double)); + rate_matrix[i][i] = 0.0; + } + } + /* eigensystem of 1 PAM rate matrix */ + eigensystem_sym(rate_matrix, state_freq, eigenvalues, eigenvectors, inv_eigenvectors, num_states); + calculateSquareMatrixTranspose(inv_eigenvectors, num_states + , inv_eigenvectors_transposed); + for (int i = num_states-1; i >= 0; i--) { + delete [] rate_matrix[i]; + } + delete [] rate_matrix; } void ModelMarkov::readRates(istream &in) throw(const char*, string) { @@ -985,8 +1569,11 @@ void ModelMarkov::readRates(istream &in) throw(const char*, string) { in >> str; if (str == "equalrate") { for (int i = 0; i < nrates; i++) + { rates[i] = 1.0; - } else { + } + } else if (is_reversible ){ + // reversible model try { rates[0] = convert_double(str.c_str()); } catch (string &str) { @@ -1000,7 +1587,37 @@ void ModelMarkov::readRates(istream &in) throw(const char*, string) { if (rates[i] < 0.0) throw "Negative rates not allowed"; } - } + } else { + // non-reversible model, read the whole rate matrix + int i = 0, row, col; + for (row = 0; row < num_states; row++) { + double row_sum = 0.0; + for (col = 0; col < num_states; col++) + if (row == 0 && col == 0) { + // top-left element was already red + try { + row_sum = convert_double(str.c_str()); + } catch (string &str) { + outError(str); + } + } else if (row != col) { + // non-diagonal element + if (!(in >> rates[i])) + throw name+string(": Rate entries could not be read"); + if (rates[i] < 0.0) + throw "Negative rates found"; + row_sum += rates[i]; + i++; + } else { + // diagonal element + double d; + in >> d; + row_sum += d; + } + if (fabs(row_sum) > 1e-3) + throw "Row " + convertIntToString(row) + " does not sum to 0"; + } + } } void ModelMarkov::readRates(string str) throw(const char*) { @@ -1044,6 +1661,9 @@ void ModelMarkov::readStateFreq(istream &in) throw(const char*) { for (i = 0; i < num_states; i++) sum += state_freq[i]; if (fabs(sum-1.0) > 1e-2) throw "State frequencies do not sum up to 1.0"; + sum = 1.0/sum; + for (i = 0; i < num_states; i++) + state_freq[i] *= sum; } void ModelMarkov::readStateFreq(string str) throw(const char*) { @@ -1066,15 +1686,37 @@ void ModelMarkov::readStateFreq(string str) throw(const char*) { for (i = 0; i < num_states; i++) sum += state_freq[i]; if (fabs(sum-1.0) > 1e-2) outError("State frequencies do not sum up to 1.0 in ", str); + sum = 1.0/sum; + for (i = 0; i < num_states; i++) + state_freq[i] *= sum; } -void ModelMarkov::readParameters(const char *file_name) { +void ModelMarkov::readParameters(const char *file_name, bool adapt_tree) { + if (!fileExists(file_name)) + outError("File not found ", file_name); + + cout << "Reading model parameters from file " << file_name << endl; + + // if detect if reading full matrix or half matrix by the first entry + try { + ifstream in(file_name); + double d; + in >> d; + if (d < 0) { + setReversible(false, adapt_tree); + } else + setReversible(true, adapt_tree); + in.close(); + } + catch (...) { + outError(ERR_READ_ANY, file_name); + } + try { ifstream in(file_name); if (in.fail()) { outError("Invalid model name ", file_name); } - cout << "Reading model parameters from file " << file_name << endl; readRates(in); readStateFreq(in); in.close(); @@ -1084,38 +1726,78 @@ void ModelMarkov::readParameters(const char *file_name) { } num_params = 0; writeInfo(cout); + + if (!is_reversible) { + // check consistency of state_freq + double saved_state_freq[num_states]; + memcpy(saved_state_freq, state_freq, sizeof(double)*num_states); + decomposeRateMatrix(); + for (int i = 0; i < num_states; i++) + if (fabs(state_freq[i] - saved_state_freq[i]) > 1e-3) + cout << "WARNING: State " << i << " frequency " << state_freq[i] + << " does not match " << saved_state_freq[i] << endl; + } +} + +void ModelMarkov::readParametersString(string &model_str, bool adapt_tree) { + + // if detect if reading full matrix or half matrix by the first entry + int end_pos; + double d = 0.0; + d = convert_double(model_str.c_str(), end_pos); + if (d < 0) { + setReversible(false, adapt_tree); + } + else { + setReversible(true, adapt_tree); + } + try { + stringstream in(model_str); + readRates(in); + readStateFreq(in); + } + catch (const char *str) { + outError(str); + } + num_params = 0; + writeInfo(cout); + + if (!is_reversible) { + // check consistency of state_freq + double saved_state_freq[num_states]; + memcpy(saved_state_freq, state_freq, sizeof(double)*num_states); + decomposeRateMatrix(); + for (int i = 0; i < num_states; i++) + if (fabs(state_freq[i] - saved_state_freq[i]) > 1e-3) + cout << "WARNING: State " << i << " frequency " << state_freq[i] + << " does not match " << saved_state_freq[i] << endl; + } } ModelMarkov::~ModelMarkov() { - freeMem(); + // mem space pointing to target model and thus avoid double free here + internalFreeMem(); +} + +void ModelMarkov::internalFreeMem() { + aligned_free(inv_eigenvectors); + aligned_free(inv_eigenvectors_transposed); + aligned_free(eigenvectors); + aligned_free(eigenvalues); + + delete[] rates; + + aligned_free(cinv_evec); + aligned_free(cevec); + aligned_free(ceval); + aligned_free(eigenvalues_imag); + aligned_free(rate_matrix); } void ModelMarkov::freeMem() { - if (inv_eigenvectors) - aligned_free(inv_eigenvectors); - if (eigenvectors) - aligned_free(eigenvectors); - if (eigenvalues) - aligned_free(eigenvalues); - - if (rates) delete [] rates; - - if (cinv_evec) - aligned_free(cinv_evec); - if (cevec) - aligned_free(cevec); - if (ceval) - aligned_free(ceval); - if (eigenvalues_imag) - aligned_free(eigenvalues_imag); - if (temp_space) - aligned_free(temp_space); - if (rate_matrix) - aligned_free(rate_matrix); -// if (model_parameters) -// delete [] model_parameters; + internalFreeMem(); } double *ModelMarkov::getEigenvalues() const @@ -1132,31 +1814,34 @@ double* ModelMarkov::getInverseEigenvectors() const { return inv_eigenvectors; } -//void ModelGTR::setEigenCoeff(double *eigenCoeff) -//{ -// eigen_coeff = eigenCoeff; -//} +double* ModelMarkov::getInverseEigenvectorsTransposed() const { + return inv_eigenvectors_transposed; +} + +void ModelMarkov::setEigenvalues(double *eigenValues) +{ + this->eigenvalues = eigenValues; +} -void ModelMarkov::setEigenvalues(double *eigenvalues) +void ModelMarkov::setEigenvectors(double *eigenVectors) { - this->eigenvalues = eigenvalues; + this->eigenvectors = eigenVectors; } -void ModelMarkov::setEigenvectors(double *eigenvectors) +void ModelMarkov::setInverseEigenvectors(double *eigenV) { - this->eigenvectors = eigenvectors; + this->inv_eigenvectors = eigenV; } -void ModelMarkov::setInverseEigenvectors(double *inv_eigenvectors) +void ModelMarkov::setInverseEigenvectorsTransposed(double *eigenVTranspose) { - this->inv_eigenvectors = inv_eigenvectors; + this->inv_eigenvectors_transposed = eigenVTranspose; } /****************************************************/ /* NON-REVERSIBLE STUFFS */ /****************************************************/ - void ModelMarkov::setRates() { // I don't know the proper C++ way to handle this: got error if I didn't define something here. ASSERT(0 && "setRates should only be called on subclass of ModelMarkov"); @@ -1182,11 +1867,13 @@ int ModelMarkov::get_num_states_total() { return num_states; } -void ModelMarkov::update_eigen_pointers(double *eval, double *evec, double *inv_evec) { - eigenvalues = eval; - eigenvectors = evec; - inv_eigenvectors = inv_evec; - return; +void ModelMarkov::update_eigen_pointers(double *eval, double *evec + , double *inv_evec, double *inv_evec_transposed) { + eigenvalues = eval; + eigenvectors = evec; + inv_eigenvectors = inv_evec; + inv_eigenvectors_transposed = inv_evec_transposed; + return; } void ModelMarkov::computeTransMatrixEigen(double time, double *trans_matrix) { @@ -1194,10 +1881,9 @@ void ModelMarkov::computeTransMatrixEigen(double time, double *trans_matrix) { double evol_time = time / total_num_subst; int nstates_2 = num_states*num_states; double *exptime = new double[nstates_2]; - int i, j, k; memset(exptime, 0, sizeof(double)*nstates_2); - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) { if (eigenvalues_imag[i] == 0.0) { exptime[i*num_states+i] = exp(evol_time * eigenvalues[i]); } else { @@ -1210,25 +1896,24 @@ void ModelMarkov::computeTransMatrixEigen(double time, double *trans_matrix) { exptime[i*num_states+i] = exp_eval.real(); exptime[i*num_states+i-1] = -exp_eval.imag(); } - - + } // compute V * exp(L t) - for (i = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) { + for (int i = 0; i < num_states; i++) { + for (int j = 0; j < num_states; j++) { double val = 0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += eigenvectors[i*num_states+k] * exptime[k*num_states+j]; trans_matrix[i*num_states+j] = val; } - + } memcpy(exptime, trans_matrix, sizeof(double)*nstates_2); // then compute V * exp(L t) * V^{-1} - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; i++) { double row_sum = 0.0; - for (j = 0; j < num_states; j++) { + for (int j = 0; j < num_states; j++) { double val = 0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += exptime[i*num_states+k] * inv_eigenvectors[k*num_states+j]; // make sure that trans_matrix are non-negative ASSERT(val >= -0.001); @@ -1238,7 +1923,6 @@ void ModelMarkov::computeTransMatrixEigen(double time, double *trans_matrix) { } ASSERT(fabs(row_sum-1.0) < 1e-4); } - delete [] exptime; } @@ -1271,7 +1955,7 @@ int matinv (double x[], int n, int m, double space[]) det *= x[irow[i]*m+i]; if (xmax < ee) { cout << endl << "xmax = " << xmax << " close to zero at " << i+1 << "!\t" << endl; - exit(-1); + ASSERT(0); } if (irow[i] != i) { for (j=0; j < m; j++) { @@ -1302,81 +1986,102 @@ int matinv (double x[], int n, int m, double space[]) return(0); } -int computeStateFreqFromQMatrix (double Q[], double pi[], int n, double space[]) +/* +int computeStateFreqFromQMatrix (double Q[], double pi[], int n) { - /* from rate matrix Q[] to pi, the stationary frequencies: - Q' * pi = 0 pi * 1 = 1 - space[] is of size n*(n+1). - */ + double *space = new double[n*(n+1)]; + + // from rate matrix Q[] to pi, the stationary frequencies: + // Q' * pi = 0 pi * 1 = 1 + // space[] is of size n*(n+1). int i,j; - double *T = space; /* T[n*(n+1)] */ + double *T = space; // T[n*(n+1)] for (i=0;i(Q, n, n); + VectorXd b(n+1); + b.setZero(); + b(0) = 1.0; + Map freq(pi, n); + freq = A.colPivHouseholderQr().solve(b); + double sum = freq.sum(); + ASSERT(fabs(sum-1.0) < 1e-4); + return 0; } -int matexp (double Q[], double t, int n, int TimeSquare, double space[]) -{ - /* This calculates the matrix exponential P(t) = exp(t*Q). - Input: Q[] has the rate matrix, and t is the time or branch length. - TimeSquare is the number of times the matrix is squared and should - be from 5 to 31. - Output: Q[] has the transition probability matrix, that is P(Qt). - space[n*n]: required working space. - P(t) = (I + Qt/m + (Qt/m)^2/2)^m, with m = 2^TimeSquare. - T[it=0] is the current matrix, and T[it=1] is the squared result matrix, - used to avoid copying matrices. - Use an even TimeSquare to avoid one round of matrix copying. - */ - int it, i; - double *T[2]; - - if (TimeSquare<2 || TimeSquare>31) cout << "TimeSquare not good" << endl; - T[0]=Q; - T[1]=space; - for (i=0; i31) cout << "TimeSquare not good" << endl; +// T[0]=Q; +// T[1]=space; +// for (i=0; iunrooted tree */ - ModelMarkov(PhyloTree *tree, bool reversible = true); + ModelMarkov(PhyloTree *tree, bool reversible = true, bool adapt_tree = true); /** @return TRUE if model is time-reversible, FALSE otherwise @@ -62,8 +65,9 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition /** set the reversibility of the model @param reversible TRUE to make model reversible, FALSE otherwise + @param adapt_tree TRUE (default) to convert between rooted and unrooted tree */ - virtual void setReversible(bool reversible); + virtual void setReversible(bool reversible, bool adapt_tree = true); /** @@ -105,7 +109,7 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition restore object from the checkpoint */ virtual void restoreCheckpoint(); - + /** * @return model name */ @@ -164,10 +168,15 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition /** read model parameters from a file - @param file_name file containing upper-triangle rate matrix and state frequencies + @param file_name file containing rate matrix and state frequencies */ - void readParameters(const char *file_name); + void readParameters(const char *file_name, bool adapt_tree = true); + /** + read model parameters from a string + @param model_str string containing rate matrix and state frequencies + */ + void readParametersString(string &model_str, bool adapt_tree = true); /** compute the transition probability matrix. @@ -178,6 +187,15 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ virtual void computeTransMatrix(double time, double *trans_matrix, int mixture = 0); + /** + compute the transition probability matrix for non-reversible model + @param time time between two events + @param mixture (optional) class for mixture model + @param trans_matrix (OUT) the transition matrix between all pairs of states. + Assume trans_matrix has size of num_states * num_states. + */ + virtual void computeTransMatrixNonrev(double time, double *trans_matrix, int mixture = 0); + /** compute the transition probability between two states @param time time between two events @@ -208,6 +226,13 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ virtual void setRateMatrix(double *rate_mat); + /** + Set the full rate matrix of size num_states*num_states + @param rate_mat full rate matrix + @param freq state frequency + */ + virtual void setFullRateMatrix(double *rate_mat, double *freq); + /** compute the state frequency vector @param mixture (optional) class for mixture model @@ -221,6 +246,12 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ virtual void setStateFrequency(double *state_freq); + /** + set the state frequency vector + @param state_freq (IN) state frequency vector. Assume state_freq has size of num_states + */ + virtual void adaptStateFrequency(double *state_freq); + /** * compute Q matrix * @param q_mat (OUT) Q matrix, assuming of size num_states * num_states @@ -309,6 +340,11 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ virtual void writeParameters(ostream &out){} + /** decompose rate matrix for non-reversible models */ + virtual void decomposeRateMatrixNonrev(); + + /** old version of decompose rate matrix for reversible models */ + void decomposeRateMatrixRev(); /** decompose the rate matrix into eigenvalues and eigenvectors @@ -321,6 +357,7 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition virtual double *getEigenvectors() const; virtual double *getInverseEigenvectors() const; + virtual double *getInverseEigenvectorsTransposed() const; // void setEigenCoeff(double *eigenCoeff); @@ -330,6 +367,23 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition void setInverseEigenvectors(double *inv_eigenvectors); + void setInverseEigenvectorsTransposed(double *inv_eigenvectors); + + static void calculateExponentOfScalarMultiply(const double* source, int size + , double scalar, double* dest); + + static void calculateHadamardProduct(const double* first, const double* second + , int size, double *dest); + + static double dotProduct(const double* first, const double* second, int size); + + static void calculateSquareMatrixTranspose(const double* original, int rank + , double* transpose); + + static void aTimesDiagonalBTimesTransposeOfC + ( const double* matrixA, const double* rowB + , const double* matrixCTranspose, int rank + , double* dest); /** * compute the memory size for the model, can be large for site-specific models * @return memory size required in bytes @@ -371,8 +425,24 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition // need to be updated recursively, if the model is a mixture model. For a // normal Markov model, only the standard pointers are set. This was done in // `ModelMixture::initMem()` before. - virtual void update_eigen_pointers(double *eval, double *evec, double *inv_evec); + virtual void update_eigen_pointers(double *eval, double *evec + , double *inv_evec, double *inv_evec_transposed); + + /** + set num_params variable + */ + virtual void setNParams(int num_params) { + this->num_params = num_params; + } + + /** + get num_params variable + */ + virtual int getNParams() { + return num_params; + } + protected: /** @@ -398,8 +468,14 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition virtual void setRates(); /** - free all allocated memory + free all allocated memory + (internal version not affected by virtual dispatch; safe to call from destructor) */ + void internalFreeMem(); + + /** + free all allocated memory + */ virtual void freeMem(); /** TRUE if model is reversible */ @@ -436,11 +512,11 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ double *inv_eigenvectors; - /** - coefficient cache, served for fast computation of the P(t) matrix - */ -// double *eigen_coeff; - + /** + transpose of the matrix of the inverse eigenvectors of the rate matrix Q + */ + double *inv_eigenvectors_transposed; + /** state with highest frequency, used when optimizing state frequencies +FO */ int highest_freq_state; @@ -456,9 +532,6 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition */ void computeTransMatrixEigen(double time, double *trans_matrix); - /** true to fix parameters, otherwise false */ - bool fixed_parameters; - /** unrestricted Q matrix. Note that Q is normalized to 1 and has row sums of 0. no state frequencies are involved here since Q is a general matrix. @@ -468,17 +541,17 @@ class ModelMarkov : public ModelSubst, public EigenDecomposition /** imaginary part of eigenvalues */ double *eigenvalues_imag; - /** - temporary working space - */ - double *temp_space; - /** complex eigenvalues and eigenvectors, pointing to the same pointer to the previous double *eigenvalues and double *eigenvectors */ std::complex *ceval, *cevec, *cinv_evec; + /** will be set true for nondiagonalizable rate matrices, + then will use scaled squaring method for matrix exponentiation. + */ + bool nondiagonalizable; + }; #endif diff --git a/model/modelmixture.cpp b/model/modelmixture.cpp index cb692fea5..abef7b631 100644 --- a/model/modelmixture.cpp +++ b/model/modelmixture.cpp @@ -7,6 +7,7 @@ #include "modelmarkov.h" #include "modeldna.h" +#include "modeldnaerror.h" #include "modelprotein.h" #include "modelbin.h" #include "modelcodon.h" @@ -19,970 +20,972 @@ using namespace std; -const string builtin_mixmodels_definition = -"#nexus\n\ -\n\ -begin models;\n\ -\n\ -[ ---------------------------------------------------------\n\ - EX2 mixture model of Le, Lartillot & Gascuel (2008) \n\ - --------------------------------------------------------- ]\n\ -\n\ -[ Exposed component ]\n\ -model ExpEX2 =\n\ -0.526738 \n\ -0.483150 0.505837 \n\ -0.658902 0.051052 3.902456 \n\ -2.051872 2.214326 0.961103 0.129989 \n\ -1.280002 2.039552 1.301786 0.399061 0.456521 \n\ -1.306565 0.137928 0.285806 3.100403 0.033946 2.514377 \n\ -1.370782 0.363365 1.820100 0.885317 0.886564 0.320746 0.303966 \n\ -0.540809 2.288922 4.949307 0.700890 2.172284 3.755421 0.270957 0.401311 \n\ -0.171986 0.237023 0.337226 0.018315 1.037046 0.212032 0.084442 0.012279 0.317239 \n\ -0.430511 0.670514 0.158937 0.021949 1.702066 1.261113 0.110508 0.052946 0.869247 8.675343 \n\ -0.697731 3.881079 1.677194 0.105450 0.146263 2.570254 0.730337 0.279865 0.598289 0.338782 0.313102 \n\ -1.043937 0.656943 0.539827 0.066925 1.846562 1.973592 0.188160 0.158136 0.519993 9.483497 14.176858 1.013268 \n\ -0.265209 0.097443 0.182522 0.026918 3.002586 0.080193 0.023999 0.084663 2.047163 2.193062 4.802817 0.044792 3.261401 \n\ -1.270693 0.166534 0.068692 0.228829 0.156216 0.362501 0.214847 0.148900 0.323141 0.071992 0.343919 0.195470 0.099252 0.087020 \n\ -4.826665 0.751947 4.412265 0.975564 5.294149 1.033459 0.382235 1.970857 0.993310 0.190509 0.389101 0.592156 0.557254 0.668834 1.223981 \n\ -2.131819 0.584329 2.133604 0.368887 2.067387 1.013613 0.511390 0.174527 0.580960 2.563630 0.522334 1.147459 2.960091 0.244420 0.413148 7.384701 \n\ -0.143081 0.475590 0.061094 0.042618 1.603125 0.210329 0.048276 0.186382 0.961546 0.208313 1.130724 0.052858 1.328785 5.210001 0.045945 0.316078 0.144393 \n\ -0.208643 0.196271 0.599369 0.121313 3.842632 0.158470 0.064648 0.039280 8.230282 0.517123 0.713426 0.084962 0.812142 23.228875 0.043249 0.405310 0.234217 4.903887 \n\ -2.544463 0.313443 0.172264 0.073705 4.207648 0.497398 0.484620 0.132496 0.329895 23.711178 3.466991 0.348362 4.136445 1.199764 0.368231 0.266531 3.184874 0.252132 0.459187 \n\ -\n\ -0.088367 0.078147 0.047163 0.087976 0.004517 0.058526 0.128039 0.056993 0.024856 0.025277 0.045202 0.094639 0.012338 0.016158 0.060124 0.055346 0.051290 0.006771 0.021554 0.036718;\n\ -\n\ -[ Buried component ]\n\ -model BurEX2 =\n\ -0.338649 \n\ -0.201335 0.981635 \n\ -0.283859 0.247537 6.505182 \n\ -2.640244 0.904730 1.353325 0.312005 \n\ -0.543136 4.570308 2.439639 0.682052 0.216787 \n\ -0.748479 0.917979 0.804756 10.030310 0.024055 8.670112 \n\ -2.700465 0.539246 0.810739 0.810727 0.701320 0.330139 0.636675 \n\ -0.237686 3.175221 6.308043 1.540002 0.469875 8.675492 0.750683 0.183743 \n\ -0.044209 0.099241 0.162644 0.020816 0.166986 0.082745 0.030581 0.005017 0.075820 \n\ -0.124047 0.314159 0.088243 0.017526 0.449241 0.641784 0.073392 0.017752 0.277023 2.383760 \n\ -0.433721 17.781822 2.851914 0.459939 0.117548 6.815411 3.482941 0.484653 1.247888 0.161658 0.219757 \n\ -0.497479 0.448773 0.380964 0.057176 0.815999 2.089412 0.291379 0.054491 0.307450 2.817174 4.759683 1.082403 \n\ -0.093991 0.055530 0.098936 0.026160 0.662517 0.091948 0.022760 0.034431 0.675645 0.521416 1.672365 0.077917 1.296869 \n\ -0.986621 0.356417 0.214521 0.246129 0.164228 0.654039 0.295079 0.179095 0.428213 0.037671 0.170780 0.347219 0.074086 0.057233 \n\ -5.925588 0.979993 4.725421 1.158990 5.111992 1.120931 0.737456 2.279470 0.886126 0.051057 0.089611 0.925355 0.275366 0.274582 1.151114 \n\ -1.958501 0.630713 2.007592 0.289641 2.284140 0.787821 0.539892 0.097432 0.467489 0.644041 0.202812 1.401676 1.340732 0.103118 0.601281 8.190534 \n\ -0.068357 0.784449 0.109073 0.085810 0.457880 0.297731 0.155877 0.157418 0.708743 0.054134 0.374568 0.115777 0.477495 2.362999 0.047127 0.209085 0.097054 \n\ -0.084768 0.312038 0.615093 0.202611 0.788164 0.293543 0.137306 0.035497 4.938330 0.101803 0.180086 0.280737 0.264540 8.142914 0.059308 0.264401 0.133054 2.905674 \n\ -1.387752 0.140091 0.112176 0.058637 1.575057 0.203946 0.239406 0.044011 0.085226 6.427279 1.035942 0.244336 1.033583 0.278010 0.213475 0.079878 1.592560 0.081135 0.108383 \n\ -\n\ -0.123119 0.019475 0.019852 0.018583 0.018711 0.017275 0.018723 0.050388 0.016402 0.119697 0.161399 0.012776 0.035838 0.057019 0.030913 0.043472 0.049935 0.012600 0.039929 0.133894;\n\ -\n\ -[ main definition of EX2 with fixed component rates ]\n\ -model EX2 =MIX{BurEX2:0.672020808818762,ExpEX2:1.6413466609931};\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - EX3 mixture model of Le, Lartillot & Gascuel (2008) \n\ - --------------------------------------------------------- ]\n\ -\n\ -[ Buried component ]\n\ -model BurEX3 =\n\ -0.352598 \n\ -0.216996 1.087422 \n\ -0.292440 0.323465 7.797086 \n\ -2.610812 0.913640 1.460331 0.344397 \n\ -0.510610 5.128748 2.811070 0.773241 0.220223 \n\ -0.753729 1.090823 0.956820 12.012282 0.021022 10.123412 \n\ -2.838061 0.595013 0.884971 0.922298 0.707214 0.351856 0.713974 \n\ -0.239679 3.625577 7.108377 1.826237 0.481109 10.246488 0.839852 0.219310 \n\ -0.051496 0.102940 0.168735 0.024207 0.162795 0.087881 0.036973 0.004515 0.079975 \n\ -0.119849 0.316151 0.091984 0.018800 0.422679 0.648064 0.075035 0.016317 0.282195 2.225363 \n\ -0.443183 20.766910 3.194817 0.568138 0.132784 7.478955 4.176123 0.551523 1.415394 0.163276 0.207613 \n\ -0.460570 0.458210 0.398615 0.059146 0.765112 2.134261 0.313124 0.053192 0.340474 2.609469 4.476961 1.014674 \n\ -0.089411 0.056698 0.104720 0.027913 0.630095 0.094857 0.023275 0.034031 0.691151 0.491179 1.606618 0.077868 1.226530 \n\ -0.993370 0.419898 0.217106 0.273526 0.181230 0.729534 0.311152 0.192454 0.483200 0.040002 0.170402 0.376998 0.075002 0.057218 \n\ -6.108406 1.066008 5.182562 1.216396 5.236005 1.159086 0.763810 2.404073 0.924395 0.048875 0.084247 0.923997 0.260340 0.260617 1.208454 \n\ -1.992855 0.687262 2.181095 0.312299 2.276505 0.829879 0.551397 0.101409 0.480998 0.610331 0.198919 1.407257 1.292634 0.096955 0.648250 8.527249 \n\ -0.063159 0.855332 0.134012 0.099769 0.468450 0.329372 0.136731 0.169991 0.745868 0.056715 0.377293 0.137955 0.463394 2.343596 0.058650 0.211406 0.085948 \n\ -0.078057 0.341493 0.655744 0.241264 0.762740 0.302096 0.142491 0.040257 5.226086 0.092084 0.180292 0.311130 0.249838 8.141649 0.062812 0.267992 0.128044 3.047417 \n\ -1.339724 0.144916 0.125078 0.062854 1.481083 0.194081 0.225389 0.043663 0.090575 5.973306 0.993888 0.222252 0.964622 0.262045 0.207448 0.083450 1.544911 0.078358 0.105286 \n\ -\n\ -0.123992 0.016529 0.017595 0.015784 0.019325 0.015552 0.015939 0.049573 0.014540 0.126555 0.167605 0.011083 0.037438 0.058363 0.028849 0.042324 0.049207 0.011962 0.037833 0.139953;\n\ -\n\ -[ Intermediate component ]\n\ -model IntEX3 =\n\ -0.489239 \n\ -0.466919 0.536794 \n\ -0.601908 0.069474 4.603441 \n\ -2.430552 1.807414 0.997223 0.166431 \n\ -1.101971 2.081359 1.299123 0.508086 0.393348 \n\ -1.227777 0.215899 0.345545 3.579383 0.046861 3.113235 \n\ -1.873072 0.390054 1.528288 0.941969 0.867139 0.349219 0.406414 \n\ -0.519003 1.930915 5.003737 0.781887 1.630085 3.567804 0.324903 0.315383 \n\ -0.158722 0.180317 0.295816 0.013254 0.642786 0.179498 0.090830 0.013181 0.209208 \n\ -0.345026 0.503290 0.138767 0.024393 1.107569 1.027755 0.123806 0.048549 0.592981 5.439892 \n\ -0.610178 4.322929 1.524318 0.121994 0.181609 2.674484 0.792405 0.276766 0.591509 0.301836 0.294950 \n\ -0.949957 0.472702 0.502710 0.091008 1.283305 1.905885 0.242081 0.140301 0.378459 6.259505 9.391081 1.074513 \n\ -0.247271 0.069820 0.161809 0.028611 2.065479 0.077874 0.025753 0.065388 1.541097 1.306479 3.015722 0.048689 2.243101 \n\ -1.334722 0.170174 0.099375 0.211869 0.163190 0.349495 0.155436 0.186099 0.300496 0.065625 0.265961 0.162529 0.088677 0.083754 \n\ -5.316955 0.699036 4.526191 1.143652 5.249370 0.970695 0.438792 2.366185 0.939629 0.138819 0.275119 0.532771 0.521510 0.547761 1.187779 \n\ -1.963809 0.535034 2.034583 0.383040 2.012437 0.891145 0.531018 0.180104 0.467342 1.861944 0.395319 1.071879 2.340268 0.183984 0.400373 7.243848 \n\ -0.145693 0.378596 0.046601 0.048388 1.074147 0.174525 0.063777 0.168836 0.822524 0.110645 0.677913 0.062047 0.796395 3.502387 0.046950 0.290501 0.107097 \n\ -0.195764 0.149382 0.534652 0.105996 2.446201 0.150150 0.071967 0.031908 6.198893 0.299207 0.413150 0.090874 0.492692 15.039152 0.044765 0.328289 0.175204 3.125850 \n\ -2.227504 0.220361 0.150316 0.066496 3.112801 0.393451 0.444469 0.108811 0.224352 15.532696 2.152640 0.302279 2.658339 0.738053 0.322254 0.197018 2.507055 0.175763 0.276642 \n\ -\n\ -0.086346 0.080808 0.041727 0.064440 0.006654 0.052795 0.092110 0.048527 0.028831 0.040497 0.071679 0.079687 0.018007 0.025901 0.052632 0.052778 0.056138 0.010733 0.034744 0.054964;\n\ -\n\ -[ Highly exposed component ]\n\ -model HExEX3 =\n\ -0.557500 \n\ -0.467024 0.508965 \n\ -0.660464 0.044039 3.386724 \n\ -1.332582 3.667491 1.440486 0.185886 \n\ -1.402485 2.156104 1.297398 0.333117 0.789370 \n\ -1.259192 0.111162 0.245837 2.707953 0.058650 2.098300 \n\ -0.934526 0.393780 2.196372 0.868249 1.336358 0.322363 0.252359 \n\ -0.518929 3.157422 5.392488 0.748008 3.827563 4.517669 0.284167 0.634601 \n\ -0.279723 0.407537 0.535113 0.054030 3.345087 0.427624 0.148200 0.015686 0.658979 \n\ -0.715094 1.182387 0.270883 0.035162 3.520931 2.366650 0.172395 0.100089 1.779380 18.830270 \n\ -0.694526 3.728628 1.747648 0.083685 0.100399 2.477205 0.623294 0.280977 0.694965 0.569776 0.493141 \n\ -1.338414 1.261833 0.818216 0.054313 3.918703 2.383718 0.219943 0.228757 0.867786 19.605444 31.431195 1.089056 \n\ -0.295523 0.190129 0.263800 0.044853 5.266468 0.120909 0.042178 0.194665 3.494314 5.825792 11.527190 0.044361 6.237844 \n\ -1.085021 0.168461 0.041147 0.203765 0.185173 0.353420 0.218194 0.120292 0.375260 0.116875 0.705493 0.190747 0.139085 0.108823 \n\ -4.090024 0.852803 4.335615 0.829194 6.499129 1.095446 0.336922 1.733724 1.144100 0.413986 0.878828 0.631498 0.730416 1.167593 1.195720 \n\ -2.318400 0.650016 2.351068 0.385247 1.883085 1.167877 0.532167 0.187062 0.796107 4.825759 0.838744 1.268311 4.445757 0.381760 0.419944 7.677284 \n\ -0.134371 1.021826 0.151293 0.065183 3.716538 0.530580 0.077516 0.396559 1.324147 0.443432 3.290145 0.064651 4.411035 13.056874 0.056705 0.534908 0.408415 \n\ -0.212989 0.424870 1.115762 0.268883 8.874037 0.255572 0.125866 0.107717 14.436023 1.292209 1.491799 0.104026 2.063744 49.760746 0.057618 0.756357 0.396791 12.032322 \n\ -3.112666 0.544010 0.214411 0.125541 5.301703 0.868794 0.839508 0.215758 0.533676 46.074660 7.301056 0.557248 9.151909 2.634769 0.523205 0.564572 4.519860 0.456880 0.670812 \n\ -\n\ -0.094155 0.070537 0.052200 0.112406 0.002213 0.062733 0.165272 0.062302 0.019853 0.011154 0.019829 0.108860 0.006503 0.006873 0.070091 0.057931 0.046183 0.002449 0.008629 0.019827;\n\ -\n\ -[ main definition of EX3 with fixed component rates ]\n\ -model EX3 = MIX{BurEX3:0.427672756793791,IntEX3:0.837595938019774,HExEX3:1.51863631431518};\n\ -\n\ -[ ---------------------------------------------------------\n\ - EHO mixture model of Le, Lartillot & Gascuel (2008)\n\ - --------------------------------------------------------- ]\n\ -\n\ -[ extended component ]\n\ -model ExtEHO = \n\ -0.221750 \n\ -0.256487 0.595368 \n\ -0.447755 0.112310 7.769815 \n\ -4.893140 0.929131 1.061884 0.164472 \n\ -0.542660 2.886791 1.927072 0.497273 0.133291 \n\ -0.549459 0.290798 0.518264 5.393249 0.003776 4.326528 \n\ -5.411319 0.302948 0.907713 0.961651 1.249183 0.173873 0.316780 \n\ -0.283752 2.760038 5.159285 0.978418 0.737799 5.086066 0.421812 0.209276 \n\ -0.026683 0.053027 0.166715 0.016491 0.151942 0.055934 0.026726 0.001780 0.098605 \n\ -0.226816 0.251641 0.062256 0.015837 0.763554 0.537705 0.042909 0.032938 0.321607 3.217159 \n\ -0.235513 6.017300 2.543177 0.223507 0.023575 3.432847 1.211039 0.160545 0.671045 0.082221 0.106179 \n\ -0.992834 0.351969 0.415447 0.041511 1.271632 1.700679 0.111984 0.117596 0.326393 3.329162 7.496635 0.519821 \n\ -0.191967 0.041219 0.090517 0.014810 1.004694 0.042779 0.011177 0.040989 0.641267 0.813011 2.233318 0.023173 1.863238 \n\ -1.876507 0.395175 0.362650 0.550534 0.174031 0.731229 0.412907 0.205341 0.381717 0.011597 0.315127 0.393303 0.135360 0.043846 \n\ -6.066032 1.083228 5.612711 1.035540 4.263932 1.429211 0.766802 2.266299 1.074108 0.047896 0.147065 0.683291 0.352118 0.382422 1.462674 \n\ -1.827471 0.645132 1.883173 0.287521 1.395928 1.013709 0.781080 0.055140 0.512000 0.588357 0.142327 1.256445 1.435179 0.079647 0.417388 6.092548 \n\ -0.101419 0.452274 0.065206 0.034173 0.592031 0.164037 0.049674 0.183473 0.741383 0.069289 0.429275 0.050856 0.545447 2.178510 0.022770 0.304839 0.111242 \n\ -0.091914 0.112094 0.451176 0.108762 1.183567 0.132194 0.042952 0.030418 4.373360 0.122828 0.186938 0.096667 0.344096 8.276255 0.053251 0.325231 0.135310 2.597897 \n\ -1.970427 0.119016 0.091863 0.041044 1.750822 0.222903 0.225961 0.053387 0.123318 6.815243 1.427658 0.124284 1.427074 0.341263 0.127045 0.076658 1.052442 0.073165 0.101733 \n\ -\n\ -0.062087 0.053435 0.023743 0.032063 0.013132 0.034151 0.061042 0.030664 0.022696 0.104732 0.099541 0.054991 0.022312 0.045996 0.025392 0.045673 0.072789 0.012691 0.043790 0.139079;\n\ -\n\ -[ Helix component ]\n\ -model HelEHO = \n\ -0.346476 \n\ -0.374362 0.664870 \n\ -0.557349 0.079157 3.710526 \n\ -3.192474 1.027228 0.891196 0.006722 \n\ -0.776545 1.902860 1.561002 0.517360 0.112028 \n\ -0.841893 0.158406 0.443065 3.792847 0.000006 2.320685 \n\ -4.037113 0.661209 1.866962 1.144918 1.465540 0.511489 0.573208 \n\ -0.394225 2.123760 5.845902 0.737868 1.084909 3.960964 0.270146 0.380762 \n\ -0.111350 0.099645 0.233216 0.005627 0.839533 0.089484 0.019520 0.021251 0.132153 \n\ -0.193017 0.307622 0.115495 0.009651 1.136538 0.584189 0.039838 0.048105 0.485901 4.915707 \n\ -0.481682 3.827872 1.926308 0.163314 0.021755 2.487895 0.768919 0.327002 0.534206 0.147053 0.136159 \n\ -0.610432 0.344033 0.452639 0.035659 1.624032 1.146169 0.103241 0.171164 0.364836 6.260678 7.738615 0.549401 \n\ -0.147278 0.035167 0.106276 0.018468 1.864906 0.047207 0.010268 0.086543 1.244539 0.927331 3.243633 0.016265 2.326533 \n\ -1.090575 0.181605 0.093658 0.386490 0.097655 0.462559 0.290152 0.568098 0.458437 0.043237 0.207460 0.198291 0.061027 0.067592 \n\ -6.243684 0.836138 5.633664 0.952131 6.398291 1.267404 0.430602 5.463144 1.088326 0.102127 0.193860 0.707365 0.438507 0.470620 1.534272 \n\ -2.847158 0.566364 2.984732 0.347047 3.711971 1.083181 0.495700 0.500029 0.642773 1.698955 0.402699 1.111399 2.483456 0.231119 0.685164 8.832473 \n\ -0.090983 0.369015 0.085583 0.046821 0.950521 0.183299 0.040785 0.391093 0.950288 0.075780 0.624335 0.041505 0.980672 3.915972 0.053806 0.299723 0.100663 \n\ -0.152848 0.170981 0.594708 0.106099 2.051641 0.121416 0.047614 0.064377 8.167042 0.195540 0.352598 0.069186 0.465779 15.178886 0.058255 0.405459 0.201603 4.035822 \n\ -2.140511 0.136453 0.145376 0.046174 4.011687 0.191618 0.192292 0.202844 0.174981 14.460840 2.175028 0.136317 2.393838 0.659302 0.418505 0.180248 3.585329 0.175143 0.281722 \n\ -\n\ -0.121953 0.076798 0.032215 0.066765 0.006842 0.061304 0.131841 0.026596 0.020392 0.047287 0.087919 0.084679 0.020970 0.024145 0.025871 0.042103 0.038715 0.008346 0.023841 0.051421;\n\ -\n\ -[ Other component ]\n\ -model OthEHO =\n\ -0.529263 \n\ -0.379476 0.612335 \n\ -0.516691 0.067732 4.012914 \n\ -3.774890 1.615176 0.888663 0.165810 \n\ -1.312262 2.913667 1.533683 0.442262 0.337571 \n\ -1.403437 0.154460 0.333334 3.815893 0.015567 3.743866 \n\ -1.272402 0.389317 1.243222 0.661976 0.554904 0.332656 0.319770 \n\ -0.558733 2.816641 4.803000 0.761339 1.223662 4.889028 0.323617 0.300981 \n\ -0.124057 0.155080 0.219635 0.019097 0.560959 0.100743 0.038076 0.005599 0.184752 \n\ -0.340362 0.580087 0.119838 0.015948 1.192857 1.156516 0.083154 0.031031 0.646292 7.873544 \n\ -0.706732 5.734632 1.847806 0.128114 0.050896 3.616626 1.131071 0.283950 0.643558 0.179831 0.224320 \n\ -1.056749 0.665355 0.399943 0.053900 1.893946 2.299714 0.168079 0.085094 0.556024 8.136055 14.213193 0.931689 \n\ -0.233961 0.079465 0.130295 0.016768 1.902244 0.077611 0.012655 0.048906 1.403178 1.581816 4.275863 0.036062 2.888633 \n\ -1.518830 0.252482 0.049484 0.171011 0.108909 0.501196 0.346600 0.058913 0.299924 0.073007 0.297573 0.249478 0.091619 0.068920 \n\ -5.595735 0.861017 3.749627 0.987083 4.952776 1.045071 0.463265 1.190738 0.897478 0.131753 0.265701 0.607097 0.399537 0.408758 0.993614 \n\ -2.157458 0.613623 1.733380 0.361861 2.145775 1.011592 0.523086 0.091023 0.450662 1.492403 0.408418 1.143233 2.378569 0.131777 0.381007 7.574340 \n\ -0.151895 0.544292 0.060182 0.043433 1.259614 0.228038 0.045082 0.134804 0.748147 0.134416 0.979277 0.038787 0.908253 4.850762 0.052415 0.249753 0.114232 \n\ -0.219509 0.243507 0.580103 0.130214 2.325021 0.196580 0.079660 0.037482 6.907609 0.299245 0.552917 0.067894 0.685250 19.404995 0.047839 0.323207 0.183044 4.704884 \n\ -3.049976 0.278740 0.134120 0.055382 4.149385 0.500946 0.435957 0.067170 0.214393 22.435652 2.883298 0.323886 3.369448 0.722571 0.315978 0.152899 2.423398 0.186495 0.303833 \n\ -\n\ -0.076458 0.052393 0.055429 0.088634 0.007473 0.040671 0.080952 0.100192 0.025439 0.031730 0.053100 0.070835 0.014039 0.023159 0.087111 0.063636 0.055346 0.007033 0.023779 0.042590;\n\ -\n\ -[ main definition of EHO with fixed component rates ]\n\ -model EHO = MIX{ExtEHO:0.720274356,HelEHO:0.976798797,OthEHO:0.783109376};\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - UL2 mixture model of Le, Lartillot & Gascuel (2008)\n\ - --------------------------------------------------------- ]\n\ -\n\ -model M1_UL2 =\n\ -0.267149 \n\ -0.211944 0.816250 \n\ -0.156648 0.336150 3.110967 \n\ -2.402535 1.001114 1.287205 0.467161 \n\ -0.301870 3.168646 1.844180 0.571540 0.394361 \n\ -0.503678 1.529332 0.788530 3.920399 0.234553 8.502278 \n\ -3.124853 0.171548 0.220006 0.250690 0.766651 0.174653 0.399019 \n\ -0.139279 1.597241 5.622886 2.146897 0.349557 8.097306 1.211287 0.044878 \n\ -0.037158 0.139068 0.189483 0.049336 0.147864 0.122799 0.153664 0.006928 0.085276 \n\ -0.108752 0.387538 0.092568 0.035815 0.399254 0.617370 0.225586 0.018972 0.202328 2.343778 \n\ -0.255267 15.176345 1.030178 0.196011 0.396427 3.731061 2.642525 0.142626 0.878376 0.319044 0.422741 \n\ -0.430988 0.522887 0.351960 0.102916 0.683070 2.247889 0.621957 0.070803 0.228871 2.780325 4.767336 1.450453 \n\ -0.088392 0.116382 0.114044 0.066251 0.668683 0.133418 0.075116 0.039034 0.780377 0.488538 1.586897 0.143427 1.211385 \n\ -1.303487 0.178064 0.192016 0.065259 0.315140 0.406966 0.144065 0.135536 0.273070 0.087171 0.298010 0.087701 0.165232 0.104423 \n\ -7.472990 0.579607 3.004054 0.854304 5.789930 0.930019 0.709540 2.018826 0.527351 0.051443 0.070322 0.432286 0.281917 0.286341 0.473611 \n\ -2.276542 0.392852 1.332166 0.193248 2.577504 0.541748 0.690939 0.052900 0.272814 0.634227 0.224553 0.795413 1.360016 0.120449 0.745729 6.088861 \n\ -0.048841 0.673695 0.076107 0.073261 0.377566 0.284556 0.284138 0.130136 0.649073 0.047797 0.324911 0.148403 0.390301 2.189403 0.122493 0.131225 0.080727 \n\ -0.073190 0.425791 0.503951 0.250485 0.577049 0.306036 0.198368 0.024991 3.987606 0.083215 0.127898 0.372637 0.179514 7.784255 0.089874 0.175724 0.117177 2.629196 \n\ -1.351002 0.175990 0.120675 0.105544 1.491339 0.203270 0.463186 0.055506 0.065132 6.411609 1.020423 0.337618 1.047308 0.272790 0.407545 0.079844 1.634833 0.077263 0.083195 \n\ -\n\ -0.122413 0.017757 0.020209 0.012086 0.018894 0.014525 0.009897 0.045663 0.020120 0.124002 0.168915 0.011684 0.037631 0.063612 0.023347 0.039268 0.046707 0.015603 0.050968 0.136701;\n\ -\n\ -\n\ -model M2_UL2 =\n\ -0.557363 \n\ -0.539068 0.465628 \n\ -0.696831 0.032997 3.879799 \n\ -1.480953 4.566841 1.777582 0.310752 \n\ -1.402193 1.920868 1.276554 0.327085 0.972350 \n\ -1.335667 0.096752 0.255510 2.685052 0.088385 2.281328 \n\ -1.056193 0.423348 2.171283 0.933450 1.398738 0.369406 0.334900 \n\ -0.729300 2.712485 5.461073 0.679965 5.202985 4.012284 0.282038 0.585359 \n\ -0.267035 0.493033 0.523699 0.023230 2.563394 0.459103 0.176281 0.010013 0.551901 \n\ -0.700687 0.932999 0.206875 0.025161 3.939537 1.918986 0.154733 0.085684 1.446302 8.189198 \n\ -0.736759 3.603558 1.676442 0.070721 0.292188 2.403019 0.611829 0.307607 0.675279 0.627044 0.410941 \n\ -1.505101 0.819561 0.736222 0.089302 4.462071 2.539203 0.250970 0.204790 0.654198 11.105816 15.171688 1.258549 \n\ -0.541573 0.185468 0.343735 0.042217 5.958046 0.156533 0.064557 0.188906 3.891682 3.152154 5.098336 0.088022 4.518197 \n\ -1.155460 0.142408 0.044854 0.175385 0.123605 0.316005 0.157783 0.157894 0.347393 0.047328 0.344717 0.153954 0.054635 0.108793 \n\ -3.823040 0.733964 4.846938 0.890611 7.416660 0.987912 0.343107 2.296896 1.193558 0.368432 0.667347 0.535051 0.754875 1.469714 1.242760 \n\ -1.897039 0.590040 2.371940 0.347041 1.619173 1.025240 0.479587 0.210934 0.728868 5.106169 0.726618 1.152768 3.985684 0.433442 0.358997 9.007029 \n\ -0.296375 0.833840 0.091310 0.080326 5.217767 0.363445 0.078944 0.378088 1.571919 0.351013 2.139511 0.098671 2.796573 6.102504 0.023698 0.665667 0.292919 \n\ -0.297444 0.206563 0.871576 0.173621 11.803422 0.181973 0.110832 0.073892 12.757344 1.161331 1.646025 0.101481 1.732368 29.335598 0.037045 0.706902 0.346859 5.666524 \n\ -2.765737 0.415803 0.194725 0.093474 5.264577 0.734884 0.683342 0.156374 0.517626 26.038986 3.741256 0.457775 5.253478 1.999427 0.297563 0.344932 4.012753 0.385172 0.870088 \n\ -\n\ -0.087622 0.083588 0.048847 0.098882 0.002815 0.062809 0.143166 0.055391 0.023310 0.015495 0.032465 0.102135 0.009511 0.008409 0.069323 0.057733 0.051876 0.003945 0.014462 0.028216;\n\ -\n\ -model UL2 = MIX{M1_UL2:0.581348617,M2_UL2:1.465482789};\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - UL3 mixture model of Le, Lartillot & Gascuel (2008)\n\ - --------------------------------------------------------- ]\n\ -\n\ -model Q1_UL3 =\n\ -0.514865 \n\ -0.774348 0.583403 \n\ -0.854291 0.046141 2.011233 \n\ -1.019817 5.652322 2.260587 0.057603 \n\ -1.095968 1.696154 1.296536 0.417322 0.967032 \n\ -1.054599 0.084924 0.368384 3.592374 0.063073 1.885301 \n\ -3.510012 0.797055 1.759631 1.421695 2.627911 0.743770 0.772359 \n\ -0.694799 2.596186 4.214186 0.654590 6.673533 3.664595 0.294967 0.608220 \n\ -0.344837 0.543739 0.965435 0.062495 2.500862 0.452448 0.155720 0.083334 0.905291 \n\ -0.593987 0.857922 0.351903 0.045358 3.290242 1.421539 0.109100 0.230693 1.595696 5.042430 \n\ -0.708843 2.012940 1.662582 0.106190 0.329149 2.268825 0.579185 0.365374 0.696286 0.701896 0.398546 \n\ -0.990080 0.754111 0.910436 0.143464 3.570847 1.708803 0.181804 0.706982 0.789517 8.138995 13.390024 1.137779 \n\ -0.085639 0.012721 0.098898 0.018361 2.148695 0.012425 0.009316 0.135782 0.921964 1.006572 2.479349 0.014715 1.418875 \n\ -0.655013 0.150052 0.120388 0.698261 0.254951 0.353826 0.250818 0.715043 0.329691 0.170251 0.827093 0.187804 0.178490 0.048299 \n\ -2.863328 0.657706 3.761619 0.619692 9.817007 0.810603 0.344050 6.758412 0.997214 0.414623 0.625678 0.555290 0.647617 0.392859 0.929152 \n\ -1.373936 0.392433 2.711122 0.237865 2.460302 0.701472 0.319136 0.607889 0.728133 3.705396 0.412346 0.953939 2.446017 0.054119 0.279699 9.934970 \n\ -0.247598 0.514750 0.144529 0.157484 5.383077 0.199950 0.045688 0.790171 1.116595 0.243053 1.738186 0.070214 3.427855 3.275850 0.007577 0.583988 0.205721 \n\ -0.090644 0.046952 0.326197 0.089450 7.475195 0.018555 0.020706 0.016617 3.728614 0.404819 0.617948 0.029889 0.956437 47.933104 0.050416 0.181180 0.070113 5.242459 \n\ -2.093798 0.323334 0.307076 0.101486 8.553531 0.473023 0.410909 0.459941 0.568017 13.906640 1.778101 0.426825 2.763369 0.570421 0.311278 0.389524 2.915452 0.252168 0.268516 \n\ -\n\ -0.104307 0.092553 0.043722 0.085643 0.003218 0.074342 0.163928 0.024243 0.022216 0.016012 0.038591 0.105577 0.011434 0.016126 0.018057 0.061232 0.061373 0.004086 0.020876 0.032465;\n\ -\n\ -model Q2_UL3 =\n\ -1.709484 \n\ -0.184309 0.860448 \n\ -0.660851 0.182073 4.471383 \n\ -4.554487 2.843438 1.801073 1.068728 \n\ -3.425703 6.092362 2.868388 0.790473 0.794773 \n\ -4.278840 0.359055 0.585031 4.176143 0.121031 6.860012 \n\ -0.625715 1.054231 1.222442 0.492366 1.418419 0.796035 0.643251 \n\ -1.089116 6.396197 8.965630 1.915247 2.033352 11.058341 0.768162 0.523196 \n\ -0.024545 0.023433 0.014686 0.002204 0.628823 0.008720 0.008363 0.002485 0.046726 \n\ -0.150945 0.140520 0.002514 0.000212 1.903535 0.384413 0.015127 0.010251 0.210723 5.066207 \n\ -1.751314 12.981698 3.641808 0.278298 0.036599 7.677610 2.744099 0.612733 1.686490 0.042380 0.023858 \n\ -0.475876 0.364580 0.063143 0.001486 3.890832 0.754732 0.041044 0.024222 0.236955 5.752463 12.019762 0.229898 \n\ -0.142125 0.051255 0.006503 0.000593 5.397699 0.064190 0.006871 0.015588 0.424840 1.005341 5.458275 0.021422 1.779060 \n\ -5.433246 1.051312 0.012611 0.027267 0.635181 1.765792 0.849429 0.023324 0.610884 0.000184 0.037705 0.604166 0.001415 0.003197 \n\ -6.267113 1.750009 5.986041 1.411952 5.482009 1.923966 0.595886 0.943724 1.786620 0.043381 0.066093 0.813893 0.053557 0.199095 1.723045 \n\ -6.389458 1.828974 2.044599 1.561907 2.083626 2.070125 1.210529 0.217976 1.192222 0.515450 0.199809 2.020941 1.238100 0.150760 1.727569 9.882473 \n\ -0.281689 1.180712 0.000006 0.017218 3.696424 0.146508 0.068518 0.222418 0.497727 0.199828 1.849405 0.001429 1.394852 2.473491 0.016401 0.288550 0.190290 \n\ -0.302638 0.475135 0.196905 0.067615 6.355457 0.576342 0.232832 0.059485 4.525509 0.571811 1.194578 0.006674 0.467694 8.107893 0.024556 0.394389 0.441794 4.067825 \n\ -11.333027 0.298555 0.053673 0.009846 5.743238 0.296166 0.413471 0.120393 0.105418 9.130937 2.674960 0.165290 4.417978 1.811161 0.492985 0.042803 3.284174 0.844277 2.327679 \n\ -\n\ -0.044015 0.021591 0.056258 0.102405 0.003260 0.018409 0.041364 0.168549 0.015843 0.064277 0.118096 0.036061 0.027358 0.036695 0.124914 0.047807 0.022696 0.005866 0.018644 0.025892;\n\ -\n\ -\n\ -model Q3_UL3 =\n\ -0.063622 \n\ -0.118948 0.528684 \n\ -0.065502 0.142677 12.092355 \n\ -2.010382 0.302352 1.127688 0.014546 \n\ -0.169022 2.026184 1.256016 0.417582 0.170493 \n\ -0.172876 0.453837 0.454428 1.882165 0.045799 4.705997 \n\ -5.254550 0.174422 0.364886 0.192790 0.891120 0.148450 0.195211 \n\ -0.090586 1.258840 5.523808 0.313487 0.211550 4.734918 0.466811 0.096529 \n\ -0.034911 0.065167 0.222440 0.023060 0.132230 0.122571 0.075521 0.003942 0.065261 \n\ -0.146425 0.219004 0.136129 0.028165 0.448432 0.795591 0.146014 0.016718 0.240024 2.387089 \n\ -0.041117 11.082325 0.783756 0.049843 0.039616 1.828161 0.649991 0.069199 0.271006 0.094864 0.140698 \n\ -0.599734 0.230551 0.595641 0.059404 0.699534 2.765355 0.391569 0.089210 0.206188 3.020110 2.806927 0.516762 \n\ -0.148889 0.145244 0.408811 0.089283 0.724422 0.260910 0.101509 0.101882 1.508086 0.693424 0.709933 0.061880 1.887041 \n\ -0.378131 0.225548 0.181924 0.038283 0.146379 0.511097 0.151769 0.166424 0.386101 0.186116 0.753595 0.182723 0.420131 0.341199 \n\ -8.340091 0.495564 3.010756 0.463573 5.601734 0.985082 0.415256 2.532014 0.720035 0.067860 0.121784 0.279698 0.626080 0.788724 0.890779 \n\ -1.932342 0.358779 1.069003 0.093122 2.028674 0.637861 0.597230 0.019551 0.211054 0.870341 0.381670 0.461083 2.131453 0.255120 0.604567 3.450887 \n\ -0.031419 0.378744 0.085588 0.008303 0.277628 0.277906 0.122038 0.055246 0.420382 0.021973 0.127345 0.027776 0.139098 3.077241 0.163299 0.177114 0.062650 \n\ -0.106283 0.366786 1.453278 0.404793 0.753053 0.475024 0.273992 0.055936 7.367304 0.094629 0.126746 0.148716 0.369726 5.251757 0.232549 0.676796 0.247828 2.910495 \n\ -1.104848 0.069287 0.110149 0.084224 1.280832 0.175361 0.342585 0.047507 0.038768 8.415916 1.577573 0.054532 1.537983 0.409619 0.309028 0.094413 1.483411 0.060147 0.076595 \n\ -\n\ -0.134055 0.044392 0.020730 0.020801 0.021008 0.024509 0.028587 0.029069 0.028138 0.105826 0.117458 0.049732 0.026120 0.038984 0.016306 0.038369 0.055334 0.017276 0.039905 0.143398;\n\ -\n\ -model UL3 = MIX{Q1_UL3:0.484340397,Q2_UL3:0.492780514,Q3_UL3:1.15597274};\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - EX_EHO mixture model of Le & Gascuel (2010)\n\ - --------------------------------------------------------- ]\n\ -\n\ -\n\ -model BUR_EXT =\n\ -0.228492 \n\ -0.165543 0.916344 \n\ -0.238509 0.258514 8.498064 \n\ -3.374029 1.037434 1.667702 0.332072 \n\ -0.344742 4.971495 2.471912 0.654950 0.130301 \n\ -0.417921 1.039226 0.875808 13.073209 0.040759 9.834742 \n\ -4.248714 0.411876 0.585570 0.748848 0.908311 0.221633 0.593504 \n\ -0.182762 3.872065 6.999812 1.719470 0.493863 8.695395 0.749303 0.137367 \n\ -0.011705 0.090751 0.149898 0.021996 0.077693 0.043664 0.013820 0.001527 0.073342 \n\ -0.133793 0.286232 0.065118 0.015540 0.456304 0.546974 0.052641 0.024196 0.226460 2.160734 \n\ -0.249141 17.756919 3.385483 0.343780 0.093875 6.677050 2.745017 0.295602 1.481997 0.100576 0.167406 \n\ -0.641194 0.342577 0.427146 0.059345 0.867233 2.306480 0.218260 0.058613 0.358032 2.187901 5.151337 0.750049 \n\ -0.118366 0.068606 0.102572 0.009357 0.633943 0.033356 0.012944 0.024474 0.497973 0.534407 1.581972 0.063281 1.329239 \n\ -1.561052 0.483968 0.385170 0.261437 0.310131 0.913924 0.355871 0.175520 0.512823 0.019789 0.295416 0.348527 0.104569 0.059641 \n\ -5.891807 1.320618 5.737159 1.074011 4.702782 1.389531 0.878480 2.178078 1.111068 0.033343 0.094349 1.035903 0.327901 0.292022 1.344678 \n\ -2.059884 0.976165 2.166428 0.369522 1.951862 0.815145 0.575774 0.060834 0.558388 0.422299 0.153549 1.793263 1.268126 0.085468 0.780914 9.031309 \n\ -0.081683 0.814216 0.057557 0.055146 0.450959 0.191881 0.109420 0.144367 0.651978 0.068649 0.345622 0.169527 0.387902 1.883741 0.023466 0.309129 0.111568 \n\ -0.052650 0.248907 0.570101 0.180267 0.701260 0.253975 0.061388 0.025465 4.206114 0.083799 0.147600 0.226848 0.254720 6.549427 0.027521 0.283138 0.141408 2.561108 \n\ -1.355342 0.137437 0.104597 0.051387 1.203830 0.218892 0.194527 0.031054 0.088935 4.577473 1.003647 0.153722 0.883283 0.242657 0.191295 0.068785 0.990922 0.056276 0.078264 \n\ -\n\ -0.087158 0.015906 0.012970 0.012566 0.020325 0.013301 0.013777 0.039603 0.014597 0.161107 0.147775 0.011033 0.031334 0.064281 0.013322 0.035417 0.048583 0.012672 0.045210 0.199064;\n\ -\n\ -\n\ -model BUR_HEL =\n\ -0.317211 \n\ -0.209784 1.120865 \n\ -0.315205 0.301050 7.439896 \n\ -2.214446 0.884449 1.356293 0.110768 \n\ -0.465495 4.319791 2.843187 1.082540 0.215988 \n\ -0.668735 0.901135 0.986572 11.245156 0.009874 7.561773 \n\ -3.614157 0.568883 0.972660 1.036117 0.894733 0.409083 0.780808 \n\ -0.249929 3.138701 7.344935 1.747672 0.379845 9.559763 0.842239 0.146008 \n\ -0.059633 0.103290 0.206475 0.017492 0.286194 0.123433 0.037593 0.010910 0.071273 \n\ -0.096230 0.285199 0.113728 0.015874 0.439724 0.547078 0.063675 0.021607 0.303531 2.097349 \n\ -0.380075 15.783354 2.780107 0.569108 0.093004 6.179905 3.209588 0.413960 1.002075 0.185911 0.185249 \n\ -0.371379 0.411553 0.398602 0.076761 0.727245 1.665645 0.249045 0.068128 0.256194 2.940308 3.649539 0.972247 \n\ -0.075616 0.043519 0.096446 0.041118 0.636688 0.102460 0.039991 0.041269 0.839126 0.376556 1.551814 0.064774 1.173962 \n\ -1.100574 0.385197 0.319458 0.353000 0.112549 0.805706 0.369483 0.482895 0.520098 0.058167 0.144341 0.361488 0.074069 0.057968 \n\ -6.832958 0.955160 5.296628 1.265211 6.144756 1.315182 0.902504 3.903795 0.862633 0.072343 0.080478 0.979654 0.330305 0.328917 1.924898 \n\ -2.223205 0.445571 2.461831 0.299635 2.943208 0.830637 0.621903 0.184055 0.468356 0.911139 0.208091 1.343261 1.515339 0.158763 0.915879 9.298787 \n\ -0.062541 0.806724 0.110928 0.132125 0.414525 0.388313 0.191952 0.271274 0.909529 0.025790 0.343842 0.099137 0.543577 2.467147 0.044938 0.215329 0.087955 \n\ -0.082948 0.329591 0.693402 0.286594 0.866329 0.259566 0.167425 0.049038 6.332054 0.093136 0.177755 0.275998 0.261754 8.344684 0.088981 0.335859 0.137177 3.125017 \n\ -1.390479 0.142986 0.175068 0.106294 1.687293 0.159520 0.297915 0.080925 0.085103 6.414688 0.953785 0.240157 1.097345 0.264988 0.373870 0.144230 2.572837 0.089110 0.115941 \n\ -\n\ -0.158060 0.021566 0.016487 0.014079 0.016937 0.020232 0.023096 0.032822 0.014618 0.114447 0.198900 0.014668 0.042840 0.053434 0.015640 0.037275 0.043095 0.012211 0.036330 0.113263;\n\ -\n\ -model BUR_OTH =\n\ -0.406682 \n\ -0.246649 0.848592 \n\ -0.364260 0.198690 4.535840 \n\ -3.292044 0.837291 1.295138 0.420726 \n\ -0.735862 4.205085 2.062501 0.427451 0.259335 \n\ -0.954795 0.673046 0.671062 8.395674 0.048284 8.922739 \n\ -1.958847 0.573207 0.632317 0.572264 0.486274 0.345345 0.650009 \n\ -0.312042 2.699661 4.969855 1.181781 0.551188 7.620453 0.701108 0.195346 \n\ -0.071000 0.127041 0.184028 0.030240 0.180591 0.065984 0.039235 0.005033 0.098525 \n\ -0.142298 0.338853 0.086876 0.026095 0.484427 0.867777 0.087780 0.017129 0.309774 3.477136 \n\ -0.624622 18.390649 2.748646 0.442886 0.238266 6.993941 3.906971 0.652336 1.365814 0.219252 0.288480 \n\ -0.610604 0.581287 0.382156 0.048508 0.963147 2.672887 0.384585 0.051334 0.386066 3.752286 6.858529 1.524446 \n\ -0.124670 0.047666 0.102656 0.031532 0.699124 0.129867 0.004923 0.039185 0.701690 0.643782 2.019473 0.104308 1.568249 \n\ -1.126387 0.321347 0.107738 0.137858 0.150346 0.601413 0.310374 0.073794 0.332910 0.056230 0.208204 0.368816 0.078902 0.062410 \n\ -5.908551 0.834735 3.611589 0.969189 4.765870 0.881934 0.528944 1.439305 0.746876 0.060111 0.114374 0.784754 0.235963 0.219009 0.710100 \n\ -1.856381 0.574277 1.573584 0.223054 2.038789 0.763848 0.461329 0.076195 0.396095 0.701247 0.249302 1.091322 1.282643 0.070553 0.419070 6.616977 \n\ -0.069294 0.654056 0.127255 0.078896 0.517561 0.188732 0.125541 0.104279 0.547504 0.066927 0.454998 0.056498 0.425274 2.668838 0.050943 0.151483 0.062698 \n\ -0.128158 0.354167 0.640140 0.182565 0.793990 0.368725 0.157796 0.037084 4.307140 0.140691 0.241076 0.323966 0.293629 9.711414 0.060323 0.207489 0.111492 2.857446 \n\ -1.982761 0.158227 0.115545 0.051117 2.065903 0.338262 0.258245 0.045770 0.089942 10.113118 1.382024 0.431385 1.456614 0.295718 0.273919 0.066465 1.668063 0.113899 0.144981 \n\ -\n\ -0.102123 0.021199 0.032404 0.032350 0.018985 0.017469 0.017625 0.089270 0.021090 0.083642 0.123866 0.012720 0.029789 0.055399 0.072705 0.061298 0.061705 0.013496 0.039682 0.093184;\n\ -\n\ -model EXP_EXT=\n\ -0.464716 \n\ -0.597009 0.420578 \n\ -1.010693 0.048553 5.944290 \n\ -3.915828 2.088244 0.878468 0.236108 \n\ -1.156023 1.882317 1.435926 0.338823 0.482742 \n\ -1.131098 0.127150 0.346338 3.317186 0.061060 2.724696 \n\ -4.638659 0.351041 1.379174 1.216518 1.396050 0.199361 0.353970 \n\ -0.657615 2.215990 4.150252 0.717363 1.853969 3.768864 0.347165 0.313421 \n\ -0.078558 0.127092 0.347281 0.032361 0.605448 0.171553 0.104678 0.010608 0.309418 \n\ -0.516672 0.510585 0.105529 0.039188 1.808273 1.017577 0.112010 0.044661 0.772131 5.693102 \n\ -0.519389 3.571104 1.844049 0.109305 0.103105 2.232749 0.653339 0.195325 0.547017 0.219311 0.253086 \n\ -1.658261 0.640712 0.558751 0.063591 1.694880 2.088441 0.194697 0.291701 0.321392 6.220456 12.392618 0.862547 \n\ -0.426071 0.064894 0.132019 0.034872 2.076573 0.085745 0.026972 0.099963 1.388250 1.765294 3.859637 0.032198 3.134107 \n\ -3.082729 0.250470 0.232578 0.376163 0.290522 0.502379 0.240501 0.302007 0.283950 0.013574 0.606936 0.248475 0.226716 0.058246 \n\ -7.012884 0.866957 5.008997 0.814153 4.758346 1.192080 0.595351 2.514269 0.993487 0.135167 0.349525 0.542021 0.512591 0.744682 1.258172 \n\ -2.037755 0.446367 1.618299 0.203392 1.177421 0.840646 0.583757 0.071515 0.466886 1.503883 0.260405 0.934230 2.245607 0.123552 0.258896 4.504833 \n\ -0.171334 0.385971 0.087717 0.019596 1.015512 0.127027 0.037725 0.217844 0.822780 0.095756 0.777332 0.039952 0.977419 3.217291 0.015240 0.301259 0.102153 \n\ -0.194998 0.091803 0.433021 0.086495 3.074882 0.111578 0.041481 0.048438 4.904785 0.336528 0.411742 0.087476 0.640594 14.126821 0.061656 0.338111 0.129249 2.902137 \n\ -2.811391 0.216605 0.127240 0.061503 2.320268 0.390874 0.450783 0.132513 0.234279 12.181354 2.539512 0.233848 3.363159 0.717467 0.138035 0.159602 1.615372 0.132268 0.186175 \n\ -\n\ -0.043140 0.090761 0.034408 0.052848 0.006370 0.053817 0.107749 0.024812 0.029498 0.049134 0.050167 0.098127 0.013722 0.025841 0.037395 0.056505 0.094326 0.012045 0.039238 0.080099;\n\ -\n\ -model EXP_HEL =\n\ -0.434227 \n\ -0.551823 0.569806 \n\ -0.698268 0.056291 3.064314 \n\ -2.026002 2.379205 1.077282 0.016649 \n\ -0.986617 1.606282 1.331570 0.426399 0.409724 \n\ -1.005936 0.120122 0.390888 2.999742 0.021217 1.881156 \n\ -3.221202 0.736168 2.269617 1.272893 1.771711 0.622430 0.656603 \n\ -0.515574 2.032567 5.484997 0.666491 2.985549 3.380526 0.265244 0.557878 \n\ -0.200810 0.241566 0.441585 0.009830 1.541200 0.198621 0.069562 0.043838 0.339616 \n\ -0.328669 0.583849 0.178015 0.022077 2.045404 1.046125 0.089148 0.104708 0.875298 8.628242 \n\ -0.598864 3.090263 1.682415 0.113637 0.207957 2.085253 0.582536 0.376534 0.554395 0.371883 0.290692 \n\ -0.799278 0.528354 0.704087 0.062290 2.303849 1.507620 0.173293 0.356580 0.492228 10.028453 12.162732 0.867109 \n\ -0.256227 0.083117 0.192262 0.030759 4.328951 0.078062 0.022890 0.181917 2.406824 2.014776 4.856941 0.041675 3.521229 \n\ -1.118844 0.147481 0.061969 0.323498 0.171678 0.387521 0.237715 0.641036 0.433529 0.069102 0.359935 0.164055 0.063832 0.126592 \n\ -5.069051 0.749554 5.245486 0.840686 7.114530 1.177802 0.382956 6.139836 1.086779 0.194824 0.424579 0.655759 0.682174 0.753148 1.355810 \n\ -2.949741 0.623328 3.248881 0.406219 3.345739 1.214278 0.538553 0.867954 0.747654 3.316346 0.754081 1.193593 3.516479 0.366653 0.622665 7.975653 \n\ -0.115446 0.394156 0.090971 0.055309 1.947845 0.185912 0.046886 0.451084 1.173014 0.277029 1.078778 0.054622 1.516237 5.813526 0.071865 0.359167 0.106921 \n\ -0.205680 0.197878 0.678775 0.118188 4.183184 0.139485 0.059999 0.051336 10.200670 0.507328 0.721921 0.086974 0.741023 24.191458 0.046460 0.489820 0.247367 4.904042 \n\ -2.494211 0.280293 0.235248 0.083648 5.509932 0.429196 0.409105 0.447130 0.351675 23.404006 3.840750 0.300727 4.126659 1.483049 0.675560 0.336101 4.426709 0.309940 0.588217 \n\ -\n\ -0.115826 0.094038 0.037357 0.085821 0.003363 0.073078 0.167709 0.025416 0.021634 0.024147 0.050238 0.106612 0.013318 0.013330 0.029895 0.044902 0.037901 0.006460 0.018548 0.030407;\n\ -\n\ -model EXP_OTH =\n\ -0.603175 \n\ -0.478745 0.562615 \n\ -0.608325 0.056553 3.755571 \n\ -2.371839 2.480665 0.889513 0.170707 \n\ -1.551117 2.685995 1.462350 0.424139 0.669728 \n\ -1.624084 0.129505 0.314826 3.404205 0.049823 3.375473 \n\ -0.987777 0.356744 1.294077 0.640234 0.583980 0.331879 0.304731 \n\ -0.667236 2.788429 4.719171 0.731257 1.872668 4.612209 0.316233 0.320454 \n\ -0.186911 0.269245 0.318538 0.028464 0.987958 0.242926 0.090427 0.007312 0.327205 \n\ -0.527992 0.844027 0.167295 0.021423 1.623589 1.636879 0.135662 0.044560 0.939347 10.338048 \n\ -0.842575 5.076266 1.736167 0.106076 0.132985 3.365869 0.969736 0.270931 0.669196 0.356829 0.352830 \n\ -1.296147 0.863599 0.469732 0.075018 1.832599 2.642602 0.217378 0.107935 0.624941 10.670411 17.593544 1.247987 \n\ -0.325034 0.135328 0.192352 0.021631 2.731423 0.103263 0.027708 0.060740 2.148472 2.344767 5.497995 0.057563 3.278627 \n\ -1.670091 0.235642 0.042844 0.164518 0.112539 0.479958 0.326780 0.057540 0.291899 0.110067 0.380466 0.240061 0.109541 0.083760 \n\ -5.098150 0.831455 3.661924 0.978777 4.500240 1.064732 0.455496 1.095629 0.915898 0.226713 0.405000 0.608323 0.525496 0.593321 1.035726 \n\ -2.174502 0.630453 1.791747 0.396219 1.681712 1.083797 0.556968 0.100584 0.457070 2.361119 0.543612 1.211816 2.987220 0.198957 0.368383 7.505908 \n\ -0.203719 0.615713 0.044203 0.046952 1.745090 0.303876 0.050920 0.155176 0.920001 0.165182 1.385828 0.055323 1.274920 5.896599 0.059081 0.303111 0.156402 \n\ -0.271220 0.253084 0.643377 0.142691 3.763228 0.209729 0.093004 0.035856 8.167503 0.490579 0.894778 0.077103 1.029700 26.210400 0.045876 0.373529 0.218567 5.726440 \n\ -3.470639 0.410713 0.180011 0.081584 4.323431 0.751254 0.686467 0.086874 0.318032 29.800262 3.856040 0.482930 4.862267 1.182403 0.390522 0.268937 2.836818 0.229423 0.453335 \n\ -\n\ -0.071716 0.058979 0.060316 0.101089 0.005039 0.044673 0.093349 0.105394 0.026228 0.020220 0.037831 0.081647 0.010677 0.015875 0.090566 0.065046 0.054453 0.005546 0.019924 0.031432;\n\ -\n\ -\n\ -model EX_EHO = MIX{BUR_EXT:0.761816796788931,BUR_HEL:0.744425646802117,BUR_OTH:0.532457759429489,EXP_EXT:1.5639387472863,EXP_HEL:2.06403411829438,EXP_OTH:1.43336795177594};\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - LG4M mixture model of Le, Dang & Gascuel (2012)\n\ - --------------------------------------------------------- ]\n\ -\n\ -model LG4M1 =\n\ - 0.269343\n\ - 0.254612 0.150988\n\ - 0.236821 0.031863 0.659648\n\ - 2.506547 0.938594 0.975736 0.175533\n\ - 0.359080 0.348288 0.697708 0.086573 0.095967\n\ - 0.304674 0.156000 0.377704 0.449140 0.064706 4.342595\n\ - 1.692015 0.286638 0.565095 0.380358 0.617945 0.202058 0.264342\n\ - 0.251974 0.921633 1.267609 0.309692 0.390429 2.344059 0.217750 0.104842\n\ - 1.085220 0.325624 0.818658 0.037814 1.144150 0.534567 0.222793 0.062682 0.567431\n\ - 0.676353 0.602366 0.217027 0.007533 1.595775 0.671143 0.158424 0.070463 0.764255 8.226528\n\ - 0.179155 0.971338 1.343718 0.133744 0.122468 0.983857 0.994128 0.220916 0.410581 0.387487 0.181110\n\ - 1.636817 0.515217 0.670461 0.071252 1.534848 5.288642 0.255628 0.094198 0.257229 25.667158 6.819689 1.591212\n\ - 0.235498 0.123932 0.099793 0.030425 0.897279 0.112229 0.022529 0.047488 0.762914 1.344259 0.865691 0.038921 2.030833\n\ - 1.265605 0.040163 0.173354 0.027579 0.259961 0.580374 0.088041 0.145595 0.143676 0.298859 1.020117 0.000714 0.190019 0.093964\n\ - 5.368405 0.470952 5.267140 0.780505 4.986071 0.890554 0.377949 1.755515 0.786352 0.527246 0.667783 0.659948 0.731921 0.837669 1.355630\n\ - 1.539394 0.326789 1.688169 0.283738 1.389282 0.329821 0.231770 0.117017 0.449977 3.531600 0.721586 0.497588 2.691697 0.152088 0.698040 16.321298\n\ - 0.140944 0.375611 0.025163 0.002757 0.801456 0.257253 0.103678 0.132995 0.345834 0.377156 0.839647 0.176970 0.505682 1.670170 0.091298 0.210096 0.013165\n\ - 0.199836 0.146857 0.806275 0.234246 1.436970 0.319669 0.010076 0.036859 3.503317 0.598632 0.738969 0.154436 0.579000 4.245524 0.074524 0.454195 0.232913 1.178490\n\ - 9.435529 0.285934 0.395670 0.130890 6.097263 0.516259 0.503665 0.222960 0.149143 13.666175 2.988174 0.162725 5.973826 0.843416 0.597394 0.701149 4.680002 0.300085 0.416262\n\ -\n\ -0.082276 0.055172 0.043853 0.053484 0.018957 0.028152 0.046679 0.157817 0.033297 0.028284 0.054284 0.025275 0.023665 0.041874 0.063071 0.066501 0.065424 0.023837 0.038633 0.049465;\n\ -\n\ -model LG4M2 =\n\ -0.133720\n\ - 0.337212 0.749052\n\ - 0.110918 0.105087 4.773487\n\ - 3.993460 0.188305 1.590332 0.304942\n\ - 0.412075 2.585774 1.906884 0.438367 0.242076\n\ - 0.435295 0.198278 0.296366 7.470333 0.008443 3.295515\n\ - 7.837540 0.164607 0.431724 0.153850 1.799716 0.269744 0.242866\n\ - 0.203872 2.130334 9.374479 1.080878 0.152458 12.299133 0.279589 0.089714\n\ - 0.039718 0.024553 0.135254 0.014979 0.147498 0.033964 0.005585 0.007248 0.022746\n\ - 0.075784 0.080091 0.084971 0.014128 0.308347 0.500836 0.022833 0.022999 0.161270 1.511682\n\ - 0.177662 10.373708 1.036721 0.038303 0.043030 2.181033 0.321165 0.103050 0.459502 0.021215 0.078395\n\ - 0.420784 0.192765 0.329545 0.008331 0.883142 1.403324 0.168673 0.160728 0.612573 1.520889 7.763266 0.307903\n\ - 0.071268 0.019652 0.088753 0.013547 0.566609 0.071878 0.020050 0.041022 0.625361 0.382806 1.763059 0.044644 1.551911\n\ - 0.959127 1.496585 0.377794 0.332010 0.318192 1.386970 0.915904 0.224255 2.611479 0.029351 0.068250 1.542356 0.047525 0.182715\n\ - 11.721512 0.359408 2.399158 0.219464 9.104192 0.767563 0.235229 3.621219 0.971955 0.033780 0.043035 0.236929 0.319964 0.124977 0.840651\n\ - 2.847068 0.218463 1.855386 0.109808 4.347048 0.765848 0.164569 0.312024 0.231569 0.356327 0.159597 0.403210 1.135162 0.106903 0.269190 9.816481\n\ - 0.030203 0.387292 0.118878 0.067287 0.190240 0.122113 0.007023 0.137411 0.585141 0.020634 0.228824 0.000122 0.474862 3.135128 0.030313 0.093830 0.119152\n\ - 0.067183 0.130101 0.348730 0.061798 0.301198 0.095382 0.095764 0.044628 2.107384 0.046105 0.100117 0.017073 0.192383 8.367641 0.000937 0.137416 0.044722 4.179782\n\ - 0.679398 0.041567 0.092408 0.023701 1.271187 0.115566 0.055277 0.086988 0.060779 8.235167 0.609420 0.061764 0.581962 0.184187 0.080246 0.098033 1.438350 0.023439 0.039124\n\ -\n\ -0.120900 0.036460 0.026510 0.040410 0.015980 0.021132 0.025191 0.036369 0.015884 0.111029 0.162852 0.024820 0.028023 0.074058 0.012065 0.041963 0.039072 0.012666 0.040478 0.114137;\n\ -\n\ -model LG4M3 =\n\ -0.421017\n\ - 0.316236 0.693340\n\ - 0.285984 0.059926 6.158219\n\ - 4.034031 1.357707 0.708088 0.063669\n\ - 0.886972 2.791622 1.701830 0.484347 0.414286\n\ - 0.760525 0.233051 0.378723 4.032667 0.081977 4.940411\n\ - 0.754103 0.402894 2.227443 1.102689 0.416576 0.459376 0.508409\n\ - 0.571422 2.319453 5.579973 0.885376 1.439275 4.101979 0.576745 0.428799\n\ - 0.162152 0.085229 0.095692 0.006129 0.490937 0.104843 0.045514 0.004705 0.098934\n\ - 0.308006 0.287051 0.056994 0.007102 0.958988 0.578990 0.067119 0.024403 0.342983 3.805528\n\ - 0.390161 7.663209 1.663641 0.105129 0.135029 3.364474 0.652618 0.457702 0.823674 0.129858 0.145630\n\ - 1.042298 0.364551 0.293222 0.037983 1.486520 1.681752 0.192414 0.070498 0.222626 4.529623 4.781730 0.665308\n\ - 0.362476 0.073439 0.129245 0.020078 1.992483 0.114549 0.023272 0.064490 1.491794 1.113437 2.132006 0.041677 1.928654\n\ - 1.755491 0.087050 0.099325 0.163817 0.242851 0.322939 0.062943 0.198698 0.192904 0.062948 0.180283 0.059655 0.129323 0.065778\n\ - 3.975060 0.893398 5.496314 1.397313 3.575120 1.385297 0.576191 1.733288 1.021255 0.065131 0.129115 0.600308 0.387276 0.446001 1.298493\n\ - 2.565079 0.534056 2.143993 0.411388 2.279084 0.893006 0.528209 0.135731 0.518741 0.972662 0.280700 0.890086 1.828755 0.189028 0.563778 7.788147\n\ - 0.283631 0.497926 0.075454 0.043794 1.335322 0.308605 0.140137 0.150797 1.409726 0.119868 0.818331 0.080591 1.066017 3.754687 0.073415 0.435046 0.197272\n\ - 0.242513 0.199157 0.472207 0.085937 2.039787 0.262751 0.084578 0.032247 7.762326 0.153966 0.299828 0.117255 0.438215 14.506235 0.089180 0.352766 0.215417 5.054245\n\ - 2.795818 0.107130 0.060909 0.029724 2.986426 0.197267 0.196977 0.044327 0.116751 7.144311 1.848622 0.118020 1.999696 0.705747 0.272763 0.096935 1.820982 0.217007 0.172975\n\ -\n\ -0.072639 0.051691 0.038642 0.055580 0.009829 0.031374 0.048731 0.065283 0.023791 0.086640 0.120847 0.052177 0.026728 0.032589 0.039238 0.046748 0.053361 0.008024 0.037426 0.098662;\n\ -\n\ -model LG4M4 =\n\ -0.576160\n\ - 0.567606 0.498643\n\ - 0.824359 0.050698 3.301401\n\ - 0.822724 4.529235 1.291808 0.101930\n\ - 1.254238 2.169809 1.427980 0.449474 0.868679\n\ - 1.218615 0.154502 0.411471 3.172277 0.050239 2.138661\n\ - 1.803443 0.604673 2.125496 1.276384 1.598679 0.502653 0.479490\n\ - 0.516862 2.874265 4.845769 0.719673 3.825677 4.040275 0.292773 0.596643\n\ - 0.180898 0.444586 0.550969 0.023542 2.349573 0.370160 0.142187 0.016618 0.500788\n\ - 0.452099 0.866322 0.201033 0.026731 2.813990 1.645178 0.135556 0.072152 1.168817 5.696116\n\ - 0.664186 2.902886 2.101971 0.127988 0.200218 2.505933 0.759509 0.333569 0.623100 0.547454 0.363656\n\ - 0.864415 0.835049 0.632649 0.079201 2.105931 1.633544 0.216462 0.252419 0.665406 7.994105 11.751178 1.096842\n\ - 0.324478 0.208947 0.280339 0.041683 4.788477 0.107022 0.067711 0.171320 3.324779 2.965328 5.133843 0.084856 4.042591\n\ - 1.073043 0.173826 0.041985 0.270336 0.121299 0.351384 0.228565 0.225318 0.376089 0.058027 0.390354 0.214230 0.058954 0.126299\n\ - 3.837562 0.884342 4.571911 0.942751 6.592827 1.080063 0.465397 3.137614 1.119667 0.362516 0.602355 0.716940 0.506796 1.444484 1.432558\n\ - 2.106026 0.750016 2.323325 0.335915 1.654673 1.194017 0.617231 0.318671 0.801030 4.455842 0.580191 1.384210 3.522468 0.473128 0.432718 5.716300\n\ - 0.163720 0.818102 0.072322 0.068275 3.305436 0.373790 0.054323 0.476587 1.100360 0.392946 1.703323 0.085720 1.725516 5.436253 0.053108 0.498594 0.231832\n\ - 0.241167 0.302440 1.055095 0.246940 9.741942 0.249895 0.129973 0.052363 11.542498 1.047449 1.319667 0.139770 1.330225 26.562270 0.046986 0.737653 0.313460 5.165098\n\ - 1.824586 0.435795 0.179086 0.091739 3.609570 0.649507 0.656681 0.225234 0.473437 19.897252 3.001995 0.452926 3.929598 1.692159 0.370204 0.373501 3.329822 0.326593 0.860743\n\ -\n\ -0.104843 0.078835 0.043513 0.090498 0.002924 0.066163 0.151640 0.038843 0.022556 0.018383 0.038687 0.104462 0.010166 0.009089 0.066950 0.053667 0.049486 0.004409 0.012924 0.031963;\n\ -\n\ -model LG4M = MIX{LG4M1,LG4M2,LG4M3,LG4M4}*G4;\n\ -model LG4 = MIX{LG4M1,LG4M2,LG4M3,LG4M4}*G4;\n\ -\n\ -\n\ -[ ---------------------------------------------------------\n\ - LG4X mixture model of Le, Dang & Gascuel (2012)\n\ - --------------------------------------------------------- ]\n\ -\n\ -model LG4X1 =\n\ -0.295719\n\ -0.067388 0.448317\n\ -0.253712 0.457483 2.358429\n\ -1.029289 0.576016 0.251987 0.189008\n\ -0.107964 1.741924 0.216561 0.599450 0.029955\n\ -0.514644 0.736017 0.503084 109.901504 0.084794 4.117654\n\ -10.868848 0.704334 0.435271 1.070052 1.862626 0.246260 1.202023\n\ -0.380498 5.658311 4.873453 5.229858 0.553477 6.508329 1.634845 0.404968\n\ -0.084223 0.123387 0.090748 0.052764 0.151733 0.054187 0.060194 0.048984 0.204296\n\ -0.086976 0.221777 0.033310 0.021407 0.230320 0.195703 0.069359 0.069963 0.504221 1.495537\n\ -0.188789 93.433377 0.746537 0.621146 0.096955 1.669092 2.448827 0.256662 1.991533 0.091940 0.122332\n\ -0.286389 0.382175 0.128905 0.081091 0.352526 0.810168 0.232297 0.228519 0.655465 1.994320 3.256485 0.457430\n\ -0.155567 0.235965 0.127321 0.205164 0.590018 0.066081 0.064822 0.241077 6.799829 0.754940 2.261319 0.163849 1.559944\n\ -1.671061 6.535048 0.904011 5.164456 0.386853 2.437439 3.537387 4.320442 11.291065 0.170343 0.848067 5.260446 0.426508 0.438856\n\ -2.132922 0.525521 0.939733 0.747330 1.559564 0.165666 0.435384 3.656545 0.961142 0.050315 0.064441 0.360946 0.132547 0.306683 4.586081\n\ -0.529591 0.303537 0.435450 0.308078 0.606648 0.106333 0.290413 0.290216 0.448965 0.372166 0.102493 0.389413 0.498634 0.109129 2.099355 3.634276\n\ -0.115551 0.641259 0.046646 0.260889 0.587531 0.093417 0.280695 0.307466 6.227274 0.206332 0.459041 0.033291 0.559069 18.392863 0.411347 0.101797 0.034710\n\ -0.102453 0.289466 0.262076 0.185083 0.592318 0.035149 0.105999 0.096556 20.304886 0.097050 0.133091 0.115301 0.264728 66.647302 0.476350 0.148995 0.063603 20.561407\n\ -0.916683 0.102065 0.043986 0.080708 0.885230 0.072549 0.206603 0.306067 0.205944 5.381403 0.561215 0.112593 0.693307 0.400021 0.584622 0.089177 0.755865 0.133790 0.154902\n\ -\n\ -0.147383 0.017579 0.058208 0.017707 0.026331 0.041582 0.017494 0.027859 0.011849 0.076971 0.147823 0.019535 0.037132 0.029940 0.008059 0.088179 0.089653 0.006477 0.032308 0.097931;\n\ -\n\ -model LG4X2 =\n\ - 0.066142\n\ - 0.590377 0.468325\n\ - 0.069930 0.013688 2.851667\n\ - 9.850951 0.302287 3.932151 0.146882\n\ - 1.101363 1.353957 8.159169 0.249672 0.582670\n\ - 0.150375 0.028386 0.219934 0.560142 0.005035 3.054085\n\ - 0.568586 0.037750 0.421974 0.046719 0.275844 0.129551 0.037250\n\ - 0.051668 0.262130 2.468752 0.106259 0.098208 4.210126 0.029788 0.013513\n\ - 0.127170 0.016923 0.344765 0.003656 0.445038 0.165753 0.008541 0.002533 0.031779\n\ - 0.292429 0.064289 0.210724 0.004200 1.217010 1.088704 0.014768 0.005848 0.064558 7.278994\n\ - 0.071458 0.855973 1.172204 0.014189 0.033969 1.889645 0.125869 0.031390 0.065585 0.029917 0.042762\n\ - 1.218562 0.079621 0.763553 0.009876 1.988516 3.344809 0.056702 0.021612 0.079927 7.918203 14.799537 0.259400\n\ - 0.075144 0.011169 0.082464 0.002656 0.681161 0.111063 0.004186 0.004854 0.095591 0.450964 1.506485 0.009457 1.375871\n\ - 7.169085 0.161937 0.726566 0.040244 0.825960 2.067758 0.110993 0.129497 0.196886 0.169797 0.637893 0.090576 0.457399 0.143327\n\ - 30.139501 0.276530 11.149790 0.267322 18.762977 3.547017 0.201148 0.976631 0.408834 0.104288 0.123793 0.292108 0.598048 0.328689 3.478333\n\ - 13.461692 0.161053 4.782635 0.053740 11.949233 2.466507 0.139705 0.053397 0.126088 1.578530 0.641351 0.297913 4.418398 0.125011 2.984862 13.974326\n\ - 0.021372 0.081472 0.058046 0.006597 0.286794 0.188236 0.009201 0.019475 0.037226 0.015909 0.154810 0.017172 0.239749 0.562720 0.061299 0.154326 0.060703\n\ - 0.045779 0.036742 0.498072 0.027639 0.534219 0.203493 0.012095 0.004964 0.452302 0.094365 0.140750 0.021976 0.168432 1.414883 0.077470 0.224675 0.123480 0.447011\n\ - 4.270235 0.030342 0.258487 0.012745 4.336817 0.281953 0.043812 0.015539 0.016212 16.179952 3.416059 0.032578 2.950318 0.227807 1.050562 0.112000 5.294490 0.033381 0.045528\n\ -\n\ -0.063139 0.066357 0.011586 0.066571 0.010800 0.009276 0.053984 0.146986 0.034214 0.088822 0.098196 0.032390 0.021263 0.072697 0.016761 0.020711 0.020797 0.025463 0.045615 0.094372;\n\ -\n\ -model LG4X3 =\n\ - 0.733336\n\ - 0.558955 0.597671\n\ - 0.503360 0.058964 5.581680\n\ - 4.149599 2.863355 1.279881 0.225860\n\ - 1.415369 2.872594 1.335650 0.434096 1.043232\n\ - 1.367574 0.258365 0.397108 2.292917 0.209978 4.534772\n\ - 1.263002 0.366868 1.840061 1.024707 0.823594 0.377181 0.496780\n\ - 0.994098 2.578946 5.739035 0.821921 3.039380 4.877840 0.532488 0.398817\n\ - 0.517204 0.358350 0.284730 0.027824 1.463390 0.370939 0.232460 0.008940 0.349195\n\ - 0.775054 0.672023 0.109781 0.021443 1.983693 1.298542 0.169219 0.043707 0.838324 5.102837\n\ - 0.763094 5.349861 1.612642 0.088850 0.397640 3.509873 0.755219 0.436013 0.888693 0.561690 0.401070\n\ - 1.890137 0.691594 0.466979 0.060820 2.831098 2.646440 0.379926 0.087640 0.488389 7.010411 8.929538 1.357738\n\ - 0.540460 0.063347 0.141582 0.018288 4.102068 0.087872 0.020447 0.064863 1.385133 3.054968 5.525874 0.043394 3.135353\n\ - 0.200122 0.032875 0.019509 0.042687 0.059723 0.072299 0.023282 0.036426 0.050226 0.039318 0.067505 0.023126 0.012695 0.015631\n\ - 4.972745 0.821562 4.670980 1.199607 5.901348 1.139018 0.503875 1.673207 0.962470 0.204155 0.273372 0.567639 0.570771 0.458799 0.233109\n\ - 1.825593 0.580847 1.967383 0.420710 2.034980 0.864479 0.577513 0.124068 0.502294 2.653232 0.437116 1.048288 2.319555 0.151684 0.077004 8.113282\n\ - 0.450842 0.661866 0.088064 0.037642 2.600668 0.390688 0.109318 0.218118 1.065585 0.564368 1.927515 0.120994 1.856122 4.154750 0.011074 0.377578 0.222293\n\ - 0.526135 0.265730 0.581928 0.141233 5.413080 0.322761 0.153776 0.039217 8.351808 0.854294 0.940458 0.180650 0.975427 11.429924 0.026268 0.429221 0.273138 4.731579\n\ - 3.839269 0.395134 0.145401 0.090101 4.193725 0.625409 0.696533 0.104335 0.377304 15.559906 2.508169 0.449074 3.404087 1.457957 0.052132 0.260296 2.903836 0.564762 0.681215\n\ -\n\ - 0.062457 0.066826 0.049332 0.065270 0.006513 0.041231 0.058965 0.080852 0.028024 0.037024 0.075925 0.064131 0.019620 0.028710 0.104579 0.056388 0.062027 0.008241 0.033124 0.050760;\n\ -\n\ -model LG4X4 =\n\ - 0.658412\n\ - 0.566269 0.540749\n\ - 0.854111 0.058015 3.060574\n\ - 0.884454 5.851132 1.279257 0.160296\n\ - 1.309554 2.294145 1.438430 0.482619 0.992259\n\ - 1.272639 0.182966 0.431464 2.992763 0.086318 2.130054\n\ - 1.874713 0.684164 2.075952 1.296206 2.149634 0.571406 0.507160\n\ - 0.552007 3.192521 4.840271 0.841829 5.103188 4.137385 0.351381 0.679853\n\ - 0.227683 0.528161 0.644656 0.031467 3.775817 0.437589 0.189152 0.025780 0.665865\n\ - 0.581512 1.128882 0.266076 0.048542 3.954021 2.071689 0.217780 0.082005 1.266791 8.904999\n\ - 0.695190 3.010922 2.084975 0.132774 0.190734 2.498630 0.767361 0.326441 0.680174 0.652629 0.440178\n\ - 0.967985 1.012866 0.720060 0.133055 1.776095 1.763546 0.278392 0.343977 0.717301 10.091413 14.013035 1.082703\n\ - 0.344015 0.227296 0.291854 0.056045 4.495841 0.116381 0.092075 0.195877 4.001286 2.671718 5.069337 0.091278 4.643214\n\ - 0.978992 0.156635 0.028961 0.209188 0.264277 0.296578 0.177263 0.217424 0.362942 0.086367 0.539010 0.172734 0.121821 0.161015\n\ - 3.427163 0.878405 4.071574 0.925172 7.063879 1.033710 0.451893 3.057583 1.189259 0.359932 0.742569 0.693405 0.584083 1.531223 1.287474\n\ - 2.333253 0.802754 2.258357 0.360522 2.221150 1.283423 0.653836 0.377558 0.964545 4.797423 0.780580 1.422571 4.216178 0.599244 0.444362 5.231362\n\ - 0.154701 0.830884 0.073037 0.094591 3.017954 0.312579 0.074620 0.401252 1.350568 0.336801 1.331875 0.068958 1.677263 5.832025 0.076328 0.548763 0.208791\n\ - 0.221089 0.431617 1.238426 0.313945 8.558815 0.305772 0.181992 0.072258 12.869737 1.021885 1.531589 0.163829 1.575754 33.873091 0.079916 0.831890 0.307846 5.910440\n\ - 2.088785 0.456530 0.199728 0.118104 4.310199 0.681277 0.752277 0.241015 0.531100 23.029406 4.414850 0.481711 5.046403 1.914768 0.466823 0.382271 3.717971 0.282540 0.964421\n\ -\n\ -0.106471 0.074171 0.044513 0.096390 0.002148 0.066733 0.158908 0.037625 0.020691 0.014608 0.028797 0.105352 0.007864 0.007477 0.083595 0.055726 0.047711 0.003975 0.010088 0.027159;\n\ -\n\ -model LG4X = MIX{LG4X1,LG4X2,LG4X3,LG4X4}*R4;\n\ -\n\ -[ ---------------------------------------------------------\n\ - +cF class frequency mixture model of Wang et al. (2008)\n\ - --------------------------------------------------------- ]\n\ -\n\ -frequency Fclass1 = 0.02549352 0.01296012 0.005545202 0.006005566 0.01002193 0.01112289 0.008811948 0.001796161 0.004312188 0.2108274 0.2730413 0.01335451 0.07862202 0.03859909 0.005058205 0.008209453 0.03210019 0.002668138 0.01379098 0.2376598;\n\ -frequency Fclass2 = 0.09596966 0.008786096 0.02805857 0.01880183 0.005026264 0.006454635 0.01582725 0.7215719 0.003379354 0.002257725 0.003013483 0.01343441 0.001511657 0.002107865 0.006751404 0.04798539 0.01141559 0.000523736 0.002188483 0.004934972;\n\ -frequency Fclass3 = 0.01726065 0.005467988 0.01092937 0.3627871 0.001046402 0.01984758 0.5149206 0.004145081 0.002563289 0.002955213 0.005286931 0.01558693 0.002693098 0.002075771 0.003006167 0.01263069 0.01082144 0.000253451 0.001144787 0.004573568;\n\ -frequency Fclass4 = 0.1263139 0.09564027 0.07050061 0.03316681 0.02095119 0.05473468 0.02790523 0.009007538 0.03441334 0.005855319 0.008061884 0.1078084 0.009019514 0.05018693 0.07948 0.09447839 0.09258897 0.01390669 0.05367769 0.01230413;\n\ -model CF4 = POISSON+FMIX{Fclass1,Fclass2,Fclass3,Fclass4}+F+G;\n\ -model JTTCF4G = JTT+FMIX{empirical,Fclass1,Fclass2,Fclass3,Fclass4}+G;\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C10 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -\n\ -frequency C10pi1 = 0.4082573125 0.0081783015 0.0096285438 0.0069870889 0.0349388179 0.0075279735 0.0097846653 0.1221613215 0.0039151830 0.0125784287 0.0158338663 0.0059670150 0.0081313216 0.0061604332 0.0394155867 0.1682450664 0.0658132542 0.0018751587 0.0041579747 0.0604426865;\n\ -frequency C10pi2 = 0.1027763487 0.0418664491 0.0213272051 0.0155943616 0.0149663448 0.0440685478 0.0419667447 0.0138805792 0.0158864807 0.1066076641 0.1131944125 0.0436343681 0.0437800327 0.0180729309 0.0223250701 0.0529608087 0.1081741005 0.0045147205 0.0137373857 0.1606654446;\n\ -frequency C10pi3 = 0.0351766018 0.0019678632 0.0016591476 0.0006768741 0.0078706538 0.0016559557 0.0019686768 0.0022420602 0.0012878339 0.3515819591 0.1278183107 0.0018856550 0.0242631753 0.0126221329 0.0029771559 0.0049998099 0.0255378034 0.0011907778 0.0037539283 0.3888636245;\n\ -frequency C10pi4 = 0.0408513927 0.0269887074 0.2185648186 0.2333814790 0.0037602852 0.0380451418 0.0901238869 0.1158332065 0.0373197176 0.0025523644 0.0052164616 0.0485017266 0.0022571778 0.0025108218 0.0108333610 0.0804527209 0.0302879995 0.0010815260 0.0069890931 0.0044481118;\n\ -frequency C10pi5 = 0.0185492661 0.0062362395 0.0024895723 0.0009775062 0.0070416514 0.0083539447 0.0024891617 0.0028952913 0.0040103982 0.1632422345 0.4443079409 0.0043570878 0.1202815687 0.0733329781 0.0048827648 0.0051642443 0.0131806647 0.0068759784 0.0144734420 0.0968580644;\n\ -frequency C10pi6 = 0.1106750119 0.0352190043 0.0405186210 0.1636437899 0.0014834855 0.0877962201 0.2638456592 0.0325228293 0.0163803600 0.0068334902 0.0140679579 0.0677158208 0.0048988133 0.0023256777 0.0298982139 0.0562887953 0.0426922497 0.0010338979 0.0040522304 0.0181078719;\n\ -frequency C10pi7 = 0.0522657662 0.0668294648 0.0714836849 0.0297745257 0.0143324928 0.0736540298 0.0388386669 0.0228101108 0.1551638111 0.0187406149 0.0653779932 0.0439469345 0.0207189121 0.0624033021 0.0145475497 0.0549017631 0.0370140058 0.0193756900 0.1110694548 0.0267512268;\n\ -frequency C10pi8 = 0.0116587342 0.0050990142 0.0064011054 0.0021742457 0.0105340743 0.0040203734 0.0024251112 0.0034709143 0.0366787049 0.0187185330 0.0676489746 0.0026694717 0.0143534813 0.3650985596 0.0031159927 0.0094848536 0.0073713920 0.0509564551 0.3574858593 0.0206341497;\n\ -frequency C10pi9 = 0.0627195947 0.2038782162 0.0428629162 0.0236193294 0.0052662886 0.1098111767 0.0686284994 0.0256174957 0.0332612124 0.0128968249 0.0305627740 0.2270839355 0.0124036991 0.0039181841 0.0140440613 0.0483152469 0.0463378087 0.0025143473 0.0065521118 0.0197062770;\n\ -frequency C10pi10 = 0.1145518598 0.0324008908 0.0750614981 0.0416192189 0.0098549497 0.0339624663 0.0364907910 0.0503817581 0.0165233329 0.0092949460 0.0139153707 0.0423026886 0.0082240805 0.0046605982 0.0379221548 0.2610647896 0.1845829279 0.0017548981 0.0058538316 0.0195769483;\n\ -model C10 = POISSON+G+FMIX{C10pi1:1:0.1191344178,C10pi2:1:0.0874372456,C10pi3:1:0.1037105070,C10pi4:1:0.0922584809,C10pi5:1:0.1070492801,C10pi6:1:0.1329945166,C10pi7:1:0.0538028458,C10pi8:1:0.0691986212,C10pi9:1:0.1319937434,C10pi10:1:0.1024203429};\n\ -model C10Opt = POISSON+G+FMIX{C10pi1,C10pi2,C10pi3,C10pi4,C10pi5,C10pi6,C10pi7,C10pi8,C10pi9,C10pi10};\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C20 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -frequency C20pi1 = 0.0862412505 0.0171943793 0.0791293376 0.0329908619 0.0130504558 0.0169046938 0.0184526503 0.0366905299 0.0108013340 0.0097907148 0.0112826424 0.0220195221 0.0087821483 0.0044155335 0.0189273201 0.3178152357 0.2711700523 0.0015317305 0.0048342853 0.0179753220 ;\n\ -frequency C20pi2 = 0.2035582865 0.0050980810 0.0077052407 0.0031656079 0.0348667285 0.0064044073 0.0070859400 0.0195235515 0.0024392035 0.1152573291 0.0789777393 0.0042380850 0.0309187017 0.0112429356 0.0164189221 0.0496777139 0.1118946615 0.0017762569 0.0048448213 0.2849057867 ;\n\ -frequency C20pi3 = 0.0211547413 0.0014946177 0.0012755030 0.0005492865 0.0048188557 0.0012328812 0.0014539632 0.0011430874 0.0011346394 0.3928460626 0.1250644210 0.0013579946 0.0209788805 0.0128251737 0.0020247248 0.0026240726 0.0171914121 0.0011591071 0.0036027969 0.3860677787 ;\n\ -frequency C20pi4 = 0.0376903543 0.2885196153 0.0365411474 0.0109469400 0.0064073829 0.0893564381 0.0358365464 0.0191106776 0.0329513951 0.0101711878 0.0237495504 0.2897626974 0.0096528870 0.0036349802 0.0105337370 0.0356313768 0.0355926500 0.0027925238 0.0066557222 0.0144621902 ;\n\ -frequency C20pi5 = 0.0084597802 0.0053589922 0.0072525884 0.0024487852 0.0084909000 0.0042781483 0.0025055486 0.0024277107 0.0433214027 0.0097713028 0.0380507037 0.0026741007 0.0080724771 0.3420463838 0.0021418673 0.0080418935 0.0055322116 0.0494840193 0.4375001561 0.0121410277 ;\n\ -frequency C20pi6 = 0.1759898886 0.0290429175 0.0332845569 0.1301263816 0.0017558693 0.0707183953 0.2182166681 0.0409535143 0.0130708195 0.0085622087 0.0159530702 0.0542946169 0.0054045759 0.0025276980 0.0371020404 0.0793480500 0.0540083424 0.0010592104 0.0036259116 0.0249552645 ;\n\ -frequency C20pi7 = 0.1634397322 0.0195541184 0.0438701833 0.0374272612 0.0088659891 0.0137554758 0.0220611924 0.5296717726 0.0090006141 0.0017569353 0.0061156267 0.0167117975 0.0029390787 0.0030641349 0.0126457766 0.0829342776 0.0142835614 0.0028640685 0.0032398299 0.0057985736 ;\n\ -frequency C20pi8 = 0.0917468761 0.0265853306 0.0290699087 0.0133818895 0.0284015012 0.0255084506 0.0196875685 0.0249898794 0.0449766405 0.0583555688 0.1155009222 0.0164915955 0.0395994595 0.0998479096 0.0209916159 0.0736482742 0.0661518462 0.0246463919 0.0972327226 0.0831856483 ;\n\ -frequency C20pi9 = 0.0646700714 0.0988015996 0.0228907308 0.0168733856 0.0077117603 0.0996414875 0.0544977962 0.0148893975 0.0313851988 0.0505983315 0.1844282999 0.0907931290 0.0774839960 0.0219148172 0.0105004469 0.0321196170 0.0411766062 0.0084303030 0.0206106035 0.0505824221 ;\n\ -frequency C20pi10 = 0.0135993865 0.0043408375 0.0018469375 0.0007951703 0.0100090240 0.0046420778 0.0018011758 0.0026794645 0.0072401918 0.0814026713 0.3661422246 0.0025158135 0.0734965132 0.2640965246 0.0038994134 0.0043668760 0.0075248451 0.0261564898 0.0660970801 0.0573472826 ;\n\ -frequency C20pi11 = 0.1478036236 0.0842845089 0.0726630217 0.0534743238 0.0048825808 0.0757166156 0.0727246460 0.0907725939 0.0262288856 0.0035781075 0.0126777221 0.1051660098 0.0059621792 0.0029903868 0.0156558198 0.1459903343 0.0634877444 0.0015928454 0.0050760739 0.0092719768 ;\n\ -frequency C20pi12 = 0.0186377412 0.0042055165 0.0019865236 0.0008329696 0.0054968852 0.0065890091 0.0020248504 0.0021713483 0.0023665991 0.2020809776 0.4370381920 0.0029120653 0.1241860384 0.0385383157 0.0040672279 0.0046177381 0.0149904396 0.0026871667 0.0056324117 0.1189379840 ;\n\ -frequency C20pi13 = 0.0477624336 0.0505742667 0.0209574273 0.0141349161 0.0075791708 0.0429296799 0.0462688073 0.0052327914 0.0165351815 0.1741496627 0.1121253570 0.0577575020 0.0330288046 0.0130691347 0.0124374733 0.0264988925 0.0951754678 0.0031660482 0.0112465746 0.2093704079 ;\n\ -frequency C20pi14 = 0.4164189845 0.0056100821 0.0091701381 0.0045131748 0.0406937949 0.0061320495 0.0063229801 0.0946185184 0.0031057404 0.0076443223 0.0099885414 0.0038941773 0.0069323155 0.0048438356 0.0187840756 0.2360774301 0.0746274607 0.0012172579 0.0034825786 0.0459225422 ;\n\ -frequency C20pi15 = 0.0402295888 0.0735203003 0.1036647193 0.0365523994 0.0124782975 0.0826558132 0.0372197283 0.0233618081 0.2108307125 0.0093478727 0.0360561493 0.0482410586 0.0100289536 0.0459094917 0.0098503973 0.0533383445 0.0310209005 0.0140076639 0.1064377821 0.0152480184 ;\n\ -frequency C20pi16 = 0.0323453034 0.0236282995 0.2520448083 0.2431495959 0.0035976296 0.0330831153 0.0710274499 0.1016074562 0.0366225082 0.0031410809 0.0051980542 0.0470129351 0.0024028744 0.0024429276 0.0094837826 0.0848355278 0.0359083275 0.0008730928 0.0067247672 0.0048704638 ;\n\ -frequency C20pi17 = 0.1476256642 0.0334506604 0.0211972524 0.0403051550 0.0032327194 0.0371554480 0.0576893391 0.0330850942 0.0146392559 0.0108267008 0.0256200793 0.0451350877 0.0058651400 0.0047177179 0.3473710507 0.0892065279 0.0485899446 0.0016358749 0.0044177191 0.0282335685 ;\n\ -frequency C20pi18 = 0.1031448143 0.0717747663 0.0435172139 0.0386401502 0.0061762467 0.0786603123 0.0923369140 0.0202338419 0.0246761899 0.0376904275 0.0376283678 0.0921698920 0.0161883318 0.0067666433 0.0128302120 0.0951450188 0.1378566702 0.0022144738 0.0083041573 0.0740453560 ;\n\ -frequency C20pi19 = 0.0837542823 0.0899383244 0.0518811417 0.0804870571 0.0020735078 0.1456497470 0.1947759184 0.0229030361 0.0268458796 0.0074079756 0.0190249576 0.1459287407 0.0067395241 0.0023063393 0.0085616014 0.0455739585 0.0451080843 0.0010771349 0.0049325333 0.0150302559 ;\n\ -frequency C20pi20 = 0.0578735570 0.0138313604 0.0491421636 0.2946738942 0.0011130839 0.0598250358 0.3402102668 0.0293911435 0.0139817004 0.0030525663 0.0062611922 0.0363365043 0.0027295976 0.0017034884 0.0156106390 0.0358044639 0.0249941878 0.0008664342 0.0038312977 0.0087674229 ;\n\ -\n\ -[ C20 with fixed weights ]\n\ -model C20 = POISSON+G+FMIX{C20pi1:1:0.0559910600,C20pi2:1:0.0514824870,C20pi3:1:0.0812922124,C20pi4:1:0.0721976867,C20pi5:1:0.0556718858,C20pi6:1:0.0331003080,C20pi7:1:0.0589501763,C20pi8:1:0.0263756889,C20pi9:1:0.0307584220,C20pi10:1:0.0376701125,C20pi11:1:0.0303058290,C20pi12:1:0.0808775576,C20pi13:1:0.0263349134,C20pi14:1:0.0579101455,C20pi15:1:0.0371248064,C20pi16:1:0.0586867766,C20pi17:1:0.0561479138,C20pi18:1:0.0349810886,C20pi19:1:0.0544937394,C20pi20:1:0.0596471901};\n\ -[ C20 to weights to be optimized ]\n\ -model C20Opt = POISSON+G+FMIX{C20pi1,C20pi2,C20pi3,C20pi4,C20pi5,C20pi6,C20pi7,C20pi8,C20pi9,C20pi10,C20pi11,C20pi12,C20pi13,C20pi14,C20pi15,C20pi16,C20pi17,C20pi18,C20pi19,C20pi20};\n\ -\n\ -model C20Test = POISSON+G+FMIX{C20pi1:1:0.089485,C20pi2:1:0.021281,C20pi3:1:0.119676,C20pi4:1:0.080933,C20pi5:1:0.064054,C20pi6:1:0.021848,C20pi7:1:0.063392,C20pi8:1:0.003629,C20pi9:1:0.007174,C20pi10:1:0.006256,C20pi11:1:0.023424,C20pi12:1:0.086825,C20pi13:1:0.038495,C20pi14:1:0.090028,C20pi15:1:0.020025,C20pi16:1:0.043484,C20pi17:1:0.076864,C20pi18:1:0.031347,C20pi19:1:0.047749,C20pi20:1:0.064031};\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C30 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -frequency C30pi1 = 0.1100453954 0.0171294861 0.0640338464 0.1595411459 0.0019047235 0.0310187088 0.1098958823 0.0684301540 0.0137950707 0.0026283074 0.0073396531 0.0358553674 0.0024706414 0.0016629473 0.1669356820 0.1381790473 0.0568342547 0.0004661120 0.0035970152 0.0082365591;\n\ -frequency C30pi2 = 0.0874125465 0.0806320385 0.0382152368 0.0326119879 0.0049826376 0.0798168854 0.0951700809 0.0144042708 0.0210626652 0.0399884450 0.0301585074 0.1147200015 0.0126488911 0.0048996596 0.0137397028 0.0873769666 0.1558616621 0.0015122843 0.0053974463 0.0793880836;\n\ -frequency C30pi3 = 0.0225477414 0.0014900535 0.0013034594 0.0005959279 0.0050018158 0.0011436556 0.0015030529 0.0011570953 0.0009374322 0.3944689167 0.0889573138 0.0013600872 0.0189102669 0.0089216031 0.0018312028 0.0028336408 0.0189813395 0.0006693746 0.0023303726 0.4250556480;\n\ -frequency C30pi4 = 0.0602158209 0.0136833299 0.0414987935 0.2900084105 0.0009525462 0.0621611083 0.3610869026 0.0281925621 0.0130500799 0.0030516237 0.0060401889 0.0352704692 0.0027460635 0.0014625624 0.0127175499 0.0318109377 0.0225279521 0.0007948027 0.0034024563 0.0093258397;\n\ -frequency C30pi5 = 0.0101223637 0.0028344920 0.0012928910 0.0006379191 0.0085989355 0.0035028551 0.0011249625 0.0024085229 0.0047753376 0.0701153131 0.4135913903 0.0016748492 0.0744862631 0.2785384406 0.0040466582 0.0037087155 0.0052379329 0.0200222636 0.0523938808 0.0408860135;\n\ -frequency C30pi6 = 0.1335831781 0.0284789590 0.0213891629 0.1125775537 0.0010514541 0.0565844323 0.2099572968 0.0207551870 0.0121330488 0.0073526522 0.0133278240 0.0771772013 0.0030571689 0.0016793592 0.1890195131 0.0484054108 0.0373318180 0.0009266995 0.0026946425 0.0225174379;\n\ -frequency C30pi7 = 0.0408277374 0.0124491768 0.0080464869 0.0030634898 0.0153918410 0.0102922098 0.0066010880 0.0058113137 0.0245211764 0.1487514547 0.1637802160 0.0075923232 0.0385527359 0.1575049888 0.0058352224 0.0151578617 0.0332220362 0.0264937109 0.1213342989 0.1547706314;\n\ -frequency C30pi8 = 0.2469059247 0.0106278945 0.0168929681 0.0027418266 0.1039406309 0.0103988197 0.0054944756 0.0373263209 0.0085752319 0.0292403793 0.0535091180 0.0056123053 0.0302246485 0.0251775640 0.0078098946 0.1642352274 0.1239889705 0.0053155877 0.0163953993 0.0955868125;\n\ -frequency C30pi9 = 0.0549428629 0.1305426495 0.0202957532 0.0092915274 0.0099280995 0.0906036344 0.0417085054 0.0105563869 0.0363512470 0.0569584863 0.1681833183 0.1152521806 0.0592328363 0.0243860149 0.0083055411 0.0283778833 0.0412594019 0.0096355359 0.0249780472 0.0592100878;\n\ -frequency C30pi10 = 0.0462773303 0.0362984274 0.0412365193 0.0182504174 0.0172727117 0.0348990852 0.0224266258 0.0160971397 0.1357852215 0.0164966886 0.0598936127 0.0239396241 0.0164507129 0.1336320854 0.0117413009 0.0454156401 0.0304387749 0.0330338410 0.2350163763 0.0253978649;\n\ -frequency C30pi11 = 0.0474379955 0.0410179935 0.0222453982 0.0112116958 0.0082332447 0.0374051414 0.0388100853 0.0055998598 0.0149156570 0.1832173840 0.1100691114 0.0467850545 0.0356443791 0.0116643783 0.0100244663 0.0317171100 0.1114352326 0.0026685586 0.0099660086 0.2199312452;\n\ -frequency C30pi12 = 0.0213607696 0.0069976154 0.0039878996 0.0012941246 0.0061024858 0.0139566033 0.0036297282 0.0030017014 0.0038425894 0.1309465785 0.4566988203 0.0054567760 0.1947837355 0.0371808169 0.0040747282 0.0076991487 0.0198018718 0.0034086391 0.0064545692 0.0693207986;\n\ -frequency C30pi13 = 0.0919632044 0.0160004872 0.0764682386 0.0306717360 0.0117031014 0.0160060006 0.0171907654 0.0370684649 0.0100792697 0.0093123713 0.0097240970 0.0205385908 0.0075767282 0.0041589440 0.0179686194 0.3254471625 0.2744377258 0.0013887442 0.0044739725 0.0178217761;\n\ -frequency C30pi14 = 0.4649246103 0.0043013249 0.0075304815 0.0050731691 0.0233328752 0.0043571322 0.0057994247 0.1495242047 0.0023298425 0.0043361190 0.0055995530 0.0028525398 0.0039313170 0.0025588185 0.0186467246 0.2150194771 0.0477030158 0.0009038096 0.0020087184 0.0292668421;\n\ -frequency C30pi15 = 0.2051329382 0.0439661329 0.0339418395 0.1070980865 0.0020915940 0.0822742346 0.1989733497 0.0487574293 0.0127143076 0.0058124693 0.0133471767 0.0667787412 0.0043783406 0.0018235059 0.0110997761 0.0873961609 0.0519781961 0.0007361603 0.0023821404 0.0193174204;\n\ -frequency C30pi16 = 0.0263689890 0.0133613622 0.2727158135 0.3117715371 0.0039462429 0.0218978778 0.0694354212 0.0799842408 0.0309615130 0.0027521242 0.0038579661 0.0288630708 0.0018363656 0.0023351927 0.0062457560 0.0798729385 0.0324143174 0.0007229656 0.0063857732 0.0042705326;\n\ -frequency C30pi17 = 0.1526502637 0.0332784464 0.0168229991 0.0237392180 0.0040215287 0.0341733672 0.0377949108 0.0306214335 0.0141929803 0.0123317972 0.0290062362 0.0375543022 0.0064473224 0.0058584416 0.3864504800 0.0880336410 0.0489543188 0.0018252558 0.0048877798 0.0313552773;\n\ -frequency C30pi18 = 0.0080247558 0.0017408595 0.0006327403 0.0003385965 0.0023412143 0.0015507896 0.0007818945 0.0005403825 0.0010026402 0.3177056649 0.3737894172 0.0012598254 0.0488212345 0.0311968471 0.0020687549 0.0012095129 0.0065696791 0.0016309208 0.0043343553 0.1944599147;\n\ -frequency C30pi19 = 0.0599950319 0.1000540567 0.1334918892 0.0889730776 0.0016884984 0.0864856169 0.0962700957 0.0588796388 0.0327277145 0.0021467269 0.0070876372 0.1825860579 0.0033979446 0.0011800742 0.0141408084 0.0779002375 0.0448817374 0.0006249028 0.0032641120 0.0042241415;\n\ -frequency C30pi20 = 0.0393520657 0.0838170642 0.1425481600 0.0431197671 0.0099071945 0.1019786610 0.0394639510 0.0282866471 0.2095718357 0.0076101442 0.0258339558 0.0596434088 0.0084586675 0.0188680789 0.0096840517 0.0624998643 0.0347087967 0.0054645779 0.0564145251 0.0127685828;\n\ -frequency C30pi21 = 0.0072715487 0.0140998918 0.0019756795 0.0027603830 0.0067852535 0.0043339290 0.0025069369 0.0080834718 0.0113217919 0.0056609640 0.0394199644 0.0017735096 0.0079866080 0.1271475634 0.0041098092 0.0052244365 0.0043022271 0.6273570153 0.1084563767 0.0094226397;\n\ -frequency C30pi22 = 0.0907070068 0.0290062335 0.0860677696 0.0745872716 0.0063699858 0.0259377035 0.0386802115 0.4750046194 0.0168090013 0.0014721054 0.0055149849 0.0343855535 0.0024692074 0.0028859215 0.0112150781 0.0731110371 0.0153705714 0.0022914775 0.0041860660 0.0039281943;\n\ -frequency C30pi23 = 0.0055291882 0.0024626303 0.0046086594 0.0011413426 0.0072105915 0.0022692184 0.0009683043 0.0016070950 0.0325831191 0.0082918400 0.0353677882 0.0013849437 0.0074486804 0.3744093753 0.0013374573 0.0057402692 0.0037279636 0.0330334445 0.4609978298 0.0098802591;\n\ -frequency C30pi24 = 0.2443263138 0.0045386562 0.0062422652 0.0031590902 0.0273880205 0.0053593950 0.0076715636 0.0196089609 0.0020189401 0.1017435067 0.0468424225 0.0045492259 0.0201286022 0.0060619450 0.0185219126 0.0497753825 0.1170795523 0.0009577255 0.0035333687 0.3104931504;\n\ -frequency C30pi25 = 0.0863111274 0.0984811895 0.0313963115 0.0600902926 0.0024419845 0.1672351286 0.2036096150 0.0175221435 0.0245245046 0.0105994220 0.0271209781 0.1485789590 0.0095824358 0.0029393105 0.0068276769 0.0347800318 0.0408210979 0.0014001253 0.0055105388 0.0202271268;\n\ -frequency C30pi26 = 0.0643926114 0.0369048739 0.1031213278 0.1628208462 0.0023165895 0.0752534859 0.1762701353 0.0297139006 0.0303503732 0.0088163033 0.0148016812 0.0727140107 0.0056748403 0.0043066715 0.0099270322 0.0926433867 0.0833129915 0.0011237109 0.0093801464 0.0161550816;\n\ -frequency C30pi27 = 0.1736682858 0.0943628709 0.0520404980 0.0285984935 0.0083596568 0.0722446698 0.0483894060 0.0781901497 0.0266134684 0.0068641911 0.0219499324 0.0964011794 0.0112303313 0.0058273974 0.0169661076 0.1547802460 0.0751701930 0.0028774511 0.0082130397 0.0172524320;\n\ -frequency C30pi28 = 0.0347856579 0.3075984538 0.0314157384 0.0092355245 0.0062754891 0.0861073155 0.0323568406 0.0170288127 0.0306438905 0.0091932292 0.0224428556 0.3020845818 0.0093720833 0.0034303536 0.0104447169 0.0326882932 0.0328713449 0.0025244855 0.0064171317 0.0130832013;\n\ -frequency C30pi29 = 0.1087737102 0.0051781020 0.0032679768 0.0015823203 0.0247877480 0.0057932006 0.0041769888 0.0134703172 0.0024765788 0.1643462917 0.2337152707 0.0027000391 0.0539213396 0.0316523420 0.0154886946 0.0188187787 0.0474912345 0.0037656478 0.0073106362 0.2512827825;\n\ -frequency C30pi30 = 0.1101008748 0.0324324597 0.0435098681 0.0579268520 0.0072699765 0.0615196630 0.0828181488 0.0314463068 0.0308557019 0.0530865813 0.1096787834 0.0293860426 0.0458728977 0.0269153699 0.0296430687 0.0715887866 0.0685882454 0.0062324120 0.0257237601 0.0754042006;\n\ -model C30 = POISSON+G+FMIX{C30pi1:1:0.0095783264,C30pi2:1:0.0248476365,C30pi3:1:0.0636309366,C30pi4:1:0.0537939225,C30pi5:1:0.0295885587,C30pi6:1:0.0117587936,C30pi7:1:0.0132013428,C30pi8:1:0.0236868805,C30pi9:1:0.0261687659,C30pi10:1:0.0239821974,C30pi11:1:0.0257100906,C30pi12:1:0.0465072425,C30pi13:1:0.0546794546,C30pi14:1:0.0536085131,C30pi15:1:0.0270622670,C30pi16:1:0.0403913593,C30pi17:1:0.0474212700,C30pi18:1:0.0458816478,C30pi19:1:0.0214036510,C30pi20:1:0.0290385981,C30pi21:1:0.0123391793,C30pi22:1:0.0569350229,C30pi23:1:0.0419687568,C30pi24:1:0.0339027062,C30pi25:1:0.0388777376,C30pi26:1:0.0196343766,C30pi27:1:0.0233086174,C30pi28:1:0.0622722654,C30pi29:1:0.0184803385,C30pi30:1:0.0203395454};\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C40 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -frequency C40pi1 = 0.0660259814 0.0231861755 0.1599815873 0.1054473175 0.0056586745 0.0273928499 0.0440360794 0.0711238664 0.0168194755 0.0039088727 0.0055316013 0.0366689617 0.0037412416 0.0013104807 0.0176359169 0.2497687201 0.1507079582 0.0006723214 0.0038290224 0.0065528958;\n\ -frequency C40pi2 = 0.0232377444 0.0122683027 0.2759650991 0.3532087982 0.0037987468 0.0197339134 0.0739378219 0.0576668030 0.0315866952 0.0031092806 0.0038711609 0.0259363304 0.0017355634 0.0024032103 0.0063116881 0.0657067704 0.0270483653 0.0007602894 0.0069602476 0.0047531689;\n\ -frequency C40pi3 = 0.0166486809 0.0012594763 0.0012622242 0.0005651446 0.0036665719 0.0010669784 0.0013356251 0.0008894749 0.0008231853 0.4129367561 0.0884689295 0.0011904105 0.0186054583 0.0082775676 0.0014029981 0.0021339439 0.0162167380 0.0006082049 0.0019553200 0.4206863114;\n\ -frequency C40pi4 = 0.2394741986 0.0072901253 0.0120536943 0.0044741726 0.0283811727 0.0086558850 0.0105529632 0.0135109628 0.0038929844 0.0765957115 0.0358494908 0.0071093014 0.0199496319 0.0055991131 0.0114265585 0.0847798773 0.1797284519 0.0009838000 0.0042240671 0.2454678377;\n\ -frequency C40pi5 = 0.1194613086 0.0233255669 0.0294552140 0.0134272792 0.0150526644 0.0301537796 0.0192173037 0.0337675998 0.0214746045 0.0579001821 0.1446308373 0.0147261337 0.0561242940 0.0550467421 0.0631355418 0.0925266727 0.0831230185 0.0131636136 0.0331118002 0.0811758434;\n\ -frequency C40pi6 = 0.0567043710 0.0117359330 0.0364734454 0.2955500969 0.0008924801 0.0609516515 0.3795154126 0.0230469606 0.0118360971 0.0031182036 0.0060137466 0.0314205689 0.0028584065 0.0012972333 0.0124745819 0.0300334889 0.0227051137 0.0007738758 0.0031343761 0.0094639563;\n\ -frequency C40pi7 = 0.0179027412 0.0040967133 0.0035697688 0.0008870412 0.0160760340 0.0045395474 0.0023182113 0.0039829808 0.0127292680 0.0404650518 0.1676143477 0.0027994718 0.0424172255 0.3344862590 0.0020115128 0.0075841581 0.0068227293 0.0518381385 0.2452542553 0.0326045442;\n\ -frequency C40pi8 = 0.2712170094 0.0056480837 0.0141045260 0.0021017036 0.2003830179 0.0048264059 0.0023229984 0.0502501222 0.0053727960 0.0150684657 0.0330003443 0.0020646283 0.0154811217 0.0202990358 0.0045351023 0.1764198412 0.0839578061 0.0046265242 0.0141271048 0.0741933626;\n\ -frequency C40pi9 = 0.0894736584 0.1040026384 0.0190192153 0.0272183085 0.0045538316 0.1168091917 0.1275076663 0.0115685734 0.0215746293 0.0469424171 0.0512035100 0.1382047308 0.0147656854 0.0056590176 0.0095546504 0.0383953611 0.0836652641 0.0017079427 0.0062181292 0.0819555787;\n\ -frequency C40pi10 = 0.0495441385 0.0375345822 0.0315863530 0.0143641284 0.0182505609 0.0316504100 0.0215379122 0.0140199913 0.1108543799 0.0247065801 0.0700287927 0.0258142032 0.0188271760 0.1418048822 0.0112101202 0.0456094427 0.0361427973 0.0371985427 0.2223972375 0.0369177689;\n\ -frequency C40pi11 = 0.1704314254 0.0415784004 0.0271109259 0.1098556600 0.0009747331 0.0917299929 0.2536458944 0.0249846466 0.0101389736 0.0058749399 0.0116526350 0.0903324267 0.0036512738 0.0013321301 0.0293613681 0.0561765645 0.0479045729 0.0006696817 0.0022637316 0.0203300232;\n\ -frequency C40pi12 = 0.0162725399 0.0054826071 0.0021876158 0.0010182101 0.0050614097 0.0104414465 0.0025141347 0.0021935389 0.0029914328 0.1328173512 0.4904441779 0.0040120394 0.1929931280 0.0376245580 0.0034333187 0.0040122105 0.0127074428 0.0032107554 0.0058100621 0.0647720205;\n\ -frequency C40pi13 = 0.0823765743 0.0734226431 0.0598389731 0.0311745159 0.0065694304 0.0686451074 0.0675530778 0.0178961594 0.0251143622 0.0291161743 0.0287904106 0.0982301674 0.0168022878 0.0064717899 0.0114044922 0.1302995288 0.1820374273 0.0022724618 0.0079573279 0.0540270885;\n\ -frequency C40pi14 = 0.3594965940 0.0072407229 0.0033421456 0.0031484357 0.0251417178 0.0049014279 0.0064962700 0.1194682267 0.0022970448 0.0458766662 0.0468053893 0.0050168849 0.0215568816 0.0092020461 0.0443915884 0.0465270945 0.0477755293 0.0024540215 0.0046450361 0.1942162766;\n\ -frequency C40pi15 = 0.2015583874 0.0430161610 0.0425386444 0.0954149893 0.0032365302 0.0772010857 0.1534908791 0.0667291678 0.0155218808 0.0067740832 0.0165114429 0.0547322644 0.0060162992 0.0025643300 0.0091970560 0.1185981804 0.0625472744 0.0009565508 0.0031150007 0.0202797924;\n\ -frequency C40pi16 = 0.1042731047 0.0147062345 0.0621645800 0.2424069523 0.0022450116 0.0356498946 0.1774821588 0.1697819523 0.0132648834 0.0018929517 0.0042542620 0.0220651981 0.0016441234 0.0012570256 0.0317041583 0.0778636230 0.0288515782 0.0006930898 0.0017741945 0.0060250231;\n\ -frequency C40pi17 = 0.0781183281 0.0111498472 0.0159270309 0.0041541669 0.0194448667 0.0240151620 0.0116633921 0.0111524105 0.0063589385 0.1354530457 0.2457574952 0.0093729846 0.1087781166 0.0262793949 0.0055294038 0.0408518858 0.0860514305 0.0031547586 0.0085108496 0.1482764918;\n\ -frequency C40pi18 = 0.0856592432 0.0101233167 0.0441923073 0.0135061568 0.0136072878 0.0092590642 0.0078602552 0.0245400880 0.0055379075 0.0100591561 0.0103343559 0.0127318506 0.0080675803 0.0047153035 0.0175273997 0.3406479487 0.3573294650 0.0014243098 0.0035099810 0.0193670227;\n\ -frequency C40pi19 = 0.0674594695 0.1161734658 0.1163107783 0.0662588409 0.0021634231 0.0939360452 0.0865501280 0.0368556575 0.0381149118 0.0033238825 0.0093839985 0.1899736999 0.0039487389 0.0018212730 0.0151207830 0.0842204423 0.0565953680 0.0007187305 0.0046189437 0.0064514195;\n\ -frequency C40pi20 = 0.0572262322 0.0494723554 0.1083882793 0.1793932771 0.0015301521 0.0903668522 0.1992261265 0.0316472274 0.0291392067 0.0045804559 0.0100739563 0.1015624916 0.0040204606 0.0013701849 0.0063674130 0.0621142922 0.0496102162 0.0006669285 0.0046497641 0.0085941279;\n\ -frequency C40pi21 = 0.0036020163 0.0102712927 0.0013455508 0.0020871647 0.0045484804 0.0032718114 0.0017857730 0.0056391633 0.0064968790 0.0029292916 0.0232635081 0.0010419846 0.0044592278 0.0855714596 0.0024991984 0.0030671803 0.0025900250 0.7617821954 0.0678809532 0.0058668443;\n\ -frequency C40pi22 = 0.2032018418 0.0083895722 0.0143743754 0.0135011707 0.0098131618 0.0044514580 0.0083818173 0.6184886075 0.0027747899 0.0011828492 0.0039826789 0.0044598895 0.0020631785 0.0019619615 0.0085870399 0.0739919851 0.0108922273 0.0018606145 0.0015638674 0.0060769136;\n\ -frequency C40pi23 = 0.0050898779 0.0028740788 0.0057092962 0.0016126151 0.0061776450 0.0024693148 0.0012040415 0.0016334183 0.0393460780 0.0059088776 0.0249343597 0.0013713662 0.0049795162 0.3563126947 0.0014136424 0.0059527667 0.0036536770 0.0357987380 0.4853645852 0.0081934106;\n\ -frequency C40pi24 = 0.0403335679 0.0540186397 0.0216052457 0.0098218598 0.0081549541 0.0383639077 0.0375406578 0.0047934404 0.0176735565 0.1893424159 0.1051859862 0.0607377395 0.0305599836 0.0119140782 0.0077550551 0.0257110173 0.1009913165 0.0028780020 0.0115276935 0.2210908828;\n\ -frequency C40pi25 = 0.0790086293 0.1065441152 0.0309384274 0.0546012394 0.0024947877 0.1843375981 0.1997882784 0.0192655847 0.0270700474 0.0075667489 0.0254542392 0.1553108816 0.0098024439 0.0023773444 0.0056640684 0.0332370813 0.0359574739 0.0011682801 0.0048820809 0.0145306498;\n\ -frequency C40pi26 = 0.0722240672 0.0489728405 0.0678929607 0.1194883992 0.0064755348 0.0708969573 0.1345886574 0.0287815397 0.0699011334 0.0173588702 0.0519870084 0.0490341790 0.0154411043 0.0348233029 0.0145597486 0.0589579876 0.0425972780 0.0087913770 0.0554386705 0.0317883834;\n\ -frequency C40pi27 = 0.1085842431 0.0206450023 0.0441956285 0.1529666596 0.0012502570 0.0405398136 0.1664851192 0.0336098469 0.0134902179 0.0038821795 0.0089861440 0.0576227094 0.0024339036 0.0014553522 0.1990095021 0.0846749753 0.0454715217 0.0005902831 0.0027650162 0.0113416246;\n\ -frequency C40pi28 = 0.0309526387 0.3195887318 0.0301336637 0.0082352132 0.0065593963 0.0832608108 0.0291974083 0.0154206187 0.0310385092 0.0098251607 0.0237900204 0.3062634996 0.0097071728 0.0036891639 0.0095029109 0.0295285439 0.0303052301 0.0028125285 0.0068850639 0.0133037148;\n\ -frequency C40pi29 = 0.0098953741 0.0019604525 0.0007307935 0.0003748228 0.0028276741 0.0017337004 0.0009182100 0.0006997068 0.0010419482 0.3115040359 0.3750387796 0.0013960508 0.0474451070 0.0298607430 0.0025296256 0.0014628019 0.0075738968 0.0016799771 0.0040259930 0.1973003069;\n\ -frequency C40pi30 = 0.1163213921 0.0273321006 0.0250163656 0.0731917718 0.0034792282 0.0586677248 0.1380880502 0.0193193469 0.0160240740 0.0712243431 0.0771473538 0.0355120487 0.0242841072 0.0094117688 0.0508926833 0.0475560280 0.0726552233 0.0026892716 0.0076166020 0.1235705162;\n\ -frequency C40pi31 = 0.1285218235 0.0373073487 0.1179844215 0.0402749992 0.0172928883 0.0439706110 0.0250692272 0.1127033137 0.0606981059 0.0109350265 0.0258415767 0.0288749652 0.0167592956 0.0199118302 0.0180674983 0.1741489481 0.0648967655 0.0063574951 0.0321771650 0.0182066946;\n\ -frequency C40pi32 = 0.0372286941 0.0094528028 0.0053377315 0.0023703173 0.0144940088 0.0079097138 0.0048585146 0.0046433943 0.0186795102 0.1820459527 0.1780099317 0.0058198481 0.0371334296 0.1463772419 0.0048538601 0.0103570678 0.0284161577 0.0211293603 0.0958905187 0.1849919442;\n\ -frequency C40pi33 = 0.0535643726 0.1159797757 0.0239172676 0.0113537364 0.0096256227 0.0928585070 0.0391699080 0.0120279334 0.0384887950 0.0522748270 0.1892392595 0.0996037748 0.0712219098 0.0264213736 0.0083720574 0.0299114019 0.0389484845 0.0104232046 0.0265030050 0.0500947835;\n\ -frequency C40pi34 = 0.1332424803 0.0033147683 0.0022704992 0.0012739239 0.0246514263 0.0030843469 0.0040461524 0.0089139209 0.0015864680 0.1971284995 0.1251288442 0.0023713225 0.0286947200 0.0156995251 0.0118845743 0.0171461828 0.0563298009 0.0017341820 0.0048778410 0.3566205216;\n\ -frequency C40pi35 = 0.1498658185 0.0326607222 0.0176452820 0.0280354786 0.0035437399 0.0348151308 0.0435380704 0.0311112643 0.0140625707 0.0101953314 0.0251433928 0.0393124980 0.0051548319 0.0047533945 0.3923800449 0.0874496981 0.0473306717 0.0015215239 0.0043208299 0.0271597054;\n\ -frequency C40pi36 = 0.4214366359 0.0061425967 0.0121590498 0.0073305074 0.0187609694 0.0072748556 0.0086837775 0.0902333103 0.0030262044 0.0039362777 0.0047193320 0.0051508681 0.0038306586 0.0027156136 0.0208940236 0.2901188793 0.0651922314 0.0008108235 0.0023622848 0.0252211004;\n\ -frequency C40pi37 = 0.1770713890 0.1332782050 0.0311656783 0.0226500225 0.0078348946 0.0752471493 0.0509767242 0.0897389513 0.0220667143 0.0059519850 0.0205369728 0.1257689326 0.0092982479 0.0040514178 0.0264087912 0.1169591448 0.0565566955 0.0029947127 0.0049346701 0.0165087010;\n\ -frequency C40pi38 = 0.0293984032 0.0370901720 0.1483622633 0.1099709900 0.0031729093 0.0388688450 0.0464270335 0.4222420155 0.0272494642 0.0007997326 0.0037634298 0.0622314461 0.0016657052 0.0015039626 0.0056481827 0.0472252404 0.0086568982 0.0009176022 0.0027693124 0.0020363920;\n\ -frequency C40pi39 = 0.0265779317 0.0791104753 0.1318603134 0.0280314140 0.0101369144 0.0989710810 0.0269057233 0.0173376629 0.2815133703 0.0064646977 0.0268210053 0.0474749135 0.0072375268 0.0276960902 0.0083014995 0.0426276702 0.0259042511 0.0078528946 0.0891598394 0.0100147256;\n\ -frequency C40pi40 = 0.0096096503 0.0027136180 0.0013104432 0.0006331856 0.0077301682 0.0033899420 0.0010471898 0.0020227436 0.0039001415 0.0733098005 0.4451691588 0.0014931484 0.0732575295 0.2630171690 0.0042768091 0.0036117358 0.0057928403 0.0181275729 0.0370698053 0.0425173480;\n\ -model C40 = POISSON+G+FMIX{C40pi1:1:0.0223853788,C40pi2:1:0.0338891820,C40pi3:1:0.0577169375,C40pi4:1:0.0252416233,C40pi5:1:0.0108607921,C40pi6:1:0.0462373793,C40pi7:1:0.0102293175,C40pi8:1:0.0147523625,C40pi9:1:0.0143161352,C40pi10:1:0.0182302541,C40pi11:1:0.0204025079,C40pi12:1:0.0425505156,C40pi13:1:0.0248627269,C40pi14:1:0.0105892988,C40pi15:1:0.0188238725,C40pi16:1:0.0086663445,C40pi17:1:0.0148496147,C40pi18:1:0.0343037402,C40pi19:1:0.0225335203,C40pi20:1:0.0174068578,C40pi21:1:0.0112207827,C40pi22:1:0.0443532245,C40pi23:1:0.0392573370,C40pi24:1:0.0196756555,C40pi25:1:0.0287690328,C40pi26:1:0.0114441177,C40pi27:1:0.0112338740,C40pi28:1:0.0582694099,C40pi29:1:0.0444272279,C40pi30:1:0.0112010942,C40pi31:1:0.0145176111,C40pi32:1:0.0114629026,C40pi33:1:0.0239628061,C40pi34:1:0.0266266492,C40pi35:1:0.0481201159,C40pi36:1:0.0371147423,C40pi37:1:0.0160476688,C40pi38:1:0.0237249267,C40pi39:1:0.0235226203,C40pi40:1:0.0261998398};\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C50 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -frequency C50pi1 = 0.1357566757 0.0328511938 0.0937692919 0.0757182069 0.0041887049 0.0448010470 0.0572805366 0.1210866186 0.0167465028 0.0049719235 0.0113823284 0.0458096069 0.0064563157 0.0029292810 0.0228705187 0.2060115780 0.1011347978 0.0012443033 0.0056104605 0.0093801079;\n\ -frequency C50pi2 = 0.0530862751 0.1905936010 0.0595772279 0.0320970468 0.0026608079 0.1152605895 0.0840617877 0.0196495178 0.0274729775 0.0064919200 0.0158709120 0.2635539775 0.0078171228 0.0017231166 0.0121639300 0.0449347664 0.0472425608 0.0008407188 0.0037608716 0.0111402722;\n\ -frequency C50pi3 = 0.0083279799 0.0007172026 0.0006359642 0.0003134388 0.0020547407 0.0007351595 0.0005373710 0.0005576905 0.0004858721 0.4370910601 0.1208722220 0.0006394909 0.0195499664 0.0090175268 0.0007265254 0.0007876194 0.0057076665 0.0006453449 0.0016797264 0.3889174318;\n\ -frequency C50pi4 = 0.2072868350 0.0166858699 0.0129177658 0.0020625574 0.0849982226 0.0151757635 0.0065903656 0.0472047575 0.0130289256 0.0345690755 0.1042722764 0.0075861385 0.0498042308 0.0572909747 0.0064928361 0.1183618036 0.0780339514 0.0128352368 0.0323576924 0.0924447209;\n\ -frequency C50pi5 = 0.0364181183 0.0076427099 0.0052725527 0.0020389950 0.0171009943 0.0064088232 0.0042399368 0.0053824238 0.0198596156 0.1361523026 0.1651892915 0.0045481616 0.0387479055 0.2025922657 0.0055053348 0.0121111950 0.0254621828 0.0327580458 0.1368025306 0.1357666147;\n\ -frequency C50pi6 = 0.0535489196 0.0099543365 0.0269073208 0.3076150732 0.0007101021 0.0574988641 0.4066173371 0.0204537673 0.0096286483 0.0025879708 0.0049721459 0.0280989086 0.0025143457 0.0010618006 0.0124317994 0.0247246015 0.0191107367 0.0006385967 0.0024132214 0.0085115039;\n\ -frequency C50pi7 = 0.0074733729 0.0025226602 0.0033967505 0.0005574007 0.0081158286 0.0037658904 0.0013610444 0.0022017759 0.0115142679 0.0195730439 0.1268878488 0.0018497296 0.0269141680 0.3821985941 0.0019970421 0.0057127939 0.0039692337 0.0553575998 0.3184099394 0.0162210153;\n\ -frequency C50pi8 = 0.2615592974 0.0027098854 0.0124908261 0.0020153852 0.2740228527 0.0017043893 0.0007667803 0.0463498030 0.0019474361 0.0082858275 0.0147048711 0.0010787235 0.0063051368 0.0062080862 0.0039442437 0.1940042648 0.0963699489 0.0016185483 0.0048431386 0.0590705550;\n\ -frequency C50pi9 = 0.1190557043 0.0956320251 0.0215995297 0.0378323341 0.0041536088 0.1151348174 0.1337084452 0.0179375220 0.0216767047 0.0336228770 0.0557402194 0.1132452331 0.0178407325 0.0063405927 0.0147606946 0.0478666925 0.0712091035 0.0022867238 0.0075728630 0.0627835766;\n\ -frequency C50pi10 = 0.0505010344 0.0281381134 0.0341872191 0.0178157543 0.0183140005 0.0271729546 0.0212018661 0.0176052654 0.1190104107 0.0161645217 0.0561232531 0.0203908848 0.0146521042 0.1553484132 0.0135251600 0.0478959652 0.0292963208 0.0376058633 0.2477283800 0.0273225153;\n\ -frequency C50pi11 = 0.1239446910 0.0355525870 0.0409769096 0.1479953346 0.0011563976 0.0908869312 0.2700270273 0.0283589709 0.0126760201 0.0064825033 0.0122101302 0.0787433823 0.0042467440 0.0016540857 0.0205717500 0.0552940245 0.0474239965 0.0008596621 0.0027823209 0.0181565313;\n\ -frequency C50pi12 = 0.0160542063 0.0027359185 0.0014708079 0.0007004900 0.0034820152 0.0061470051 0.0016359686 0.0022137927 0.0013207229 0.1640035117 0.4616043506 0.0021342205 0.2174099502 0.0143751693 0.0013694259 0.0037614383 0.0172651408 0.0011454338 0.0019438536 0.0792265779;\n\ -frequency C50pi13 = 0.1548192401 0.0131324559 0.0280584102 0.0095301620 0.0166267416 0.0175228950 0.0170969133 0.0179616718 0.0078385586 0.0865181208 0.0523369910 0.0132802182 0.0326348210 0.0083511229 0.0145594414 0.1096327081 0.2218108602 0.0015829972 0.0062173360 0.1704883347;\n\ -frequency C50pi14 = 0.2950313592 0.0027580697 0.0021616268 0.0015364190 0.0375439186 0.0028808733 0.0042976283 0.0261726702 0.0008294969 0.0834938143 0.0553606311 0.0022642314 0.0181259911 0.0074433078 0.0126794048 0.0382913338 0.0783205173 0.0010015148 0.0034016419 0.3264055498;\n\ -frequency C50pi15 = 0.1683177099 0.0820396152 0.0526048706 0.0822517150 0.0023029997 0.0969341246 0.1488943001 0.0535291188 0.0179803231 0.0032503636 0.0114941086 0.1156402642 0.0039439899 0.0015002945 0.0066854154 0.0924511658 0.0480769504 0.0006152103 0.0025022919 0.0089851683;\n\ -frequency C50pi16 = 0.0334088176 0.0134485791 0.1590918150 0.3657542471 0.0025127086 0.0327665151 0.1820739351 0.0740807194 0.0202010901 0.0016650025 0.0036700956 0.0295517886 0.0017087810 0.0011422805 0.0073155123 0.0426788071 0.0211162106 0.0005931485 0.0034724580 0.0037474882;\n\ -frequency C50pi17 = 0.0777586977 0.0174438357 0.0053423343 0.0043431532 0.0062523949 0.0220851281 0.0161769285 0.0053903202 0.0080675581 0.1052945216 0.1617365895 0.0148319919 0.0288253912 0.0168985297 0.2565426868 0.0202089662 0.0542929694 0.0060146095 0.0078109966 0.1646823969;\n\ -frequency C50pi18 = 0.0727013979 0.0048977192 0.0026095383 0.0011420120 0.0198747408 0.0066949336 0.0030401434 0.0079074845 0.0026492900 0.1685788878 0.3185489163 0.0026024909 0.0735597038 0.0490419983 0.0051699104 0.0128630830 0.0305356924 0.0050857840 0.0095279173 0.2029683559;\n\ -frequency C50pi19 = 0.0658153836 0.0833432992 0.0224582275 0.0107735824 0.0092974677 0.0745951987 0.0299754097 0.0146336557 0.0148026634 0.0671888719 0.2198675990 0.0868172087 0.1084156835 0.0155812696 0.0071132147 0.0381451947 0.0562948237 0.0056421684 0.0102813038 0.0589577740;\n\ -frequency C50pi20 = 0.0525278351 0.0364897390 0.0903013988 0.1854660991 0.0037795400 0.0776857292 0.1789287290 0.0232011648 0.0687702011 0.0135825419 0.0337350646 0.0458143770 0.0108457797 0.0191020037 0.0088729983 0.0495289201 0.0389358438 0.0046292762 0.0354195947 0.0223831639;\n\ -frequency C50pi21 = 0.0026515970 0.0080885204 0.0010572021 0.0016052142 0.0036540307 0.0022979498 0.0014681767 0.0046230912 0.0043887616 0.0020669456 0.0172444871 0.0006593575 0.0034691503 0.0658351447 0.0019185467 0.0022498420 0.0021278866 0.8183345006 0.0515918357 0.0046677595;\n\ -frequency C50pi22 = 0.0548133174 0.0692044159 0.0211265710 0.0207779125 0.0072646572 0.0567865657 0.0738456579 0.0051797705 0.0168408457 0.1386104888 0.0713795154 0.0896393340 0.0201205491 0.0082150393 0.0104049016 0.0282344422 0.0995597110 0.0019722093 0.0074054035 0.1986186919;\n\ -frequency C50pi23 = 0.0047955268 0.0028033787 0.0050506238 0.0014080516 0.0061671241 0.0019350126 0.0009861551 0.0014396818 0.0389623239 0.0048950388 0.0151748150 0.0012306644 0.0032520404 0.3601993060 0.0011266316 0.0054509935 0.0034763921 0.0362899931 0.4980200998 0.0073361467;\n\ -frequency C50pi24 = 0.0365462996 0.0280070630 0.0183606115 0.0070525803 0.0093251684 0.0300239431 0.0221812842 0.0047778642 0.0178840316 0.2025947306 0.1973012130 0.0250209750 0.0557862640 0.0258067541 0.0042772210 0.0209374223 0.0731398943 0.0049738166 0.0200601168 0.1959427463;\n\ -frequency C50pi25 = 0.0684197684 0.0111619750 0.0544764241 0.0224313301 0.0106958312 0.0091799953 0.0097436799 0.0255871619 0.0055558006 0.0059416697 0.0076746853 0.0144198991 0.0056892166 0.0037356845 0.0172554137 0.3527301149 0.3586913194 0.0012501907 0.0028636710 0.0124961682;\n\ -frequency C50pi26 = 0.0495330775 0.1060064564 0.1511923969 0.0483471288 0.0080946362 0.0886108407 0.0449556763 0.0331436148 0.1447288287 0.0061850770 0.0190407203 0.0948075276 0.0063418871 0.0126162987 0.0100869563 0.0799801169 0.0445418973 0.0044765096 0.0363930724 0.0109172804;\n\ -frequency C50pi27 = 0.0702411901 0.0642050323 0.0779553908 0.0510328304 0.0042438849 0.0723300485 0.0883747710 0.0177347101 0.0233800891 0.0198779320 0.0183537117 0.1051267065 0.0107865869 0.0037987118 0.0112811107 0.1345081583 0.1805543234 0.0014252764 0.0055089381 0.0392805971;\n\ -frequency C50pi28 = 0.1207399152 0.1741788075 0.0385528120 0.0162689581 0.0118494185 0.0760068404 0.0337935391 0.0653431008 0.0342783806 0.0085426053 0.0256788075 0.1434443984 0.0112347894 0.0061270793 0.0294493558 0.1091415488 0.0634181251 0.0046156419 0.0085374279 0.0187984481;\n\ -frequency C50pi29 = 0.0064521696 0.0021817337 0.0005939658 0.0003904032 0.0021538307 0.0019099968 0.0008007758 0.0005208471 0.0011374294 0.2850758996 0.4278536740 0.0013920239 0.0561988528 0.0449501501 0.0026289702 0.0011053664 0.0055157148 0.0022753671 0.0059612583 0.1509015707;\n\ -frequency C50pi30 = 0.0969092741 0.0359723370 0.0633194168 0.0411020773 0.0145578946 0.0466661704 0.0469223767 0.0374614202 0.0537149580 0.0394603009 0.0856256544 0.0283577862 0.0346435320 0.0507298072 0.0167177549 0.0990945318 0.0806503833 0.0128373826 0.0598972198 0.0553597218;\n\ -frequency C50pi31 = 0.0840212010 0.0214242172 0.2240668646 0.0354684798 0.0265031681 0.0235675678 0.0076026464 0.1173325117 0.0516019781 0.0048917455 0.0067211727 0.0173653354 0.0079342101 0.0087501486 0.0093276105 0.2637097946 0.0630157977 0.0022314593 0.0170994247 0.0073646661;\n\ -frequency C50pi32 = 0.0055061507 0.0012508737 0.0004824961 0.0004530173 0.0054435931 0.0011315076 0.0004150379 0.0012285001 0.0019884532 0.0617431901 0.4342418135 0.0008161868 0.0554628445 0.3289659386 0.0025814794 0.0021197505 0.0029510440 0.0172981374 0.0412097497 0.0347102358;\n\ -frequency C50pi33 = 0.0442014612 0.1295816316 0.0258622052 0.0148900471 0.0076165815 0.1301765579 0.0636708052 0.0105339122 0.0662542863 0.0423977240 0.1434197528 0.1040381429 0.0403363621 0.0260540342 0.0089335090 0.0242573966 0.0317938092 0.0077831996 0.0309973779 0.0472012033;\n\ -frequency C50pi34 = 0.0571984155 0.0034929878 0.0031324721 0.0012472712 0.0113230439 0.0025279922 0.0040737817 0.0030647398 0.0020494153 0.3131200932 0.0901750144 0.0034699557 0.0242565205 0.0112345295 0.0048197020 0.0095675953 0.0529842025 0.0010645104 0.0041851135 0.3970126433;\n\ -frequency C50pi35 = 0.1141963934 0.0102229903 0.0178644126 0.0172307307 0.0056978908 0.0039055039 0.0085974326 0.7425714921 0.0026414175 0.0005602022 0.0019872568 0.0055400059 0.0004739977 0.0010663175 0.0054302447 0.0508318204 0.0055408544 0.0018890811 0.0012409205 0.0025110348;\n\ -frequency C50pi36 = 0.3531758625 0.0043402857 0.0031812423 0.0030024877 0.0165711581 0.0029126214 0.0042077690 0.4520896100 0.0021366362 0.0063692579 0.0120143269 0.0022586970 0.0080260130 0.0043865828 0.0111462027 0.0658344033 0.0182952730 0.0010872878 0.0023330172 0.0266312657;\n\ -frequency C50pi37 = 0.0310798708 0.0234519814 0.1273669012 0.1197925100 0.0031216960 0.0295858842 0.0470763446 0.4883046368 0.0193412101 0.0008855622 0.0032808220 0.0408430573 0.0014984226 0.0016298596 0.0063229464 0.0423452622 0.0082797260 0.0007718998 0.0024996877 0.0025217188;\n\ -frequency C50pi38 = 0.0370340667 0.0689410214 0.1704407181 0.1041817082 0.0018108784 0.0715495095 0.0659866718 0.2159298358 0.0443591808 0.0008668888 0.0064679416 0.1275300877 0.0027248464 0.0014178323 0.0060253154 0.0534574556 0.0147073432 0.0007999410 0.0037708147 0.0019979426;\n\ -frequency C50pi39 = 0.0160398536 0.0526622999 0.1051167149 0.0187352256 0.0085330116 0.0922616498 0.0154450839 0.0076235155 0.3848449137 0.0057129406 0.0277195224 0.0219347380 0.0071078308 0.0376358992 0.0072201969 0.0209969653 0.0142198783 0.0096946226 0.1384243143 0.0080708232;\n\ -frequency C50pi40 = 0.0165549167 0.0085856833 0.0049441851 0.0016567380 0.0086529073 0.0184087838 0.0033759867 0.0033844413 0.0084695063 0.0483923758 0.4963073963 0.0056997331 0.1949377866 0.0999527140 0.0060271256 0.0084289585 0.0122619536 0.0114013282 0.0192314834 0.0233259964;\n\ -frequency C50pi41 = 0.0227379959 0.0137060298 0.3162561805 0.2932103363 0.0037073869 0.0169119273 0.0380984220 0.0550224760 0.0319886436 0.0039219190 0.0041582288 0.0312539900 0.0019467591 0.0022276545 0.0059660826 0.0998736999 0.0462336456 0.0007310446 0.0069012376 0.0051463400;\n\ -frequency C50pi42 = 0.2406936002 0.0197081082 0.0462578641 0.0206379264 0.0186726798 0.0189843646 0.0129785315 0.1749109142 0.0118714342 0.0049349532 0.0126237761 0.0127876711 0.0095642661 0.0083606873 0.0326283314 0.2101300187 0.1130042042 0.0041951500 0.0069210515 0.0201344675;\n\ -frequency C50pi43 = 0.0214325714 0.3730744306 0.0220674626 0.0037495290 0.0069038342 0.0670391950 0.0159298773 0.0126211348 0.0284477629 0.0102051798 0.0242954287 0.3272456489 0.0093147452 0.0036403029 0.0070138928 0.0216860624 0.0232259733 0.0030422478 0.0065368590 0.0125278613;\n\ -frequency C50pi44 = 0.1567707052 0.0258059606 0.0161658338 0.0223946414 0.0074382689 0.0274455582 0.0410010574 0.0360501033 0.0159972680 0.0640941463 0.0944756654 0.0192586366 0.0312789234 0.0227728534 0.1653169011 0.0640177954 0.0549103568 0.0050980224 0.0138248643 0.1158824381;\n\ -frequency C50pi45 = 0.4345912387 0.0061142999 0.0097660767 0.0060102195 0.0197377879 0.0069062805 0.0082800652 0.0829075516 0.0029125126 0.0047747098 0.0054182241 0.0049974525 0.0039676868 0.0029052002 0.0193588692 0.2795854727 0.0677816788 0.0008196092 0.0025196339 0.0306454302;\n\ -frequency C50pi46 = 0.0296734965 0.1443250343 0.0128668160 0.0059561454 0.0129805897 0.0492311054 0.0262726056 0.0069437743 0.0676183913 0.0452364160 0.1374511139 0.0907089722 0.0308070846 0.0816441785 0.0060701025 0.0197130339 0.0299715868 0.0461468661 0.1119414237 0.0444412635;\n\ -frequency C50pi47 = 0.1089911217 0.0159187676 0.0643054232 0.2086425054 0.0016540963 0.0375565797 0.1791004993 0.0610564917 0.0144660242 0.0038322948 0.0067778708 0.0372270242 0.0022817918 0.0012634818 0.0851792013 0.1065821239 0.0524401536 0.0005901255 0.0027836060 0.0093508169;\n\ -frequency C50pi48 = 0.1429463629 0.0304191716 0.0191145368 0.0351867799 0.0031493079 0.0341248336 0.0508492526 0.0305914291 0.0134276644 0.0070227247 0.0197257013 0.0421442438 0.0038904796 0.0040697467 0.4052202085 0.0874406009 0.0445304918 0.0012842531 0.0039485525 0.0209136585;\n\ -frequency C50pi49 = 0.0580116857 0.0903213669 0.0369245281 0.0613603988 0.0022829951 0.2073851382 0.2225853236 0.0159476910 0.0311816018 0.0068543753 0.0217092509 0.1504781849 0.0084841006 0.0020581132 0.0046206107 0.0276754451 0.0321477211 0.0011651089 0.0051889637 0.0136173964;\n\ -frequency C50pi50 = 0.2153540940 0.0359173007 0.0219927944 0.0735128474 0.0037017294 0.0566408566 0.1350375818 0.0662986417 0.0157121780 0.0138456188 0.0266922211 0.0474338339 0.0088042600 0.0035035311 0.0739583083 0.0921989198 0.0575687235 0.0019306896 0.0044520833 0.0454437865;\n\ -model C50 = POISSON+G+FMIX{C50pi1:1:0.0164297003,C50pi2:1:0.0273175755,C50pi3:1:0.0460247610,C50pi4:1:0.0084864734,C50pi5:1:0.0125389252,C50pi6:1:0.0343549036,C50pi7:1:0.0130241102,C50pi8:1:0.0094755681,C50pi9:1:0.0190040551,C50pi10:1:0.0151902354,C50pi11:1:0.0320534760,C50pi12:1:0.0210059850,C50pi13:1:0.0237408547,C50pi14:1:0.0239841203,C50pi15:1:0.0213748021,C50pi16:1:0.0210717705,C50pi17:1:0.0050241805,C50pi18:1:0.0166262276,C50pi19:1:0.0143945956,C50pi20:1:0.0104391130,C50pi21:1:0.0107628277,C50pi22:1:0.0148818171,C50pi23:1:0.0321480239,C50pi24:1:0.0145477978,C50pi25:1:0.0332355807,C50pi26:1:0.0143190281,C50pi27:1:0.0234478734,C50pi28:1:0.0183044983,C50pi29:1:0.0403269452,C50pi30:1:0.0135629530,C50pi31:1:0.0091880799,C50pi32:1:0.0158270022,C50pi33:1:0.0121019379,C50pi34:1:0.0353560982,C50pi35:1:0.0404495617,C50pi36:1:0.0104569232,C50pi37:1:0.0146187792,C50pi38:1:0.0093984095,C50pi39:1:0.0146773809,C50pi40:1:0.0201635562,C50pi41:1:0.0255640273,C50pi42:1:0.0039486842,C50pi43:1:0.0393652608,C50pi44:1:0.0056415419,C50pi45:1:0.0382833580,C50pi46:1:0.0039735086,C50pi47:1:0.0140269355,C50pi48:1:0.0476703673,C50pi49:1:0.0204062788,C50pi50:1:0.0117835304};\n\ -\n\ -[ ---------------------------------------------------------\n\ - CAT-C60 profile mixture model of Le, Gascuel & Lartillot (2008)\n\ - --------------------------------------------------------- ]\n\ -frequency C60pi1 = 0.1534363248 0.0444389067 0.0796726990 0.0546757288 0.0047306596 0.0514333025 0.0529324359 0.1103775749 0.0174480218 0.0050343887 0.0130294160 0.0603928711 0.0075550589 0.0035554315 0.0249523704 0.2029625968 0.0957668473 0.0014444483 0.0059800307 0.0101808864;\n\ -frequency C60pi2 = 0.0281984692 0.3031055487 0.0312954609 0.0091549350 0.0019503463 0.0939884393 0.0388530140 0.0084028325 0.0155384715 0.0107872879 0.0217786594 0.3476042929 0.0109904917 0.0015919288 0.0071539896 0.0197479052 0.0328352333 0.0009209994 0.0025714024 0.0135302919;\n\ -frequency C60pi3 = 0.0083680740 0.0007319768 0.0006123446 0.0002228366 0.0020433870 0.0009498685 0.0004731544 0.0004825748 0.0005189995 0.3768453098 0.2608334606 0.0006296168 0.0315700586 0.0123984358 0.0009595916 0.0009746383 0.0049990761 0.0008657759 0.0017132332 0.2938075872;\n\ -frequency C60pi4 = 0.2227229348 0.0064846074 0.0061206496 0.0007997588 0.1640285908 0.0051051888 0.0027280806 0.0202702520 0.0037183875 0.0455406072 0.0883350071 0.0022832871 0.0348094559 0.0228667054 0.0035471579 0.0850040072 0.1012848285 0.0048424833 0.0096500033 0.1698580069;\n\ -frequency C60pi5 = 0.0412139519 0.0067627055 0.0051067690 0.0017434391 0.0204715649 0.0057538477 0.0037263409 0.0069107492 0.0180293946 0.1154281623 0.1693562458 0.0042900270 0.0414066566 0.2239001858 0.0058416410 0.0149106129 0.0239548406 0.0332237129 0.1379349474 0.1200342049;\n\ -frequency C60pi6 = 0.0480550249 0.0308438053 0.0940628721 0.2084606133 0.0037801787 0.0747676701 0.1855184661 0.0191402239 0.0872162350 0.0094685435 0.0277340828 0.0375741243 0.0088308358 0.0196000958 0.0081267777 0.0439680761 0.0324588883 0.0034665720 0.0387499964 0.0181769181;\n\ -frequency C60pi7 = 0.0062848745 0.0026246919 0.0030342510 0.0005324147 0.0073027627 0.0034409089 0.0009741492 0.0019578159 0.0102225186 0.0180592309 0.1179064681 0.0016205916 0.0234721825 0.3974552519 0.0020165583 0.0056903327 0.0037091821 0.0598639097 0.3185565304 0.0152753744;\n\ -frequency C60pi8 = 0.1815005560 0.0026845411 0.0148484537 0.0025145485 0.4205633920 0.0014097001 0.0007088144 0.0461854175 0.0014374605 0.0041745536 0.0098310464 0.0006474254 0.0041611385 0.0068976432 0.0038767247 0.1864537050 0.0687189855 0.0027083549 0.0061033012 0.0345742379;\n\ -frequency C60pi9 = 0.0600740822 0.0367642654 0.0134869242 0.0170572285 0.0070719770 0.0142469806 0.0127486975 0.0343564471 0.0305859029 0.0204571345 0.0994551128 0.0212367087 0.0318165939 0.1140907926 0.0297628218 0.0505792699 0.0339368402 0.2312808862 0.1192491702 0.0217421638;\n\ -frequency C60pi10 = 0.0708394513 0.0474098489 0.0416822304 0.0324482918 0.0131641265 0.0494874703 0.0508264389 0.0183309196 0.0567272697 0.0650369079 0.1282255556 0.0343618389 0.0390362930 0.0594359563 0.0135608209 0.0551343199 0.0642260358 0.0137118382 0.0673934289 0.0789609573;\n\ -frequency C60pi11 = 0.0617689371 0.0076332888 0.0303081645 0.3430234188 0.0007199837 0.0307856241 0.3792509407 0.0284658686 0.0079592120 0.0016999627 0.0039945339 0.0216076877 0.0019734329 0.0009814186 0.0174791407 0.0337831940 0.0203426591 0.0006130268 0.0017102752 0.0058992300;\n\ -frequency C60pi12 = 0.0421559537 0.1042068314 0.0286980872 0.0164385240 0.0044450330 0.1393690851 0.0531949072 0.0134711207 0.0177764997 0.0267727728 0.1967237776 0.1323735242 0.1182827521 0.0086728324 0.0051837880 0.0255852718 0.0333292020 0.0045852327 0.0070281498 0.0217066546;\n\ -frequency C60pi13 = 0.2814809927 0.0100367066 0.0172867775 0.0064385734 0.0258337508 0.0133101925 0.0115046410 0.0270054934 0.0054629657 0.0188216093 0.0190993462 0.0098712843 0.0158719589 0.0050481705 0.0129510033 0.1886808600 0.2427104979 0.0012274627 0.0036052922 0.0837524211;\n\ -frequency C60pi14 = 0.2769188320 0.0017226995 0.0021315271 0.0011672545 0.0318292645 0.0018216251 0.0024752467 0.0199646887 0.0005170863 0.0983109006 0.0489264326 0.0016232163 0.0173414948 0.0070843906 0.0070179705 0.0336348952 0.0814141404 0.0007118144 0.0032942319 0.3620922883;\n\ -frequency C60pi15 = 0.1577797792 0.1112140270 0.0570403237 0.0648290471 0.0053318076 0.1065373681 0.0913586945 0.0906209718 0.0533809635 0.0029171632 0.0156225571 0.0782148712 0.0045758969 0.0025047816 0.0067077844 0.0929310045 0.0393122597 0.0028575821 0.0077590269 0.0085040899;\n\ -frequency C60pi16 = 0.0593735135 0.0354740772 0.1151175314 0.2189482708 0.0015332173 0.0688752402 0.1819422913 0.0813707101 0.0220478285 0.0020993577 0.0056191259 0.0750172075 0.0021871739 0.0010838321 0.0109737422 0.0726449461 0.0380238271 0.0007346460 0.0026664883 0.0042669729;\n\ -frequency C60pi17 = 0.0978066326 0.0265576438 0.0101843505 0.0120781428 0.0064138404 0.0307876446 0.0291282947 0.0128912798 0.0128036716 0.0723904209 0.1279438950 0.0245630658 0.0303267312 0.0198963719 0.2723524069 0.0350549441 0.0484557340 0.0046842467 0.0104773833 0.1152032995;\n\ -frequency C60pi18 = 0.0124023388 0.0030680354 0.0009239105 0.0006037316 0.0041885695 0.0032957441 0.0012524000 0.0011306791 0.0013542104 0.2344167852 0.4550557697 0.0016718177 0.0667307666 0.0610615367 0.0037076169 0.0019420934 0.0067612939 0.0038937184 0.0074911765 0.1290478057;\n\ -frequency C60pi19 = 0.0794230623 0.1294739355 0.0662792725 0.0587236242 0.0019919499 0.1143880588 0.1246900644 0.0325432311 0.0238605372 0.0036277150 0.0097987961 0.2147597316 0.0041846209 0.0012869951 0.0142410239 0.0615807386 0.0477333594 0.0006525371 0.0029420233 0.0078187231;\n\ -frequency C60pi20 = 0.0248148778 0.0083552910 0.1888915388 0.4278832998 0.0027839717 0.0210777725 0.1432386297 0.0643968435 0.0185736870 0.0022506941 0.0034558626 0.0179274104 0.0015714503 0.0014680353 0.0073768035 0.0377003132 0.0187767966 0.0005891859 0.0042602708 0.0046072655;\n\ -frequency C60pi21 = 0.0017003427 0.0060674330 0.0004222900 0.0010711490 0.0029059420 0.0016424179 0.0011731741 0.0035579609 0.0027630465 0.0012291190 0.0127420810 0.0004273804 0.0025671348 0.0513377024 0.0013536738 0.0011871674 0.0014033068 0.8640436936 0.0390912582 0.0033137266;\n\ -frequency C60pi22 = 0.0468360682 0.0639796924 0.0205603686 0.0185615516 0.0059954138 0.0557030821 0.0705436036 0.0045435329 0.0152062773 0.1550613356 0.0824253382 0.0866248354 0.0245854443 0.0080177192 0.0081485616 0.0237025617 0.0962054496 0.0018368673 0.0067131723 0.2047491243;\n\ -frequency C60pi23 = 0.0258764792 0.0201097124 0.0298384107 0.0107037437 0.0142503909 0.0158529432 0.0105649532 0.0073064999 0.1411078834 0.0114777629 0.0407992414 0.0119179202 0.0098798997 0.1876429961 0.0051228805 0.0275699644 0.0170764901 0.0405124999 0.3536390834 0.0187502449;\n\ -frequency C60pi24 = 0.0296285022 0.0046400334 0.0034944393 0.0008851024 0.0090046468 0.0055481111 0.0033046518 0.0027969482 0.0050701500 0.2583397750 0.2668085481 0.0046690936 0.0770825277 0.0408798247 0.0026918193 0.0068538089 0.0322265673 0.0035506055 0.0153353414 0.2271895033;\n\ -frequency C60pi25 = 0.0555725806 0.0098447861 0.0409064430 0.0140389597 0.0097418602 0.0068727710 0.0069443190 0.0157956555 0.0041631258 0.0069826497 0.0075271247 0.0139224817 0.0058762687 0.0034496730 0.0119733364 0.3482466393 0.4213655981 0.0010061491 0.0026576772 0.0131119012;\n\ -frequency C60pi26 = 0.0682671212 0.0615207091 0.0530661192 0.0360278709 0.0141433148 0.0612274332 0.0497415394 0.0268696520 0.1127674983 0.0132646615 0.0544493838 0.0482609047 0.0170033964 0.0803375967 0.0191949850 0.0671839752 0.0443995774 0.0199957919 0.1255070748 0.0267713947;\n\ -frequency C60pi27 = 0.0792618808 0.0638377192 0.0635289371 0.0436646174 0.0049503302 0.0666365188 0.0829639117 0.0183428565 0.0233169239 0.0249427251 0.0221483402 0.0932577596 0.0120893380 0.0049131149 0.0126360122 0.1334848656 0.1916745928 0.0018040086 0.0062353115 0.0503102360;\n\ -frequency C60pi28 = 0.0731759112 0.2105335985 0.0324200854 0.0110007149 0.0123458504 0.0858951989 0.0349942684 0.0224509173 0.0386903280 0.0246226304 0.0508307349 0.1783344831 0.0185740720 0.0093148787 0.0148722772 0.0603181436 0.0649574934 0.0051046395 0.0130597421 0.0385040321;\n\ -frequency C60pi29 = 0.0878402710 0.0110331750 0.0060801213 0.0032803903 0.0171147088 0.0109831614 0.0101465790 0.0087090941 0.0054902234 0.1987761871 0.1756460821 0.0082096925 0.0417232903 0.0191954435 0.0111283542 0.0209862621 0.0697718709 0.0031744014 0.0081905473 0.2825201446;\n\ -frequency C60pi30 = 0.0990215820 0.0349351987 0.0211149501 0.0118797946 0.0108995677 0.0557710676 0.0278999992 0.0240250097 0.0123445071 0.0776564721 0.2354511299 0.0322817789 0.1207665429 0.0214442058 0.0075655541 0.0524170141 0.0649785115 0.0047075806 0.0077328724 0.0771066610;\n\ -frequency C60pi31 = 0.0601641168 0.0161995226 0.2783522747 0.0337188808 0.0315066987 0.0210645987 0.0059839451 0.0543080710 0.0531523512 0.0070650825 0.0070698142 0.0139598368 0.0088298653 0.0069525877 0.0075834331 0.2829802556 0.0860317092 0.0014966551 0.0134849454 0.0100953553;\n\ -frequency C60pi32 = 0.0049781737 0.0018412331 0.0007012207 0.0005315368 0.0052978737 0.0024089907 0.0007630546 0.0015051317 0.0041575221 0.0443828633 0.4417417476 0.0011615060 0.0602807417 0.3351117140 0.0027847686 0.0025795769 0.0030288544 0.0171302592 0.0458455751 0.0237676560;\n\ -frequency C60pi33 = 0.0251996593 0.1114468110 0.0142031925 0.0041012288 0.0097099500 0.0620070749 0.0262571641 0.0038067269 0.0431938935 0.0974043253 0.2447197423 0.0824312856 0.0539323021 0.0429091639 0.0052658505 0.0096093107 0.0251183002 0.0146571900 0.0456965140 0.0783303143;\n\ -frequency C60pi34 = 0.0230361648 0.0014748749 0.0013534390 0.0006264439 0.0048580122 0.0009870046 0.0015762583 0.0011565336 0.0008899238 0.3952895890 0.0576537208 0.0014663528 0.0140986541 0.0072127040 0.0020177885 0.0028770237 0.0205580852 0.0005477695 0.0019539080 0.4603657493;\n\ -frequency C60pi35 = 0.1408776963 0.0297808449 0.0171297613 0.0285076933 0.0032213718 0.0320632225 0.0423838922 0.0299558472 0.0131321477 0.0066914481 0.0195120028 0.0383781635 0.0036276863 0.0041231064 0.4383466229 0.0851400095 0.0422765692 0.0013236871 0.0037087638 0.0198194632;\n\ -frequency C60pi36 = 0.4442491220 0.0050216551 0.0102305117 0.0057193038 0.0235405374 0.0055997640 0.0064889886 0.0822687710 0.0025505743 0.0033615104 0.0040990063 0.0038097073 0.0028683069 0.0024413211 0.0162890960 0.2999969708 0.0559664935 0.0007735426 0.0020639824 0.0226608347;\n\ -frequency C60pi37 = 0.0898717958 0.0070958305 0.0130067619 0.0129166888 0.0044131479 0.0023806547 0.0058957027 0.8087563021 0.0016517855 0.0004339282 0.0015564455 0.0033939025 0.0004253422 0.0008073572 0.0034128140 0.0362876891 0.0032887534 0.0015223902 0.0008537454 0.0020289624;\n\ -frequency C60pi38 = 0.0550840246 0.0472254260 0.1877829604 0.1273796123 0.0035824944 0.0527969268 0.0655884730 0.0637607521 0.0404883483 0.0075574152 0.0136304510 0.0867682792 0.0081684229 0.0040375032 0.0110681809 0.1263380956 0.0752544318 0.0013563681 0.0118590434 0.0102727908;\n\ -frequency C60pi39 = 0.0117681394 0.0442558806 0.0844144627 0.0144712108 0.0070388254 0.1038342049 0.0110901161 0.0049626578 0.4337194047 0.0061337038 0.0298794939 0.0137928558 0.0076237551 0.0338266335 0.0081346096 0.0140571089 0.0108276801 0.0080683065 0.1437251732 0.0083757773;\n\ -frequency C60pi40 = 0.0159285638 0.0048098656 0.0032692643 0.0010966937 0.0080519916 0.0134552459 0.0021324215 0.0025086365 0.0049192147 0.0501543893 0.5307634291 0.0035599431 0.2160085187 0.0743650717 0.0045247350 0.0066922196 0.0119092283 0.0070928134 0.0106565111 0.0281012433;\n\ -frequency C60pi41 = 0.0195973253 0.0105142992 0.3289103336 0.3099848991 0.0034539049 0.0116196758 0.0250777800 0.0627528956 0.0295961112 0.0032650434 0.0028246884 0.0240963907 0.0008425062 0.0019706550 0.0049062781 0.1064984500 0.0438053705 0.0006333959 0.0056197958 0.0040302013;\n\ -frequency C60pi42 = 0.0833804360 0.0125871438 0.0969824220 0.0686820704 0.0081981143 0.0121520930 0.0227415415 0.0982291876 0.0073954898 0.0017471177 0.0039653113 0.0129342146 0.0019557975 0.0024132583 0.0355924232 0.3115606483 0.2113368612 0.0016329034 0.0017991083 0.0047138579;\n\ -frequency C60pi43 = 0.0181409133 0.4129662563 0.0233205154 0.0033333547 0.0085143598 0.0526694251 0.0096531879 0.0224552642 0.0375238929 0.0035090482 0.0149146621 0.3208065790 0.0046098856 0.0035426859 0.0087197469 0.0262309419 0.0131791136 0.0034766995 0.0079588201 0.0044746474;\n\ -frequency C60pi44 = 0.2494227404 0.0185481724 0.0164119567 0.0169234299 0.0122862654 0.0228501981 0.0370491083 0.0347467705 0.0087069587 0.0595718359 0.0451065029 0.0177064733 0.0204556127 0.0077360919 0.0686403544 0.0889295672 0.0986017356 0.0028603862 0.0061938477 0.1672519917;\n\ -frequency C60pi45 = 0.1419737638 0.0373945961 0.0576296888 0.0537452477 0.0068856658 0.0286239972 0.0407540287 0.3988107872 0.0152895617 0.0016627616 0.0092348297 0.0314273807 0.0055425500 0.0040286132 0.0180328866 0.1123731997 0.0242478202 0.0025909098 0.0049054208 0.0048462908;\n\ -frequency C60pi46 = 0.0178903305 0.1958843646 0.0155853897 0.0031054277 0.0290304227 0.1051819261 0.0040503389 0.0100480293 0.1252696215 0.0016708003 0.0722356645 0.0233340169 0.0116142354 0.0238913260 0.0009938415 0.0181675536 0.0186260222 0.2260554691 0.0859787232 0.0113864962;\n\ -frequency C60pi47 = 0.1454758367 0.0420979067 0.0400419720 0.1294249748 0.0014186329 0.0906469055 0.2471353458 0.0319650773 0.0130426183 0.0058525371 0.0123593139 0.0818154090 0.0044178939 0.0017552077 0.0151135525 0.0656688174 0.0511289472 0.0007731441 0.0029258438 0.0169400635;\n\ -frequency C60pi48 = 0.0169799462 0.0242346701 0.1318047919 0.1043655101 0.0022087215 0.0269349684 0.0376379591 0.5404470183 0.0181137053 0.0007459679 0.0021146994 0.0508617611 0.0009473769 0.0006780593 0.0038754401 0.0297030159 0.0045836180 0.0006031889 0.0015704090 0.0015891728;\n\ -frequency C60pi49 = 0.0402646249 0.1152022601 0.0323829165 0.0293968352 0.0039388655 0.2497008043 0.1603524245 0.0129260411 0.0617967839 0.0098491259 0.0354918823 0.1448804422 0.0124818865 0.0041153375 0.0043374229 0.0243246958 0.0305645368 0.0026676598 0.0097227847 0.0156026694;\n\ -frequency C60pi50 = 0.2256914610 0.0523417493 0.0244308734 0.0637125217 0.0043390149 0.0578159236 0.1154830640 0.0867335173 0.0131066949 0.0085086217 0.0193314218 0.0660468804 0.0064877206 0.0027440054 0.0611149102 0.1070877179 0.0507677144 0.0013695913 0.0028982948 0.0299883012;\n\ -frequency C60pi51 = 0.0033164209 0.0015310773 0.0030830171 0.0008266472 0.0051890730 0.0011024889 0.0005134130 0.0010432830 0.0278451262 0.0041895268 0.0111212494 0.0007149922 0.0023621780 0.3801761447 0.0008365077 0.0035876698 0.0023608948 0.0333346985 0.5107889643 0.0060766272;\n\ -frequency C60pi52 = 0.1995014012 0.0236078675 0.0392254543 0.0094955104 0.0584590451 0.0254265363 0.0125535371 0.0939787338 0.0341857201 0.0140209879 0.0449387571 0.0118723304 0.0246990633 0.0634433944 0.0145385320 0.1663920640 0.0533159207 0.0129802666 0.0606346163 0.0367302614;\n\ -frequency C60pi53 = 0.0319448994 0.1011667268 0.2084709220 0.0378074649 0.0066040348 0.0766372935 0.0279488190 0.0365541130 0.2088643258 0.0047542347 0.0156545731 0.0868664783 0.0043253317 0.0108915768 0.0060899575 0.0577656939 0.0302051160 0.0026001883 0.0387897304 0.0060585202;\n\ -frequency C60pi54 = 0.0776799515 0.0142518583 0.0403216692 0.0080651725 0.0140092962 0.0179995517 0.0112622427 0.0136868237 0.0133729897 0.1239635380 0.0724670993 0.0129144967 0.0420745442 0.0173584908 0.0117084432 0.0922723571 0.2316899445 0.0028153633 0.0141726542 0.1679135132;\n\ -frequency C60pi55 = 0.1183662657 0.0805192606 0.0259524932 0.0495595439 0.0035624835 0.1204924917 0.1537589210 0.0194993426 0.0229373171 0.0302661211 0.0571250629 0.0982304112 0.0171727472 0.0068665705 0.0175153030 0.0486588400 0.0635796210 0.0023008307 0.0083027431 0.0553336300;\n\ -frequency C60pi56 = 0.0528559899 0.0193569043 0.0264743774 0.2092761515 0.0008625883 0.1212409715 0.4024189781 0.0155838458 0.0124148798 0.0054864832 0.0090256472 0.0497017031 0.0042357114 0.0012650715 0.0063185636 0.0197262901 0.0235463735 0.0008381610 0.0033948741 0.0159764347;\n\ -frequency C60pi57 = 0.0344366215 0.0426221820 0.1636716191 0.1139007491 0.0020985982 0.0605413987 0.0541780220 0.3361639671 0.0461776737 0.0003463416 0.0048355678 0.0667552967 0.0019704509 0.0031557619 0.0040369775 0.0481173332 0.0089148085 0.0006510101 0.0054145649 0.0020110555;\n\ -frequency C60pi58 = 0.1153088951 0.0151278638 0.0458476603 0.1755516676 0.0014962362 0.0366731222 0.1749410045 0.0394181311 0.0132401530 0.0056912974 0.0101409559 0.0433118387 0.0030332064 0.0015700232 0.1665802563 0.0871536033 0.0468260603 0.0007515702 0.0031432715 0.0141931831;\n\ -frequency C60pi59 = 0.3865149348 0.0037579334 0.0030420497 0.0022366810 0.0218928357 0.0021464743 0.0031387843 0.3694353983 0.0014672902 0.0085376076 0.0127257242 0.0018840458 0.0080581695 0.0039281367 0.0158688291 0.0808877279 0.0305195935 0.0009922880 0.0019020345 0.0410634615;\n\ -frequency C60pi60 = 0.0146570745 0.0028841333 0.0012998335 0.0005210575 0.0024317913 0.0049362750 0.0014874369 0.0020953252 0.0010181940 0.1913901476 0.4432797758 0.0022898369 0.2217427062 0.0091637503 0.0007685153 0.0027251487 0.0170997497 0.0008779380 0.0014756028 0.0778557075;\n\ -model C60 = POISSON+G+FMIX{C60pi1:1:0.0169698865,C60pi2:1:0.0211683374,C60pi3:1:0.0276589079,C60pi4:1:0.0065675964,C60pi5:1:0.0141221416,C60pi6:1:0.0068774834,C60pi7:1:0.0146909701,C60pi8:1:0.0067225777,C60pi9:1:0.0018396660,C60pi10:1:0.0102547197,C60pi11:1:0.0230896163,C60pi12:1:0.0057941033,C60pi13:1:0.0125394534,C60pi14:1:0.0204526478,C60pi15:1:0.0070629602,C60pi16:1:0.0117982741,C60pi17:1:0.0068334668,C60pi18:1:0.0433775839,C60pi19:1:0.0318278731,C60pi20:1:0.0222546108,C60pi21:1:0.0102264969,C60pi22:1:0.0150545891,C60pi23:1:0.0134159878,C60pi24:1:0.0148552065,C60pi25:1:0.0239111516,C60pi26:1:0.0128776278,C60pi27:1:0.0222318842,C60pi28:1:0.0247444742,C60pi29:1:0.0214274810,C60pi30:1:0.0115001882,C60pi31:1:0.0076017389,C60pi32:1:0.0130258568,C60pi33:1:0.0093701965,C60pi34:1:0.0467194264,C60pi35:1:0.0441940314,C60pi36:1:0.0322263154,C60pi37:1:0.0402999891,C60pi38:1:0.0150234227,C60pi39:1:0.0104589903,C60pi40:1:0.0214742395,C60pi41:1:0.0154957836,C60pi42:1:0.0101789953,C60pi43:1:0.0227980379,C60pi44:1:0.0123204539,C60pi45:1:0.0066777583,C60pi46:1:0.0004150083,C60pi47:1:0.0344385130,C60pi48:1:0.0113663379,C60pi49:1:0.0127143049,C60pi50:1:0.0124323741,C60pi51:1:0.0262124415,C60pi52:1:0.0064994957,C60pi53:1:0.0103203293,C60pi54:1:0.0142463512,C60pi55:1:0.0215600067,C60pi56:1:0.0199150700,C60pi57:1:0.0038964200,C60pi58:1:0.0113448855,C60pi59:1:0.0128595846,C60pi60:1:0.0117656776};\n\ -\n\ -end;\n"; +const char* builtin_mixmodels_definition = R"( +#nexus + +begin models; + +[ --------------------------------------------------------- + EX2 mixture model of Le, Lartillot & Gascuel (2008) + --------------------------------------------------------- ] + +[ Exposed component ] +model ExpEX2 = +0.526738 +0.483150 0.505837 +0.658902 0.051052 3.902456 +2.051872 2.214326 0.961103 0.129989 +1.280002 2.039552 1.301786 0.399061 0.456521 +1.306565 0.137928 0.285806 3.100403 0.033946 2.514377 +1.370782 0.363365 1.820100 0.885317 0.886564 0.320746 0.303966 +0.540809 2.288922 4.949307 0.700890 2.172284 3.755421 0.270957 0.401311 +0.171986 0.237023 0.337226 0.018315 1.037046 0.212032 0.084442 0.012279 0.317239 +0.430511 0.670514 0.158937 0.021949 1.702066 1.261113 0.110508 0.052946 0.869247 8.675343 +0.697731 3.881079 1.677194 0.105450 0.146263 2.570254 0.730337 0.279865 0.598289 0.338782 0.313102 +1.043937 0.656943 0.539827 0.066925 1.846562 1.973592 0.188160 0.158136 0.519993 9.483497 14.176858 1.013268 +0.265209 0.097443 0.182522 0.026918 3.002586 0.080193 0.023999 0.084663 2.047163 2.193062 4.802817 0.044792 3.261401 +1.270693 0.166534 0.068692 0.228829 0.156216 0.362501 0.214847 0.148900 0.323141 0.071992 0.343919 0.195470 0.099252 0.087020 +4.826665 0.751947 4.412265 0.975564 5.294149 1.033459 0.382235 1.970857 0.993310 0.190509 0.389101 0.592156 0.557254 0.668834 1.223981 +2.131819 0.584329 2.133604 0.368887 2.067387 1.013613 0.511390 0.174527 0.580960 2.563630 0.522334 1.147459 2.960091 0.244420 0.413148 7.384701 +0.143081 0.475590 0.061094 0.042618 1.603125 0.210329 0.048276 0.186382 0.961546 0.208313 1.130724 0.052858 1.328785 5.210001 0.045945 0.316078 0.144393 +0.208643 0.196271 0.599369 0.121313 3.842632 0.158470 0.064648 0.039280 8.230282 0.517123 0.713426 0.084962 0.812142 23.228875 0.043249 0.405310 0.234217 4.903887 +2.544463 0.313443 0.172264 0.073705 4.207648 0.497398 0.484620 0.132496 0.329895 23.711178 3.466991 0.348362 4.136445 1.199764 0.368231 0.266531 3.184874 0.252132 0.459187 + +0.088367 0.078147 0.047163 0.087976 0.004517 0.058526 0.128039 0.056993 0.024856 0.025277 0.045202 0.094639 0.012338 0.016158 0.060124 0.055346 0.051290 0.006771 0.021554 0.036718; + +[ Buried component ] +model BurEX2 = +0.338649 +0.201335 0.981635 +0.283859 0.247537 6.505182 +2.640244 0.904730 1.353325 0.312005 +0.543136 4.570308 2.439639 0.682052 0.216787 +0.748479 0.917979 0.804756 10.030310 0.024055 8.670112 +2.700465 0.539246 0.810739 0.810727 0.701320 0.330139 0.636675 +0.237686 3.175221 6.308043 1.540002 0.469875 8.675492 0.750683 0.183743 +0.044209 0.099241 0.162644 0.020816 0.166986 0.082745 0.030581 0.005017 0.075820 +0.124047 0.314159 0.088243 0.017526 0.449241 0.641784 0.073392 0.017752 0.277023 2.383760 +0.433721 17.781822 2.851914 0.459939 0.117548 6.815411 3.482941 0.484653 1.247888 0.161658 0.219757 +0.497479 0.448773 0.380964 0.057176 0.815999 2.089412 0.291379 0.054491 0.307450 2.817174 4.759683 1.082403 +0.093991 0.055530 0.098936 0.026160 0.662517 0.091948 0.022760 0.034431 0.675645 0.521416 1.672365 0.077917 1.296869 +0.986621 0.356417 0.214521 0.246129 0.164228 0.654039 0.295079 0.179095 0.428213 0.037671 0.170780 0.347219 0.074086 0.057233 +5.925588 0.979993 4.725421 1.158990 5.111992 1.120931 0.737456 2.279470 0.886126 0.051057 0.089611 0.925355 0.275366 0.274582 1.151114 +1.958501 0.630713 2.007592 0.289641 2.284140 0.787821 0.539892 0.097432 0.467489 0.644041 0.202812 1.401676 1.340732 0.103118 0.601281 8.190534 +0.068357 0.784449 0.109073 0.085810 0.457880 0.297731 0.155877 0.157418 0.708743 0.054134 0.374568 0.115777 0.477495 2.362999 0.047127 0.209085 0.097054 +0.084768 0.312038 0.615093 0.202611 0.788164 0.293543 0.137306 0.035497 4.938330 0.101803 0.180086 0.280737 0.264540 8.142914 0.059308 0.264401 0.133054 2.905674 +1.387752 0.140091 0.112176 0.058637 1.575057 0.203946 0.239406 0.044011 0.085226 6.427279 1.035942 0.244336 1.033583 0.278010 0.213475 0.079878 1.592560 0.081135 0.108383 + +0.123119 0.019475 0.019852 0.018583 0.018711 0.017275 0.018723 0.050388 0.016402 0.119697 0.161399 0.012776 0.035838 0.057019 0.030913 0.043472 0.049935 0.012600 0.039929 0.133894; + +[ main definition of EX2 with fixed component rates ] +model EX2 =MIX{BurEX2:0.672020808818762,ExpEX2:1.6413466609931}; + + +[ --------------------------------------------------------- + EX3 mixture model of Le, Lartillot & Gascuel (2008) + --------------------------------------------------------- ] + +[ Buried component ] +model BurEX3 = +0.352598 +0.216996 1.087422 +0.292440 0.323465 7.797086 +2.610812 0.913640 1.460331 0.344397 +0.510610 5.128748 2.811070 0.773241 0.220223 +0.753729 1.090823 0.956820 12.012282 0.021022 10.123412 +2.838061 0.595013 0.884971 0.922298 0.707214 0.351856 0.713974 +0.239679 3.625577 7.108377 1.826237 0.481109 10.246488 0.839852 0.219310 +0.051496 0.102940 0.168735 0.024207 0.162795 0.087881 0.036973 0.004515 0.079975 +0.119849 0.316151 0.091984 0.018800 0.422679 0.648064 0.075035 0.016317 0.282195 2.225363 +0.443183 20.766910 3.194817 0.568138 0.132784 7.478955 4.176123 0.551523 1.415394 0.163276 0.207613 +0.460570 0.458210 0.398615 0.059146 0.765112 2.134261 0.313124 0.053192 0.340474 2.609469 4.476961 1.014674 +0.089411 0.056698 0.104720 0.027913 0.630095 0.094857 0.023275 0.034031 0.691151 0.491179 1.606618 0.077868 1.226530 +0.993370 0.419898 0.217106 0.273526 0.181230 0.729534 0.311152 0.192454 0.483200 0.040002 0.170402 0.376998 0.075002 0.057218 +6.108406 1.066008 5.182562 1.216396 5.236005 1.159086 0.763810 2.404073 0.924395 0.048875 0.084247 0.923997 0.260340 0.260617 1.208454 +1.992855 0.687262 2.181095 0.312299 2.276505 0.829879 0.551397 0.101409 0.480998 0.610331 0.198919 1.407257 1.292634 0.096955 0.648250 8.527249 +0.063159 0.855332 0.134012 0.099769 0.468450 0.329372 0.136731 0.169991 0.745868 0.056715 0.377293 0.137955 0.463394 2.343596 0.058650 0.211406 0.085948 +0.078057 0.341493 0.655744 0.241264 0.762740 0.302096 0.142491 0.040257 5.226086 0.092084 0.180292 0.311130 0.249838 8.141649 0.062812 0.267992 0.128044 3.047417 +1.339724 0.144916 0.125078 0.062854 1.481083 0.194081 0.225389 0.043663 0.090575 5.973306 0.993888 0.222252 0.964622 0.262045 0.207448 0.083450 1.544911 0.078358 0.105286 + +0.123992 0.016529 0.017595 0.015784 0.019325 0.015552 0.015939 0.049573 0.014540 0.126555 0.167605 0.011083 0.037438 0.058363 0.028849 0.042324 0.049207 0.011962 0.037833 0.139953; + +[ Intermediate component ] +model IntEX3 = +0.489239 +0.466919 0.536794 +0.601908 0.069474 4.603441 +2.430552 1.807414 0.997223 0.166431 +1.101971 2.081359 1.299123 0.508086 0.393348 +1.227777 0.215899 0.345545 3.579383 0.046861 3.113235 +1.873072 0.390054 1.528288 0.941969 0.867139 0.349219 0.406414 +0.519003 1.930915 5.003737 0.781887 1.630085 3.567804 0.324903 0.315383 +0.158722 0.180317 0.295816 0.013254 0.642786 0.179498 0.090830 0.013181 0.209208 +0.345026 0.503290 0.138767 0.024393 1.107569 1.027755 0.123806 0.048549 0.592981 5.439892 +0.610178 4.322929 1.524318 0.121994 0.181609 2.674484 0.792405 0.276766 0.591509 0.301836 0.294950 +0.949957 0.472702 0.502710 0.091008 1.283305 1.905885 0.242081 0.140301 0.378459 6.259505 9.391081 1.074513 +0.247271 0.069820 0.161809 0.028611 2.065479 0.077874 0.025753 0.065388 1.541097 1.306479 3.015722 0.048689 2.243101 +1.334722 0.170174 0.099375 0.211869 0.163190 0.349495 0.155436 0.186099 0.300496 0.065625 0.265961 0.162529 0.088677 0.083754 +5.316955 0.699036 4.526191 1.143652 5.249370 0.970695 0.438792 2.366185 0.939629 0.138819 0.275119 0.532771 0.521510 0.547761 1.187779 +1.963809 0.535034 2.034583 0.383040 2.012437 0.891145 0.531018 0.180104 0.467342 1.861944 0.395319 1.071879 2.340268 0.183984 0.400373 7.243848 +0.145693 0.378596 0.046601 0.048388 1.074147 0.174525 0.063777 0.168836 0.822524 0.110645 0.677913 0.062047 0.796395 3.502387 0.046950 0.290501 0.107097 +0.195764 0.149382 0.534652 0.105996 2.446201 0.150150 0.071967 0.031908 6.198893 0.299207 0.413150 0.090874 0.492692 15.039152 0.044765 0.328289 0.175204 3.125850 +2.227504 0.220361 0.150316 0.066496 3.112801 0.393451 0.444469 0.108811 0.224352 15.532696 2.152640 0.302279 2.658339 0.738053 0.322254 0.197018 2.507055 0.175763 0.276642 + +0.086346 0.080808 0.041727 0.064440 0.006654 0.052795 0.092110 0.048527 0.028831 0.040497 0.071679 0.079687 0.018007 0.025901 0.052632 0.052778 0.056138 0.010733 0.034744 0.054964; + +[ Highly exposed component ] +model HExEX3 = +0.557500 +0.467024 0.508965 +0.660464 0.044039 3.386724 +1.332582 3.667491 1.440486 0.185886 +1.402485 2.156104 1.297398 0.333117 0.789370 +1.259192 0.111162 0.245837 2.707953 0.058650 2.098300 +0.934526 0.393780 2.196372 0.868249 1.336358 0.322363 0.252359 +0.518929 3.157422 5.392488 0.748008 3.827563 4.517669 0.284167 0.634601 +0.279723 0.407537 0.535113 0.054030 3.345087 0.427624 0.148200 0.015686 0.658979 +0.715094 1.182387 0.270883 0.035162 3.520931 2.366650 0.172395 0.100089 1.779380 18.830270 +0.694526 3.728628 1.747648 0.083685 0.100399 2.477205 0.623294 0.280977 0.694965 0.569776 0.493141 +1.338414 1.261833 0.818216 0.054313 3.918703 2.383718 0.219943 0.228757 0.867786 19.605444 31.431195 1.089056 +0.295523 0.190129 0.263800 0.044853 5.266468 0.120909 0.042178 0.194665 3.494314 5.825792 11.527190 0.044361 6.237844 +1.085021 0.168461 0.041147 0.203765 0.185173 0.353420 0.218194 0.120292 0.375260 0.116875 0.705493 0.190747 0.139085 0.108823 +4.090024 0.852803 4.335615 0.829194 6.499129 1.095446 0.336922 1.733724 1.144100 0.413986 0.878828 0.631498 0.730416 1.167593 1.195720 +2.318400 0.650016 2.351068 0.385247 1.883085 1.167877 0.532167 0.187062 0.796107 4.825759 0.838744 1.268311 4.445757 0.381760 0.419944 7.677284 +0.134371 1.021826 0.151293 0.065183 3.716538 0.530580 0.077516 0.396559 1.324147 0.443432 3.290145 0.064651 4.411035 13.056874 0.056705 0.534908 0.408415 +0.212989 0.424870 1.115762 0.268883 8.874037 0.255572 0.125866 0.107717 14.436023 1.292209 1.491799 0.104026 2.063744 49.760746 0.057618 0.756357 0.396791 12.032322 +3.112666 0.544010 0.214411 0.125541 5.301703 0.868794 0.839508 0.215758 0.533676 46.074660 7.301056 0.557248 9.151909 2.634769 0.523205 0.564572 4.519860 0.456880 0.670812 + +0.094155 0.070537 0.052200 0.112406 0.002213 0.062733 0.165272 0.062302 0.019853 0.011154 0.019829 0.108860 0.006503 0.006873 0.070091 0.057931 0.046183 0.002449 0.008629 0.019827; + +[ main definition of EX3 with fixed component rates ] +model EX3 = MIX{BurEX3:0.427672756793791,IntEX3:0.837595938019774,HExEX3:1.51863631431518}; + +[ --------------------------------------------------------- + EHO mixture model of Le, Lartillot & Gascuel (2008) + --------------------------------------------------------- ] + +[ extended component ] +model ExtEHO = +0.221750 +0.256487 0.595368 +0.447755 0.112310 7.769815 +4.893140 0.929131 1.061884 0.164472 +0.542660 2.886791 1.927072 0.497273 0.133291 +0.549459 0.290798 0.518264 5.393249 0.003776 4.326528 +5.411319 0.302948 0.907713 0.961651 1.249183 0.173873 0.316780 +0.283752 2.760038 5.159285 0.978418 0.737799 5.086066 0.421812 0.209276 +0.026683 0.053027 0.166715 0.016491 0.151942 0.055934 0.026726 0.001780 0.098605 +0.226816 0.251641 0.062256 0.015837 0.763554 0.537705 0.042909 0.032938 0.321607 3.217159 +0.235513 6.017300 2.543177 0.223507 0.023575 3.432847 1.211039 0.160545 0.671045 0.082221 0.106179 +0.992834 0.351969 0.415447 0.041511 1.271632 1.700679 0.111984 0.117596 0.326393 3.329162 7.496635 0.519821 +0.191967 0.041219 0.090517 0.014810 1.004694 0.042779 0.011177 0.040989 0.641267 0.813011 2.233318 0.023173 1.863238 +1.876507 0.395175 0.362650 0.550534 0.174031 0.731229 0.412907 0.205341 0.381717 0.011597 0.315127 0.393303 0.135360 0.043846 +6.066032 1.083228 5.612711 1.035540 4.263932 1.429211 0.766802 2.266299 1.074108 0.047896 0.147065 0.683291 0.352118 0.382422 1.462674 +1.827471 0.645132 1.883173 0.287521 1.395928 1.013709 0.781080 0.055140 0.512000 0.588357 0.142327 1.256445 1.435179 0.079647 0.417388 6.092548 +0.101419 0.452274 0.065206 0.034173 0.592031 0.164037 0.049674 0.183473 0.741383 0.069289 0.429275 0.050856 0.545447 2.178510 0.022770 0.304839 0.111242 +0.091914 0.112094 0.451176 0.108762 1.183567 0.132194 0.042952 0.030418 4.373360 0.122828 0.186938 0.096667 0.344096 8.276255 0.053251 0.325231 0.135310 2.597897 +1.970427 0.119016 0.091863 0.041044 1.750822 0.222903 0.225961 0.053387 0.123318 6.815243 1.427658 0.124284 1.427074 0.341263 0.127045 0.076658 1.052442 0.073165 0.101733 + +0.062087 0.053435 0.023743 0.032063 0.013132 0.034151 0.061042 0.030664 0.022696 0.104732 0.099541 0.054991 0.022312 0.045996 0.025392 0.045673 0.072789 0.012691 0.043790 0.139079; + +[ Helix component ] +model HelEHO = +0.346476 +0.374362 0.664870 +0.557349 0.079157 3.710526 +3.192474 1.027228 0.891196 0.006722 +0.776545 1.902860 1.561002 0.517360 0.112028 +0.841893 0.158406 0.443065 3.792847 0.000006 2.320685 +4.037113 0.661209 1.866962 1.144918 1.465540 0.511489 0.573208 +0.394225 2.123760 5.845902 0.737868 1.084909 3.960964 0.270146 0.380762 +0.111350 0.099645 0.233216 0.005627 0.839533 0.089484 0.019520 0.021251 0.132153 +0.193017 0.307622 0.115495 0.009651 1.136538 0.584189 0.039838 0.048105 0.485901 4.915707 +0.481682 3.827872 1.926308 0.163314 0.021755 2.487895 0.768919 0.327002 0.534206 0.147053 0.136159 +0.610432 0.344033 0.452639 0.035659 1.624032 1.146169 0.103241 0.171164 0.364836 6.260678 7.738615 0.549401 +0.147278 0.035167 0.106276 0.018468 1.864906 0.047207 0.010268 0.086543 1.244539 0.927331 3.243633 0.016265 2.326533 +1.090575 0.181605 0.093658 0.386490 0.097655 0.462559 0.290152 0.568098 0.458437 0.043237 0.207460 0.198291 0.061027 0.067592 +6.243684 0.836138 5.633664 0.952131 6.398291 1.267404 0.430602 5.463144 1.088326 0.102127 0.193860 0.707365 0.438507 0.470620 1.534272 +2.847158 0.566364 2.984732 0.347047 3.711971 1.083181 0.495700 0.500029 0.642773 1.698955 0.402699 1.111399 2.483456 0.231119 0.685164 8.832473 +0.090983 0.369015 0.085583 0.046821 0.950521 0.183299 0.040785 0.391093 0.950288 0.075780 0.624335 0.041505 0.980672 3.915972 0.053806 0.299723 0.100663 +0.152848 0.170981 0.594708 0.106099 2.051641 0.121416 0.047614 0.064377 8.167042 0.195540 0.352598 0.069186 0.465779 15.178886 0.058255 0.405459 0.201603 4.035822 +2.140511 0.136453 0.145376 0.046174 4.011687 0.191618 0.192292 0.202844 0.174981 14.460840 2.175028 0.136317 2.393838 0.659302 0.418505 0.180248 3.585329 0.175143 0.281722 + +0.121953 0.076798 0.032215 0.066765 0.006842 0.061304 0.131841 0.026596 0.020392 0.047287 0.087919 0.084679 0.020970 0.024145 0.025871 0.042103 0.038715 0.008346 0.023841 0.051421; + +[ Other component ] +model OthEHO = +0.529263 +0.379476 0.612335 +0.516691 0.067732 4.012914 +3.774890 1.615176 0.888663 0.165810 +1.312262 2.913667 1.533683 0.442262 0.337571 +1.403437 0.154460 0.333334 3.815893 0.015567 3.743866 +1.272402 0.389317 1.243222 0.661976 0.554904 0.332656 0.319770 +0.558733 2.816641 4.803000 0.761339 1.223662 4.889028 0.323617 0.300981 +0.124057 0.155080 0.219635 0.019097 0.560959 0.100743 0.038076 0.005599 0.184752 +0.340362 0.580087 0.119838 0.015948 1.192857 1.156516 0.083154 0.031031 0.646292 7.873544 +0.706732 5.734632 1.847806 0.128114 0.050896 3.616626 1.131071 0.283950 0.643558 0.179831 0.224320 +1.056749 0.665355 0.399943 0.053900 1.893946 2.299714 0.168079 0.085094 0.556024 8.136055 14.213193 0.931689 +0.233961 0.079465 0.130295 0.016768 1.902244 0.077611 0.012655 0.048906 1.403178 1.581816 4.275863 0.036062 2.888633 +1.518830 0.252482 0.049484 0.171011 0.108909 0.501196 0.346600 0.058913 0.299924 0.073007 0.297573 0.249478 0.091619 0.068920 +5.595735 0.861017 3.749627 0.987083 4.952776 1.045071 0.463265 1.190738 0.897478 0.131753 0.265701 0.607097 0.399537 0.408758 0.993614 +2.157458 0.613623 1.733380 0.361861 2.145775 1.011592 0.523086 0.091023 0.450662 1.492403 0.408418 1.143233 2.378569 0.131777 0.381007 7.574340 +0.151895 0.544292 0.060182 0.043433 1.259614 0.228038 0.045082 0.134804 0.748147 0.134416 0.979277 0.038787 0.908253 4.850762 0.052415 0.249753 0.114232 +0.219509 0.243507 0.580103 0.130214 2.325021 0.196580 0.079660 0.037482 6.907609 0.299245 0.552917 0.067894 0.685250 19.404995 0.047839 0.323207 0.183044 4.704884 +3.049976 0.278740 0.134120 0.055382 4.149385 0.500946 0.435957 0.067170 0.214393 22.435652 2.883298 0.323886 3.369448 0.722571 0.315978 0.152899 2.423398 0.186495 0.303833 + +0.076458 0.052393 0.055429 0.088634 0.007473 0.040671 0.080952 0.100192 0.025439 0.031730 0.053100 0.070835 0.014039 0.023159 0.087111 0.063636 0.055346 0.007033 0.023779 0.042590; + +[ main definition of EHO with fixed component rates ] +model EHO = MIX{ExtEHO:0.720274356,HelEHO:0.976798797,OthEHO:0.783109376}; + + +[ --------------------------------------------------------- + UL2 mixture model of Le, Lartillot & Gascuel (2008) + --------------------------------------------------------- ] + +model M1_UL2 = +0.267149 +0.211944 0.816250 +0.156648 0.336150 3.110967 +2.402535 1.001114 1.287205 0.467161 +0.301870 3.168646 1.844180 0.571540 0.394361 +0.503678 1.529332 0.788530 3.920399 0.234553 8.502278 +3.124853 0.171548 0.220006 0.250690 0.766651 0.174653 0.399019 +0.139279 1.597241 5.622886 2.146897 0.349557 8.097306 1.211287 0.044878 +0.037158 0.139068 0.189483 0.049336 0.147864 0.122799 0.153664 0.006928 0.085276 +0.108752 0.387538 0.092568 0.035815 0.399254 0.617370 0.225586 0.018972 0.202328 2.343778 +0.255267 15.176345 1.030178 0.196011 0.396427 3.731061 2.642525 0.142626 0.878376 0.319044 0.422741 +0.430988 0.522887 0.351960 0.102916 0.683070 2.247889 0.621957 0.070803 0.228871 2.780325 4.767336 1.450453 +0.088392 0.116382 0.114044 0.066251 0.668683 0.133418 0.075116 0.039034 0.780377 0.488538 1.586897 0.143427 1.211385 +1.303487 0.178064 0.192016 0.065259 0.315140 0.406966 0.144065 0.135536 0.273070 0.087171 0.298010 0.087701 0.165232 0.104423 +7.472990 0.579607 3.004054 0.854304 5.789930 0.930019 0.709540 2.018826 0.527351 0.051443 0.070322 0.432286 0.281917 0.286341 0.473611 +2.276542 0.392852 1.332166 0.193248 2.577504 0.541748 0.690939 0.052900 0.272814 0.634227 0.224553 0.795413 1.360016 0.120449 0.745729 6.088861 +0.048841 0.673695 0.076107 0.073261 0.377566 0.284556 0.284138 0.130136 0.649073 0.047797 0.324911 0.148403 0.390301 2.189403 0.122493 0.131225 0.080727 +0.073190 0.425791 0.503951 0.250485 0.577049 0.306036 0.198368 0.024991 3.987606 0.083215 0.127898 0.372637 0.179514 7.784255 0.089874 0.175724 0.117177 2.629196 +1.351002 0.175990 0.120675 0.105544 1.491339 0.203270 0.463186 0.055506 0.065132 6.411609 1.020423 0.337618 1.047308 0.272790 0.407545 0.079844 1.634833 0.077263 0.083195 + +0.122413 0.017757 0.020209 0.012086 0.018894 0.014525 0.009897 0.045663 0.020120 0.124002 0.168915 0.011684 0.037631 0.063612 0.023347 0.039268 0.046707 0.015603 0.050968 0.136701; + + +model M2_UL2 = +0.557363 +0.539068 0.465628 +0.696831 0.032997 3.879799 +1.480953 4.566841 1.777582 0.310752 +1.402193 1.920868 1.276554 0.327085 0.972350 +1.335667 0.096752 0.255510 2.685052 0.088385 2.281328 +1.056193 0.423348 2.171283 0.933450 1.398738 0.369406 0.334900 +0.729300 2.712485 5.461073 0.679965 5.202985 4.012284 0.282038 0.585359 +0.267035 0.493033 0.523699 0.023230 2.563394 0.459103 0.176281 0.010013 0.551901 +0.700687 0.932999 0.206875 0.025161 3.939537 1.918986 0.154733 0.085684 1.446302 8.189198 +0.736759 3.603558 1.676442 0.070721 0.292188 2.403019 0.611829 0.307607 0.675279 0.627044 0.410941 +1.505101 0.819561 0.736222 0.089302 4.462071 2.539203 0.250970 0.204790 0.654198 11.105816 15.171688 1.258549 +0.541573 0.185468 0.343735 0.042217 5.958046 0.156533 0.064557 0.188906 3.891682 3.152154 5.098336 0.088022 4.518197 +1.155460 0.142408 0.044854 0.175385 0.123605 0.316005 0.157783 0.157894 0.347393 0.047328 0.344717 0.153954 0.054635 0.108793 +3.823040 0.733964 4.846938 0.890611 7.416660 0.987912 0.343107 2.296896 1.193558 0.368432 0.667347 0.535051 0.754875 1.469714 1.242760 +1.897039 0.590040 2.371940 0.347041 1.619173 1.025240 0.479587 0.210934 0.728868 5.106169 0.726618 1.152768 3.985684 0.433442 0.358997 9.007029 +0.296375 0.833840 0.091310 0.080326 5.217767 0.363445 0.078944 0.378088 1.571919 0.351013 2.139511 0.098671 2.796573 6.102504 0.023698 0.665667 0.292919 +0.297444 0.206563 0.871576 0.173621 11.803422 0.181973 0.110832 0.073892 12.757344 1.161331 1.646025 0.101481 1.732368 29.335598 0.037045 0.706902 0.346859 5.666524 +2.765737 0.415803 0.194725 0.093474 5.264577 0.734884 0.683342 0.156374 0.517626 26.038986 3.741256 0.457775 5.253478 1.999427 0.297563 0.344932 4.012753 0.385172 0.870088 + +0.087622 0.083588 0.048847 0.098882 0.002815 0.062809 0.143166 0.055391 0.023310 0.015495 0.032465 0.102135 0.009511 0.008409 0.069323 0.057733 0.051876 0.003945 0.014462 0.028216; + +model UL2 = MIX{M1_UL2:0.581348617,M2_UL2:1.465482789}; + + +[ --------------------------------------------------------- + UL3 mixture model of Le, Lartillot & Gascuel (2008) + --------------------------------------------------------- ] + +model Q1_UL3 = +0.514865 +0.774348 0.583403 +0.854291 0.046141 2.011233 +1.019817 5.652322 2.260587 0.057603 +1.095968 1.696154 1.296536 0.417322 0.967032 +1.054599 0.084924 0.368384 3.592374 0.063073 1.885301 +3.510012 0.797055 1.759631 1.421695 2.627911 0.743770 0.772359 +0.694799 2.596186 4.214186 0.654590 6.673533 3.664595 0.294967 0.608220 +0.344837 0.543739 0.965435 0.062495 2.500862 0.452448 0.155720 0.083334 0.905291 +0.593987 0.857922 0.351903 0.045358 3.290242 1.421539 0.109100 0.230693 1.595696 5.042430 +0.708843 2.012940 1.662582 0.106190 0.329149 2.268825 0.579185 0.365374 0.696286 0.701896 0.398546 +0.990080 0.754111 0.910436 0.143464 3.570847 1.708803 0.181804 0.706982 0.789517 8.138995 13.390024 1.137779 +0.085639 0.012721 0.098898 0.018361 2.148695 0.012425 0.009316 0.135782 0.921964 1.006572 2.479349 0.014715 1.418875 +0.655013 0.150052 0.120388 0.698261 0.254951 0.353826 0.250818 0.715043 0.329691 0.170251 0.827093 0.187804 0.178490 0.048299 +2.863328 0.657706 3.761619 0.619692 9.817007 0.810603 0.344050 6.758412 0.997214 0.414623 0.625678 0.555290 0.647617 0.392859 0.929152 +1.373936 0.392433 2.711122 0.237865 2.460302 0.701472 0.319136 0.607889 0.728133 3.705396 0.412346 0.953939 2.446017 0.054119 0.279699 9.934970 +0.247598 0.514750 0.144529 0.157484 5.383077 0.199950 0.045688 0.790171 1.116595 0.243053 1.738186 0.070214 3.427855 3.275850 0.007577 0.583988 0.205721 +0.090644 0.046952 0.326197 0.089450 7.475195 0.018555 0.020706 0.016617 3.728614 0.404819 0.617948 0.029889 0.956437 47.933104 0.050416 0.181180 0.070113 5.242459 +2.093798 0.323334 0.307076 0.101486 8.553531 0.473023 0.410909 0.459941 0.568017 13.906640 1.778101 0.426825 2.763369 0.570421 0.311278 0.389524 2.915452 0.252168 0.268516 + +0.104307 0.092553 0.043722 0.085643 0.003218 0.074342 0.163928 0.024243 0.022216 0.016012 0.038591 0.105577 0.011434 0.016126 0.018057 0.061232 0.061373 0.004086 0.020876 0.032465; + +model Q2_UL3 = +1.709484 +0.184309 0.860448 +0.660851 0.182073 4.471383 +4.554487 2.843438 1.801073 1.068728 +3.425703 6.092362 2.868388 0.790473 0.794773 +4.278840 0.359055 0.585031 4.176143 0.121031 6.860012 +0.625715 1.054231 1.222442 0.492366 1.418419 0.796035 0.643251 +1.089116 6.396197 8.965630 1.915247 2.033352 11.058341 0.768162 0.523196 +0.024545 0.023433 0.014686 0.002204 0.628823 0.008720 0.008363 0.002485 0.046726 +0.150945 0.140520 0.002514 0.000212 1.903535 0.384413 0.015127 0.010251 0.210723 5.066207 +1.751314 12.981698 3.641808 0.278298 0.036599 7.677610 2.744099 0.612733 1.686490 0.042380 0.023858 +0.475876 0.364580 0.063143 0.001486 3.890832 0.754732 0.041044 0.024222 0.236955 5.752463 12.019762 0.229898 +0.142125 0.051255 0.006503 0.000593 5.397699 0.064190 0.006871 0.015588 0.424840 1.005341 5.458275 0.021422 1.779060 +5.433246 1.051312 0.012611 0.027267 0.635181 1.765792 0.849429 0.023324 0.610884 0.000184 0.037705 0.604166 0.001415 0.003197 +6.267113 1.750009 5.986041 1.411952 5.482009 1.923966 0.595886 0.943724 1.786620 0.043381 0.066093 0.813893 0.053557 0.199095 1.723045 +6.389458 1.828974 2.044599 1.561907 2.083626 2.070125 1.210529 0.217976 1.192222 0.515450 0.199809 2.020941 1.238100 0.150760 1.727569 9.882473 +0.281689 1.180712 0.000006 0.017218 3.696424 0.146508 0.068518 0.222418 0.497727 0.199828 1.849405 0.001429 1.394852 2.473491 0.016401 0.288550 0.190290 +0.302638 0.475135 0.196905 0.067615 6.355457 0.576342 0.232832 0.059485 4.525509 0.571811 1.194578 0.006674 0.467694 8.107893 0.024556 0.394389 0.441794 4.067825 +11.333027 0.298555 0.053673 0.009846 5.743238 0.296166 0.413471 0.120393 0.105418 9.130937 2.674960 0.165290 4.417978 1.811161 0.492985 0.042803 3.284174 0.844277 2.327679 + +0.044015 0.021591 0.056258 0.102405 0.003260 0.018409 0.041364 0.168549 0.015843 0.064277 0.118096 0.036061 0.027358 0.036695 0.124914 0.047807 0.022696 0.005866 0.018644 0.025892; + + +model Q3_UL3 = +0.063622 +0.118948 0.528684 +0.065502 0.142677 12.092355 +2.010382 0.302352 1.127688 0.014546 +0.169022 2.026184 1.256016 0.417582 0.170493 +0.172876 0.453837 0.454428 1.882165 0.045799 4.705997 +5.254550 0.174422 0.364886 0.192790 0.891120 0.148450 0.195211 +0.090586 1.258840 5.523808 0.313487 0.211550 4.734918 0.466811 0.096529 +0.034911 0.065167 0.222440 0.023060 0.132230 0.122571 0.075521 0.003942 0.065261 +0.146425 0.219004 0.136129 0.028165 0.448432 0.795591 0.146014 0.016718 0.240024 2.387089 +0.041117 11.082325 0.783756 0.049843 0.039616 1.828161 0.649991 0.069199 0.271006 0.094864 0.140698 +0.599734 0.230551 0.595641 0.059404 0.699534 2.765355 0.391569 0.089210 0.206188 3.020110 2.806927 0.516762 +0.148889 0.145244 0.408811 0.089283 0.724422 0.260910 0.101509 0.101882 1.508086 0.693424 0.709933 0.061880 1.887041 +0.378131 0.225548 0.181924 0.038283 0.146379 0.511097 0.151769 0.166424 0.386101 0.186116 0.753595 0.182723 0.420131 0.341199 +8.340091 0.495564 3.010756 0.463573 5.601734 0.985082 0.415256 2.532014 0.720035 0.067860 0.121784 0.279698 0.626080 0.788724 0.890779 +1.932342 0.358779 1.069003 0.093122 2.028674 0.637861 0.597230 0.019551 0.211054 0.870341 0.381670 0.461083 2.131453 0.255120 0.604567 3.450887 +0.031419 0.378744 0.085588 0.008303 0.277628 0.277906 0.122038 0.055246 0.420382 0.021973 0.127345 0.027776 0.139098 3.077241 0.163299 0.177114 0.062650 +0.106283 0.366786 1.453278 0.404793 0.753053 0.475024 0.273992 0.055936 7.367304 0.094629 0.126746 0.148716 0.369726 5.251757 0.232549 0.676796 0.247828 2.910495 +1.104848 0.069287 0.110149 0.084224 1.280832 0.175361 0.342585 0.047507 0.038768 8.415916 1.577573 0.054532 1.537983 0.409619 0.309028 0.094413 1.483411 0.060147 0.076595 + +0.134055 0.044392 0.020730 0.020801 0.021008 0.024509 0.028587 0.029069 0.028138 0.105826 0.117458 0.049732 0.026120 0.038984 0.016306 0.038369 0.055334 0.017276 0.039905 0.143398; + +model UL3 = MIX{Q1_UL3:0.484340397,Q2_UL3:0.492780514,Q3_UL3:1.15597274}; + + +[ --------------------------------------------------------- + EX_EHO mixture model of Le & Gascuel (2010) + --------------------------------------------------------- ] + + +model BUR_EXT = +0.228492 +0.165543 0.916344 +0.238509 0.258514 8.498064 +3.374029 1.037434 1.667702 0.332072 +0.344742 4.971495 2.471912 0.654950 0.130301 +0.417921 1.039226 0.875808 13.073209 0.040759 9.834742 +4.248714 0.411876 0.585570 0.748848 0.908311 0.221633 0.593504 +0.182762 3.872065 6.999812 1.719470 0.493863 8.695395 0.749303 0.137367 +0.011705 0.090751 0.149898 0.021996 0.077693 0.043664 0.013820 0.001527 0.073342 +0.133793 0.286232 0.065118 0.015540 0.456304 0.546974 0.052641 0.024196 0.226460 2.160734 +0.249141 17.756919 3.385483 0.343780 0.093875 6.677050 2.745017 0.295602 1.481997 0.100576 0.167406 +0.641194 0.342577 0.427146 0.059345 0.867233 2.306480 0.218260 0.058613 0.358032 2.187901 5.151337 0.750049 +0.118366 0.068606 0.102572 0.009357 0.633943 0.033356 0.012944 0.024474 0.497973 0.534407 1.581972 0.063281 1.329239 +1.561052 0.483968 0.385170 0.261437 0.310131 0.913924 0.355871 0.175520 0.512823 0.019789 0.295416 0.348527 0.104569 0.059641 +5.891807 1.320618 5.737159 1.074011 4.702782 1.389531 0.878480 2.178078 1.111068 0.033343 0.094349 1.035903 0.327901 0.292022 1.344678 +2.059884 0.976165 2.166428 0.369522 1.951862 0.815145 0.575774 0.060834 0.558388 0.422299 0.153549 1.793263 1.268126 0.085468 0.780914 9.031309 +0.081683 0.814216 0.057557 0.055146 0.450959 0.191881 0.109420 0.144367 0.651978 0.068649 0.345622 0.169527 0.387902 1.883741 0.023466 0.309129 0.111568 +0.052650 0.248907 0.570101 0.180267 0.701260 0.253975 0.061388 0.025465 4.206114 0.083799 0.147600 0.226848 0.254720 6.549427 0.027521 0.283138 0.141408 2.561108 +1.355342 0.137437 0.104597 0.051387 1.203830 0.218892 0.194527 0.031054 0.088935 4.577473 1.003647 0.153722 0.883283 0.242657 0.191295 0.068785 0.990922 0.056276 0.078264 + +0.087158 0.015906 0.012970 0.012566 0.020325 0.013301 0.013777 0.039603 0.014597 0.161107 0.147775 0.011033 0.031334 0.064281 0.013322 0.035417 0.048583 0.012672 0.045210 0.199064; + + +model BUR_HEL = +0.317211 +0.209784 1.120865 +0.315205 0.301050 7.439896 +2.214446 0.884449 1.356293 0.110768 +0.465495 4.319791 2.843187 1.082540 0.215988 +0.668735 0.901135 0.986572 11.245156 0.009874 7.561773 +3.614157 0.568883 0.972660 1.036117 0.894733 0.409083 0.780808 +0.249929 3.138701 7.344935 1.747672 0.379845 9.559763 0.842239 0.146008 +0.059633 0.103290 0.206475 0.017492 0.286194 0.123433 0.037593 0.010910 0.071273 +0.096230 0.285199 0.113728 0.015874 0.439724 0.547078 0.063675 0.021607 0.303531 2.097349 +0.380075 15.783354 2.780107 0.569108 0.093004 6.179905 3.209588 0.413960 1.002075 0.185911 0.185249 +0.371379 0.411553 0.398602 0.076761 0.727245 1.665645 0.249045 0.068128 0.256194 2.940308 3.649539 0.972247 +0.075616 0.043519 0.096446 0.041118 0.636688 0.102460 0.039991 0.041269 0.839126 0.376556 1.551814 0.064774 1.173962 +1.100574 0.385197 0.319458 0.353000 0.112549 0.805706 0.369483 0.482895 0.520098 0.058167 0.144341 0.361488 0.074069 0.057968 +6.832958 0.955160 5.296628 1.265211 6.144756 1.315182 0.902504 3.903795 0.862633 0.072343 0.080478 0.979654 0.330305 0.328917 1.924898 +2.223205 0.445571 2.461831 0.299635 2.943208 0.830637 0.621903 0.184055 0.468356 0.911139 0.208091 1.343261 1.515339 0.158763 0.915879 9.298787 +0.062541 0.806724 0.110928 0.132125 0.414525 0.388313 0.191952 0.271274 0.909529 0.025790 0.343842 0.099137 0.543577 2.467147 0.044938 0.215329 0.087955 +0.082948 0.329591 0.693402 0.286594 0.866329 0.259566 0.167425 0.049038 6.332054 0.093136 0.177755 0.275998 0.261754 8.344684 0.088981 0.335859 0.137177 3.125017 +1.390479 0.142986 0.175068 0.106294 1.687293 0.159520 0.297915 0.080925 0.085103 6.414688 0.953785 0.240157 1.097345 0.264988 0.373870 0.144230 2.572837 0.089110 0.115941 + +0.158060 0.021566 0.016487 0.014079 0.016937 0.020232 0.023096 0.032822 0.014618 0.114447 0.198900 0.014668 0.042840 0.053434 0.015640 0.037275 0.043095 0.012211 0.036330 0.113263; + +model BUR_OTH = +0.406682 +0.246649 0.848592 +0.364260 0.198690 4.535840 +3.292044 0.837291 1.295138 0.420726 +0.735862 4.205085 2.062501 0.427451 0.259335 +0.954795 0.673046 0.671062 8.395674 0.048284 8.922739 +1.958847 0.573207 0.632317 0.572264 0.486274 0.345345 0.650009 +0.312042 2.699661 4.969855 1.181781 0.551188 7.620453 0.701108 0.195346 +0.071000 0.127041 0.184028 0.030240 0.180591 0.065984 0.039235 0.005033 0.098525 +0.142298 0.338853 0.086876 0.026095 0.484427 0.867777 0.087780 0.017129 0.309774 3.477136 +0.624622 18.390649 2.748646 0.442886 0.238266 6.993941 3.906971 0.652336 1.365814 0.219252 0.288480 +0.610604 0.581287 0.382156 0.048508 0.963147 2.672887 0.384585 0.051334 0.386066 3.752286 6.858529 1.524446 +0.124670 0.047666 0.102656 0.031532 0.699124 0.129867 0.004923 0.039185 0.701690 0.643782 2.019473 0.104308 1.568249 +1.126387 0.321347 0.107738 0.137858 0.150346 0.601413 0.310374 0.073794 0.332910 0.056230 0.208204 0.368816 0.078902 0.062410 +5.908551 0.834735 3.611589 0.969189 4.765870 0.881934 0.528944 1.439305 0.746876 0.060111 0.114374 0.784754 0.235963 0.219009 0.710100 +1.856381 0.574277 1.573584 0.223054 2.038789 0.763848 0.461329 0.076195 0.396095 0.701247 0.249302 1.091322 1.282643 0.070553 0.419070 6.616977 +0.069294 0.654056 0.127255 0.078896 0.517561 0.188732 0.125541 0.104279 0.547504 0.066927 0.454998 0.056498 0.425274 2.668838 0.050943 0.151483 0.062698 +0.128158 0.354167 0.640140 0.182565 0.793990 0.368725 0.157796 0.037084 4.307140 0.140691 0.241076 0.323966 0.293629 9.711414 0.060323 0.207489 0.111492 2.857446 +1.982761 0.158227 0.115545 0.051117 2.065903 0.338262 0.258245 0.045770 0.089942 10.113118 1.382024 0.431385 1.456614 0.295718 0.273919 0.066465 1.668063 0.113899 0.144981 + +0.102123 0.021199 0.032404 0.032350 0.018985 0.017469 0.017625 0.089270 0.021090 0.083642 0.123866 0.012720 0.029789 0.055399 0.072705 0.061298 0.061705 0.013496 0.039682 0.093184; + +model EXP_EXT= +0.464716 +0.597009 0.420578 +1.010693 0.048553 5.944290 +3.915828 2.088244 0.878468 0.236108 +1.156023 1.882317 1.435926 0.338823 0.482742 +1.131098 0.127150 0.346338 3.317186 0.061060 2.724696 +4.638659 0.351041 1.379174 1.216518 1.396050 0.199361 0.353970 +0.657615 2.215990 4.150252 0.717363 1.853969 3.768864 0.347165 0.313421 +0.078558 0.127092 0.347281 0.032361 0.605448 0.171553 0.104678 0.010608 0.309418 +0.516672 0.510585 0.105529 0.039188 1.808273 1.017577 0.112010 0.044661 0.772131 5.693102 +0.519389 3.571104 1.844049 0.109305 0.103105 2.232749 0.653339 0.195325 0.547017 0.219311 0.253086 +1.658261 0.640712 0.558751 0.063591 1.694880 2.088441 0.194697 0.291701 0.321392 6.220456 12.392618 0.862547 +0.426071 0.064894 0.132019 0.034872 2.076573 0.085745 0.026972 0.099963 1.388250 1.765294 3.859637 0.032198 3.134107 +3.082729 0.250470 0.232578 0.376163 0.290522 0.502379 0.240501 0.302007 0.283950 0.013574 0.606936 0.248475 0.226716 0.058246 +7.012884 0.866957 5.008997 0.814153 4.758346 1.192080 0.595351 2.514269 0.993487 0.135167 0.349525 0.542021 0.512591 0.744682 1.258172 +2.037755 0.446367 1.618299 0.203392 1.177421 0.840646 0.583757 0.071515 0.466886 1.503883 0.260405 0.934230 2.245607 0.123552 0.258896 4.504833 +0.171334 0.385971 0.087717 0.019596 1.015512 0.127027 0.037725 0.217844 0.822780 0.095756 0.777332 0.039952 0.977419 3.217291 0.015240 0.301259 0.102153 +0.194998 0.091803 0.433021 0.086495 3.074882 0.111578 0.041481 0.048438 4.904785 0.336528 0.411742 0.087476 0.640594 14.126821 0.061656 0.338111 0.129249 2.902137 +2.811391 0.216605 0.127240 0.061503 2.320268 0.390874 0.450783 0.132513 0.234279 12.181354 2.539512 0.233848 3.363159 0.717467 0.138035 0.159602 1.615372 0.132268 0.186175 + +0.043140 0.090761 0.034408 0.052848 0.006370 0.053817 0.107749 0.024812 0.029498 0.049134 0.050167 0.098127 0.013722 0.025841 0.037395 0.056505 0.094326 0.012045 0.039238 0.080099; + +model EXP_HEL = +0.434227 +0.551823 0.569806 +0.698268 0.056291 3.064314 +2.026002 2.379205 1.077282 0.016649 +0.986617 1.606282 1.331570 0.426399 0.409724 +1.005936 0.120122 0.390888 2.999742 0.021217 1.881156 +3.221202 0.736168 2.269617 1.272893 1.771711 0.622430 0.656603 +0.515574 2.032567 5.484997 0.666491 2.985549 3.380526 0.265244 0.557878 +0.200810 0.241566 0.441585 0.009830 1.541200 0.198621 0.069562 0.043838 0.339616 +0.328669 0.583849 0.178015 0.022077 2.045404 1.046125 0.089148 0.104708 0.875298 8.628242 +0.598864 3.090263 1.682415 0.113637 0.207957 2.085253 0.582536 0.376534 0.554395 0.371883 0.290692 +0.799278 0.528354 0.704087 0.062290 2.303849 1.507620 0.173293 0.356580 0.492228 10.028453 12.162732 0.867109 +0.256227 0.083117 0.192262 0.030759 4.328951 0.078062 0.022890 0.181917 2.406824 2.014776 4.856941 0.041675 3.521229 +1.118844 0.147481 0.061969 0.323498 0.171678 0.387521 0.237715 0.641036 0.433529 0.069102 0.359935 0.164055 0.063832 0.126592 +5.069051 0.749554 5.245486 0.840686 7.114530 1.177802 0.382956 6.139836 1.086779 0.194824 0.424579 0.655759 0.682174 0.753148 1.355810 +2.949741 0.623328 3.248881 0.406219 3.345739 1.214278 0.538553 0.867954 0.747654 3.316346 0.754081 1.193593 3.516479 0.366653 0.622665 7.975653 +0.115446 0.394156 0.090971 0.055309 1.947845 0.185912 0.046886 0.451084 1.173014 0.277029 1.078778 0.054622 1.516237 5.813526 0.071865 0.359167 0.106921 +0.205680 0.197878 0.678775 0.118188 4.183184 0.139485 0.059999 0.051336 10.200670 0.507328 0.721921 0.086974 0.741023 24.191458 0.046460 0.489820 0.247367 4.904042 +2.494211 0.280293 0.235248 0.083648 5.509932 0.429196 0.409105 0.447130 0.351675 23.404006 3.840750 0.300727 4.126659 1.483049 0.675560 0.336101 4.426709 0.309940 0.588217 + +0.115826 0.094038 0.037357 0.085821 0.003363 0.073078 0.167709 0.025416 0.021634 0.024147 0.050238 0.106612 0.013318 0.013330 0.029895 0.044902 0.037901 0.006460 0.018548 0.030407; + +model EXP_OTH = +0.603175 +0.478745 0.562615 +0.608325 0.056553 3.755571 +2.371839 2.480665 0.889513 0.170707 +1.551117 2.685995 1.462350 0.424139 0.669728 +1.624084 0.129505 0.314826 3.404205 0.049823 3.375473 +0.987777 0.356744 1.294077 0.640234 0.583980 0.331879 0.304731 +0.667236 2.788429 4.719171 0.731257 1.872668 4.612209 0.316233 0.320454 +0.186911 0.269245 0.318538 0.028464 0.987958 0.242926 0.090427 0.007312 0.327205 +0.527992 0.844027 0.167295 0.021423 1.623589 1.636879 0.135662 0.044560 0.939347 10.338048 +0.842575 5.076266 1.736167 0.106076 0.132985 3.365869 0.969736 0.270931 0.669196 0.356829 0.352830 +1.296147 0.863599 0.469732 0.075018 1.832599 2.642602 0.217378 0.107935 0.624941 10.670411 17.593544 1.247987 +0.325034 0.135328 0.192352 0.021631 2.731423 0.103263 0.027708 0.060740 2.148472 2.344767 5.497995 0.057563 3.278627 +1.670091 0.235642 0.042844 0.164518 0.112539 0.479958 0.326780 0.057540 0.291899 0.110067 0.380466 0.240061 0.109541 0.083760 +5.098150 0.831455 3.661924 0.978777 4.500240 1.064732 0.455496 1.095629 0.915898 0.226713 0.405000 0.608323 0.525496 0.593321 1.035726 +2.174502 0.630453 1.791747 0.396219 1.681712 1.083797 0.556968 0.100584 0.457070 2.361119 0.543612 1.211816 2.987220 0.198957 0.368383 7.505908 +0.203719 0.615713 0.044203 0.046952 1.745090 0.303876 0.050920 0.155176 0.920001 0.165182 1.385828 0.055323 1.274920 5.896599 0.059081 0.303111 0.156402 +0.271220 0.253084 0.643377 0.142691 3.763228 0.209729 0.093004 0.035856 8.167503 0.490579 0.894778 0.077103 1.029700 26.210400 0.045876 0.373529 0.218567 5.726440 +3.470639 0.410713 0.180011 0.081584 4.323431 0.751254 0.686467 0.086874 0.318032 29.800262 3.856040 0.482930 4.862267 1.182403 0.390522 0.268937 2.836818 0.229423 0.453335 + +0.071716 0.058979 0.060316 0.101089 0.005039 0.044673 0.093349 0.105394 0.026228 0.020220 0.037831 0.081647 0.010677 0.015875 0.090566 0.065046 0.054453 0.005546 0.019924 0.031432; + + +model EX_EHO = MIX{BUR_EXT:0.761816796788931,BUR_HEL:0.744425646802117,BUR_OTH:0.532457759429489,EXP_EXT:1.5639387472863,EXP_HEL:2.06403411829438,EXP_OTH:1.43336795177594}; + + +[ --------------------------------------------------------- + LG4M mixture model of Le, Dang & Gascuel (2012) + --------------------------------------------------------- ] + +model LG4M1 = + 0.269343 + 0.254612 0.150988 + 0.236821 0.031863 0.659648 + 2.506547 0.938594 0.975736 0.175533 + 0.359080 0.348288 0.697708 0.086573 0.095967 + 0.304674 0.156000 0.377704 0.449140 0.064706 4.342595 + 1.692015 0.286638 0.565095 0.380358 0.617945 0.202058 0.264342 + 0.251974 0.921633 1.267609 0.309692 0.390429 2.344059 0.217750 0.104842 + 1.085220 0.325624 0.818658 0.037814 1.144150 0.534567 0.222793 0.062682 0.567431 + 0.676353 0.602366 0.217027 0.007533 1.595775 0.671143 0.158424 0.070463 0.764255 8.226528 + 0.179155 0.971338 1.343718 0.133744 0.122468 0.983857 0.994128 0.220916 0.410581 0.387487 0.181110 + 1.636817 0.515217 0.670461 0.071252 1.534848 5.288642 0.255628 0.094198 0.257229 25.667158 6.819689 1.591212 + 0.235498 0.123932 0.099793 0.030425 0.897279 0.112229 0.022529 0.047488 0.762914 1.344259 0.865691 0.038921 2.030833 + 1.265605 0.040163 0.173354 0.027579 0.259961 0.580374 0.088041 0.145595 0.143676 0.298859 1.020117 0.000714 0.190019 0.093964 + 5.368405 0.470952 5.267140 0.780505 4.986071 0.890554 0.377949 1.755515 0.786352 0.527246 0.667783 0.659948 0.731921 0.837669 1.355630 + 1.539394 0.326789 1.688169 0.283738 1.389282 0.329821 0.231770 0.117017 0.449977 3.531600 0.721586 0.497588 2.691697 0.152088 0.698040 16.321298 + 0.140944 0.375611 0.025163 0.002757 0.801456 0.257253 0.103678 0.132995 0.345834 0.377156 0.839647 0.176970 0.505682 1.670170 0.091298 0.210096 0.013165 + 0.199836 0.146857 0.806275 0.234246 1.436970 0.319669 0.010076 0.036859 3.503317 0.598632 0.738969 0.154436 0.579000 4.245524 0.074524 0.454195 0.232913 1.178490 + 9.435529 0.285934 0.395670 0.130890 6.097263 0.516259 0.503665 0.222960 0.149143 13.666175 2.988174 0.162725 5.973826 0.843416 0.597394 0.701149 4.680002 0.300085 0.416262 + +0.082276 0.055172 0.043853 0.053484 0.018957 0.028152 0.046679 0.157817 0.033297 0.028284 0.054284 0.025275 0.023665 0.041874 0.063071 0.066501 0.065424 0.023837 0.038633 0.049465; + +model LG4M2 = +0.133720 + 0.337212 0.749052 + 0.110918 0.105087 4.773487 + 3.993460 0.188305 1.590332 0.304942 + 0.412075 2.585774 1.906884 0.438367 0.242076 + 0.435295 0.198278 0.296366 7.470333 0.008443 3.295515 + 7.837540 0.164607 0.431724 0.153850 1.799716 0.269744 0.242866 + 0.203872 2.130334 9.374479 1.080878 0.152458 12.299133 0.279589 0.089714 + 0.039718 0.024553 0.135254 0.014979 0.147498 0.033964 0.005585 0.007248 0.022746 + 0.075784 0.080091 0.084971 0.014128 0.308347 0.500836 0.022833 0.022999 0.161270 1.511682 + 0.177662 10.373708 1.036721 0.038303 0.043030 2.181033 0.321165 0.103050 0.459502 0.021215 0.078395 + 0.420784 0.192765 0.329545 0.008331 0.883142 1.403324 0.168673 0.160728 0.612573 1.520889 7.763266 0.307903 + 0.071268 0.019652 0.088753 0.013547 0.566609 0.071878 0.020050 0.041022 0.625361 0.382806 1.763059 0.044644 1.551911 + 0.959127 1.496585 0.377794 0.332010 0.318192 1.386970 0.915904 0.224255 2.611479 0.029351 0.068250 1.542356 0.047525 0.182715 + 11.721512 0.359408 2.399158 0.219464 9.104192 0.767563 0.235229 3.621219 0.971955 0.033780 0.043035 0.236929 0.319964 0.124977 0.840651 + 2.847068 0.218463 1.855386 0.109808 4.347048 0.765848 0.164569 0.312024 0.231569 0.356327 0.159597 0.403210 1.135162 0.106903 0.269190 9.816481 + 0.030203 0.387292 0.118878 0.067287 0.190240 0.122113 0.007023 0.137411 0.585141 0.020634 0.228824 0.000122 0.474862 3.135128 0.030313 0.093830 0.119152 + 0.067183 0.130101 0.348730 0.061798 0.301198 0.095382 0.095764 0.044628 2.107384 0.046105 0.100117 0.017073 0.192383 8.367641 0.000937 0.137416 0.044722 4.179782 + 0.679398 0.041567 0.092408 0.023701 1.271187 0.115566 0.055277 0.086988 0.060779 8.235167 0.609420 0.061764 0.581962 0.184187 0.080246 0.098033 1.438350 0.023439 0.039124 + +0.120900 0.036460 0.026510 0.040410 0.015980 0.021132 0.025191 0.036369 0.015884 0.111029 0.162852 0.024820 0.028023 0.074058 0.012065 0.041963 0.039072 0.012666 0.040478 0.114138; + +model LG4M3 = +0.421017 + 0.316236 0.693340 + 0.285984 0.059926 6.158219 + 4.034031 1.357707 0.708088 0.063669 + 0.886972 2.791622 1.701830 0.484347 0.414286 + 0.760525 0.233051 0.378723 4.032667 0.081977 4.940411 + 0.754103 0.402894 2.227443 1.102689 0.416576 0.459376 0.508409 + 0.571422 2.319453 5.579973 0.885376 1.439275 4.101979 0.576745 0.428799 + 0.162152 0.085229 0.095692 0.006129 0.490937 0.104843 0.045514 0.004705 0.098934 + 0.308006 0.287051 0.056994 0.007102 0.958988 0.578990 0.067119 0.024403 0.342983 3.805528 + 0.390161 7.663209 1.663641 0.105129 0.135029 3.364474 0.652618 0.457702 0.823674 0.129858 0.145630 + 1.042298 0.364551 0.293222 0.037983 1.486520 1.681752 0.192414 0.070498 0.222626 4.529623 4.781730 0.665308 + 0.362476 0.073439 0.129245 0.020078 1.992483 0.114549 0.023272 0.064490 1.491794 1.113437 2.132006 0.041677 1.928654 + 1.755491 0.087050 0.099325 0.163817 0.242851 0.322939 0.062943 0.198698 0.192904 0.062948 0.180283 0.059655 0.129323 0.065778 + 3.975060 0.893398 5.496314 1.397313 3.575120 1.385297 0.576191 1.733288 1.021255 0.065131 0.129115 0.600308 0.387276 0.446001 1.298493 + 2.565079 0.534056 2.143993 0.411388 2.279084 0.893006 0.528209 0.135731 0.518741 0.972662 0.280700 0.890086 1.828755 0.189028 0.563778 7.788147 + 0.283631 0.497926 0.075454 0.043794 1.335322 0.308605 0.140137 0.150797 1.409726 0.119868 0.818331 0.080591 1.066017 3.754687 0.073415 0.435046 0.197272 + 0.242513 0.199157 0.472207 0.085937 2.039787 0.262751 0.084578 0.032247 7.762326 0.153966 0.299828 0.117255 0.438215 14.506235 0.089180 0.352766 0.215417 5.054245 + 2.795818 0.107130 0.060909 0.029724 2.986426 0.197267 0.196977 0.044327 0.116751 7.144311 1.848622 0.118020 1.999696 0.705747 0.272763 0.096935 1.820982 0.217007 0.172975 + +0.072639 0.051691 0.038642 0.055580 0.009829 0.031374 0.048731 0.065283 0.023791 0.086640 0.120847 0.052177 0.026728 0.032589 0.039238 0.046748 0.053361 0.008024 0.037426 0.098662; + +model LG4M4 = +0.576160 + 0.567606 0.498643 + 0.824359 0.050698 3.301401 + 0.822724 4.529235 1.291808 0.101930 + 1.254238 2.169809 1.427980 0.449474 0.868679 + 1.218615 0.154502 0.411471 3.172277 0.050239 2.138661 + 1.803443 0.604673 2.125496 1.276384 1.598679 0.502653 0.479490 + 0.516862 2.874265 4.845769 0.719673 3.825677 4.040275 0.292773 0.596643 + 0.180898 0.444586 0.550969 0.023542 2.349573 0.370160 0.142187 0.016618 0.500788 + 0.452099 0.866322 0.201033 0.026731 2.813990 1.645178 0.135556 0.072152 1.168817 5.696116 + 0.664186 2.902886 2.101971 0.127988 0.200218 2.505933 0.759509 0.333569 0.623100 0.547454 0.363656 + 0.864415 0.835049 0.632649 0.079201 2.105931 1.633544 0.216462 0.252419 0.665406 7.994105 11.751178 1.096842 + 0.324478 0.208947 0.280339 0.041683 4.788477 0.107022 0.067711 0.171320 3.324779 2.965328 5.133843 0.084856 4.042591 + 1.073043 0.173826 0.041985 0.270336 0.121299 0.351384 0.228565 0.225318 0.376089 0.058027 0.390354 0.214230 0.058954 0.126299 + 3.837562 0.884342 4.571911 0.942751 6.592827 1.080063 0.465397 3.137614 1.119667 0.362516 0.602355 0.716940 0.506796 1.444484 1.432558 + 2.106026 0.750016 2.323325 0.335915 1.654673 1.194017 0.617231 0.318671 0.801030 4.455842 0.580191 1.384210 3.522468 0.473128 0.432718 5.716300 + 0.163720 0.818102 0.072322 0.068275 3.305436 0.373790 0.054323 0.476587 1.100360 0.392946 1.703323 0.085720 1.725516 5.436253 0.053108 0.498594 0.231832 + 0.241167 0.302440 1.055095 0.246940 9.741942 0.249895 0.129973 0.052363 11.542498 1.047449 1.319667 0.139770 1.330225 26.562270 0.046986 0.737653 0.313460 5.165098 + 1.824586 0.435795 0.179086 0.091739 3.609570 0.649507 0.656681 0.225234 0.473437 19.897252 3.001995 0.452926 3.929598 1.692159 0.370204 0.373501 3.329822 0.326593 0.860743 + +0.104843 0.078835 0.043513 0.090498 0.002924 0.066163 0.151640 0.038843 0.022556 0.018383 0.038687 0.104462 0.010166 0.009089 0.066950 0.053667 0.049486 0.004409 0.012924 0.031962; + +model LG4M = MIX{LG4M1,LG4M2,LG4M3,LG4M4}*G4; +model LG4 = MIX{LG4M1,LG4M2,LG4M3,LG4M4}*G4; + + +[ --------------------------------------------------------- + LG4X mixture model of Le, Dang & Gascuel (2012) + --------------------------------------------------------- ] + +model LG4X1 = +0.295719 +0.067388 0.448317 +0.253712 0.457483 2.358429 +1.029289 0.576016 0.251987 0.189008 +0.107964 1.741924 0.216561 0.599450 0.029955 +0.514644 0.736017 0.503084 109.901504 0.084794 4.117654 +10.868848 0.704334 0.435271 1.070052 1.862626 0.246260 1.202023 +0.380498 5.658311 4.873453 5.229858 0.553477 6.508329 1.634845 0.404968 +0.084223 0.123387 0.090748 0.052764 0.151733 0.054187 0.060194 0.048984 0.204296 +0.086976 0.221777 0.033310 0.021407 0.230320 0.195703 0.069359 0.069963 0.504221 1.495537 +0.188789 93.433377 0.746537 0.621146 0.096955 1.669092 2.448827 0.256662 1.991533 0.091940 0.122332 +0.286389 0.382175 0.128905 0.081091 0.352526 0.810168 0.232297 0.228519 0.655465 1.994320 3.256485 0.457430 +0.155567 0.235965 0.127321 0.205164 0.590018 0.066081 0.064822 0.241077 6.799829 0.754940 2.261319 0.163849 1.559944 +1.671061 6.535048 0.904011 5.164456 0.386853 2.437439 3.537387 4.320442 11.291065 0.170343 0.848067 5.260446 0.426508 0.438856 +2.132922 0.525521 0.939733 0.747330 1.559564 0.165666 0.435384 3.656545 0.961142 0.050315 0.064441 0.360946 0.132547 0.306683 4.586081 +0.529591 0.303537 0.435450 0.308078 0.606648 0.106333 0.290413 0.290216 0.448965 0.372166 0.102493 0.389413 0.498634 0.109129 2.099355 3.634276 +0.115551 0.641259 0.046646 0.260889 0.587531 0.093417 0.280695 0.307466 6.227274 0.206332 0.459041 0.033291 0.559069 18.392863 0.411347 0.101797 0.034710 +0.102453 0.289466 0.262076 0.185083 0.592318 0.035149 0.105999 0.096556 20.304886 0.097050 0.133091 0.115301 0.264728 66.647302 0.476350 0.148995 0.063603 20.561407 +0.916683 0.102065 0.043986 0.080708 0.885230 0.072549 0.206603 0.306067 0.205944 5.381403 0.561215 0.112593 0.693307 0.400021 0.584622 0.089177 0.755865 0.133790 0.154902 + +0.147383 0.017579 0.058208 0.017707 0.026331 0.041582 0.017494 0.027859 0.011849 0.076971 0.147823 0.019535 0.037132 0.029940 0.008059 0.088179 0.089653 0.006477 0.032308 0.097931; + +model LG4X2 = + 0.066142 + 0.590377 0.468325 + 0.069930 0.013688 2.851667 + 9.850951 0.302287 3.932151 0.146882 + 1.101363 1.353957 8.159169 0.249672 0.582670 + 0.150375 0.028386 0.219934 0.560142 0.005035 3.054085 + 0.568586 0.037750 0.421974 0.046719 0.275844 0.129551 0.037250 + 0.051668 0.262130 2.468752 0.106259 0.098208 4.210126 0.029788 0.013513 + 0.127170 0.016923 0.344765 0.003656 0.445038 0.165753 0.008541 0.002533 0.031779 + 0.292429 0.064289 0.210724 0.004200 1.217010 1.088704 0.014768 0.005848 0.064558 7.278994 + 0.071458 0.855973 1.172204 0.014189 0.033969 1.889645 0.125869 0.031390 0.065585 0.029917 0.042762 + 1.218562 0.079621 0.763553 0.009876 1.988516 3.344809 0.056702 0.021612 0.079927 7.918203 14.799537 0.259400 + 0.075144 0.011169 0.082464 0.002656 0.681161 0.111063 0.004186 0.004854 0.095591 0.450964 1.506485 0.009457 1.375871 + 7.169085 0.161937 0.726566 0.040244 0.825960 2.067758 0.110993 0.129497 0.196886 0.169797 0.637893 0.090576 0.457399 0.143327 + 30.139501 0.276530 11.149790 0.267322 18.762977 3.547017 0.201148 0.976631 0.408834 0.104288 0.123793 0.292108 0.598048 0.328689 3.478333 + 13.461692 0.161053 4.782635 0.053740 11.949233 2.466507 0.139705 0.053397 0.126088 1.578530 0.641351 0.297913 4.418398 0.125011 2.984862 13.974326 + 0.021372 0.081472 0.058046 0.006597 0.286794 0.188236 0.009201 0.019475 0.037226 0.015909 0.154810 0.017172 0.239749 0.562720 0.061299 0.154326 0.060703 + 0.045779 0.036742 0.498072 0.027639 0.534219 0.203493 0.012095 0.004964 0.452302 0.094365 0.140750 0.021976 0.168432 1.414883 0.077470 0.224675 0.123480 0.447011 + 4.270235 0.030342 0.258487 0.012745 4.336817 0.281953 0.043812 0.015539 0.016212 16.179952 3.416059 0.032578 2.950318 0.227807 1.050562 0.112000 5.294490 0.033381 0.045528 + +0.063139 0.066357 0.011586 0.066571 0.010800 0.009276 0.053984 0.146986 0.034214 0.088822 0.098196 0.032390 0.021263 0.072697 0.016761 0.020711 0.020797 0.025463 0.045615 0.094372; + +model LG4X3 = + 0.733336 + 0.558955 0.597671 + 0.503360 0.058964 5.581680 + 4.149599 2.863355 1.279881 0.225860 + 1.415369 2.872594 1.335650 0.434096 1.043232 + 1.367574 0.258365 0.397108 2.292917 0.209978 4.534772 + 1.263002 0.366868 1.840061 1.024707 0.823594 0.377181 0.496780 + 0.994098 2.578946 5.739035 0.821921 3.039380 4.877840 0.532488 0.398817 + 0.517204 0.358350 0.284730 0.027824 1.463390 0.370939 0.232460 0.008940 0.349195 + 0.775054 0.672023 0.109781 0.021443 1.983693 1.298542 0.169219 0.043707 0.838324 5.102837 + 0.763094 5.349861 1.612642 0.088850 0.397640 3.509873 0.755219 0.436013 0.888693 0.561690 0.401070 + 1.890137 0.691594 0.466979 0.060820 2.831098 2.646440 0.379926 0.087640 0.488389 7.010411 8.929538 1.357738 + 0.540460 0.063347 0.141582 0.018288 4.102068 0.087872 0.020447 0.064863 1.385133 3.054968 5.525874 0.043394 3.135353 + 0.200122 0.032875 0.019509 0.042687 0.059723 0.072299 0.023282 0.036426 0.050226 0.039318 0.067505 0.023126 0.012695 0.015631 + 4.972745 0.821562 4.670980 1.199607 5.901348 1.139018 0.503875 1.673207 0.962470 0.204155 0.273372 0.567639 0.570771 0.458799 0.233109 + 1.825593 0.580847 1.967383 0.420710 2.034980 0.864479 0.577513 0.124068 0.502294 2.653232 0.437116 1.048288 2.319555 0.151684 0.077004 8.113282 + 0.450842 0.661866 0.088064 0.037642 2.600668 0.390688 0.109318 0.218118 1.065585 0.564368 1.927515 0.120994 1.856122 4.154750 0.011074 0.377578 0.222293 + 0.526135 0.265730 0.581928 0.141233 5.413080 0.322761 0.153776 0.039217 8.351808 0.854294 0.940458 0.180650 0.975427 11.429924 0.026268 0.429221 0.273138 4.731579 + 3.839269 0.395134 0.145401 0.090101 4.193725 0.625409 0.696533 0.104335 0.377304 15.559906 2.508169 0.449074 3.404087 1.457957 0.052132 0.260296 2.903836 0.564762 0.681215 + + 0.062457 0.066826 0.049332 0.065270 0.006513 0.041231 0.058965 0.080852 0.028024 0.037024 0.075925 0.064131 0.019620 0.028710 0.104579 0.056388 0.062027 0.008241 0.033124 0.050761; + +model LG4X4 = + 0.658412 + 0.566269 0.540749 + 0.854111 0.058015 3.060574 + 0.884454 5.851132 1.279257 0.160296 + 1.309554 2.294145 1.438430 0.482619 0.992259 + 1.272639 0.182966 0.431464 2.992763 0.086318 2.130054 + 1.874713 0.684164 2.075952 1.296206 2.149634 0.571406 0.507160 + 0.552007 3.192521 4.840271 0.841829 5.103188 4.137385 0.351381 0.679853 + 0.227683 0.528161 0.644656 0.031467 3.775817 0.437589 0.189152 0.025780 0.665865 + 0.581512 1.128882 0.266076 0.048542 3.954021 2.071689 0.217780 0.082005 1.266791 8.904999 + 0.695190 3.010922 2.084975 0.132774 0.190734 2.498630 0.767361 0.326441 0.680174 0.652629 0.440178 + 0.967985 1.012866 0.720060 0.133055 1.776095 1.763546 0.278392 0.343977 0.717301 10.091413 14.013035 1.082703 + 0.344015 0.227296 0.291854 0.056045 4.495841 0.116381 0.092075 0.195877 4.001286 2.671718 5.069337 0.091278 4.643214 + 0.978992 0.156635 0.028961 0.209188 0.264277 0.296578 0.177263 0.217424 0.362942 0.086367 0.539010 0.172734 0.121821 0.161015 + 3.427163 0.878405 4.071574 0.925172 7.063879 1.033710 0.451893 3.057583 1.189259 0.359932 0.742569 0.693405 0.584083 1.531223 1.287474 + 2.333253 0.802754 2.258357 0.360522 2.221150 1.283423 0.653836 0.377558 0.964545 4.797423 0.780580 1.422571 4.216178 0.599244 0.444362 5.231362 + 0.154701 0.830884 0.073037 0.094591 3.017954 0.312579 0.074620 0.401252 1.350568 0.336801 1.331875 0.068958 1.677263 5.832025 0.076328 0.548763 0.208791 + 0.221089 0.431617 1.238426 0.313945 8.558815 0.305772 0.181992 0.072258 12.869737 1.021885 1.531589 0.163829 1.575754 33.873091 0.079916 0.831890 0.307846 5.910440 + 2.088785 0.456530 0.199728 0.118104 4.310199 0.681277 0.752277 0.241015 0.531100 23.029406 4.414850 0.481711 5.046403 1.914768 0.466823 0.382271 3.717971 0.282540 0.964421 + +0.106471 0.074171 0.044513 0.096390 0.002148 0.066733 0.158908 0.037625 0.020691 0.014608 0.028797 0.105352 0.007864 0.007477 0.083595 0.055726 0.047711 0.003975 0.010087 0.027158; + +model LG4X = MIX{LG4X1,LG4X2,LG4X3,LG4X4}*R4; + +[ --------------------------------------------------------- + +cF class frequency mixture model of Wang et al. (2008) + --------------------------------------------------------- ] + +frequency Fclass1 = 0.02549352 0.01296012 0.005545202 0.006005566 0.01002193 0.01112289 0.008811948 0.001796161 0.004312188 0.2108274 0.2730413 0.01335451 0.07862202 0.03859909 0.005058205 0.008209453 0.03210019 0.002668138 0.01379098 0.2376598; +frequency Fclass2 = 0.09596966 0.008786096 0.02805857 0.01880183 0.005026264 0.006454635 0.01582725 0.7215719 0.003379354 0.002257725 0.003013483 0.01343441 0.001511657 0.002107865 0.006751404 0.04798539 0.01141559 0.000523736 0.002188483 0.004934972; +frequency Fclass3 = 0.01726065 0.005467988 0.01092937 0.3627871 0.001046402 0.01984758 0.5149206 0.004145081 0.002563289 0.002955213 0.005286931 0.01558693 0.002693098 0.002075771 0.003006167 0.01263069 0.01082144 0.000253451 0.001144787 0.004573568; +frequency Fclass4 = 0.1263139 0.09564027 0.07050061 0.03316681 0.02095119 0.05473468 0.02790523 0.009007538 0.03441334 0.005855319 0.008061884 0.1078084 0.009019514 0.05018693 0.07948 0.09447839 0.09258897 0.01390669 0.05367769 0.01230413; +model CF4 = POISSON+FMIX{Fclass1,Fclass2,Fclass3,Fclass4}+F+G; +model JTTCF4G = JTT+FMIX{empirical,Fclass1,Fclass2,Fclass3,Fclass4}+G; + +[ --------------------------------------------------------- + CAT-C10 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] + +frequency C10pi1 = 0.4082573125 0.0081783015 0.0096285438 0.0069870889 0.0349388179 0.0075279735 0.0097846653 0.1221613215 0.0039151830 0.0125784287 0.0158338663 0.0059670150 0.0081313216 0.0061604332 0.0394155867 0.1682450664 0.0658132542 0.0018751587 0.0041579747 0.0604426865; +frequency C10pi2 = 0.1027763487 0.0418664491 0.0213272051 0.0155943616 0.0149663448 0.0440685478 0.0419667447 0.0138805792 0.0158864807 0.1066076641 0.1131944125 0.0436343681 0.0437800327 0.0180729309 0.0223250701 0.0529608087 0.1081741005 0.0045147205 0.0137373857 0.1606654446; +frequency C10pi3 = 0.0351766018 0.0019678632 0.0016591476 0.0006768741 0.0078706538 0.0016559557 0.0019686768 0.0022420602 0.0012878339 0.3515819591 0.1278183107 0.0018856550 0.0242631753 0.0126221329 0.0029771559 0.0049998099 0.0255378034 0.0011907778 0.0037539283 0.3888636245; +frequency C10pi4 = 0.0408513927 0.0269887074 0.2185648186 0.2333814790 0.0037602852 0.0380451418 0.0901238869 0.1158332065 0.0373197176 0.0025523644 0.0052164616 0.0485017266 0.0022571778 0.0025108218 0.0108333610 0.0804527209 0.0302879995 0.0010815260 0.0069890931 0.0044481118; +frequency C10pi5 = 0.0185492661 0.0062362395 0.0024895723 0.0009775062 0.0070416514 0.0083539447 0.0024891617 0.0028952913 0.0040103982 0.1632422345 0.4443079409 0.0043570878 0.1202815687 0.0733329781 0.0048827648 0.0051642443 0.0131806647 0.0068759784 0.0144734420 0.0968580644; +frequency C10pi6 = 0.1106750119 0.0352190043 0.0405186210 0.1636437899 0.0014834855 0.0877962201 0.2638456592 0.0325228293 0.0163803600 0.0068334902 0.0140679579 0.0677158208 0.0048988133 0.0023256777 0.0298982139 0.0562887953 0.0426922497 0.0010338979 0.0040522304 0.0181078719; +frequency C10pi7 = 0.0522657662 0.0668294648 0.0714836849 0.0297745257 0.0143324928 0.0736540298 0.0388386669 0.0228101108 0.1551638111 0.0187406149 0.0653779932 0.0439469345 0.0207189121 0.0624033021 0.0145475497 0.0549017631 0.0370140058 0.0193756900 0.1110694548 0.0267512268; +frequency C10pi8 = 0.0116587342 0.0050990142 0.0064011054 0.0021742457 0.0105340743 0.0040203734 0.0024251112 0.0034709143 0.0366787049 0.0187185330 0.0676489746 0.0026694717 0.0143534813 0.3650985596 0.0031159927 0.0094848536 0.0073713920 0.0509564551 0.3574858593 0.0206341497; +frequency C10pi9 = 0.0627195947 0.2038782162 0.0428629162 0.0236193294 0.0052662886 0.1098111767 0.0686284994 0.0256174957 0.0332612124 0.0128968249 0.0305627740 0.2270839355 0.0124036991 0.0039181841 0.0140440613 0.0483152469 0.0463378087 0.0025143473 0.0065521118 0.0197062770; +frequency C10pi10 = 0.1145518598 0.0324008908 0.0750614981 0.0416192189 0.0098549497 0.0339624663 0.0364907910 0.0503817581 0.0165233329 0.0092949460 0.0139153707 0.0423026886 0.0082240805 0.0046605982 0.0379221548 0.2610647896 0.1845829279 0.0017548981 0.0058538316 0.0195769483; +model C10 = POISSON+G+FMIX{C10pi1:1:0.1191344178,C10pi2:1:0.0874372456,C10pi3:1:0.1037105070,C10pi4:1:0.0922584809,C10pi5:1:0.1070492801,C10pi6:1:0.1329945166,C10pi7:1:0.0538028458,C10pi8:1:0.0691986212,C10pi9:1:0.1319937434,C10pi10:1:0.1024203429}; +model C10Opt = POISSON+G+FMIX{C10pi1,C10pi2,C10pi3,C10pi4,C10pi5,C10pi6,C10pi7,C10pi8,C10pi9,C10pi10}; + +[ --------------------------------------------------------- + CAT-C20 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] +frequency C20pi1 = 0.0862412505 0.0171943793 0.0791293376 0.0329908619 0.0130504558 0.0169046938 0.0184526503 0.0366905299 0.0108013340 0.0097907148 0.0112826424 0.0220195221 0.0087821483 0.0044155335 0.0189273201 0.3178152357 0.2711700523 0.0015317305 0.0048342853 0.0179753220 ; +frequency C20pi2 = 0.2035582865 0.0050980810 0.0077052407 0.0031656079 0.0348667285 0.0064044073 0.0070859400 0.0195235515 0.0024392035 0.1152573291 0.0789777393 0.0042380850 0.0309187017 0.0112429356 0.0164189221 0.0496777139 0.1118946615 0.0017762569 0.0048448213 0.2849057867 ; +frequency C20pi3 = 0.0211547413 0.0014946177 0.0012755030 0.0005492865 0.0048188557 0.0012328812 0.0014539632 0.0011430874 0.0011346394 0.3928460626 0.1250644210 0.0013579946 0.0209788805 0.0128251737 0.0020247248 0.0026240726 0.0171914121 0.0011591071 0.0036027969 0.3860677787 ; +frequency C20pi4 = 0.0376903543 0.2885196153 0.0365411474 0.0109469400 0.0064073829 0.0893564381 0.0358365464 0.0191106776 0.0329513951 0.0101711878 0.0237495504 0.2897626974 0.0096528870 0.0036349802 0.0105337370 0.0356313768 0.0355926500 0.0027925238 0.0066557222 0.0144621902 ; +frequency C20pi5 = 0.0084597802 0.0053589922 0.0072525884 0.0024487852 0.0084909000 0.0042781483 0.0025055486 0.0024277107 0.0433214027 0.0097713028 0.0380507037 0.0026741007 0.0080724771 0.3420463838 0.0021418673 0.0080418935 0.0055322116 0.0494840193 0.4375001561 0.0121410277 ; +frequency C20pi6 = 0.1759898886 0.0290429175 0.0332845569 0.1301263816 0.0017558693 0.0707183953 0.2182166681 0.0409535143 0.0130708195 0.0085622087 0.0159530702 0.0542946169 0.0054045759 0.0025276980 0.0371020404 0.0793480500 0.0540083424 0.0010592104 0.0036259116 0.0249552645 ; +frequency C20pi7 = 0.1634397322 0.0195541184 0.0438701833 0.0374272612 0.0088659891 0.0137554758 0.0220611924 0.5296717726 0.0090006141 0.0017569353 0.0061156267 0.0167117975 0.0029390787 0.0030641349 0.0126457766 0.0829342776 0.0142835614 0.0028640685 0.0032398299 0.0057985736 ; +frequency C20pi8 = 0.0917468761 0.0265853306 0.0290699087 0.0133818895 0.0284015012 0.0255084506 0.0196875685 0.0249898794 0.0449766405 0.0583555688 0.1155009222 0.0164915955 0.0395994595 0.0998479096 0.0209916159 0.0736482742 0.0661518462 0.0246463919 0.0972327226 0.0831856483 ; +frequency C20pi9 = 0.0646700714 0.0988015996 0.0228907308 0.0168733856 0.0077117603 0.0996414875 0.0544977962 0.0148893975 0.0313851988 0.0505983315 0.1844282999 0.0907931290 0.0774839960 0.0219148172 0.0105004469 0.0321196170 0.0411766062 0.0084303030 0.0206106035 0.0505824221 ; +frequency C20pi10 = 0.0135993865 0.0043408375 0.0018469375 0.0007951703 0.0100090240 0.0046420778 0.0018011758 0.0026794645 0.0072401918 0.0814026713 0.3661422246 0.0025158135 0.0734965132 0.2640965246 0.0038994134 0.0043668760 0.0075248451 0.0261564898 0.0660970801 0.0573472826 ; +frequency C20pi11 = 0.1478036236 0.0842845089 0.0726630217 0.0534743238 0.0048825808 0.0757166156 0.0727246460 0.0907725939 0.0262288856 0.0035781075 0.0126777221 0.1051660098 0.0059621792 0.0029903868 0.0156558198 0.1459903343 0.0634877444 0.0015928454 0.0050760739 0.0092719768 ; +frequency C20pi12 = 0.0186377412 0.0042055165 0.0019865236 0.0008329696 0.0054968852 0.0065890091 0.0020248504 0.0021713483 0.0023665991 0.2020809776 0.4370381920 0.0029120653 0.1241860384 0.0385383157 0.0040672279 0.0046177381 0.0149904396 0.0026871667 0.0056324117 0.1189379840 ; +frequency C20pi13 = 0.0477624336 0.0505742667 0.0209574273 0.0141349161 0.0075791708 0.0429296799 0.0462688073 0.0052327914 0.0165351815 0.1741496627 0.1121253570 0.0577575020 0.0330288046 0.0130691347 0.0124374733 0.0264988925 0.0951754678 0.0031660482 0.0112465746 0.2093704079 ; +frequency C20pi14 = 0.4164189845 0.0056100821 0.0091701381 0.0045131748 0.0406937949 0.0061320495 0.0063229801 0.0946185184 0.0031057404 0.0076443223 0.0099885414 0.0038941773 0.0069323155 0.0048438356 0.0187840756 0.2360774301 0.0746274607 0.0012172579 0.0034825786 0.0459225422 ; +frequency C20pi15 = 0.0402295888 0.0735203003 0.1036647193 0.0365523994 0.0124782975 0.0826558132 0.0372197283 0.0233618081 0.2108307125 0.0093478727 0.0360561493 0.0482410586 0.0100289536 0.0459094917 0.0098503973 0.0533383445 0.0310209005 0.0140076639 0.1064377821 0.0152480184 ; +frequency C20pi16 = 0.0323453034 0.0236282995 0.2520448083 0.2431495959 0.0035976296 0.0330831153 0.0710274499 0.1016074562 0.0366225082 0.0031410809 0.0051980542 0.0470129351 0.0024028744 0.0024429276 0.0094837826 0.0848355278 0.0359083275 0.0008730928 0.0067247672 0.0048704638 ; +frequency C20pi17 = 0.1476256642 0.0334506604 0.0211972524 0.0403051550 0.0032327194 0.0371554480 0.0576893391 0.0330850942 0.0146392559 0.0108267008 0.0256200793 0.0451350877 0.0058651400 0.0047177179 0.3473710507 0.0892065279 0.0485899446 0.0016358749 0.0044177191 0.0282335685 ; +frequency C20pi18 = 0.1031448143 0.0717747663 0.0435172139 0.0386401502 0.0061762467 0.0786603123 0.0923369140 0.0202338419 0.0246761899 0.0376904275 0.0376283678 0.0921698920 0.0161883318 0.0067666433 0.0128302120 0.0951450188 0.1378566702 0.0022144738 0.0083041573 0.0740453560 ; +frequency C20pi19 = 0.0837542823 0.0899383244 0.0518811417 0.0804870571 0.0020735078 0.1456497470 0.1947759184 0.0229030361 0.0268458796 0.0074079756 0.0190249576 0.1459287407 0.0067395241 0.0023063393 0.0085616014 0.0455739585 0.0451080843 0.0010771349 0.0049325333 0.0150302559 ; +frequency C20pi20 = 0.0578735570 0.0138313604 0.0491421636 0.2946738942 0.0011130839 0.0598250358 0.3402102668 0.0293911435 0.0139817004 0.0030525663 0.0062611922 0.0363365043 0.0027295976 0.0017034884 0.0156106390 0.0358044639 0.0249941878 0.0008664342 0.0038312977 0.0087674229 ; + +[ C20 with fixed weights ] +model C20 = POISSON+G+FMIX{C20pi1:1:0.0559910600,C20pi2:1:0.0514824870,C20pi3:1:0.0812922124,C20pi4:1:0.0721976867,C20pi5:1:0.0556718858,C20pi6:1:0.0331003080,C20pi7:1:0.0589501763,C20pi8:1:0.0263756889,C20pi9:1:0.0307584220,C20pi10:1:0.0376701125,C20pi11:1:0.0303058290,C20pi12:1:0.0808775576,C20pi13:1:0.0263349134,C20pi14:1:0.0579101455,C20pi15:1:0.0371248064,C20pi16:1:0.0586867766,C20pi17:1:0.0561479138,C20pi18:1:0.0349810886,C20pi19:1:0.0544937394,C20pi20:1:0.0596471901}; +[ C20 to weights to be optimized ] +model C20Opt = POISSON+G+FMIX{C20pi1,C20pi2,C20pi3,C20pi4,C20pi5,C20pi6,C20pi7,C20pi8,C20pi9,C20pi10,C20pi11,C20pi12,C20pi13,C20pi14,C20pi15,C20pi16,C20pi17,C20pi18,C20pi19,C20pi20}; + +model C20Test = POISSON+G+FMIX{C20pi1:1:0.089485,C20pi2:1:0.021281,C20pi3:1:0.119676,C20pi4:1:0.080933,C20pi5:1:0.064054,C20pi6:1:0.021848,C20pi7:1:0.063392,C20pi8:1:0.003629,C20pi9:1:0.007174,C20pi10:1:0.006256,C20pi11:1:0.023424,C20pi12:1:0.086825,C20pi13:1:0.038495,C20pi14:1:0.090028,C20pi15:1:0.020025,C20pi16:1:0.043484,C20pi17:1:0.076864,C20pi18:1:0.031347,C20pi19:1:0.047749,C20pi20:1:0.064031}; + +[ --------------------------------------------------------- + CAT-C30 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] +frequency C30pi1 = 0.1100453954 0.0171294861 0.0640338464 0.1595411459 0.0019047235 0.0310187088 0.1098958823 0.0684301540 0.0137950707 0.0026283074 0.0073396531 0.0358553674 0.0024706414 0.0016629473 0.1669356820 0.1381790473 0.0568342547 0.0004661120 0.0035970152 0.0082365591; +frequency C30pi2 = 0.0874125465 0.0806320385 0.0382152368 0.0326119879 0.0049826376 0.0798168854 0.0951700809 0.0144042708 0.0210626652 0.0399884450 0.0301585074 0.1147200015 0.0126488911 0.0048996596 0.0137397028 0.0873769666 0.1558616621 0.0015122843 0.0053974463 0.0793880836; +frequency C30pi3 = 0.0225477414 0.0014900535 0.0013034594 0.0005959279 0.0050018158 0.0011436556 0.0015030529 0.0011570953 0.0009374322 0.3944689167 0.0889573138 0.0013600872 0.0189102669 0.0089216031 0.0018312028 0.0028336408 0.0189813395 0.0006693746 0.0023303726 0.4250556480; +frequency C30pi4 = 0.0602158209 0.0136833299 0.0414987935 0.2900084105 0.0009525462 0.0621611083 0.3610869026 0.0281925621 0.0130500799 0.0030516237 0.0060401889 0.0352704692 0.0027460635 0.0014625624 0.0127175499 0.0318109377 0.0225279521 0.0007948027 0.0034024563 0.0093258397; +frequency C30pi5 = 0.0101223637 0.0028344920 0.0012928910 0.0006379191 0.0085989355 0.0035028551 0.0011249625 0.0024085229 0.0047753376 0.0701153131 0.4135913903 0.0016748492 0.0744862631 0.2785384406 0.0040466582 0.0037087155 0.0052379329 0.0200222636 0.0523938808 0.0408860135; +frequency C30pi6 = 0.1335831781 0.0284789590 0.0213891629 0.1125775537 0.0010514541 0.0565844323 0.2099572968 0.0207551870 0.0121330488 0.0073526522 0.0133278240 0.0771772013 0.0030571689 0.0016793592 0.1890195131 0.0484054108 0.0373318180 0.0009266995 0.0026946425 0.0225174379; +frequency C30pi7 = 0.0408277374 0.0124491768 0.0080464869 0.0030634898 0.0153918410 0.0102922098 0.0066010880 0.0058113137 0.0245211764 0.1487514547 0.1637802160 0.0075923232 0.0385527359 0.1575049888 0.0058352224 0.0151578617 0.0332220362 0.0264937109 0.1213342989 0.1547706314; +frequency C30pi8 = 0.2469059247 0.0106278945 0.0168929681 0.0027418266 0.1039406309 0.0103988197 0.0054944756 0.0373263209 0.0085752319 0.0292403793 0.0535091180 0.0056123053 0.0302246485 0.0251775640 0.0078098946 0.1642352274 0.1239889705 0.0053155877 0.0163953993 0.0955868125; +frequency C30pi9 = 0.0549428629 0.1305426495 0.0202957532 0.0092915274 0.0099280995 0.0906036344 0.0417085054 0.0105563869 0.0363512470 0.0569584863 0.1681833183 0.1152521806 0.0592328363 0.0243860149 0.0083055411 0.0283778833 0.0412594019 0.0096355359 0.0249780472 0.0592100878; +frequency C30pi10 = 0.0462773303 0.0362984274 0.0412365193 0.0182504174 0.0172727117 0.0348990852 0.0224266258 0.0160971397 0.1357852215 0.0164966886 0.0598936127 0.0239396241 0.0164507129 0.1336320854 0.0117413009 0.0454156401 0.0304387749 0.0330338410 0.2350163763 0.0253978649; +frequency C30pi11 = 0.0474379955 0.0410179935 0.0222453982 0.0112116958 0.0082332447 0.0374051414 0.0388100853 0.0055998598 0.0149156570 0.1832173840 0.1100691114 0.0467850545 0.0356443791 0.0116643783 0.0100244663 0.0317171100 0.1114352326 0.0026685586 0.0099660086 0.2199312452; +frequency C30pi12 = 0.0213607696 0.0069976154 0.0039878996 0.0012941246 0.0061024858 0.0139566033 0.0036297282 0.0030017014 0.0038425894 0.1309465785 0.4566988203 0.0054567760 0.1947837355 0.0371808169 0.0040747282 0.0076991487 0.0198018718 0.0034086391 0.0064545692 0.0693207986; +frequency C30pi13 = 0.0919632044 0.0160004872 0.0764682386 0.0306717360 0.0117031014 0.0160060006 0.0171907654 0.0370684649 0.0100792697 0.0093123713 0.0097240970 0.0205385908 0.0075767282 0.0041589440 0.0179686194 0.3254471625 0.2744377258 0.0013887442 0.0044739725 0.0178217761; +frequency C30pi14 = 0.4649246103 0.0043013249 0.0075304815 0.0050731691 0.0233328752 0.0043571322 0.0057994247 0.1495242047 0.0023298425 0.0043361190 0.0055995530 0.0028525398 0.0039313170 0.0025588185 0.0186467246 0.2150194771 0.0477030158 0.0009038096 0.0020087184 0.0292668421; +frequency C30pi15 = 0.2051329382 0.0439661329 0.0339418395 0.1070980865 0.0020915940 0.0822742346 0.1989733497 0.0487574293 0.0127143076 0.0058124693 0.0133471767 0.0667787412 0.0043783406 0.0018235059 0.0110997761 0.0873961609 0.0519781961 0.0007361603 0.0023821404 0.0193174204; +frequency C30pi16 = 0.0263689890 0.0133613622 0.2727158135 0.3117715371 0.0039462429 0.0218978778 0.0694354212 0.0799842408 0.0309615130 0.0027521242 0.0038579661 0.0288630708 0.0018363656 0.0023351927 0.0062457560 0.0798729385 0.0324143174 0.0007229656 0.0063857732 0.0042705326; +frequency C30pi17 = 0.1526502637 0.0332784464 0.0168229991 0.0237392180 0.0040215287 0.0341733672 0.0377949108 0.0306214335 0.0141929803 0.0123317972 0.0290062362 0.0375543022 0.0064473224 0.0058584416 0.3864504800 0.0880336410 0.0489543188 0.0018252558 0.0048877798 0.0313552773; +frequency C30pi18 = 0.0080247558 0.0017408595 0.0006327403 0.0003385965 0.0023412143 0.0015507896 0.0007818945 0.0005403825 0.0010026402 0.3177056649 0.3737894172 0.0012598254 0.0488212345 0.0311968471 0.0020687549 0.0012095129 0.0065696791 0.0016309208 0.0043343553 0.1944599147; +frequency C30pi19 = 0.0599950319 0.1000540567 0.1334918892 0.0889730776 0.0016884984 0.0864856169 0.0962700957 0.0588796388 0.0327277145 0.0021467269 0.0070876372 0.1825860579 0.0033979446 0.0011800742 0.0141408084 0.0779002375 0.0448817374 0.0006249028 0.0032641120 0.0042241415; +frequency C30pi20 = 0.0393520657 0.0838170642 0.1425481600 0.0431197671 0.0099071945 0.1019786610 0.0394639510 0.0282866471 0.2095718357 0.0076101442 0.0258339558 0.0596434088 0.0084586675 0.0188680789 0.0096840517 0.0624998643 0.0347087967 0.0054645779 0.0564145251 0.0127685828; +frequency C30pi21 = 0.0072715487 0.0140998918 0.0019756795 0.0027603830 0.0067852535 0.0043339290 0.0025069369 0.0080834718 0.0113217919 0.0056609640 0.0394199644 0.0017735096 0.0079866080 0.1271475634 0.0041098092 0.0052244365 0.0043022271 0.6273570153 0.1084563767 0.0094226397; +frequency C30pi22 = 0.0907070068 0.0290062335 0.0860677696 0.0745872716 0.0063699858 0.0259377035 0.0386802115 0.4750046194 0.0168090013 0.0014721054 0.0055149849 0.0343855535 0.0024692074 0.0028859215 0.0112150781 0.0731110371 0.0153705714 0.0022914775 0.0041860660 0.0039281943; +frequency C30pi23 = 0.0055291882 0.0024626303 0.0046086594 0.0011413426 0.0072105915 0.0022692184 0.0009683043 0.0016070950 0.0325831191 0.0082918400 0.0353677882 0.0013849437 0.0074486804 0.3744093753 0.0013374573 0.0057402692 0.0037279636 0.0330334445 0.4609978298 0.0098802591; +frequency C30pi24 = 0.2443263138 0.0045386562 0.0062422652 0.0031590902 0.0273880205 0.0053593950 0.0076715636 0.0196089609 0.0020189401 0.1017435067 0.0468424225 0.0045492259 0.0201286022 0.0060619450 0.0185219126 0.0497753825 0.1170795523 0.0009577255 0.0035333687 0.3104931504; +frequency C30pi25 = 0.0863111274 0.0984811895 0.0313963115 0.0600902926 0.0024419845 0.1672351286 0.2036096150 0.0175221435 0.0245245046 0.0105994220 0.0271209781 0.1485789590 0.0095824358 0.0029393105 0.0068276769 0.0347800318 0.0408210979 0.0014001253 0.0055105388 0.0202271268; +frequency C30pi26 = 0.0643926114 0.0369048739 0.1031213278 0.1628208462 0.0023165895 0.0752534859 0.1762701353 0.0297139006 0.0303503732 0.0088163033 0.0148016812 0.0727140107 0.0056748403 0.0043066715 0.0099270322 0.0926433867 0.0833129915 0.0011237109 0.0093801464 0.0161550816; +frequency C30pi27 = 0.1736682858 0.0943628709 0.0520404980 0.0285984935 0.0083596568 0.0722446698 0.0483894060 0.0781901497 0.0266134684 0.0068641911 0.0219499324 0.0964011794 0.0112303313 0.0058273974 0.0169661076 0.1547802460 0.0751701930 0.0028774511 0.0082130397 0.0172524320; +frequency C30pi28 = 0.0347856579 0.3075984538 0.0314157384 0.0092355245 0.0062754891 0.0861073155 0.0323568406 0.0170288127 0.0306438905 0.0091932292 0.0224428556 0.3020845818 0.0093720833 0.0034303536 0.0104447169 0.0326882932 0.0328713449 0.0025244855 0.0064171317 0.0130832013; +frequency C30pi29 = 0.1087737102 0.0051781020 0.0032679768 0.0015823203 0.0247877480 0.0057932006 0.0041769888 0.0134703172 0.0024765788 0.1643462917 0.2337152707 0.0027000391 0.0539213396 0.0316523420 0.0154886946 0.0188187787 0.0474912345 0.0037656478 0.0073106362 0.2512827825; +frequency C30pi30 = 0.1101008748 0.0324324597 0.0435098681 0.0579268520 0.0072699765 0.0615196630 0.0828181488 0.0314463068 0.0308557019 0.0530865813 0.1096787834 0.0293860426 0.0458728977 0.0269153699 0.0296430687 0.0715887866 0.0685882454 0.0062324120 0.0257237601 0.0754042006; +model C30 = POISSON+G+FMIX{C30pi1:1:0.0095783264,C30pi2:1:0.0248476365,C30pi3:1:0.0636309366,C30pi4:1:0.0537939225,C30pi5:1:0.0295885587,C30pi6:1:0.0117587936,C30pi7:1:0.0132013428,C30pi8:1:0.0236868805,C30pi9:1:0.0261687659,C30pi10:1:0.0239821974,C30pi11:1:0.0257100906,C30pi12:1:0.0465072425,C30pi13:1:0.0546794546,C30pi14:1:0.0536085131,C30pi15:1:0.0270622670,C30pi16:1:0.0403913593,C30pi17:1:0.0474212700,C30pi18:1:0.0458816478,C30pi19:1:0.0214036510,C30pi20:1:0.0290385981,C30pi21:1:0.0123391793,C30pi22:1:0.0569350229,C30pi23:1:0.0419687568,C30pi24:1:0.0339027062,C30pi25:1:0.0388777376,C30pi26:1:0.0196343766,C30pi27:1:0.0233086174,C30pi28:1:0.0622722654,C30pi29:1:0.0184803385,C30pi30:1:0.0203395454}; + +[ --------------------------------------------------------- + CAT-C40 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] +frequency C40pi1 = 0.0660259814 0.0231861755 0.1599815873 0.1054473175 0.0056586745 0.0273928499 0.0440360794 0.0711238664 0.0168194755 0.0039088727 0.0055316013 0.0366689617 0.0037412416 0.0013104807 0.0176359169 0.2497687201 0.1507079582 0.0006723214 0.0038290224 0.0065528958; +frequency C40pi2 = 0.0232377444 0.0122683027 0.2759650991 0.3532087982 0.0037987468 0.0197339134 0.0739378219 0.0576668030 0.0315866952 0.0031092806 0.0038711609 0.0259363304 0.0017355634 0.0024032103 0.0063116881 0.0657067704 0.0270483653 0.0007602894 0.0069602476 0.0047531689; +frequency C40pi3 = 0.0166486809 0.0012594763 0.0012622242 0.0005651446 0.0036665719 0.0010669784 0.0013356251 0.0008894749 0.0008231853 0.4129367561 0.0884689295 0.0011904105 0.0186054583 0.0082775676 0.0014029981 0.0021339439 0.0162167380 0.0006082049 0.0019553200 0.4206863114; +frequency C40pi4 = 0.2394741986 0.0072901253 0.0120536943 0.0044741726 0.0283811727 0.0086558850 0.0105529632 0.0135109628 0.0038929844 0.0765957115 0.0358494908 0.0071093014 0.0199496319 0.0055991131 0.0114265585 0.0847798773 0.1797284519 0.0009838000 0.0042240671 0.2454678377; +frequency C40pi5 = 0.1194613086 0.0233255669 0.0294552140 0.0134272792 0.0150526644 0.0301537796 0.0192173037 0.0337675998 0.0214746045 0.0579001821 0.1446308373 0.0147261337 0.0561242940 0.0550467421 0.0631355418 0.0925266727 0.0831230185 0.0131636136 0.0331118002 0.0811758434; +frequency C40pi6 = 0.0567043710 0.0117359330 0.0364734454 0.2955500969 0.0008924801 0.0609516515 0.3795154126 0.0230469606 0.0118360971 0.0031182036 0.0060137466 0.0314205689 0.0028584065 0.0012972333 0.0124745819 0.0300334889 0.0227051137 0.0007738758 0.0031343761 0.0094639563; +frequency C40pi7 = 0.0179027412 0.0040967133 0.0035697688 0.0008870412 0.0160760340 0.0045395474 0.0023182113 0.0039829808 0.0127292680 0.0404650518 0.1676143477 0.0027994718 0.0424172255 0.3344862590 0.0020115128 0.0075841581 0.0068227293 0.0518381385 0.2452542553 0.0326045442; +frequency C40pi8 = 0.2712170094 0.0056480837 0.0141045260 0.0021017036 0.2003830179 0.0048264059 0.0023229984 0.0502501222 0.0053727960 0.0150684657 0.0330003443 0.0020646283 0.0154811217 0.0202990358 0.0045351023 0.1764198412 0.0839578061 0.0046265242 0.0141271048 0.0741933626; +frequency C40pi9 = 0.0894736584 0.1040026384 0.0190192153 0.0272183085 0.0045538316 0.1168091917 0.1275076663 0.0115685734 0.0215746293 0.0469424171 0.0512035100 0.1382047308 0.0147656854 0.0056590176 0.0095546504 0.0383953611 0.0836652641 0.0017079427 0.0062181292 0.0819555787; +frequency C40pi10 = 0.0495441385 0.0375345822 0.0315863530 0.0143641284 0.0182505609 0.0316504100 0.0215379122 0.0140199913 0.1108543799 0.0247065801 0.0700287927 0.0258142032 0.0188271760 0.1418048822 0.0112101202 0.0456094427 0.0361427973 0.0371985427 0.2223972375 0.0369177689; +frequency C40pi11 = 0.1704314254 0.0415784004 0.0271109259 0.1098556600 0.0009747331 0.0917299929 0.2536458944 0.0249846466 0.0101389736 0.0058749399 0.0116526350 0.0903324267 0.0036512738 0.0013321301 0.0293613681 0.0561765645 0.0479045729 0.0006696817 0.0022637316 0.0203300232; +frequency C40pi12 = 0.0162725399 0.0054826071 0.0021876158 0.0010182101 0.0050614097 0.0104414465 0.0025141347 0.0021935389 0.0029914328 0.1328173512 0.4904441779 0.0040120394 0.1929931280 0.0376245580 0.0034333187 0.0040122105 0.0127074428 0.0032107554 0.0058100621 0.0647720205; +frequency C40pi13 = 0.0823765743 0.0734226431 0.0598389731 0.0311745159 0.0065694304 0.0686451074 0.0675530778 0.0178961594 0.0251143622 0.0291161743 0.0287904106 0.0982301674 0.0168022878 0.0064717899 0.0114044922 0.1302995288 0.1820374273 0.0022724618 0.0079573279 0.0540270885; +frequency C40pi14 = 0.3594965940 0.0072407229 0.0033421456 0.0031484357 0.0251417178 0.0049014279 0.0064962700 0.1194682267 0.0022970448 0.0458766662 0.0468053893 0.0050168849 0.0215568816 0.0092020461 0.0443915884 0.0465270945 0.0477755293 0.0024540215 0.0046450361 0.1942162766; +frequency C40pi15 = 0.2015583874 0.0430161610 0.0425386444 0.0954149893 0.0032365302 0.0772010857 0.1534908791 0.0667291678 0.0155218808 0.0067740832 0.0165114429 0.0547322644 0.0060162992 0.0025643300 0.0091970560 0.1185981804 0.0625472744 0.0009565508 0.0031150007 0.0202797924; +frequency C40pi16 = 0.1042731047 0.0147062345 0.0621645800 0.2424069523 0.0022450116 0.0356498946 0.1774821588 0.1697819523 0.0132648834 0.0018929517 0.0042542620 0.0220651981 0.0016441234 0.0012570256 0.0317041583 0.0778636230 0.0288515782 0.0006930898 0.0017741945 0.0060250231; +frequency C40pi17 = 0.0781183281 0.0111498472 0.0159270309 0.0041541669 0.0194448667 0.0240151620 0.0116633921 0.0111524105 0.0063589385 0.1354530457 0.2457574952 0.0093729846 0.1087781166 0.0262793949 0.0055294038 0.0408518858 0.0860514305 0.0031547586 0.0085108496 0.1482764918; +frequency C40pi18 = 0.0856592432 0.0101233167 0.0441923073 0.0135061568 0.0136072878 0.0092590642 0.0078602552 0.0245400880 0.0055379075 0.0100591561 0.0103343559 0.0127318506 0.0080675803 0.0047153035 0.0175273997 0.3406479487 0.3573294650 0.0014243098 0.0035099810 0.0193670227; +frequency C40pi19 = 0.0674594695 0.1161734658 0.1163107783 0.0662588409 0.0021634231 0.0939360452 0.0865501280 0.0368556575 0.0381149118 0.0033238825 0.0093839985 0.1899736999 0.0039487389 0.0018212730 0.0151207830 0.0842204423 0.0565953680 0.0007187305 0.0046189437 0.0064514195; +frequency C40pi20 = 0.0572262322 0.0494723554 0.1083882793 0.1793932771 0.0015301521 0.0903668522 0.1992261265 0.0316472274 0.0291392067 0.0045804559 0.0100739563 0.1015624916 0.0040204606 0.0013701849 0.0063674130 0.0621142922 0.0496102162 0.0006669285 0.0046497641 0.0085941279; +frequency C40pi21 = 0.0036020163 0.0102712927 0.0013455508 0.0020871647 0.0045484804 0.0032718114 0.0017857730 0.0056391633 0.0064968790 0.0029292916 0.0232635081 0.0010419846 0.0044592278 0.0855714596 0.0024991984 0.0030671803 0.0025900250 0.7617821954 0.0678809532 0.0058668443; +frequency C40pi22 = 0.2032018418 0.0083895722 0.0143743754 0.0135011707 0.0098131618 0.0044514580 0.0083818173 0.6184886075 0.0027747899 0.0011828492 0.0039826789 0.0044598895 0.0020631785 0.0019619615 0.0085870399 0.0739919851 0.0108922273 0.0018606145 0.0015638674 0.0060769136; +frequency C40pi23 = 0.0050898779 0.0028740788 0.0057092962 0.0016126151 0.0061776450 0.0024693148 0.0012040415 0.0016334183 0.0393460780 0.0059088776 0.0249343597 0.0013713662 0.0049795162 0.3563126947 0.0014136424 0.0059527667 0.0036536770 0.0357987380 0.4853645852 0.0081934106; +frequency C40pi24 = 0.0403335679 0.0540186397 0.0216052457 0.0098218598 0.0081549541 0.0383639077 0.0375406578 0.0047934404 0.0176735565 0.1893424159 0.1051859862 0.0607377395 0.0305599836 0.0119140782 0.0077550551 0.0257110173 0.1009913165 0.0028780020 0.0115276935 0.2210908828; +frequency C40pi25 = 0.0790086293 0.1065441152 0.0309384274 0.0546012394 0.0024947877 0.1843375981 0.1997882784 0.0192655847 0.0270700474 0.0075667489 0.0254542392 0.1553108816 0.0098024439 0.0023773444 0.0056640684 0.0332370813 0.0359574739 0.0011682801 0.0048820809 0.0145306498; +frequency C40pi26 = 0.0722240672 0.0489728405 0.0678929607 0.1194883992 0.0064755348 0.0708969573 0.1345886574 0.0287815397 0.0699011334 0.0173588702 0.0519870084 0.0490341790 0.0154411043 0.0348233029 0.0145597486 0.0589579876 0.0425972780 0.0087913770 0.0554386705 0.0317883834; +frequency C40pi27 = 0.1085842431 0.0206450023 0.0441956285 0.1529666596 0.0012502570 0.0405398136 0.1664851192 0.0336098469 0.0134902179 0.0038821795 0.0089861440 0.0576227094 0.0024339036 0.0014553522 0.1990095021 0.0846749753 0.0454715217 0.0005902831 0.0027650162 0.0113416246; +frequency C40pi28 = 0.0309526387 0.3195887318 0.0301336637 0.0082352132 0.0065593963 0.0832608108 0.0291974083 0.0154206187 0.0310385092 0.0098251607 0.0237900204 0.3062634996 0.0097071728 0.0036891639 0.0095029109 0.0295285439 0.0303052301 0.0028125285 0.0068850639 0.0133037148; +frequency C40pi29 = 0.0098953741 0.0019604525 0.0007307935 0.0003748228 0.0028276741 0.0017337004 0.0009182100 0.0006997068 0.0010419482 0.3115040359 0.3750387796 0.0013960508 0.0474451070 0.0298607430 0.0025296256 0.0014628019 0.0075738968 0.0016799771 0.0040259930 0.1973003069; +frequency C40pi30 = 0.1163213921 0.0273321006 0.0250163656 0.0731917718 0.0034792282 0.0586677248 0.1380880502 0.0193193469 0.0160240740 0.0712243431 0.0771473538 0.0355120487 0.0242841072 0.0094117688 0.0508926833 0.0475560280 0.0726552233 0.0026892716 0.0076166020 0.1235705162; +frequency C40pi31 = 0.1285218235 0.0373073487 0.1179844215 0.0402749992 0.0172928883 0.0439706110 0.0250692272 0.1127033137 0.0606981059 0.0109350265 0.0258415767 0.0288749652 0.0167592956 0.0199118302 0.0180674983 0.1741489481 0.0648967655 0.0063574951 0.0321771650 0.0182066946; +frequency C40pi32 = 0.0372286941 0.0094528028 0.0053377315 0.0023703173 0.0144940088 0.0079097138 0.0048585146 0.0046433943 0.0186795102 0.1820459527 0.1780099317 0.0058198481 0.0371334296 0.1463772419 0.0048538601 0.0103570678 0.0284161577 0.0211293603 0.0958905187 0.1849919442; +frequency C40pi33 = 0.0535643726 0.1159797757 0.0239172676 0.0113537364 0.0096256227 0.0928585070 0.0391699080 0.0120279334 0.0384887950 0.0522748270 0.1892392595 0.0996037748 0.0712219098 0.0264213736 0.0083720574 0.0299114019 0.0389484845 0.0104232046 0.0265030050 0.0500947835; +frequency C40pi34 = 0.1332424803 0.0033147683 0.0022704992 0.0012739239 0.0246514263 0.0030843469 0.0040461524 0.0089139209 0.0015864680 0.1971284995 0.1251288442 0.0023713225 0.0286947200 0.0156995251 0.0118845743 0.0171461828 0.0563298009 0.0017341820 0.0048778410 0.3566205216; +frequency C40pi35 = 0.1498658185 0.0326607222 0.0176452820 0.0280354786 0.0035437399 0.0348151308 0.0435380704 0.0311112643 0.0140625707 0.0101953314 0.0251433928 0.0393124980 0.0051548319 0.0047533945 0.3923800449 0.0874496981 0.0473306717 0.0015215239 0.0043208299 0.0271597054; +frequency C40pi36 = 0.4214366359 0.0061425967 0.0121590498 0.0073305074 0.0187609694 0.0072748556 0.0086837775 0.0902333103 0.0030262044 0.0039362777 0.0047193320 0.0051508681 0.0038306586 0.0027156136 0.0208940236 0.2901188793 0.0651922314 0.0008108235 0.0023622848 0.0252211004; +frequency C40pi37 = 0.1770713890 0.1332782050 0.0311656783 0.0226500225 0.0078348946 0.0752471493 0.0509767242 0.0897389513 0.0220667143 0.0059519850 0.0205369728 0.1257689326 0.0092982479 0.0040514178 0.0264087912 0.1169591448 0.0565566955 0.0029947127 0.0049346701 0.0165087010; +frequency C40pi38 = 0.0293984032 0.0370901720 0.1483622633 0.1099709900 0.0031729093 0.0388688450 0.0464270335 0.4222420155 0.0272494642 0.0007997326 0.0037634298 0.0622314461 0.0016657052 0.0015039626 0.0056481827 0.0472252404 0.0086568982 0.0009176022 0.0027693124 0.0020363920; +frequency C40pi39 = 0.0265779317 0.0791104753 0.1318603134 0.0280314140 0.0101369144 0.0989710810 0.0269057233 0.0173376629 0.2815133703 0.0064646977 0.0268210053 0.0474749135 0.0072375268 0.0276960902 0.0083014995 0.0426276702 0.0259042511 0.0078528946 0.0891598394 0.0100147256; +frequency C40pi40 = 0.0096096503 0.0027136180 0.0013104432 0.0006331856 0.0077301682 0.0033899420 0.0010471898 0.0020227436 0.0039001415 0.0733098005 0.4451691588 0.0014931484 0.0732575295 0.2630171690 0.0042768091 0.0036117358 0.0057928403 0.0181275729 0.0370698053 0.0425173480; +model C40 = POISSON+G+FMIX{C40pi1:1:0.0223853788,C40pi2:1:0.0338891820,C40pi3:1:0.0577169375,C40pi4:1:0.0252416233,C40pi5:1:0.0108607921,C40pi6:1:0.0462373793,C40pi7:1:0.0102293175,C40pi8:1:0.0147523625,C40pi9:1:0.0143161352,C40pi10:1:0.0182302541,C40pi11:1:0.0204025079,C40pi12:1:0.0425505156,C40pi13:1:0.0248627269,C40pi14:1:0.0105892988,C40pi15:1:0.0188238725,C40pi16:1:0.0086663445,C40pi17:1:0.0148496147,C40pi18:1:0.0343037402,C40pi19:1:0.0225335203,C40pi20:1:0.0174068578,C40pi21:1:0.0112207827,C40pi22:1:0.0443532245,C40pi23:1:0.0392573370,C40pi24:1:0.0196756555,C40pi25:1:0.0287690328,C40pi26:1:0.0114441177,C40pi27:1:0.0112338740,C40pi28:1:0.0582694099,C40pi29:1:0.0444272279,C40pi30:1:0.0112010942,C40pi31:1:0.0145176111,C40pi32:1:0.0114629026,C40pi33:1:0.0239628061,C40pi34:1:0.0266266492,C40pi35:1:0.0481201159,C40pi36:1:0.0371147423,C40pi37:1:0.0160476688,C40pi38:1:0.0237249267,C40pi39:1:0.0235226203,C40pi40:1:0.0261998398}; + +[ --------------------------------------------------------- + CAT-C50 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] +frequency C50pi1 = 0.1357566757 0.0328511938 0.0937692919 0.0757182069 0.0041887049 0.0448010470 0.0572805366 0.1210866186 0.0167465028 0.0049719235 0.0113823284 0.0458096069 0.0064563157 0.0029292810 0.0228705187 0.2060115780 0.1011347978 0.0012443033 0.0056104605 0.0093801079; +frequency C50pi2 = 0.0530862751 0.1905936010 0.0595772279 0.0320970468 0.0026608079 0.1152605895 0.0840617877 0.0196495178 0.0274729775 0.0064919200 0.0158709120 0.2635539775 0.0078171228 0.0017231166 0.0121639300 0.0449347664 0.0472425608 0.0008407188 0.0037608716 0.0111402722; +frequency C50pi3 = 0.0083279799 0.0007172026 0.0006359642 0.0003134388 0.0020547407 0.0007351595 0.0005373710 0.0005576905 0.0004858721 0.4370910601 0.1208722220 0.0006394909 0.0195499664 0.0090175268 0.0007265254 0.0007876194 0.0057076665 0.0006453449 0.0016797264 0.3889174318; +frequency C50pi4 = 0.2072868350 0.0166858699 0.0129177658 0.0020625574 0.0849982226 0.0151757635 0.0065903656 0.0472047575 0.0130289256 0.0345690755 0.1042722764 0.0075861385 0.0498042308 0.0572909747 0.0064928361 0.1183618036 0.0780339514 0.0128352368 0.0323576924 0.0924447209; +frequency C50pi5 = 0.0364181183 0.0076427099 0.0052725527 0.0020389950 0.0171009943 0.0064088232 0.0042399368 0.0053824238 0.0198596156 0.1361523026 0.1651892915 0.0045481616 0.0387479055 0.2025922657 0.0055053348 0.0121111950 0.0254621828 0.0327580458 0.1368025306 0.1357666147; +frequency C50pi6 = 0.0535489196 0.0099543365 0.0269073208 0.3076150732 0.0007101021 0.0574988641 0.4066173371 0.0204537673 0.0096286483 0.0025879708 0.0049721459 0.0280989086 0.0025143457 0.0010618006 0.0124317994 0.0247246015 0.0191107367 0.0006385967 0.0024132214 0.0085115039; +frequency C50pi7 = 0.0074733729 0.0025226602 0.0033967505 0.0005574007 0.0081158286 0.0037658904 0.0013610444 0.0022017759 0.0115142679 0.0195730439 0.1268878488 0.0018497296 0.0269141680 0.3821985941 0.0019970421 0.0057127939 0.0039692337 0.0553575998 0.3184099394 0.0162210153; +frequency C50pi8 = 0.2615592974 0.0027098854 0.0124908261 0.0020153852 0.2740228527 0.0017043893 0.0007667803 0.0463498030 0.0019474361 0.0082858275 0.0147048711 0.0010787235 0.0063051368 0.0062080862 0.0039442437 0.1940042648 0.0963699489 0.0016185483 0.0048431386 0.0590705550; +frequency C50pi9 = 0.1190557043 0.0956320251 0.0215995297 0.0378323341 0.0041536088 0.1151348174 0.1337084452 0.0179375220 0.0216767047 0.0336228770 0.0557402194 0.1132452331 0.0178407325 0.0063405927 0.0147606946 0.0478666925 0.0712091035 0.0022867238 0.0075728630 0.0627835766; +frequency C50pi10 = 0.0505010344 0.0281381134 0.0341872191 0.0178157543 0.0183140005 0.0271729546 0.0212018661 0.0176052654 0.1190104107 0.0161645217 0.0561232531 0.0203908848 0.0146521042 0.1553484132 0.0135251600 0.0478959652 0.0292963208 0.0376058633 0.2477283800 0.0273225153; +frequency C50pi11 = 0.1239446910 0.0355525870 0.0409769096 0.1479953346 0.0011563976 0.0908869312 0.2700270273 0.0283589709 0.0126760201 0.0064825033 0.0122101302 0.0787433823 0.0042467440 0.0016540857 0.0205717500 0.0552940245 0.0474239965 0.0008596621 0.0027823209 0.0181565313; +frequency C50pi12 = 0.0160542063 0.0027359185 0.0014708079 0.0007004900 0.0034820152 0.0061470051 0.0016359686 0.0022137927 0.0013207229 0.1640035117 0.4616043506 0.0021342205 0.2174099502 0.0143751693 0.0013694259 0.0037614383 0.0172651408 0.0011454338 0.0019438536 0.0792265779; +frequency C50pi13 = 0.1548192401 0.0131324559 0.0280584102 0.0095301620 0.0166267416 0.0175228950 0.0170969133 0.0179616718 0.0078385586 0.0865181208 0.0523369910 0.0132802182 0.0326348210 0.0083511229 0.0145594414 0.1096327081 0.2218108602 0.0015829972 0.0062173360 0.1704883347; +frequency C50pi14 = 0.2950313592 0.0027580697 0.0021616268 0.0015364190 0.0375439186 0.0028808733 0.0042976283 0.0261726702 0.0008294969 0.0834938143 0.0553606311 0.0022642314 0.0181259911 0.0074433078 0.0126794048 0.0382913338 0.0783205173 0.0010015148 0.0034016419 0.3264055498; +frequency C50pi15 = 0.1683177099 0.0820396152 0.0526048706 0.0822517150 0.0023029997 0.0969341246 0.1488943001 0.0535291188 0.0179803231 0.0032503636 0.0114941086 0.1156402642 0.0039439899 0.0015002945 0.0066854154 0.0924511658 0.0480769504 0.0006152103 0.0025022919 0.0089851683; +frequency C50pi16 = 0.0334088176 0.0134485791 0.1590918150 0.3657542471 0.0025127086 0.0327665151 0.1820739351 0.0740807194 0.0202010901 0.0016650025 0.0036700956 0.0295517886 0.0017087810 0.0011422805 0.0073155123 0.0426788071 0.0211162106 0.0005931485 0.0034724580 0.0037474882; +frequency C50pi17 = 0.0777586977 0.0174438357 0.0053423343 0.0043431532 0.0062523949 0.0220851281 0.0161769285 0.0053903202 0.0080675581 0.1052945216 0.1617365895 0.0148319919 0.0288253912 0.0168985297 0.2565426868 0.0202089662 0.0542929694 0.0060146095 0.0078109966 0.1646823969; +frequency C50pi18 = 0.0727013979 0.0048977192 0.0026095383 0.0011420120 0.0198747408 0.0066949336 0.0030401434 0.0079074845 0.0026492900 0.1685788878 0.3185489163 0.0026024909 0.0735597038 0.0490419983 0.0051699104 0.0128630830 0.0305356924 0.0050857840 0.0095279173 0.2029683559; +frequency C50pi19 = 0.0658153836 0.0833432992 0.0224582275 0.0107735824 0.0092974677 0.0745951987 0.0299754097 0.0146336557 0.0148026634 0.0671888719 0.2198675990 0.0868172087 0.1084156835 0.0155812696 0.0071132147 0.0381451947 0.0562948237 0.0056421684 0.0102813038 0.0589577740; +frequency C50pi20 = 0.0525278351 0.0364897390 0.0903013988 0.1854660991 0.0037795400 0.0776857292 0.1789287290 0.0232011648 0.0687702011 0.0135825419 0.0337350646 0.0458143770 0.0108457797 0.0191020037 0.0088729983 0.0495289201 0.0389358438 0.0046292762 0.0354195947 0.0223831639; +frequency C50pi21 = 0.0026515970 0.0080885204 0.0010572021 0.0016052142 0.0036540307 0.0022979498 0.0014681767 0.0046230912 0.0043887616 0.0020669456 0.0172444871 0.0006593575 0.0034691503 0.0658351447 0.0019185467 0.0022498420 0.0021278866 0.8183345006 0.0515918357 0.0046677595; +frequency C50pi22 = 0.0548133174 0.0692044159 0.0211265710 0.0207779125 0.0072646572 0.0567865657 0.0738456579 0.0051797705 0.0168408457 0.1386104888 0.0713795154 0.0896393340 0.0201205491 0.0082150393 0.0104049016 0.0282344422 0.0995597110 0.0019722093 0.0074054035 0.1986186919; +frequency C50pi23 = 0.0047955268 0.0028033787 0.0050506238 0.0014080516 0.0061671241 0.0019350126 0.0009861551 0.0014396818 0.0389623239 0.0048950388 0.0151748150 0.0012306644 0.0032520404 0.3601993060 0.0011266316 0.0054509935 0.0034763921 0.0362899931 0.4980200998 0.0073361467; +frequency C50pi24 = 0.0365462996 0.0280070630 0.0183606115 0.0070525803 0.0093251684 0.0300239431 0.0221812842 0.0047778642 0.0178840316 0.2025947306 0.1973012130 0.0250209750 0.0557862640 0.0258067541 0.0042772210 0.0209374223 0.0731398943 0.0049738166 0.0200601168 0.1959427463; +frequency C50pi25 = 0.0684197684 0.0111619750 0.0544764241 0.0224313301 0.0106958312 0.0091799953 0.0097436799 0.0255871619 0.0055558006 0.0059416697 0.0076746853 0.0144198991 0.0056892166 0.0037356845 0.0172554137 0.3527301149 0.3586913194 0.0012501907 0.0028636710 0.0124961682; +frequency C50pi26 = 0.0495330775 0.1060064564 0.1511923969 0.0483471288 0.0080946362 0.0886108407 0.0449556763 0.0331436148 0.1447288287 0.0061850770 0.0190407203 0.0948075276 0.0063418871 0.0126162987 0.0100869563 0.0799801169 0.0445418973 0.0044765096 0.0363930724 0.0109172804; +frequency C50pi27 = 0.0702411901 0.0642050323 0.0779553908 0.0510328304 0.0042438849 0.0723300485 0.0883747710 0.0177347101 0.0233800891 0.0198779320 0.0183537117 0.1051267065 0.0107865869 0.0037987118 0.0112811107 0.1345081583 0.1805543234 0.0014252764 0.0055089381 0.0392805971; +frequency C50pi28 = 0.1207399152 0.1741788075 0.0385528120 0.0162689581 0.0118494185 0.0760068404 0.0337935391 0.0653431008 0.0342783806 0.0085426053 0.0256788075 0.1434443984 0.0112347894 0.0061270793 0.0294493558 0.1091415488 0.0634181251 0.0046156419 0.0085374279 0.0187984481; +frequency C50pi29 = 0.0064521696 0.0021817337 0.0005939658 0.0003904032 0.0021538307 0.0019099968 0.0008007758 0.0005208471 0.0011374294 0.2850758996 0.4278536740 0.0013920239 0.0561988528 0.0449501501 0.0026289702 0.0011053664 0.0055157148 0.0022753671 0.0059612583 0.1509015707; +frequency C50pi30 = 0.0969092741 0.0359723370 0.0633194168 0.0411020773 0.0145578946 0.0466661704 0.0469223767 0.0374614202 0.0537149580 0.0394603009 0.0856256544 0.0283577862 0.0346435320 0.0507298072 0.0167177549 0.0990945318 0.0806503833 0.0128373826 0.0598972198 0.0553597218; +frequency C50pi31 = 0.0840212010 0.0214242172 0.2240668646 0.0354684798 0.0265031681 0.0235675678 0.0076026464 0.1173325117 0.0516019781 0.0048917455 0.0067211727 0.0173653354 0.0079342101 0.0087501486 0.0093276105 0.2637097946 0.0630157977 0.0022314593 0.0170994247 0.0073646661; +frequency C50pi32 = 0.0055061507 0.0012508737 0.0004824961 0.0004530173 0.0054435931 0.0011315076 0.0004150379 0.0012285001 0.0019884532 0.0617431901 0.4342418135 0.0008161868 0.0554628445 0.3289659386 0.0025814794 0.0021197505 0.0029510440 0.0172981374 0.0412097497 0.0347102358; +frequency C50pi33 = 0.0442014612 0.1295816316 0.0258622052 0.0148900471 0.0076165815 0.1301765579 0.0636708052 0.0105339122 0.0662542863 0.0423977240 0.1434197528 0.1040381429 0.0403363621 0.0260540342 0.0089335090 0.0242573966 0.0317938092 0.0077831996 0.0309973779 0.0472012033; +frequency C50pi34 = 0.0571984155 0.0034929878 0.0031324721 0.0012472712 0.0113230439 0.0025279922 0.0040737817 0.0030647398 0.0020494153 0.3131200932 0.0901750144 0.0034699557 0.0242565205 0.0112345295 0.0048197020 0.0095675953 0.0529842025 0.0010645104 0.0041851135 0.3970126433; +frequency C50pi35 = 0.1141963934 0.0102229903 0.0178644126 0.0172307307 0.0056978908 0.0039055039 0.0085974326 0.7425714921 0.0026414175 0.0005602022 0.0019872568 0.0055400059 0.0004739977 0.0010663175 0.0054302447 0.0508318204 0.0055408544 0.0018890811 0.0012409205 0.0025110348; +frequency C50pi36 = 0.3531758625 0.0043402857 0.0031812423 0.0030024877 0.0165711581 0.0029126214 0.0042077690 0.4520896100 0.0021366362 0.0063692579 0.0120143269 0.0022586970 0.0080260130 0.0043865828 0.0111462027 0.0658344033 0.0182952730 0.0010872878 0.0023330172 0.0266312657; +frequency C50pi37 = 0.0310798708 0.0234519814 0.1273669012 0.1197925100 0.0031216960 0.0295858842 0.0470763446 0.4883046368 0.0193412101 0.0008855622 0.0032808220 0.0408430573 0.0014984226 0.0016298596 0.0063229464 0.0423452622 0.0082797260 0.0007718998 0.0024996877 0.0025217188; +frequency C50pi38 = 0.0370340667 0.0689410214 0.1704407181 0.1041817082 0.0018108784 0.0715495095 0.0659866718 0.2159298358 0.0443591808 0.0008668888 0.0064679416 0.1275300877 0.0027248464 0.0014178323 0.0060253154 0.0534574556 0.0147073432 0.0007999410 0.0037708147 0.0019979426; +frequency C50pi39 = 0.0160398536 0.0526622999 0.1051167149 0.0187352256 0.0085330116 0.0922616498 0.0154450839 0.0076235155 0.3848449137 0.0057129406 0.0277195224 0.0219347380 0.0071078308 0.0376358992 0.0072201969 0.0209969653 0.0142198783 0.0096946226 0.1384243143 0.0080708232; +frequency C50pi40 = 0.0165549167 0.0085856833 0.0049441851 0.0016567380 0.0086529073 0.0184087838 0.0033759867 0.0033844413 0.0084695063 0.0483923758 0.4963073963 0.0056997331 0.1949377866 0.0999527140 0.0060271256 0.0084289585 0.0122619536 0.0114013282 0.0192314834 0.0233259964; +frequency C50pi41 = 0.0227379959 0.0137060298 0.3162561805 0.2932103363 0.0037073869 0.0169119273 0.0380984220 0.0550224760 0.0319886436 0.0039219190 0.0041582288 0.0312539900 0.0019467591 0.0022276545 0.0059660826 0.0998736999 0.0462336456 0.0007310446 0.0069012376 0.0051463400; +frequency C50pi42 = 0.2406936002 0.0197081082 0.0462578641 0.0206379264 0.0186726798 0.0189843646 0.0129785315 0.1749109142 0.0118714342 0.0049349532 0.0126237761 0.0127876711 0.0095642661 0.0083606873 0.0326283314 0.2101300187 0.1130042042 0.0041951500 0.0069210515 0.0201344675; +frequency C50pi43 = 0.0214325714 0.3730744306 0.0220674626 0.0037495290 0.0069038342 0.0670391950 0.0159298773 0.0126211348 0.0284477629 0.0102051798 0.0242954287 0.3272456489 0.0093147452 0.0036403029 0.0070138928 0.0216860624 0.0232259733 0.0030422478 0.0065368590 0.0125278613; +frequency C50pi44 = 0.1567707052 0.0258059606 0.0161658338 0.0223946414 0.0074382689 0.0274455582 0.0410010574 0.0360501033 0.0159972680 0.0640941463 0.0944756654 0.0192586366 0.0312789234 0.0227728534 0.1653169011 0.0640177954 0.0549103568 0.0050980224 0.0138248643 0.1158824381; +frequency C50pi45 = 0.4345912387 0.0061142999 0.0097660767 0.0060102195 0.0197377879 0.0069062805 0.0082800652 0.0829075516 0.0029125126 0.0047747098 0.0054182241 0.0049974525 0.0039676868 0.0029052002 0.0193588692 0.2795854727 0.0677816788 0.0008196092 0.0025196339 0.0306454302; +frequency C50pi46 = 0.0296734965 0.1443250343 0.0128668160 0.0059561454 0.0129805897 0.0492311054 0.0262726056 0.0069437743 0.0676183913 0.0452364160 0.1374511139 0.0907089722 0.0308070846 0.0816441785 0.0060701025 0.0197130339 0.0299715868 0.0461468661 0.1119414237 0.0444412635; +frequency C50pi47 = 0.1089911217 0.0159187676 0.0643054232 0.2086425054 0.0016540963 0.0375565797 0.1791004993 0.0610564917 0.0144660242 0.0038322948 0.0067778708 0.0372270242 0.0022817918 0.0012634818 0.0851792013 0.1065821239 0.0524401536 0.0005901255 0.0027836060 0.0093508169; +frequency C50pi48 = 0.1429463629 0.0304191716 0.0191145368 0.0351867799 0.0031493079 0.0341248336 0.0508492526 0.0305914291 0.0134276644 0.0070227247 0.0197257013 0.0421442438 0.0038904796 0.0040697467 0.4052202085 0.0874406009 0.0445304918 0.0012842531 0.0039485525 0.0209136585; +frequency C50pi49 = 0.0580116857 0.0903213669 0.0369245281 0.0613603988 0.0022829951 0.2073851382 0.2225853236 0.0159476910 0.0311816018 0.0068543753 0.0217092509 0.1504781849 0.0084841006 0.0020581132 0.0046206107 0.0276754451 0.0321477211 0.0011651089 0.0051889637 0.0136173964; +frequency C50pi50 = 0.2153540940 0.0359173007 0.0219927944 0.0735128474 0.0037017294 0.0566408566 0.1350375818 0.0662986417 0.0157121780 0.0138456188 0.0266922211 0.0474338339 0.0088042600 0.0035035311 0.0739583083 0.0921989198 0.0575687235 0.0019306896 0.0044520833 0.0454437865; +model C50 = POISSON+G+FMIX{C50pi1:1:0.0164297003,C50pi2:1:0.0273175755,C50pi3:1:0.0460247610,C50pi4:1:0.0084864734,C50pi5:1:0.0125389252,C50pi6:1:0.0343549036,C50pi7:1:0.0130241102,C50pi8:1:0.0094755681,C50pi9:1:0.0190040551,C50pi10:1:0.0151902354,C50pi11:1:0.0320534760,C50pi12:1:0.0210059850,C50pi13:1:0.0237408547,C50pi14:1:0.0239841203,C50pi15:1:0.0213748021,C50pi16:1:0.0210717705,C50pi17:1:0.0050241805,C50pi18:1:0.0166262276,C50pi19:1:0.0143945956,C50pi20:1:0.0104391130,C50pi21:1:0.0107628277,C50pi22:1:0.0148818171,C50pi23:1:0.0321480239,C50pi24:1:0.0145477978,C50pi25:1:0.0332355807,C50pi26:1:0.0143190281,C50pi27:1:0.0234478734,C50pi28:1:0.0183044983,C50pi29:1:0.0403269452,C50pi30:1:0.0135629530,C50pi31:1:0.0091880799,C50pi32:1:0.0158270022,C50pi33:1:0.0121019379,C50pi34:1:0.0353560982,C50pi35:1:0.0404495617,C50pi36:1:0.0104569232,C50pi37:1:0.0146187792,C50pi38:1:0.0093984095,C50pi39:1:0.0146773809,C50pi40:1:0.0201635562,C50pi41:1:0.0255640273,C50pi42:1:0.0039486842,C50pi43:1:0.0393652608,C50pi44:1:0.0056415419,C50pi45:1:0.0382833580,C50pi46:1:0.0039735086,C50pi47:1:0.0140269355,C50pi48:1:0.0476703673,C50pi49:1:0.0204062788,C50pi50:1:0.0117835304}; + +[ --------------------------------------------------------- + CAT-C60 profile mixture model of Le, Gascuel & Lartillot (2008) + --------------------------------------------------------- ] +frequency C60pi1 = 0.1534363248 0.0444389067 0.0796726990 0.0546757288 0.0047306596 0.0514333025 0.0529324359 0.1103775749 0.0174480218 0.0050343887 0.0130294160 0.0603928711 0.0075550589 0.0035554315 0.0249523704 0.2029625968 0.0957668473 0.0014444483 0.0059800307 0.0101808864; +frequency C60pi2 = 0.0281984692 0.3031055487 0.0312954609 0.0091549350 0.0019503463 0.0939884393 0.0388530140 0.0084028325 0.0155384715 0.0107872879 0.0217786594 0.3476042929 0.0109904917 0.0015919288 0.0071539896 0.0197479052 0.0328352333 0.0009209994 0.0025714024 0.0135302919; +frequency C60pi3 = 0.0083680740 0.0007319768 0.0006123446 0.0002228366 0.0020433870 0.0009498685 0.0004731544 0.0004825748 0.0005189995 0.3768453098 0.2608334606 0.0006296168 0.0315700586 0.0123984358 0.0009595916 0.0009746383 0.0049990761 0.0008657759 0.0017132332 0.2938075872; +frequency C60pi4 = 0.2227229348 0.0064846074 0.0061206496 0.0007997588 0.1640285908 0.0051051888 0.0027280806 0.0202702520 0.0037183875 0.0455406072 0.0883350071 0.0022832871 0.0348094559 0.0228667054 0.0035471579 0.0850040072 0.1012848285 0.0048424833 0.0096500033 0.1698580069; +frequency C60pi5 = 0.0412139519 0.0067627055 0.0051067690 0.0017434391 0.0204715649 0.0057538477 0.0037263409 0.0069107492 0.0180293946 0.1154281623 0.1693562458 0.0042900270 0.0414066566 0.2239001858 0.0058416410 0.0149106129 0.0239548406 0.0332237129 0.1379349474 0.1200342049; +frequency C60pi6 = 0.0480550249 0.0308438053 0.0940628721 0.2084606133 0.0037801787 0.0747676701 0.1855184661 0.0191402239 0.0872162350 0.0094685435 0.0277340828 0.0375741243 0.0088308358 0.0196000958 0.0081267777 0.0439680761 0.0324588883 0.0034665720 0.0387499964 0.0181769181; +frequency C60pi7 = 0.0062848745 0.0026246919 0.0030342510 0.0005324147 0.0073027627 0.0034409089 0.0009741492 0.0019578159 0.0102225186 0.0180592309 0.1179064681 0.0016205916 0.0234721825 0.3974552519 0.0020165583 0.0056903327 0.0037091821 0.0598639097 0.3185565304 0.0152753744; +frequency C60pi8 = 0.1815005560 0.0026845411 0.0148484537 0.0025145485 0.4205633920 0.0014097001 0.0007088144 0.0461854175 0.0014374605 0.0041745536 0.0098310464 0.0006474254 0.0041611385 0.0068976432 0.0038767247 0.1864537050 0.0687189855 0.0027083549 0.0061033012 0.0345742379; +frequency C60pi9 = 0.0600740822 0.0367642654 0.0134869242 0.0170572285 0.0070719770 0.0142469806 0.0127486975 0.0343564471 0.0305859029 0.0204571345 0.0994551128 0.0212367087 0.0318165939 0.1140907926 0.0297628218 0.0505792699 0.0339368402 0.2312808862 0.1192491702 0.0217421638; +frequency C60pi10 = 0.0708394513 0.0474098489 0.0416822304 0.0324482918 0.0131641265 0.0494874703 0.0508264389 0.0183309196 0.0567272697 0.0650369079 0.1282255556 0.0343618389 0.0390362930 0.0594359563 0.0135608209 0.0551343199 0.0642260358 0.0137118382 0.0673934289 0.0789609573; +frequency C60pi11 = 0.0617689371 0.0076332888 0.0303081645 0.3430234188 0.0007199837 0.0307856241 0.3792509407 0.0284658686 0.0079592120 0.0016999627 0.0039945339 0.0216076877 0.0019734329 0.0009814186 0.0174791407 0.0337831940 0.0203426591 0.0006130268 0.0017102752 0.0058992300; +frequency C60pi12 = 0.0421559537 0.1042068314 0.0286980872 0.0164385240 0.0044450330 0.1393690851 0.0531949072 0.0134711207 0.0177764997 0.0267727728 0.1967237776 0.1323735242 0.1182827521 0.0086728324 0.0051837880 0.0255852718 0.0333292020 0.0045852327 0.0070281498 0.0217066546; +frequency C60pi13 = 0.2814809927 0.0100367066 0.0172867775 0.0064385734 0.0258337508 0.0133101925 0.0115046410 0.0270054934 0.0054629657 0.0188216093 0.0190993462 0.0098712843 0.0158719589 0.0050481705 0.0129510033 0.1886808600 0.2427104979 0.0012274627 0.0036052922 0.0837524211; +frequency C60pi14 = 0.2769188320 0.0017226995 0.0021315271 0.0011672545 0.0318292645 0.0018216251 0.0024752467 0.0199646887 0.0005170863 0.0983109006 0.0489264326 0.0016232163 0.0173414948 0.0070843906 0.0070179705 0.0336348952 0.0814141404 0.0007118144 0.0032942319 0.3620922883; +frequency C60pi15 = 0.1577797792 0.1112140270 0.0570403237 0.0648290471 0.0053318076 0.1065373681 0.0913586945 0.0906209718 0.0533809635 0.0029171632 0.0156225571 0.0782148712 0.0045758969 0.0025047816 0.0067077844 0.0929310045 0.0393122597 0.0028575821 0.0077590269 0.0085040899; +frequency C60pi16 = 0.0593735135 0.0354740772 0.1151175314 0.2189482708 0.0015332173 0.0688752402 0.1819422913 0.0813707101 0.0220478285 0.0020993577 0.0056191259 0.0750172075 0.0021871739 0.0010838321 0.0109737422 0.0726449461 0.0380238271 0.0007346460 0.0026664883 0.0042669729; +frequency C60pi17 = 0.0978066326 0.0265576438 0.0101843505 0.0120781428 0.0064138404 0.0307876446 0.0291282947 0.0128912798 0.0128036716 0.0723904209 0.1279438950 0.0245630658 0.0303267312 0.0198963719 0.2723524069 0.0350549441 0.0484557340 0.0046842467 0.0104773833 0.1152032995; +frequency C60pi18 = 0.0124023388 0.0030680354 0.0009239105 0.0006037316 0.0041885695 0.0032957441 0.0012524000 0.0011306791 0.0013542104 0.2344167852 0.4550557697 0.0016718177 0.0667307666 0.0610615367 0.0037076169 0.0019420934 0.0067612939 0.0038937184 0.0074911765 0.1290478057; +frequency C60pi19 = 0.0794230623 0.1294739355 0.0662792725 0.0587236242 0.0019919499 0.1143880588 0.1246900644 0.0325432311 0.0238605372 0.0036277150 0.0097987961 0.2147597316 0.0041846209 0.0012869951 0.0142410239 0.0615807386 0.0477333594 0.0006525371 0.0029420233 0.0078187231; +frequency C60pi20 = 0.0248148778 0.0083552910 0.1888915388 0.4278832998 0.0027839717 0.0210777725 0.1432386297 0.0643968435 0.0185736870 0.0022506941 0.0034558626 0.0179274104 0.0015714503 0.0014680353 0.0073768035 0.0377003132 0.0187767966 0.0005891859 0.0042602708 0.0046072655; +frequency C60pi21 = 0.0017003427 0.0060674330 0.0004222900 0.0010711490 0.0029059420 0.0016424179 0.0011731741 0.0035579609 0.0027630465 0.0012291190 0.0127420810 0.0004273804 0.0025671348 0.0513377024 0.0013536738 0.0011871674 0.0014033068 0.8640436936 0.0390912582 0.0033137266; +frequency C60pi22 = 0.0468360682 0.0639796924 0.0205603686 0.0185615516 0.0059954138 0.0557030821 0.0705436036 0.0045435329 0.0152062773 0.1550613356 0.0824253382 0.0866248354 0.0245854443 0.0080177192 0.0081485616 0.0237025617 0.0962054496 0.0018368673 0.0067131723 0.2047491243; +frequency C60pi23 = 0.0258764792 0.0201097124 0.0298384107 0.0107037437 0.0142503909 0.0158529432 0.0105649532 0.0073064999 0.1411078834 0.0114777629 0.0407992414 0.0119179202 0.0098798997 0.1876429961 0.0051228805 0.0275699644 0.0170764901 0.0405124999 0.3536390834 0.0187502449; +frequency C60pi24 = 0.0296285022 0.0046400334 0.0034944393 0.0008851024 0.0090046468 0.0055481111 0.0033046518 0.0027969482 0.0050701500 0.2583397750 0.2668085481 0.0046690936 0.0770825277 0.0408798247 0.0026918193 0.0068538089 0.0322265673 0.0035506055 0.0153353414 0.2271895033; +frequency C60pi25 = 0.0555725806 0.0098447861 0.0409064430 0.0140389597 0.0097418602 0.0068727710 0.0069443190 0.0157956555 0.0041631258 0.0069826497 0.0075271247 0.0139224817 0.0058762687 0.0034496730 0.0119733364 0.3482466393 0.4213655981 0.0010061491 0.0026576772 0.0131119012; +frequency C60pi26 = 0.0682671212 0.0615207091 0.0530661192 0.0360278709 0.0141433148 0.0612274332 0.0497415394 0.0268696520 0.1127674983 0.0132646615 0.0544493838 0.0482609047 0.0170033964 0.0803375967 0.0191949850 0.0671839752 0.0443995774 0.0199957919 0.1255070748 0.0267713947; +frequency C60pi27 = 0.0792618808 0.0638377192 0.0635289371 0.0436646174 0.0049503302 0.0666365188 0.0829639117 0.0183428565 0.0233169239 0.0249427251 0.0221483402 0.0932577596 0.0120893380 0.0049131149 0.0126360122 0.1334848656 0.1916745928 0.0018040086 0.0062353115 0.0503102360; +frequency C60pi28 = 0.0731759112 0.2105335985 0.0324200854 0.0110007149 0.0123458504 0.0858951989 0.0349942684 0.0224509173 0.0386903280 0.0246226304 0.0508307349 0.1783344831 0.0185740720 0.0093148787 0.0148722772 0.0603181436 0.0649574934 0.0051046395 0.0130597421 0.0385040321; +frequency C60pi29 = 0.0878402710 0.0110331750 0.0060801213 0.0032803903 0.0171147088 0.0109831614 0.0101465790 0.0087090941 0.0054902234 0.1987761871 0.1756460821 0.0082096925 0.0417232903 0.0191954435 0.0111283542 0.0209862621 0.0697718709 0.0031744014 0.0081905473 0.2825201446; +frequency C60pi30 = 0.0990215820 0.0349351987 0.0211149501 0.0118797946 0.0108995677 0.0557710676 0.0278999992 0.0240250097 0.0123445071 0.0776564721 0.2354511299 0.0322817789 0.1207665429 0.0214442058 0.0075655541 0.0524170141 0.0649785115 0.0047075806 0.0077328724 0.0771066610; +frequency C60pi31 = 0.0601641168 0.0161995226 0.2783522747 0.0337188808 0.0315066987 0.0210645987 0.0059839451 0.0543080710 0.0531523512 0.0070650825 0.0070698142 0.0139598368 0.0088298653 0.0069525877 0.0075834331 0.2829802556 0.0860317092 0.0014966551 0.0134849454 0.0100953553; +frequency C60pi32 = 0.0049781737 0.0018412331 0.0007012207 0.0005315368 0.0052978737 0.0024089907 0.0007630546 0.0015051317 0.0041575221 0.0443828633 0.4417417476 0.0011615060 0.0602807417 0.3351117140 0.0027847686 0.0025795769 0.0030288544 0.0171302592 0.0458455751 0.0237676560; +frequency C60pi33 = 0.0251996593 0.1114468110 0.0142031925 0.0041012288 0.0097099500 0.0620070749 0.0262571641 0.0038067269 0.0431938935 0.0974043253 0.2447197423 0.0824312856 0.0539323021 0.0429091639 0.0052658505 0.0096093107 0.0251183002 0.0146571900 0.0456965140 0.0783303143; +frequency C60pi34 = 0.0230361648 0.0014748749 0.0013534390 0.0006264439 0.0048580122 0.0009870046 0.0015762583 0.0011565336 0.0008899238 0.3952895890 0.0576537208 0.0014663528 0.0140986541 0.0072127040 0.0020177885 0.0028770237 0.0205580852 0.0005477695 0.0019539080 0.4603657493; +frequency C60pi35 = 0.1408776963 0.0297808449 0.0171297613 0.0285076933 0.0032213718 0.0320632225 0.0423838922 0.0299558472 0.0131321477 0.0066914481 0.0195120028 0.0383781635 0.0036276863 0.0041231064 0.4383466229 0.0851400095 0.0422765692 0.0013236871 0.0037087638 0.0198194632; +frequency C60pi36 = 0.4442491220 0.0050216551 0.0102305117 0.0057193038 0.0235405374 0.0055997640 0.0064889886 0.0822687710 0.0025505743 0.0033615104 0.0040990063 0.0038097073 0.0028683069 0.0024413211 0.0162890960 0.2999969708 0.0559664935 0.0007735426 0.0020639824 0.0226608347; +frequency C60pi37 = 0.0898717958 0.0070958305 0.0130067619 0.0129166888 0.0044131479 0.0023806547 0.0058957027 0.8087563021 0.0016517855 0.0004339282 0.0015564455 0.0033939025 0.0004253422 0.0008073572 0.0034128140 0.0362876891 0.0032887534 0.0015223902 0.0008537454 0.0020289624; +frequency C60pi38 = 0.0550840246 0.0472254260 0.1877829604 0.1273796123 0.0035824944 0.0527969268 0.0655884730 0.0637607521 0.0404883483 0.0075574152 0.0136304510 0.0867682792 0.0081684229 0.0040375032 0.0110681809 0.1263380956 0.0752544318 0.0013563681 0.0118590434 0.0102727908; +frequency C60pi39 = 0.0117681394 0.0442558806 0.0844144627 0.0144712108 0.0070388254 0.1038342049 0.0110901161 0.0049626578 0.4337194047 0.0061337038 0.0298794939 0.0137928558 0.0076237551 0.0338266335 0.0081346096 0.0140571089 0.0108276801 0.0080683065 0.1437251732 0.0083757773; +frequency C60pi40 = 0.0159285638 0.0048098656 0.0032692643 0.0010966937 0.0080519916 0.0134552459 0.0021324215 0.0025086365 0.0049192147 0.0501543893 0.5307634291 0.0035599431 0.2160085187 0.0743650717 0.0045247350 0.0066922196 0.0119092283 0.0070928134 0.0106565111 0.0281012433; +frequency C60pi41 = 0.0195973253 0.0105142992 0.3289103336 0.3099848991 0.0034539049 0.0116196758 0.0250777800 0.0627528956 0.0295961112 0.0032650434 0.0028246884 0.0240963907 0.0008425062 0.0019706550 0.0049062781 0.1064984500 0.0438053705 0.0006333959 0.0056197958 0.0040302013; +frequency C60pi42 = 0.0833804360 0.0125871438 0.0969824220 0.0686820704 0.0081981143 0.0121520930 0.0227415415 0.0982291876 0.0073954898 0.0017471177 0.0039653113 0.0129342146 0.0019557975 0.0024132583 0.0355924232 0.3115606483 0.2113368612 0.0016329034 0.0017991083 0.0047138579; +frequency C60pi43 = 0.0181409133 0.4129662563 0.0233205154 0.0033333547 0.0085143598 0.0526694251 0.0096531879 0.0224552642 0.0375238929 0.0035090482 0.0149146621 0.3208065790 0.0046098856 0.0035426859 0.0087197469 0.0262309419 0.0131791136 0.0034766995 0.0079588201 0.0044746474; +frequency C60pi44 = 0.2494227404 0.0185481724 0.0164119567 0.0169234299 0.0122862654 0.0228501981 0.0370491083 0.0347467705 0.0087069587 0.0595718359 0.0451065029 0.0177064733 0.0204556127 0.0077360919 0.0686403544 0.0889295672 0.0986017356 0.0028603862 0.0061938477 0.1672519917; +frequency C60pi45 = 0.1419737638 0.0373945961 0.0576296888 0.0537452477 0.0068856658 0.0286239972 0.0407540287 0.3988107872 0.0152895617 0.0016627616 0.0092348297 0.0314273807 0.0055425500 0.0040286132 0.0180328866 0.1123731997 0.0242478202 0.0025909098 0.0049054208 0.0048462908; +frequency C60pi46 = 0.0178903305 0.1958843646 0.0155853897 0.0031054277 0.0290304227 0.1051819261 0.0040503389 0.0100480293 0.1252696215 0.0016708003 0.0722356645 0.0233340169 0.0116142354 0.0238913260 0.0009938415 0.0181675536 0.0186260222 0.2260554691 0.0859787232 0.0113864962; +frequency C60pi47 = 0.1454758367 0.0420979067 0.0400419720 0.1294249748 0.0014186329 0.0906469055 0.2471353458 0.0319650773 0.0130426183 0.0058525371 0.0123593139 0.0818154090 0.0044178939 0.0017552077 0.0151135525 0.0656688174 0.0511289472 0.0007731441 0.0029258438 0.0169400635; +frequency C60pi48 = 0.0169799462 0.0242346701 0.1318047919 0.1043655101 0.0022087215 0.0269349684 0.0376379591 0.5404470183 0.0181137053 0.0007459679 0.0021146994 0.0508617611 0.0009473769 0.0006780593 0.0038754401 0.0297030159 0.0045836180 0.0006031889 0.0015704090 0.0015891728; +frequency C60pi49 = 0.0402646249 0.1152022601 0.0323829165 0.0293968352 0.0039388655 0.2497008043 0.1603524245 0.0129260411 0.0617967839 0.0098491259 0.0354918823 0.1448804422 0.0124818865 0.0041153375 0.0043374229 0.0243246958 0.0305645368 0.0026676598 0.0097227847 0.0156026694; +frequency C60pi50 = 0.2256914610 0.0523417493 0.0244308734 0.0637125217 0.0043390149 0.0578159236 0.1154830640 0.0867335173 0.0131066949 0.0085086217 0.0193314218 0.0660468804 0.0064877206 0.0027440054 0.0611149102 0.1070877179 0.0507677144 0.0013695913 0.0028982948 0.0299883012; +frequency C60pi51 = 0.0033164209 0.0015310773 0.0030830171 0.0008266472 0.0051890730 0.0011024889 0.0005134130 0.0010432830 0.0278451262 0.0041895268 0.0111212494 0.0007149922 0.0023621780 0.3801761447 0.0008365077 0.0035876698 0.0023608948 0.0333346985 0.5107889643 0.0060766272; +frequency C60pi52 = 0.1995014012 0.0236078675 0.0392254543 0.0094955104 0.0584590451 0.0254265363 0.0125535371 0.0939787338 0.0341857201 0.0140209879 0.0449387571 0.0118723304 0.0246990633 0.0634433944 0.0145385320 0.1663920640 0.0533159207 0.0129802666 0.0606346163 0.0367302614; +frequency C60pi53 = 0.0319448994 0.1011667268 0.2084709220 0.0378074649 0.0066040348 0.0766372935 0.0279488190 0.0365541130 0.2088643258 0.0047542347 0.0156545731 0.0868664783 0.0043253317 0.0108915768 0.0060899575 0.0577656939 0.0302051160 0.0026001883 0.0387897304 0.0060585202; +frequency C60pi54 = 0.0776799515 0.0142518583 0.0403216692 0.0080651725 0.0140092962 0.0179995517 0.0112622427 0.0136868237 0.0133729897 0.1239635380 0.0724670993 0.0129144967 0.0420745442 0.0173584908 0.0117084432 0.0922723571 0.2316899445 0.0028153633 0.0141726542 0.1679135132; +frequency C60pi55 = 0.1183662657 0.0805192606 0.0259524932 0.0495595439 0.0035624835 0.1204924917 0.1537589210 0.0194993426 0.0229373171 0.0302661211 0.0571250629 0.0982304112 0.0171727472 0.0068665705 0.0175153030 0.0486588400 0.0635796210 0.0023008307 0.0083027431 0.0553336300; +frequency C60pi56 = 0.0528559899 0.0193569043 0.0264743774 0.2092761515 0.0008625883 0.1212409715 0.4024189781 0.0155838458 0.0124148798 0.0054864832 0.0090256472 0.0497017031 0.0042357114 0.0012650715 0.0063185636 0.0197262901 0.0235463735 0.0008381610 0.0033948741 0.0159764347; +frequency C60pi57 = 0.0344366215 0.0426221820 0.1636716191 0.1139007491 0.0020985982 0.0605413987 0.0541780220 0.3361639671 0.0461776737 0.0003463416 0.0048355678 0.0667552967 0.0019704509 0.0031557619 0.0040369775 0.0481173332 0.0089148085 0.0006510101 0.0054145649 0.0020110555; +frequency C60pi58 = 0.1153088951 0.0151278638 0.0458476603 0.1755516676 0.0014962362 0.0366731222 0.1749410045 0.0394181311 0.0132401530 0.0056912974 0.0101409559 0.0433118387 0.0030332064 0.0015700232 0.1665802563 0.0871536033 0.0468260603 0.0007515702 0.0031432715 0.0141931831; +frequency C60pi59 = 0.3865149348 0.0037579334 0.0030420497 0.0022366810 0.0218928357 0.0021464743 0.0031387843 0.3694353983 0.0014672902 0.0085376076 0.0127257242 0.0018840458 0.0080581695 0.0039281367 0.0158688291 0.0808877279 0.0305195935 0.0009922880 0.0019020345 0.0410634615; +frequency C60pi60 = 0.0146570745 0.0028841333 0.0012998335 0.0005210575 0.0024317913 0.0049362750 0.0014874369 0.0020953252 0.0010181940 0.1913901476 0.4432797758 0.0022898369 0.2217427062 0.0091637503 0.0007685153 0.0027251487 0.0170997497 0.0008779380 0.0014756028 0.0778557075; +model C60 = POISSON+G+FMIX{C60pi1:1:0.0169698865,C60pi2:1:0.0211683374,C60pi3:1:0.0276589079,C60pi4:1:0.0065675964,C60pi5:1:0.0141221416,C60pi6:1:0.0068774834,C60pi7:1:0.0146909701,C60pi8:1:0.0067225777,C60pi9:1:0.0018396660,C60pi10:1:0.0102547197,C60pi11:1:0.0230896163,C60pi12:1:0.0057941033,C60pi13:1:0.0125394534,C60pi14:1:0.0204526478,C60pi15:1:0.0070629602,C60pi16:1:0.0117982741,C60pi17:1:0.0068334668,C60pi18:1:0.0433775839,C60pi19:1:0.0318278731,C60pi20:1:0.0222546108,C60pi21:1:0.0102264969,C60pi22:1:0.0150545891,C60pi23:1:0.0134159878,C60pi24:1:0.0148552065,C60pi25:1:0.0239111516,C60pi26:1:0.0128776278,C60pi27:1:0.0222318842,C60pi28:1:0.0247444742,C60pi29:1:0.0214274810,C60pi30:1:0.0115001882,C60pi31:1:0.0076017389,C60pi32:1:0.0130258568,C60pi33:1:0.0093701965,C60pi34:1:0.0467194264,C60pi35:1:0.0441940314,C60pi36:1:0.0322263154,C60pi37:1:0.0402999891,C60pi38:1:0.0150234227,C60pi39:1:0.0104589903,C60pi40:1:0.0214742395,C60pi41:1:0.0154957836,C60pi42:1:0.0101789953,C60pi43:1:0.0227980379,C60pi44:1:0.0123204539,C60pi45:1:0.0066777583,C60pi46:1:0.0004150083,C60pi47:1:0.0344385130,C60pi48:1:0.0113663379,C60pi49:1:0.0127143049,C60pi50:1:0.0124323741,C60pi51:1:0.0262124415,C60pi52:1:0.0064994957,C60pi53:1:0.0103203293,C60pi54:1:0.0142463512,C60pi55:1:0.0215600067,C60pi56:1:0.0199150700,C60pi57:1:0.0038964200,C60pi58:1:0.0113448855,C60pi59:1:0.0128595846,C60pi60:1:0.0117656776}; + +end; +)"; const double MIN_MIXTURE_PROP = 0.001; //const double MAX_MIXTURE_PROP = 1000.0; //const double MIN_MIXTURE_RATE = 0.01; //const double MAX_MIXTURE_RATE = 100.0; -ModelSubst* createModel(string model_str, ModelsBlock *models_block, StateFreqType freq_type, string freq_params, +ModelSubst* createModel(string model_str, ModelsBlock *models_block, + StateFreqType freq_type, string freq_params, PhyloTree* tree) { ModelSubst *model = NULL; @@ -1027,6 +1030,23 @@ ModelSubst* createModel(string model_str, ModelsBlock *models_block, StateFreqTy } } + // sequencing error model + string seqerr = ""; + string::size_type spec_pos; + while ((spec_pos = model_str.find("+E")) != string::npos) { + string::size_type end_pos = model_str.find_first_of("+*", spec_pos+1); + if (end_pos == string::npos) { + seqerr = model_str.substr(spec_pos); + model_str = model_str.substr(0, spec_pos); + } else { + seqerr = model_str.substr(spec_pos, end_pos - spec_pos); + model_str = model_str.substr(0, spec_pos) + model_str.substr(end_pos); + } + } + + if (!seqerr.empty() && tree->aln->seq_type != SEQ_DNA) { + outError("Sequencing error model " + seqerr + " is only supported for DNA"); + } // Now that PoMo stuff has been removed, check for model parameters. size_t pos = model_str.find(OPEN_BRACKET); if (pos != string::npos) { @@ -1068,9 +1088,12 @@ ModelSubst* createModel(string model_str, ModelsBlock *models_block, StateFreqTy } else if (tree->aln->seq_type == SEQ_BINARY) { model = new ModelBIN(model_str.c_str(), model_params, freq_type, freq_params, tree); } else if (tree->aln->seq_type == SEQ_DNA) { - model = new ModelDNA(model_str.c_str(), model_params, freq_type, freq_params, tree); + if (seqerr.empty()) + model = new ModelDNA(model_str.c_str(), model_params, freq_type, freq_params, tree); + else + model = new ModelDNAError(model_str.c_str(), model_params, freq_type, freq_params, seqerr, tree); } else if (tree->aln->seq_type == SEQ_PROTEIN) { - model = new ModelProtein(model_str.c_str(), model_params, freq_type, freq_params, tree); + model = new ModelProtein(model_str.c_str(), model_params, freq_type, freq_params, tree, models_block); } else if (tree->aln->seq_type == SEQ_CODON) { model = new ModelCodon(model_str.c_str(), model_params, freq_type, freq_params, tree); } else if (tree->aln->seq_type == SEQ_MORPH) { @@ -1168,23 +1191,30 @@ void ModelMixture::initMixture(string orig_model_name, string model_name, string outError("Defining both empirical and optimize frequencies not allowed"); } double sum_weights = 0.0; - for (m = 0; m < freq_weights.size(); m++) - if (freq_vec[m] != nxs_freq_empirical && freq_vec[m] != nxs_freq_optimize) + for (m = 0; m < freq_weights.size(); m++) { + if (freq_vec[m] != nxs_freq_empirical && freq_vec[m] != nxs_freq_optimize) { sum_weights += freq_weights[m]; - for (m = 0; m < freq_weights.size(); m++) - if (freq_vec[m] == nxs_freq_empirical || freq_vec[m] == nxs_freq_optimize) - freq_weights[m] = sum_weights/freq_weights.size(); + } + } + for (m = 0; m < freq_weights.size(); m++) { + if (freq_vec[m] == nxs_freq_empirical || freq_vec[m] == nxs_freq_optimize) { + freq_weights[m] = sum_weights / freq_weights.size(); + } + } ModelMarkov::init(FREQ_USER_DEFINED); } else { if (freq_params != "") readStateFreq(freq_params); + if (freq == FREQ_UNKNOWN) + freq = FREQ_USER_DEFINED; ModelMarkov::init(freq); } DoubleVector weights; name = orig_model_name.substr(0, orig_model_name.find_first_of("+*")); - if (!models_block->findMixModel(name)) + if (!models_block->findMixModel(name)) { name = ""; + } full_name = (string)"MIX" + OPEN_BRACKET; if (model_list == "") model_list = model_name; for (m = 0, cur_pos = 0; cur_pos < model_list.length(); m++) { @@ -1257,8 +1287,7 @@ void ModelMixture::initMixture(string orig_model_name, string model_name, string full_name += CLOSE_BRACKET; int nmixtures = size(); - if (prop) - aligned_free(prop); + aligned_free(prop); prop = aligned_alloc(nmixtures); double sum = 0.0; @@ -1283,17 +1312,19 @@ void ModelMixture::initMixture(string orig_model_name, string model_name, string for (i = 0; i < nmixtures; i++) prop[i] *= sum; } - // rescale total_num_subst such that the global rate is 1 - for (i = 0, sum = 0.0; i < nmixtures; i++) - sum += prop[i]*at(i)->total_num_subst; - for (i = 0; i < nmixtures; i++) - at(i)->total_num_subst /= sum; - - if (optimize_steps == 0) - optimize_steps = (getNDim()+1)*100; - - if (optimize_weights) fix_prop = false; + for (i = 0, sum = 0.0; i < nmixtures; i++) { + sum += prop[i] * at(i)->total_num_subst; + } + for (i = 0; i < nmixtures; i++) { + at(i)->total_num_subst /= sum; + } + if (optimize_steps == 0) { + optimize_steps = (getNDim() + 1) * 100; + } + if (optimize_weights) { + fix_prop = false; + } fix_prop |= (nmixtures == 1); // use central eigen etc. stufffs @@ -1306,92 +1337,90 @@ void ModelMixture::initMixture(string orig_model_name, string model_name, string err = true; } } - - if (err) + if (err) { outError("Model reversibility is not consistent"); - if (rev != isReversible()) + } + if (rev != isReversible()) { setReversible(rev); - + } // forgot to call this after refactoring - if (isReversible()) + if (isReversible()) { initMem(); - + } decomposeRateMatrix(); delete nxs_freq_optimize; delete nxs_freq_empirical; - } void ModelMixture::initMem() { - - int nmixtures = size(); - - // Calculate the total number of states and take into account that each of the - // models may be a mixture model itself (PoMo rate heterogeneity). - - int num_states_total = 0; - for (iterator it = begin(); it != end(); it++) - num_states_total += (*it)->get_num_states_total(); - - if (eigenvalues) aligned_free(eigenvalues); - if (eigenvectors) aligned_free(eigenvectors); - if (inv_eigenvectors) aligned_free(inv_eigenvectors); - // if (eigen_coeff) aligned_free(eigen_coeff); - - eigenvalues = aligned_alloc(num_states_total*nmixtures); - eigenvectors = aligned_alloc(num_states_total*num_states_total*nmixtures); - inv_eigenvectors = aligned_alloc(num_states_total*num_states_total*nmixtures); - // int ncoeff = num_states_total*num_states_total*num_states_total; - // eigen_coeff = aligned_alloc(ncoeff*nmixtures); - - // assigning memory for individual models - int m = 0; - int count_num_states = 0; - int count_num_states_2 = 0; - for (iterator it = begin(); it != end(); it++, m++) { - int num_states_this_model = (*it)->get_num_states_total(); - int num_states_this_model_2 = num_states_this_model * num_states_this_model; - // first copy memory for eigen stuffs - memcpy(&eigenvalues[count_num_states], (*it)->eigenvalues, - num_states_this_model*sizeof(double)); - memcpy(&eigenvectors[count_num_states_2], (*it)->eigenvectors, - num_states_this_model_2*sizeof(double)); - memcpy(&inv_eigenvectors[count_num_states_2], (*it)->inv_eigenvectors, - num_states_this_model_2*sizeof(double)); - // memcpy(&eigen_coeff[m*ncoeff], (*it)->eigen_coeff, ncoeff*sizeof(double)); - - // then delete - if ((*it)->eigenvalues) aligned_free((*it)->eigenvalues); - if ((*it)->eigenvectors) aligned_free((*it)->eigenvectors); - if ((*it)->inv_eigenvectors) aligned_free((*it)->inv_eigenvectors); - // if ((*it)->eigen_coeff) aligned_free((*it)->eigen_coeff); - - // And assign new memory. Also, recursively, update respective pointers for - // the mixture components of the current model. This is relevant if the - // current model is a mixture model itself. - (*it)->update_eigen_pointers(&eigenvalues[count_num_states], - &eigenvectors[count_num_states_2], - &inv_eigenvectors[count_num_states_2]); - // (*it)->eigen_coeff = &eigen_coeff[m*ncoeff]; - - // Update the state counters, so that the pointers are assigned correctly - // for the next mixture component. - count_num_states += num_states_this_model; - count_num_states_2 += num_states_this_model_2; - } + + int nmixtures = size(); + + // Calculate the total number of states and take into account that each of the + // models may be a mixture model itself (PoMo rate heterogeneity). + + int num_states_total = 0; + for (iterator it = begin(); it != end(); it++) { + num_states_total += (*it)->get_num_states_total(); + } + + aligned_free(eigenvalues); + aligned_free(eigenvectors); + aligned_free(inv_eigenvectors); + aligned_free(inv_eigenvectors_transposed); + ensure_aligned_allocated(eigenvalues, num_states_total*nmixtures); + ensure_aligned_allocated(eigenvectors, num_states_total*num_states_total*nmixtures); + ensure_aligned_allocated(inv_eigenvectors, num_states_total*num_states_total*nmixtures); + ensure_aligned_allocated(inv_eigenvectors_transposed, num_states_total*num_states_total*nmixtures); + + // assigning memory for individual models + int m = 0; + int count_num_states = 0; + int count_num_states_2 = 0; + for (iterator it = begin(); it != end(); it++, m++) { + int num_states_this_model = (*it)->get_num_states_total(); + int num_states_this_model_2 = num_states_this_model * num_states_this_model; + // first copy memory for eigen stuffs + memcpy(&eigenvalues[count_num_states], (*it)->eigenvalues, + num_states_this_model*sizeof(double)); + memcpy(&eigenvectors[count_num_states_2], (*it)->eigenvectors, + num_states_this_model_2*sizeof(double)); + memcpy(&inv_eigenvectors[count_num_states_2], (*it)->inv_eigenvectors, + num_states_this_model_2*sizeof(double)); + memcpy(&inv_eigenvectors_transposed[count_num_states_2], (*it)->inv_eigenvectors_transposed, + num_states_this_model_2*sizeof(double)); + + // then delete + aligned_free((*it)->eigenvalues); + aligned_free((*it)->eigenvectors); + aligned_free((*it)->inv_eigenvectors); + aligned_free((*it)->inv_eigenvectors_transposed); + + // And assign new memory. Also, recursively, update respective pointers for + // the mixture components of the current model. This is relevant if the + // current model is a mixture model itself. + (*it)->update_eigen_pointers(&eigenvalues[count_num_states], + &eigenvectors[count_num_states_2], + &inv_eigenvectors[count_num_states_2], + &inv_eigenvectors_transposed[count_num_states_2]); + + // Update the state counters, so that the pointers are assigned correctly + // for the next mixture component. + count_num_states += num_states_this_model; + count_num_states_2 += num_states_this_model_2; + } } ModelMixture::~ModelMixture() { - if (prop) - aligned_free(prop); - for (reverse_iterator rit = rbegin(); rit != rend(); rit++) { -// (*rit)->eigen_coeff = NULL; - (*rit)->eigenvalues = NULL; - (*rit)->eigenvectors = NULL; - (*rit)->inv_eigenvectors = NULL; - delete (*rit); - } + aligned_free(prop); + for (reverse_iterator rit = rbegin(); rit != rend(); rit++) { + (*rit)->eigenvalues = nullptr; + (*rit)->eigenvectors = nullptr; + (*rit)->inv_eigenvectors = nullptr; + (*rit)->inv_eigenvectors_transposed = nullptr; + delete (*rit); + } } void ModelMixture::setCheckpoint(Checkpoint *checkpoint) { @@ -1401,14 +1430,16 @@ void ModelMixture::setCheckpoint(Checkpoint *checkpoint) { } void ModelMixture::startCheckpoint() { - checkpoint->startStruct("ModelMixture"); + checkpoint->startStruct("ModelMixture" + convertIntToString(getNMixtures())); } void ModelMixture::saveCheckpoint() { startCheckpoint(); // CKP_SAVE(fix_prop); - int nmix = getNMixtures(); - CKP_ARRAY_SAVE(nmix, prop); + if (!fix_prop) { + int nmix = getNMixtures(); + CKP_ARRAY_SAVE(nmix, prop); + } int part = 1; for (iterator it = begin(); it != end(); it++, part++) { checkpoint->startStruct("Component" + convertIntToString(part)); @@ -1425,8 +1456,10 @@ void ModelMixture::restoreCheckpoint() { startCheckpoint(); // CKP_RESTORE(fix_prop); - int nmix = getNMixtures(); - CKP_ARRAY_RESTORE(nmix, prop); + if (!fix_prop) { + int nmix = getNMixtures(); + CKP_ARRAY_RESTORE(nmix, prop); + } int part = 1; for (iterator it = begin(); it != end(); it++, part++) { checkpoint->startStruct("Component" + convertIntToString(part)); @@ -1483,10 +1516,12 @@ void ModelMixture::computeTransDerv(double time, double *trans_matrix, int ModelMixture::getNDim() { // int dim = (fix_prop) ? 0: (size()-1); int dim = 0; - if (!optimizing_submodels && !fix_prop) - dim = size()-1; - for (iterator it = begin(); it != end(); it++) - dim += (*it)->getNDim(); + if (!optimizing_submodels && !fix_prop) { + dim = size() - 1; + } + for (iterator it = begin(); it != end(); it++) { + dim += (*it)->getNDim(); + } return dim; } @@ -1623,10 +1658,12 @@ double ModelMixture::optimizeWithEM(double gradient_epsilon) { tree->central_scale_num = phylo_tree->central_scale_num; tree->central_partial_pars = phylo_tree->central_partial_pars; - tree->copyPhyloTree(phylo_tree); + tree->copyPhyloTree(phylo_tree, true); tree->optimize_by_newton = phylo_tree->optimize_by_newton; tree->setParams(phylo_tree->params); - tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads); + tree->setLikelihoodKernel(phylo_tree->sse); + tree->setNumThreads(phylo_tree->num_threads); + // initialize model ModelFactory *model_fac = new ModelFactory(); model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint; @@ -1721,7 +1758,7 @@ double ModelMixture::optimizeWithEM(double gradient_epsilon) { // now optimize model one by one for (c = 0; c < nmix; c++) if (at(c)->getNDim() > 0) { - tree->copyPhyloTreeMixlen(phylo_tree, c); + tree->copyPhyloTreeMixlen(phylo_tree, c, true); ModelMarkov *subst_model; subst_model = at(c); tree->setModel(subst_model); @@ -1772,17 +1809,20 @@ double ModelMixture::optimizeParameters(double gradient_epsilon) { int dim = getNDim(); double score = 0.0; - if (!phylo_tree->getModelFactory()->unobserved_ptns.empty()) + if (!phylo_tree->getModelFactory()->unobserved_ptns.empty()) { outError("Mixture model +ASC is not supported yet. Contact author if needed."); - - if (dim > 0) + } + if (dim > 0) { score = optimizeWithEM(gradient_epsilon); - else if (!fix_prop) + } + else if (!fix_prop) { score = optimizeWeights(); - + } // double score = ModelGTR::optimizeParameters(gradient_epsilon); optimizing_submodels = false; - if (getNDim() == 0) return score; + if (getNDim() == 0) { + return score; + } // now rescale Q matrices to have proper interpretation of branch lengths double sum; @@ -1805,6 +1845,7 @@ bool ModelMixture::isUnstableParameters() { for (c = 0; c < ncategory; c++) if (prop[c] < MIN_MIXTURE_PROP*0.1) { outWarning("The mixture model might be overfitting because some mixture weights are estimated close to zero"); + //Todo: Which is it? Break, or return true? James B. 23-Jul-2020 break; return true; } diff --git a/model/modelmixture.h b/model/modelmixture.h index 3e730bb41..862c91069 100644 --- a/model/modelmixture.h +++ b/model/modelmixture.h @@ -14,21 +14,20 @@ #include "nclextra/modelsblock.h" -const char OPEN_BRACKET = '{'; -const char CLOSE_BRACKET = '}'; - -extern const string builtin_mixmodels_definition; +extern const char* builtin_mixmodels_definition; /** * create a substitution model * @param model_str model nme * @param freq_type state frequency type * @param freq_params frequency parameters + * @param seqerr sequencing error model * @param tree associated phylo tree * @param count_rates TRUE to assign rates counted from alignment, FALSE to not initialize rates * @return substitution model created */ -ModelSubst *createModel(string model_str, ModelsBlock *models_block, StateFreqType freq_type, string freq_params, +ModelSubst *createModel(string model_str, ModelsBlock *models_block, + StateFreqType freq_type, string freq_params, PhyloTree *tree); diff --git a/model/modelmorphology.cpp b/model/modelmorphology.cpp index b7ca50d37..de1009997 100644 --- a/model/modelmorphology.cpp +++ b/model/modelmorphology.cpp @@ -17,21 +17,27 @@ void ModelMorphology::init(const char *model_name, string model_params, StateFre { name = model_name; full_name = model_name; - freq = FREQ_EQUAL; if (name == "MK") { // all were initialized + num_params = 0; } else if (name == "ORDERED") { - int k = 0; + int i, j, k = 0; // only allow for substitution from state i to state i+1 and back. - for (int i = 0; i < num_states-1; i++) { + for (i = 0; i < num_states-1; i++) { rates[k++] = 1.0; - for (int j = i+2; j < num_states; j++, k++) + for (j = i+2; j < num_states; j++, k++) rates[k] = 0.0; } + num_params = 0; + } else if (name == "GTR" || name == "GTRX") { + outWarning("GTRX multistate model will estimate " + convertIntToString(getNumRateEntries()-1) + " substitution rates that might be overfitting!"); + outWarning("Please only use GTRX with very large data and always test for model fit!"); + name = "GTRX"; } else { // if name does not match, read the user-defined model readParameters(model_name); num_params = 0; + freq = FREQ_USER_DEFINED; } ModelMarkov::init(freq); } @@ -57,6 +63,12 @@ void ModelMorphology::readRates(istream &in) throw(const char*, string) { } } +int ModelMorphology::getNDim() { + int ndim = num_params; + if (freq_type == FREQ_ESTIMATE) + ndim += num_states-1; + return ndim; +} ModelMorphology::~ModelMorphology() { } @@ -64,3 +76,64 @@ ModelMorphology::~ModelMorphology() { void ModelMorphology::startCheckpoint() { checkpoint->startStruct("ModelMorph"); } + +void ModelMorphology::saveCheckpoint() { + startCheckpoint(); + if (num_params > 0) + CKP_ARRAY_SAVE(getNumRateEntries(), rates); + endCheckpoint(); + ModelMarkov::saveCheckpoint(); +} + +void ModelMorphology::restoreCheckpoint() { + ModelMarkov::restoreCheckpoint(); + startCheckpoint(); + if (num_params > 0) + CKP_ARRAY_RESTORE(getNumRateEntries(), rates); + endCheckpoint(); + decomposeRateMatrix(); + if (phylo_tree) + phylo_tree->clearAllPartialLH(); +} + +string ModelMorphology::getNameParams() { + if (num_params == 0) return name; + ostringstream retname; + retname << name << '{'; + int nrates = getNumRateEntries(); + for (int i = 0; i < nrates; i++) { + if (i>0) retname << ','; + retname << rates[i]; + } + retname << '}'; + getNameParamsFreq(retname); + return retname.str(); +} + +void ModelMorphology::writeParameters(ostream &out) { + int i; + if (freq_type == FREQ_ESTIMATE) { + for (i = 0; i < num_states; i++) + out << "\t" << state_freq[i]; + } + if (num_params == 0) return; + int nrateout = getNumRateEntries() - 1; + for (i = 0; i < nrateout; i++) + out << "\t" << rates[i]; +} + +void ModelMorphology::writeInfo(ostream &out) { + if (num_params > 0) { + out << "Rate parameters:"; + int nrate = getNumRateEntries(); + for (int i = 0; i < nrate; i++) + out << " " << rates[i]; + out << endl; + } + if (freq_type != FREQ_EQUAL) { + out << "State frequencies:"; + for (int i = 0; i < num_states; i++) + out << " " << state_freq[i]; + out << endl; + } +} diff --git a/model/modelmorphology.h b/model/modelmorphology.h index 6bb4e9e93..97349fac0 100644 --- a/model/modelmorphology.h +++ b/model/modelmorphology.h @@ -34,15 +34,42 @@ class ModelMorphology: public ModelMarkov { */ virtual void init(const char *model_name, string model_params, StateFreqType freq, string freq_params); + /** + return the number of dimensions + */ + virtual int getNDim(); + /** start structure for checkpointing */ virtual void startCheckpoint(); + + /** + save object into the checkpoint + */ + virtual void saveCheckpoint(); + + /** + restore object from the checkpoint + */ + virtual void restoreCheckpoint(); - /** - return the number of dimensions - */ - virtual int getNDim() { return 0; } + /** + * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} + */ + virtual string getNameParams(); + + /** + write information + @param out output stream + */ + virtual void writeInfo(ostream &out); + + /** + write parameters, used with modeltest + @param out output stream + */ + virtual void writeParameters(ostream &out); /** read the rates from an input stream. it will throw error messages if failed diff --git a/model/modelpomo.cpp b/model/modelpomo.cpp index d815b2650..c03809036 100644 --- a/model/modelpomo.cpp +++ b/model/modelpomo.cpp @@ -5,10 +5,8 @@ #include #include -#ifdef USE_EIGEN3 #include #include -#endif ModelPoMo::ModelPoMo(PhyloTree *tree) : ModelMarkov(tree) { } @@ -31,8 +29,7 @@ ModelPoMo::ModelPoMo(const char *model_name, void ModelPoMo::init_mutation_model(const char *model_name, string model_params, StateFreqType freq_type, - string freq_params, - string pomo_heterozygosity) + string freq_params) { // Trick ModelDNA constructor by setting the number of states to 4 (DNA). phylo_tree->aln->num_states = n_alleles; @@ -183,8 +180,7 @@ void ModelPoMo::init(const char *model_name, init_mutation_model(model_name, model_params, freq_type, - freq_params, - pomo_heterozygosity); + freq_params); init_sampling_method(); init_boundary_frequencies(); // Initialize heterozygosity and the scale factor of the mutation rates @@ -194,7 +190,12 @@ void ModelPoMo::init(const char *model_name, init_fixed_parameters(model_params, pomo_heterozygosity); set_heterozygosity_boundaries(); setInitialMutCoeff(); - rate_matrix = new double[num_states*num_states]; + //rate_matrix = new double[num_states*num_states]; + ignore_state_freq = true; + normalize_matrix = false; + half_matrix = false; + delete [] rates; + rates = new double[num_states*num_states]; updatePoMoStatesAndRateMatrix(); decomposeRateMatrix(); @@ -227,7 +228,7 @@ ModelPoMo::~ModelPoMo() { double ModelPoMo::computeSumFreqBoundaryStates() { int i; double norm_boundary = 0.0; - for (i = 0; i < 4; i++) + for (i = 0; i < n_alleles; i++) norm_boundary += freq_boundary_states[i]; // Should be 1.0! if ((norm_boundary > 1.0 + eps) || (norm_boundary < 1.0 - eps)) @@ -260,7 +261,7 @@ void ModelPoMo::setInitialMutCoeff() { double ModelPoMo::computeSumFreqPolyStatesNoMut() { double norm_polymorphic = 0.0; int i, j; - for (i = 0; i < 4; i++) { + for (i = 0; i < n_alleles; i++) { for (j = 0; j < i; j++) norm_polymorphic += 2 * freq_boundary_states[i] * freq_boundary_states[j]; @@ -331,11 +332,13 @@ void ModelPoMo::updatePoMoStatesAndRateMatrix () { for (j = 0; j < num_states; j++) if (i != j) { row_sum += - (rate_matrix[i*num_states+j] = + (rates[i*num_states+j] = computeProbBoundaryMutation(i, j)); } tot_sum += state_freq[i]*row_sum; - rate_matrix[i*num_states+i] = -(row_sum); + // diagonal will be handled later, should not assign now + //rates[i*num_states+i] = -(row_sum); + rates[i*num_states+i] = 0.0; } // Thu Aug 17 16:11:19 BST 2017; Dom. Normalization is preferred. Then, // branch lengths can be interpreted in an easy way (the length equals the @@ -346,7 +349,7 @@ void ModelPoMo::updatePoMoStatesAndRateMatrix () { // Normalize rate matrix such that one event happens per unit time. for (int i = 0; i < num_states; i++) { for (int j = 0; j < num_states; j++) { - rate_matrix[i*num_states+j] /= tot_sum; + rates[i*num_states+j] /= tot_sum; } } @@ -534,7 +537,31 @@ void ModelPoMo::normalizeMutationRates() { } for (int i = 0; i < n_alleles; i++) - for (int j = 0; j < n_alleles; j++) m[i*n_alleles+j] *= m_norm; + for (int j = 0; j < n_alleles; j++) { + m[i*n_alleles+j] *= m_norm; + // DEBUG. + // cout << setprecision(15); + // cout << m[i*n_alleles+j] << endl; + } + + // DEBUG. + if (verbose_mode >= VB_MED) { + cout << "theta_bm before normalization is " << theta_bm << endl; + cout << "heterozygosity is " << heterozygosity << endl; + for (int i = 0; i < n_alleles; i++) { + for (int j = i+1; j < n_alleles; j++) { + // The mutation rate matrix entry is the exchangeability times the + // target allele frequency. + double ex = m[i*n_alleles+j] / freq_boundary_states[j]; + cout << setprecision(15); + cout << "Exchangeability " << i << " to " << j << " is " << ex << endl; + } + } + computeStateFreq(); + double normc = computeNormConst(); + theta_bm = (1.0 - normc) / harmonic(N-1); + cout << "theta_bm after normalization is " << theta_bm << endl; + } } void ModelPoMo::setScale(double new_scale) { @@ -586,10 +613,11 @@ void ModelPoMo::writeInfo(ostream &out) { report(out); } +// TODO: s_freqs is not used. void ModelPoMo::computeRateMatrix(double **r_matrix, double *s_freqs, int n_states) { for (int i = 0; i < n_states; i++) { for (int j = 0; j < n_states; j++) { - r_matrix[i][j] = rate_matrix[i*n_states+j]; + r_matrix[i][j] = rates[i*n_states+j]; } } } @@ -786,6 +814,18 @@ void ModelPoMo::report_model_params(ostream &out, bool reset_scale) { else out << "The reported rates are scaled by a factor of " << scale << "." << endl;; + // TODO: If verbose, output rate matrix. + if (verbose_mode >= VB_MED) { + out << "Rate matrix: " << endl; + for (int i = 0; i < num_states; i++) { + for (int j = 0; j < num_states; j++) { + out << setprecision(8); + out << setw(8) << rates[i*num_states+j] << " "; + } + out << endl; + } + } + // Report rates. // Mutation rates. double *rs = NULL; @@ -987,47 +1027,53 @@ void ModelPoMo::restoreCheckpoint() { } // Declaration of helper function; needed by decomposeRateMatrix(). -int computeStateFreqFromQMatrix (double Q[], double pi[], int n, double space[]); +//int computeStateFreqFromQMatrix (double Q[], double pi[], int n, double space[]); + +// void ModelPoMo::decomposeRateMatrix() { +// updatePoMoStatesAndRateMatrix(); +// // Non-reversible. +// if (!is_reversible) { +// if (phylo_tree->params->matrix_exp_technique == MET_EIGEN_DECOMPOSITION) { +// eigensystem_nonrev(rate_matrix, state_freq, eigenvalues, eigenvalues_imag, eigenvectors, inv_eigenvectors, num_states); +// return; +// } +// else if (phylo_tree->params->matrix_exp_technique == MET_SCALING_SQUARING) { +// return; +// } +// else if (phylo_tree->params->matrix_exp_technique == MET_EIGEN3LIB_DECOMPOSITION) { +// // Not (yet?) implemented. +// // decomposeRateMatrixEigen3lib(); +// outError("MET_EIGEN3LIB_DECOMPOSITION does not work with PoMo."); +// } +// else if (phylo_tree->params->matrix_exp_technique == MET_LIE_MARKOV_DECOMPOSITION) +// // Not possible? +// // decomposeRateMatrixClosedForm(); +// outError("Matrix decomposition in closed form not available for PoMo."); +// else +// outError("Matrix decomposition method unknown."); +// } +// // Reversible. Alogrithms for symmetric matrizes can be used. +// else { +// // TODO DS: This leaves room for speed improvements. +// // EigenDecomposition::eigensystem_sym() expects a matrix[][] +// // object with two indices. However, it is not used, because +// // ModelPoMo::computeRateMatrix() is called anyways from +// // within eigensystem_sym(). +// double **temp_matrix = new double*[num_states]; +// for (int i = 0; i < num_states; i++) +// temp_matrix[i] = new double[num_states]; +// eigensystem_sym(temp_matrix, state_freq, eigenvalues, eigenvectors, inv_eigenvectors, num_states); +// for (int i = num_states-1; i >= 0; i--) +// delete [] temp_matrix[i]; +// delete [] temp_matrix; +// return; +// } +// } void ModelPoMo::decomposeRateMatrix() { updatePoMoStatesAndRateMatrix(); - // Non-reversible. - if (!is_reversible) { - if (phylo_tree->params->matrix_exp_technique == MET_EIGEN_DECOMPOSITION) { - eigensystem_nonrev(rate_matrix, state_freq, eigenvalues, eigenvalues_imag, eigenvectors, inv_eigenvectors, num_states); - return; - } - else if (phylo_tree->params->matrix_exp_technique == MET_SCALING_SQUARING) { - return; - } - else if (phylo_tree->params->matrix_exp_technique == MET_EIGEN3LIB_DECOMPOSITION) { - // Not (yet?) implemented. - // decomposeRateMatrixEigen3lib(); - outError("MET_EIGEN3LIB_DECOMPOSITION does not work with PoMo."); - } - else if (phylo_tree->params->matrix_exp_technique == MET_LIE_MARKOV_DECOMPOSITION) - // Not possible? - // decomposeRateMatrixClosedForm(); - outError("Matrix decomposition in closed form not available for PoMo."); - else - outError("Matrix decomposition method unknown."); - } - // Reversible. Alogrithms for symmetric matrizes can be used. - else { - // TODO DS: This leaves room for speed improvements. - // EigenDecomposition::eigensystem_sym() expects a matrix[][] - // object with two indices. However, it is not used, because - // ModelPoMo::computeRateMatrix() is called anyways from - // within eigensystem_sym(). - double **temp_matrix = new double*[num_states]; - for (int i = 0; i < num_states; i++) - temp_matrix[i] = new double[num_states]; - eigensystem_sym(temp_matrix, state_freq, eigenvalues, eigenvectors, inv_eigenvectors, num_states); - for (int i = num_states-1; i >= 0; i--) - delete [] temp_matrix[i]; - delete [] temp_matrix; + ModelMarkov::decomposeRateMatrix(); return; - } } void ModelPoMo::set_heterozygosity_boundaries() { @@ -1047,7 +1093,6 @@ void ModelPoMo::set_heterozygosity_boundaries() { // TODO DS: The parameter mixture is unused at the moment. void ModelPoMo::computeTransMatrix(double time, double *trans_matrix, int mixture) { -#ifdef USE_EIGEN3 MatrixExpTechnique technique = phylo_tree->params->matrix_exp_technique; if (technique == MET_SCALING_SQUARING || !is_reversible) { // Do not change the object rate_matrix, but only trans_matrix. @@ -1098,8 +1143,117 @@ void ModelPoMo::computeTransMatrix(double time, double *trans_matrix, int mixtur } else ModelMarkov::computeTransMatrix(time, trans_matrix); +} -#else - ModelMarkov::computeTransMatrix(time, trans_matrix); -#endif +void ModelPoMo::computeTipLikelihood(PML::StateType state, double *lh) { + Alignment *aln = phylo_tree->aln; + if (state < num_states || state >= num_states+aln->pomo_sampled_states.size()) { + ModelSubst::computeTipLikelihood(state, lh); + return; + } + state = state - num_states; + + bool hypergeometric = (aln->pomo_sampling_method == SAMPLING_WEIGHTED_HYPER); + int nstates = aln->num_states; + int N = aln->virtual_pop_size; + + memset(lh, 0, sizeof(double)*nstates); + + // decode the id and value + int id1 = aln->pomo_sampled_states[state] & 3; + int id2 = (aln->pomo_sampled_states[state] >> 16) & 3; + int j = (aln->pomo_sampled_states[state] >> 2) & 16383; + int M = j + (aln->pomo_sampled_states[state] >> 18); + + // Number of alleles is hard coded here, change if generalization is needed. + int nnuc = 4; + + // TODO DS: Implement down sampling or a better approach. + if (hypergeometric && M > N) + outError("Down sampling not yet supported."); + + // Check if observed state is a fixed one. If so, many + // PoMo states can lead to this data. E.g., even (2A,8T) + // can lead to a sampled data of 7A. + if (j == M) { + lh[id1] = 1.0; + // Second: Polymorphic states. + for (int s_id1 = 0; s_id1 < nnuc-1; s_id1++) { + for (int s_id2 = s_id1+1; s_id2 < nnuc; s_id2++) { + if (s_id1 == id1) { + // States are in the order {FIXED, + // 1A(N-1)C, ..., (N-1)A1C, ...}. + int k; + if (s_id1 == 0) k = s_id2 - 1; + else k = s_id1 + s_id2; + // Start one earlier because increment + // happens after execution of for loop + // body. + int real_state = nnuc - 1 + k*(N-1) + 1; + for (int i = 1; i < N; i++, real_state++) { + ASSERT(real_state < nstates); + if (!hypergeometric) + lh[real_state] = std::pow((double)i/(double)N,j); + else { + lh[real_state] = 1.0; + for (int l = 0; lsetBounds(lower_bound, upper_bound, bound_check); - lower_bound[1] = POMO_GAMMA_MIN; + lower_bound[1] = max(POMO_GAMMA_MIN, Params::getInstance().min_gamma_shape); upper_bound[1] = POMO_GAMMA_MAX; // Boundary checking is the preferred solution to warn the user if the // shape parameter hits the boundary, but it seems to be too verbose. @@ -156,6 +156,8 @@ void ModelPoMoMixture::decomposeRateMatrix() { memcpy(eigenvalues+m*num_states, eigenvalues, sizeof(double)*num_states); memcpy(eigenvectors+m*num_states_2, eigenvectors, sizeof(double)*num_states_2); memcpy(inv_eigenvectors+m*num_states_2, inv_eigenvectors, sizeof(double)*num_states_2); + memcpy(inv_eigenvectors_transposed+m*num_states_2 + , inv_eigenvectors_transposed, sizeof(double)*num_states_2); } // restore mutation_rate matrix memcpy(mutation_rate_matrix, saved_mutation_rate_matrix, sizeof(double)*n_alleles*n_alleles); @@ -227,18 +229,26 @@ int ModelPoMoMixture::get_num_states_total() { return num_states * getNMixtures(); } -void ModelPoMoMixture::update_eigen_pointers(double *eval, double *evec, double *inv_evec) { - eigenvalues = eval; - eigenvectors = evec; - inv_eigenvectors = inv_evec; - // We assume that all mixture model components have the same number of states. - int m = 0; - for (iterator it = begin(); it != end(); it++, m++) { - (*it)->update_eigen_pointers(eval + m*num_states, - evec + m*num_states*num_states, - inv_evec + m*num_states*num_states); - } - return; +void ModelPoMoMixture::update_eigen_pointers(double *eval, double *evec + , double *inv_evec, double* inv_evec_transposed) { + eigenvalues = eval; + eigenvectors = evec; + inv_eigenvectors = inv_evec; + inv_eigenvectors_transposed = inv_evec_transposed; + + // We assume that all mixture model components have the same number of states. + size_t rowOffset = 0; + size_t matrixOffset = 0; //into matrices + size_t num_states_squared = num_states * num_states; + + for (iterator it = begin(); it != end(); + it++, rowOffset+=num_states, matrixOffset+=num_states_squared) { + (*it)->update_eigen_pointers(eval + rowOffset, + evec + matrixOffset, + inv_evec + matrixOffset, + inv_evec_transposed + matrixOffset); + } + return; } bool ModelPoMoMixture::isUnstableParameters() { diff --git a/model/modelpomomixture.h b/model/modelpomomixture.h index 51cf5754c..2ec5253fd 100644 --- a/model/modelpomomixture.h +++ b/model/modelpomomixture.h @@ -146,7 +146,8 @@ class ModelPoMoMixture : public ModelPoMo, public ModelMixture { // need to be updated recursively, if the model is a mixture model. For a // normal Markov model, only the standard pointers are set. This was done in // `ModelMixture::initMem()` before. - virtual void update_eigen_pointers(double *eval, double *evec, double *inv_evec); + virtual void update_eigen_pointers(double *eval, double *evec + , double *inv_evec, double *inv_evec_transposed); /** diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp index de50c3143..495a1630f 100644 --- a/model/modelprotein.cpp +++ b/model/modelprotein.cpp @@ -18,6 +18,7 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "modelprotein.h" +#include "nclextra/myreader.h" #include /* @@ -27,3847 +28,834 @@ A R N D C Q E G H I L K M F P S T W Y V Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val */ - -/* this function is taken from RAxML source code */ -static bool initProtMat(double f[20], double daa[400], string prot_model) -{ - double max, temp; - int i, j; - double scaler; - - if (prot_model == "POISSON") { - for (i = 0; i < 400; i++) daa[i] = 1.0; - for (i = 0; i < 20; i++) f[i] = 0.05; - } else if (prot_model == "DAYHOFF") - { - daa[ 1*20+ 0] = 27.00; daa[ 2*20+ 0] = 98.00; daa[ 2*20+ 1] = 32.00; daa[ 3*20+ 0] = 120.00; - daa[ 3*20+ 1] = 0.00; daa[ 3*20+ 2] = 905.00; daa[ 4*20+ 0] = 36.00; daa[ 4*20+ 1] = 23.00; - daa[ 4*20+ 2] = 0.00; daa[ 4*20+ 3] = 0.00; daa[ 5*20+ 0] = 89.00; daa[ 5*20+ 1] = 246.00; - daa[ 5*20+ 2] = 103.00; daa[ 5*20+ 3] = 134.00; daa[ 5*20+ 4] = 0.00; daa[ 6*20+ 0] = 198.00; - daa[ 6*20+ 1] = 1.00; daa[ 6*20+ 2] = 148.00; daa[ 6*20+ 3] = 1153.00; daa[ 6*20+ 4] = 0.00; - daa[ 6*20+ 5] = 716.00; daa[ 7*20+ 0] = 240.00; daa[ 7*20+ 1] = 9.00; daa[ 7*20+ 2] = 139.00; - daa[ 7*20+ 3] = 125.00; daa[ 7*20+ 4] = 11.00; daa[ 7*20+ 5] = 28.00; daa[ 7*20+ 6] = 81.00; - daa[ 8*20+ 0] = 23.00; daa[ 8*20+ 1] = 240.00; daa[ 8*20+ 2] = 535.00; daa[ 8*20+ 3] = 86.00; - daa[ 8*20+ 4] = 28.00; daa[ 8*20+ 5] = 606.00; daa[ 8*20+ 6] = 43.00; daa[ 8*20+ 7] = 10.00; - daa[ 9*20+ 0] = 65.00; daa[ 9*20+ 1] = 64.00; daa[ 9*20+ 2] = 77.00; daa[ 9*20+ 3] = 24.00; - daa[ 9*20+ 4] = 44.00; daa[ 9*20+ 5] = 18.00; daa[ 9*20+ 6] = 61.00; daa[ 9*20+ 7] = 0.00; - daa[ 9*20+ 8] = 7.00; daa[10*20+ 0] = 41.00; daa[10*20+ 1] = 15.00; daa[10*20+ 2] = 34.00; - daa[10*20+ 3] = 0.00; daa[10*20+ 4] = 0.00; daa[10*20+ 5] = 73.00; daa[10*20+ 6] = 11.00; - daa[10*20+ 7] = 7.00; daa[10*20+ 8] = 44.00; daa[10*20+ 9] = 257.00; daa[11*20+ 0] = 26.00; - daa[11*20+ 1] = 464.00; daa[11*20+ 2] = 318.00; daa[11*20+ 3] = 71.00; daa[11*20+ 4] = 0.00; - daa[11*20+ 5] = 153.00; daa[11*20+ 6] = 83.00; daa[11*20+ 7] = 27.00; daa[11*20+ 8] = 26.00; - daa[11*20+ 9] = 46.00; daa[11*20+10] = 18.00; daa[12*20+ 0] = 72.00; daa[12*20+ 1] = 90.00; - daa[12*20+ 2] = 1.00; daa[12*20+ 3] = 0.00; daa[12*20+ 4] = 0.00; daa[12*20+ 5] = 114.00; - daa[12*20+ 6] = 30.00; daa[12*20+ 7] = 17.00; daa[12*20+ 8] = 0.00; daa[12*20+ 9] = 336.00; - daa[12*20+10] = 527.00; daa[12*20+11] = 243.00; daa[13*20+ 0] = 18.00; daa[13*20+ 1] = 14.00; - daa[13*20+ 2] = 14.00; daa[13*20+ 3] = 0.00; daa[13*20+ 4] = 0.00; daa[13*20+ 5] = 0.00; - daa[13*20+ 6] = 0.00; daa[13*20+ 7] = 15.00; daa[13*20+ 8] = 48.00; daa[13*20+ 9] = 196.00; - daa[13*20+10] = 157.00; daa[13*20+11] = 0.00; daa[13*20+12] = 92.00; daa[14*20+ 0] = 250.00; - daa[14*20+ 1] = 103.00; daa[14*20+ 2] = 42.00; daa[14*20+ 3] = 13.00; daa[14*20+ 4] = 19.00; - daa[14*20+ 5] = 153.00; daa[14*20+ 6] = 51.00; daa[14*20+ 7] = 34.00; daa[14*20+ 8] = 94.00; - daa[14*20+ 9] = 12.00; daa[14*20+10] = 32.00; daa[14*20+11] = 33.00; daa[14*20+12] = 17.00; - daa[14*20+13] = 11.00; daa[15*20+ 0] = 409.00; daa[15*20+ 1] = 154.00; daa[15*20+ 2] = 495.00; - daa[15*20+ 3] = 95.00; daa[15*20+ 4] = 161.00; daa[15*20+ 5] = 56.00; daa[15*20+ 6] = 79.00; - daa[15*20+ 7] = 234.00; daa[15*20+ 8] = 35.00; daa[15*20+ 9] = 24.00; daa[15*20+10] = 17.00; - daa[15*20+11] = 96.00; daa[15*20+12] = 62.00; daa[15*20+13] = 46.00; daa[15*20+14] = 245.00; - daa[16*20+ 0] = 371.00; daa[16*20+ 1] = 26.00; daa[16*20+ 2] = 229.00; daa[16*20+ 3] = 66.00; - daa[16*20+ 4] = 16.00; daa[16*20+ 5] = 53.00; daa[16*20+ 6] = 34.00; daa[16*20+ 7] = 30.00; - daa[16*20+ 8] = 22.00; daa[16*20+ 9] = 192.00; daa[16*20+10] = 33.00; daa[16*20+11] = 136.00; - daa[16*20+12] = 104.00; daa[16*20+13] = 13.00; daa[16*20+14] = 78.00; daa[16*20+15] = 550.00; - daa[17*20+ 0] = 0.00; daa[17*20+ 1] = 201.00; daa[17*20+ 2] = 23.00; daa[17*20+ 3] = 0.00; - daa[17*20+ 4] = 0.00; daa[17*20+ 5] = 0.00; daa[17*20+ 6] = 0.00; daa[17*20+ 7] = 0.00; - daa[17*20+ 8] = 27.00; daa[17*20+ 9] = 0.00; daa[17*20+10] = 46.00; daa[17*20+11] = 0.00; - daa[17*20+12] = 0.00; daa[17*20+13] = 76.00; daa[17*20+14] = 0.00; daa[17*20+15] = 75.00; - daa[17*20+16] = 0.00; daa[18*20+ 0] = 24.00; daa[18*20+ 1] = 8.00; daa[18*20+ 2] = 95.00; - daa[18*20+ 3] = 0.00; daa[18*20+ 4] = 96.00; daa[18*20+ 5] = 0.00; daa[18*20+ 6] = 22.00; - daa[18*20+ 7] = 0.00; daa[18*20+ 8] = 127.00; daa[18*20+ 9] = 37.00; daa[18*20+10] = 28.00; - daa[18*20+11] = 13.00; daa[18*20+12] = 0.00; daa[18*20+13] = 698.00; daa[18*20+14] = 0.00; - daa[18*20+15] = 34.00; daa[18*20+16] = 42.00; daa[18*20+17] = 61.00; daa[19*20+ 0] = 208.00; - daa[19*20+ 1] = 24.00; daa[19*20+ 2] = 15.00; daa[19*20+ 3] = 18.00; daa[19*20+ 4] = 49.00; - daa[19*20+ 5] = 35.00; daa[19*20+ 6] = 37.00; daa[19*20+ 7] = 54.00; daa[19*20+ 8] = 44.00; - daa[19*20+ 9] = 889.00; daa[19*20+10] = 175.00; daa[19*20+11] = 10.00; daa[19*20+12] = 258.00; - daa[19*20+13] = 12.00; daa[19*20+14] = 48.00; daa[19*20+15] = 30.00; daa[19*20+16] = 157.00; - daa[19*20+17] = 0.00; daa[19*20+18] = 28.00; - -/* - * ROUNDING ERROR again: - f[ 0] = 0.087000; f[ 1] = 0.041000; f[ 2] = 0.040000; f[ 3] = 0.047000; - f[ 4] = 0.034000; f[ 5] = 0.038000; f[ 6] = 0.050000; f[ 7] = 0.089000; - f[ 8] = 0.034000; f[ 9] = 0.037000; f[10] = 0.085000; f[11] = 0.080000; - f[12] = 0.014000; f[13] = 0.040000; f[14] = 0.051000; f[15] = 0.070000; - f[16] = 0.058000; f[17] = 0.011000; f[18] = 0.030000; f[19] = 0.064000; -*/ - //NOTE: Originally, f[19]=0.064718 but frequencies do not sum up to 1 - f[ 0] = 0.087127; f[ 1] = 0.040904; f[ 2] = 0.040432; f[ 3] = 0.046872; - f[ 4] = 0.033474; f[ 5] = 0.038255; f[ 6] = 0.049530; f[ 7] = 0.088612; - f[ 8] = 0.033618; f[ 9] = 0.036886; f[10] = 0.085357; f[11] = 0.080482; - f[12] = 0.014753; f[13] = 0.039772; f[14] = 0.050680; f[15] = 0.069577; - f[16] = 0.058542; f[17] = 0.010494; f[18] = 0.029916; f[19] = 0.064717; - - } - else if (prot_model == "DCMUT") - { - daa[ 1*20+ 0] = 26.78280; daa[ 2*20+ 0] = 98.44740; daa[ 2*20+ 1] = 32.70590; daa[ 3*20+ 0] = 119.98050; - daa[ 3*20+ 1] = 0.00000; daa[ 3*20+ 2] = 893.15150; daa[ 4*20+ 0] = 36.00160; daa[ 4*20+ 1] = 23.23740; - daa[ 4*20+ 2] = 0.00000; daa[ 4*20+ 3] = 0.00000; daa[ 5*20+ 0] = 88.77530; daa[ 5*20+ 1] = 243.99390; - daa[ 5*20+ 2] = 102.85090; daa[ 5*20+ 3] = 134.85510; daa[ 5*20+ 4] = 0.00000; daa[ 6*20+ 0] = 196.11670; - daa[ 6*20+ 1] = 0.00000; daa[ 6*20+ 2] = 149.34090; daa[ 6*20+ 3] = 1138.86590; daa[ 6*20+ 4] = 0.00000; - daa[ 6*20+ 5] = 708.60220; daa[ 7*20+ 0] = 238.61110; daa[ 7*20+ 1] = 8.77910; daa[ 7*20+ 2] = 138.53520; - daa[ 7*20+ 3] = 124.09810; daa[ 7*20+ 4] = 10.72780; daa[ 7*20+ 5] = 28.15810; daa[ 7*20+ 6] = 81.19070; - daa[ 8*20+ 0] = 22.81160; daa[ 8*20+ 1] = 238.31480; daa[ 8*20+ 2] = 529.00240; daa[ 8*20+ 3] = 86.82410; - daa[ 8*20+ 4] = 28.27290; daa[ 8*20+ 5] = 601.16130; daa[ 8*20+ 6] = 43.94690; daa[ 8*20+ 7] = 10.68020; - daa[ 9*20+ 0] = 65.34160; daa[ 9*20+ 1] = 63.26290; daa[ 9*20+ 2] = 76.80240; daa[ 9*20+ 3] = 23.92480; - daa[ 9*20+ 4] = 43.80740; daa[ 9*20+ 5] = 18.03930; daa[ 9*20+ 6] = 60.95260; daa[ 9*20+ 7] = 0.00000; - daa[ 9*20+ 8] = 7.69810; daa[10*20+ 0] = 40.64310; daa[10*20+ 1] = 15.49240; daa[10*20+ 2] = 34.11130; - daa[10*20+ 3] = 0.00000; daa[10*20+ 4] = 0.00000; daa[10*20+ 5] = 73.07720; daa[10*20+ 6] = 11.28800; - daa[10*20+ 7] = 7.15140; daa[10*20+ 8] = 44.35040; daa[10*20+ 9] = 255.66850; daa[11*20+ 0] = 25.86350; - daa[11*20+ 1] = 461.01240; daa[11*20+ 2] = 314.83710; daa[11*20+ 3] = 71.69130; daa[11*20+ 4] = 0.00000; - daa[11*20+ 5] = 151.90780; daa[11*20+ 6] = 83.00780; daa[11*20+ 7] = 26.76830; daa[11*20+ 8] = 27.04750; - daa[11*20+ 9] = 46.08570; daa[11*20+10] = 18.06290; daa[12*20+ 0] = 71.78400; daa[12*20+ 1] = 89.63210; - daa[12*20+ 2] = 0.00000; daa[12*20+ 3] = 0.00000; daa[12*20+ 4] = 0.00000; daa[12*20+ 5] = 112.74990; - daa[12*20+ 6] = 30.48030; daa[12*20+ 7] = 17.03720; daa[12*20+ 8] = 0.00000; daa[12*20+ 9] = 333.27320; - daa[12*20+10] = 523.01150; daa[12*20+11] = 241.17390; daa[13*20+ 0] = 18.36410; daa[13*20+ 1] = 13.69060; - daa[13*20+ 2] = 13.85030; daa[13*20+ 3] = 0.00000; daa[13*20+ 4] = 0.00000; daa[13*20+ 5] = 0.00000; - daa[13*20+ 6] = 0.00000; daa[13*20+ 7] = 15.34780; daa[13*20+ 8] = 47.59270; daa[13*20+ 9] = 195.19510; - daa[13*20+10] = 156.51600; daa[13*20+11] = 0.00000; daa[13*20+12] = 92.18600; daa[14*20+ 0] = 248.59200; - daa[14*20+ 1] = 102.83130; daa[14*20+ 2] = 41.92440; daa[14*20+ 3] = 13.39400; daa[14*20+ 4] = 18.75500; - daa[14*20+ 5] = 152.61880; daa[14*20+ 6] = 50.70030; daa[14*20+ 7] = 34.71530; daa[14*20+ 8] = 93.37090; - daa[14*20+ 9] = 11.91520; daa[14*20+10] = 31.62580; daa[14*20+11] = 33.54190; daa[14*20+12] = 17.02050; - daa[14*20+13] = 11.05060; daa[15*20+ 0] = 405.18700; daa[15*20+ 1] = 153.15900; daa[15*20+ 2] = 488.58920; - daa[15*20+ 3] = 95.60970; daa[15*20+ 4] = 159.83560; daa[15*20+ 5] = 56.18280; daa[15*20+ 6] = 79.39990; - daa[15*20+ 7] = 232.22430; daa[15*20+ 8] = 35.36430; daa[15*20+ 9] = 24.79550; daa[15*20+10] = 17.14320; - daa[15*20+11] = 95.45570; daa[15*20+12] = 61.99510; daa[15*20+13] = 45.99010; daa[15*20+14] = 242.72020; - daa[16*20+ 0] = 368.03650; daa[16*20+ 1] = 26.57450; daa[16*20+ 2] = 227.16970; daa[16*20+ 3] = 66.09300; - daa[16*20+ 4] = 16.23660; daa[16*20+ 5] = 52.56510; daa[16*20+ 6] = 34.01560; daa[16*20+ 7] = 30.66620; - daa[16*20+ 8] = 22.63330; daa[16*20+ 9] = 190.07390; daa[16*20+10] = 33.10900; daa[16*20+11] = 135.05990; - daa[16*20+12] = 103.15340; daa[16*20+13] = 13.66550; daa[16*20+14] = 78.28570; daa[16*20+15] = 543.66740; - daa[17*20+ 0] = 0.00000; daa[17*20+ 1] = 200.13750; daa[17*20+ 2] = 22.49680; daa[17*20+ 3] = 0.00000; - daa[17*20+ 4] = 0.00000; daa[17*20+ 5] = 0.00000; daa[17*20+ 6] = 0.00000; daa[17*20+ 7] = 0.00000; - daa[17*20+ 8] = 27.05640; daa[17*20+ 9] = 0.00000; daa[17*20+10] = 46.17760; daa[17*20+11] = 0.00000; - daa[17*20+12] = 0.00000; daa[17*20+13] = 76.23540; daa[17*20+14] = 0.00000; daa[17*20+15] = 74.08190; - daa[17*20+16] = 0.00000; daa[18*20+ 0] = 24.41390; daa[18*20+ 1] = 7.80120; daa[18*20+ 2] = 94.69400; - daa[18*20+ 3] = 0.00000; daa[18*20+ 4] = 95.31640; daa[18*20+ 5] = 0.00000; daa[18*20+ 6] = 21.47170; - daa[18*20+ 7] = 0.00000; daa[18*20+ 8] = 126.54000; daa[18*20+ 9] = 37.48340; daa[18*20+10] = 28.65720; - daa[18*20+11] = 13.21420; daa[18*20+12] = 0.00000; daa[18*20+13] = 695.26290; daa[18*20+14] = 0.00000; - daa[18*20+15] = 33.62890; daa[18*20+16] = 41.78390; daa[18*20+17] = 60.80700; daa[19*20+ 0] = 205.95640; - daa[19*20+ 1] = 24.03680; daa[19*20+ 2] = 15.80670; daa[19*20+ 3] = 17.83160; daa[19*20+ 4] = 48.46780; - daa[19*20+ 5] = 34.69830; daa[19*20+ 6] = 36.72500; daa[19*20+ 7] = 53.81650; daa[19*20+ 8] = 43.87150; - daa[19*20+ 9] = 881.00380; daa[19*20+10] = 174.51560; daa[19*20+11] = 10.38500; daa[19*20+12] = 256.59550; - daa[19*20+13] = 12.36060; daa[19*20+14] = 48.50260; daa[19*20+15] = 30.38360; daa[19*20+16] = 156.19970; - daa[19*20+17] = 0.00000; daa[19*20+18] = 27.93790; - - /* ROUNDING ERROR: - f[ 0] = 0.08700; f[ 1] = 0.04100; f[ 2] = 0.04000; f[ 3] = 0.04700; - f[ 4] = 0.03300; f[ 5] = 0.03800; f[ 6] = 0.04900; f[ 7] = 0.08900; - f[ 8] = 0.03400; f[ 9] = 0.03700; f[10] = 0.08500; f[11] = 0.08000; - f[12] = 0.01500; f[13] = 0.04000; f[14] = 0.05200; f[15] = 0.06900; - f[16] = 0.05900; f[17] = 0.01000; f[18] = 0.03000; f[19] = 0.06500; -*/ - // NOTE: originally f[19]=0.064718 but frequencies do not sum up to 1 - f[ 0] = 0.087127; f[ 1] = 0.040904; f[ 2] = 0.040432; f[ 3] = 0.046872; - f[ 4] = 0.033474; f[ 5] = 0.038255; f[ 6] = 0.049530; f[ 7] = 0.088612; - f[ 8] = 0.033619; f[ 9] = 0.036886; f[10] = 0.085357; f[11] = 0.080481; - f[12] = 0.014753; f[13] = 0.039772; f[14] = 0.050680; f[15] = 0.069577; - f[16] = 0.058542; f[17] = 0.010494; f[18] = 0.029916; f[19] = 0.064717; - - } - else if (prot_model == "JTT") - { - daa[ 1*20+ 0] = 58.00; daa[ 2*20+ 0] = 54.00; daa[ 2*20+ 1] = 45.00; daa[ 3*20+ 0] = 81.00; - daa[ 3*20+ 1] = 16.00; daa[ 3*20+ 2] = 528.00; daa[ 4*20+ 0] = 56.00; daa[ 4*20+ 1] = 113.00; - daa[ 4*20+ 2] = 34.00; daa[ 4*20+ 3] = 10.00; daa[ 5*20+ 0] = 57.00; daa[ 5*20+ 1] = 310.00; - daa[ 5*20+ 2] = 86.00; daa[ 5*20+ 3] = 49.00; daa[ 5*20+ 4] = 9.00; daa[ 6*20+ 0] = 105.00; - daa[ 6*20+ 1] = 29.00; daa[ 6*20+ 2] = 58.00; daa[ 6*20+ 3] = 767.00; daa[ 6*20+ 4] = 5.00; - daa[ 6*20+ 5] = 323.00; daa[ 7*20+ 0] = 179.00; daa[ 7*20+ 1] = 137.00; daa[ 7*20+ 2] = 81.00; - daa[ 7*20+ 3] = 130.00; daa[ 7*20+ 4] = 59.00; daa[ 7*20+ 5] = 26.00; daa[ 7*20+ 6] = 119.00; - daa[ 8*20+ 0] = 27.00; daa[ 8*20+ 1] = 328.00; daa[ 8*20+ 2] = 391.00; daa[ 8*20+ 3] = 112.00; - daa[ 8*20+ 4] = 69.00; daa[ 8*20+ 5] = 597.00; daa[ 8*20+ 6] = 26.00; daa[ 8*20+ 7] = 23.00; - daa[ 9*20+ 0] = 36.00; daa[ 9*20+ 1] = 22.00; daa[ 9*20+ 2] = 47.00; daa[ 9*20+ 3] = 11.00; - daa[ 9*20+ 4] = 17.00; daa[ 9*20+ 5] = 9.00; daa[ 9*20+ 6] = 12.00; daa[ 9*20+ 7] = 6.00; - daa[ 9*20+ 8] = 16.00; daa[10*20+ 0] = 30.00; daa[10*20+ 1] = 38.00; daa[10*20+ 2] = 12.00; - daa[10*20+ 3] = 7.00; daa[10*20+ 4] = 23.00; daa[10*20+ 5] = 72.00; daa[10*20+ 6] = 9.00; - daa[10*20+ 7] = 6.00; daa[10*20+ 8] = 56.00; daa[10*20+ 9] = 229.00; daa[11*20+ 0] = 35.00; - daa[11*20+ 1] = 646.00; daa[11*20+ 2] = 263.00; daa[11*20+ 3] = 26.00; daa[11*20+ 4] = 7.00; - daa[11*20+ 5] = 292.00; daa[11*20+ 6] = 181.00; daa[11*20+ 7] = 27.00; daa[11*20+ 8] = 45.00; - daa[11*20+ 9] = 21.00; daa[11*20+10] = 14.00; daa[12*20+ 0] = 54.00; daa[12*20+ 1] = 44.00; - daa[12*20+ 2] = 30.00; daa[12*20+ 3] = 15.00; daa[12*20+ 4] = 31.00; daa[12*20+ 5] = 43.00; - daa[12*20+ 6] = 18.00; daa[12*20+ 7] = 14.00; daa[12*20+ 8] = 33.00; daa[12*20+ 9] = 479.00; - daa[12*20+10] = 388.00; daa[12*20+11] = 65.00; daa[13*20+ 0] = 15.00; daa[13*20+ 1] = 5.00; - daa[13*20+ 2] = 10.00; daa[13*20+ 3] = 4.00; daa[13*20+ 4] = 78.00; daa[13*20+ 5] = 4.00; - daa[13*20+ 6] = 5.00; daa[13*20+ 7] = 5.00; daa[13*20+ 8] = 40.00; daa[13*20+ 9] = 89.00; - daa[13*20+10] = 248.00; daa[13*20+11] = 4.00; daa[13*20+12] = 43.00; daa[14*20+ 0] = 194.00; - daa[14*20+ 1] = 74.00; daa[14*20+ 2] = 15.00; daa[14*20+ 3] = 15.00; daa[14*20+ 4] = 14.00; - daa[14*20+ 5] = 164.00; daa[14*20+ 6] = 18.00; daa[14*20+ 7] = 24.00; daa[14*20+ 8] = 115.00; - daa[14*20+ 9] = 10.00; daa[14*20+10] = 102.00; daa[14*20+11] = 21.00; daa[14*20+12] = 16.00; - daa[14*20+13] = 17.00; daa[15*20+ 0] = 378.00; daa[15*20+ 1] = 101.00; daa[15*20+ 2] = 503.00; - daa[15*20+ 3] = 59.00; daa[15*20+ 4] = 223.00; daa[15*20+ 5] = 53.00; daa[15*20+ 6] = 30.00; - daa[15*20+ 7] = 201.00; daa[15*20+ 8] = 73.00; daa[15*20+ 9] = 40.00; daa[15*20+10] = 59.00; - daa[15*20+11] = 47.00; daa[15*20+12] = 29.00; daa[15*20+13] = 92.00; daa[15*20+14] = 285.00; - daa[16*20+ 0] = 475.00; daa[16*20+ 1] = 64.00; daa[16*20+ 2] = 232.00; daa[16*20+ 3] = 38.00; - daa[16*20+ 4] = 42.00; daa[16*20+ 5] = 51.00; daa[16*20+ 6] = 32.00; daa[16*20+ 7] = 33.00; - daa[16*20+ 8] = 46.00; daa[16*20+ 9] = 245.00; daa[16*20+10] = 25.00; daa[16*20+11] = 103.00; - daa[16*20+12] = 226.00; daa[16*20+13] = 12.00; daa[16*20+14] = 118.00; daa[16*20+15] = 477.00; - daa[17*20+ 0] = 9.00; daa[17*20+ 1] = 126.00; daa[17*20+ 2] = 8.00; daa[17*20+ 3] = 4.00; - daa[17*20+ 4] = 115.00; daa[17*20+ 5] = 18.00; daa[17*20+ 6] = 10.00; daa[17*20+ 7] = 55.00; - daa[17*20+ 8] = 8.00; daa[17*20+ 9] = 9.00; daa[17*20+10] = 52.00; daa[17*20+11] = 10.00; - daa[17*20+12] = 24.00; daa[17*20+13] = 53.00; daa[17*20+14] = 6.00; daa[17*20+15] = 35.00; - daa[17*20+16] = 12.00; daa[18*20+ 0] = 11.00; daa[18*20+ 1] = 20.00; daa[18*20+ 2] = 70.00; - daa[18*20+ 3] = 46.00; daa[18*20+ 4] = 209.00; daa[18*20+ 5] = 24.00; daa[18*20+ 6] = 7.00; - daa[18*20+ 7] = 8.00; daa[18*20+ 8] = 573.00; daa[18*20+ 9] = 32.00; daa[18*20+10] = 24.00; - daa[18*20+11] = 8.00; daa[18*20+12] = 18.00; daa[18*20+13] = 536.00; daa[18*20+14] = 10.00; - daa[18*20+15] = 63.00; daa[18*20+16] = 21.00; daa[18*20+17] = 71.00; daa[19*20+ 0] = 298.00; - daa[19*20+ 1] = 17.00; daa[19*20+ 2] = 16.00; daa[19*20+ 3] = 31.00; daa[19*20+ 4] = 62.00; - daa[19*20+ 5] = 20.00; daa[19*20+ 6] = 45.00; daa[19*20+ 7] = 47.00; daa[19*20+ 8] = 11.00; - daa[19*20+ 9] = 961.00; daa[19*20+10] = 180.00; daa[19*20+11] = 14.00; daa[19*20+12] = 323.00; - daa[19*20+13] = 62.00; daa[19*20+14] = 23.00; daa[19*20+15] = 38.00; daa[19*20+16] = 112.00; - daa[19*20+17] = 25.00; daa[19*20+18] = 16.00; - - /* ROUNDING ERROR: - f[ 0] = 0.07700; f[ 1] = 0.05200; f[ 2] = 0.04200; f[ 3] = 0.05100; - f[ 4] = 0.02000; f[ 5] = 0.04100; f[ 6] = 0.06200; f[ 7] = 0.07300; - f[ 8] = 0.02300; f[ 9] = 0.05400; f[10] = 0.09200; f[11] = 0.05900; - f[12] = 0.02400; f[13] = 0.04000; f[14] = 0.05100; f[15] = 0.06900; - f[16] = 0.05800; f[17] = 0.01400; f[18] = 0.03200; f[19] = 0.06600; - */ - // NOTE: originally, f[19]=0.066005 but frequencies do not sum up to 1 - f[ 0] = 0.076748; f[ 1] = 0.051691; f[ 2] = 0.042645; f[ 3] = 0.051544; - f[ 4] = 0.019803; f[ 5] = 0.040752; f[ 6] = 0.061830; f[ 7] = 0.073152; - f[ 8] = 0.022944; f[ 9] = 0.053761; f[10] = 0.091904; f[11] = 0.058676; - f[12] = 0.023826; f[13] = 0.040126; f[14] = 0.050901; f[15] = 0.068765; - f[16] = 0.058565; f[17] = 0.014261; f[18] = 0.032102; f[19] = 0.066004; - - } - else if (prot_model == "MTREV") - { - daa[ 1*20+ 0] = 23.18; daa[ 2*20+ 0] = 26.95; daa[ 2*20+ 1] = 13.24; daa[ 3*20+ 0] = 17.67; - daa[ 3*20+ 1] = 1.90; daa[ 3*20+ 2] = 794.38; daa[ 4*20+ 0] = 59.93; daa[ 4*20+ 1] = 103.33; - daa[ 4*20+ 2] = 58.94; daa[ 4*20+ 3] = 1.90; daa[ 5*20+ 0] = 1.90; daa[ 5*20+ 1] = 220.99; - daa[ 5*20+ 2] = 173.56; daa[ 5*20+ 3] = 55.28; daa[ 5*20+ 4] = 75.24; daa[ 6*20+ 0] = 9.77; - daa[ 6*20+ 1] = 1.90; daa[ 6*20+ 2] = 63.05; daa[ 6*20+ 3] = 583.55; daa[ 6*20+ 4] = 1.90; - daa[ 6*20+ 5] = 313.56; daa[ 7*20+ 0] = 120.71; daa[ 7*20+ 1] = 23.03; daa[ 7*20+ 2] = 53.30; - daa[ 7*20+ 3] = 56.77; daa[ 7*20+ 4] = 30.71; daa[ 7*20+ 5] = 6.75; daa[ 7*20+ 6] = 28.28; - daa[ 8*20+ 0] = 13.90; daa[ 8*20+ 1] = 165.23; daa[ 8*20+ 2] = 496.13; daa[ 8*20+ 3] = 113.99; - daa[ 8*20+ 4] = 141.49; daa[ 8*20+ 5] = 582.40; daa[ 8*20+ 6] = 49.12; daa[ 8*20+ 7] = 1.90; - daa[ 9*20+ 0] = 96.49; daa[ 9*20+ 1] = 1.90; daa[ 9*20+ 2] = 27.10; daa[ 9*20+ 3] = 4.34; - daa[ 9*20+ 4] = 62.73; daa[ 9*20+ 5] = 8.34; daa[ 9*20+ 6] = 3.31; daa[ 9*20+ 7] = 5.98; - daa[ 9*20+ 8] = 12.26; daa[10*20+ 0] = 25.46; daa[10*20+ 1] = 15.58; daa[10*20+ 2] = 15.16; - daa[10*20+ 3] = 1.90; daa[10*20+ 4] = 25.65; daa[10*20+ 5] = 39.70; daa[10*20+ 6] = 1.90; - daa[10*20+ 7] = 2.41; daa[10*20+ 8] = 11.49; daa[10*20+ 9] = 329.09; daa[11*20+ 0] = 8.36; - daa[11*20+ 1] = 141.40; daa[11*20+ 2] = 608.70; daa[11*20+ 3] = 2.31; daa[11*20+ 4] = 1.90; - daa[11*20+ 5] = 465.58; daa[11*20+ 6] = 313.86; daa[11*20+ 7] = 22.73; daa[11*20+ 8] = 127.67; - daa[11*20+ 9] = 19.57; daa[11*20+10] = 14.88; daa[12*20+ 0] = 141.88; daa[12*20+ 1] = 1.90; - daa[12*20+ 2] = 65.41; daa[12*20+ 3] = 1.90; daa[12*20+ 4] = 6.18; daa[12*20+ 5] = 47.37; - daa[12*20+ 6] = 1.90; daa[12*20+ 7] = 1.90; daa[12*20+ 8] = 11.97; daa[12*20+ 9] = 517.98; - daa[12*20+10] = 537.53; daa[12*20+11] = 91.37; daa[13*20+ 0] = 6.37; daa[13*20+ 1] = 4.69; - daa[13*20+ 2] = 15.20; daa[13*20+ 3] = 4.98; daa[13*20+ 4] = 70.80; daa[13*20+ 5] = 19.11; - daa[13*20+ 6] = 2.67; daa[13*20+ 7] = 1.90; daa[13*20+ 8] = 48.16; daa[13*20+ 9] = 84.67; - daa[13*20+10] = 216.06; daa[13*20+11] = 6.44; daa[13*20+12] = 90.82; daa[14*20+ 0] = 54.31; - daa[14*20+ 1] = 23.64; daa[14*20+ 2] = 73.31; daa[14*20+ 3] = 13.43; daa[14*20+ 4] = 31.26; - daa[14*20+ 5] = 137.29; daa[14*20+ 6] = 12.83; daa[14*20+ 7] = 1.90; daa[14*20+ 8] = 60.97; - daa[14*20+ 9] = 20.63; daa[14*20+10] = 40.10; daa[14*20+11] = 50.10; daa[14*20+12] = 18.84; - daa[14*20+13] = 17.31; daa[15*20+ 0] = 387.86; daa[15*20+ 1] = 6.04; daa[15*20+ 2] = 494.39; - daa[15*20+ 3] = 69.02; daa[15*20+ 4] = 277.05; daa[15*20+ 5] = 54.11; daa[15*20+ 6] = 54.71; - daa[15*20+ 7] = 125.93; daa[15*20+ 8] = 77.46; daa[15*20+ 9] = 47.70; daa[15*20+10] = 73.61; - daa[15*20+11] = 105.79; daa[15*20+12] = 111.16; daa[15*20+13] = 64.29; daa[15*20+14] = 169.90; - daa[16*20+ 0] = 480.72; daa[16*20+ 1] = 2.08; daa[16*20+ 2] = 238.46; daa[16*20+ 3] = 28.01; - daa[16*20+ 4] = 179.97; daa[16*20+ 5] = 94.93; daa[16*20+ 6] = 14.82; daa[16*20+ 7] = 11.17; - daa[16*20+ 8] = 44.78; daa[16*20+ 9] = 368.43; daa[16*20+10] = 126.40; daa[16*20+11] = 136.33; - daa[16*20+12] = 528.17; daa[16*20+13] = 33.85; daa[16*20+14] = 128.22; daa[16*20+15] = 597.21; - daa[17*20+ 0] = 1.90; daa[17*20+ 1] = 21.95; daa[17*20+ 2] = 10.68; daa[17*20+ 3] = 19.86; - daa[17*20+ 4] = 33.60; daa[17*20+ 5] = 1.90; daa[17*20+ 6] = 1.90; daa[17*20+ 7] = 10.92; - daa[17*20+ 8] = 7.08; daa[17*20+ 9] = 1.90; daa[17*20+10] = 32.44; daa[17*20+11] = 24.00; - daa[17*20+12] = 21.71; daa[17*20+13] = 7.84; daa[17*20+14] = 4.21; daa[17*20+15] = 38.58; - daa[17*20+16] = 9.99; daa[18*20+ 0] = 6.48; daa[18*20+ 1] = 1.90; daa[18*20+ 2] = 191.36; - daa[18*20+ 3] = 21.21; daa[18*20+ 4] = 254.77; daa[18*20+ 5] = 38.82; daa[18*20+ 6] = 13.12; - daa[18*20+ 7] = 3.21; daa[18*20+ 8] = 670.14; daa[18*20+ 9] = 25.01; daa[18*20+10] = 44.15; - daa[18*20+11] = 51.17; daa[18*20+12] = 39.96; daa[18*20+13] = 465.58; daa[18*20+14] = 16.21; - daa[18*20+15] = 64.92; daa[18*20+16] = 38.73; daa[18*20+17] = 26.25; daa[19*20+ 0] = 195.06; - daa[19*20+ 1] = 7.64; daa[19*20+ 2] = 1.90; daa[19*20+ 3] = 1.90; daa[19*20+ 4] = 1.90; - daa[19*20+ 5] = 19.00; daa[19*20+ 6] = 21.14; daa[19*20+ 7] = 2.53; daa[19*20+ 8] = 1.90; - daa[19*20+ 9] = 1222.94; daa[19*20+10] = 91.67; daa[19*20+11] = 1.90; daa[19*20+12] = 387.54; - daa[19*20+13] = 6.35; daa[19*20+14] = 8.23; daa[19*20+15] = 1.90; daa[19*20+16] = 204.54; - daa[19*20+17] = 5.37; daa[19*20+18] = 1.90; - - - f[ 0] = 0.072000; f[ 1] = 0.019000; f[ 2] = 0.039000; f[ 3] = 0.019000; - f[ 4] = 0.006000; f[ 5] = 0.025000; f[ 6] = 0.024000; f[ 7] = 0.056000; - f[ 8] = 0.028000; f[ 9] = 0.088000; f[10] = 0.169000; f[11] = 0.023000; - f[12] = 0.054000; f[13] = 0.061000; f[14] = 0.054000; f[15] = 0.072000; - f[16] = 0.086000; f[17] = 0.029000; f[18] = 0.033000; f[19] = 0.043000; -/* - The original matrix from Adachi & Hasegawa (1996) is: - f[ 0] = 0.072000; f[ 1] = 0.019000; f[ 2] = 0.039000; f[ 3] = 0.019000; - f[ 4] = 0.006000; f[ 5] = 0.025000; f[ 6] = 0.024000; f[ 7] = 0.056000; - f[ 8] = 0.028000; f[ 9] = 0.087000; f[10] = 0.168000; f[11] = 0.023000; - f[12] = 0.053000; f[13] = 0.061000; f[14] = 0.055000; f[15] = 0.072000; - f[16] = 0.088000; f[17] = 0.029000; f[18] = 0.033000; f[19] = 0.044000; - but they sum up to 1.001 -*/ - } - else if (prot_model == "WAG") - { - daa[ 1*20+ 0] = 55.15710; daa[ 2*20+ 0] = 50.98480; daa[ 2*20+ 1] = 63.53460; - daa[ 3*20+ 0] = 73.89980; daa[ 3*20+ 1] = 14.73040; daa[ 3*20+ 2] = 542.94200; - daa[ 4*20+ 0] = 102.70400; daa[ 4*20+ 1] = 52.81910; daa[ 4*20+ 2] = 26.52560; - daa[ 4*20+ 3] = 3.02949; daa[ 5*20+ 0] = 90.85980; daa[ 5*20+ 1] = 303.55000; - daa[ 5*20+ 2] = 154.36400; daa[ 5*20+ 3] = 61.67830; daa[ 5*20+ 4] = 9.88179; - daa[ 6*20+ 0] = 158.28500; daa[ 6*20+ 1] = 43.91570; daa[ 6*20+ 2] = 94.71980; - daa[ 6*20+ 3] = 617.41600; daa[ 6*20+ 4] = 2.13520; daa[ 6*20+ 5] = 546.94700; - daa[ 7*20+ 0] = 141.67200; daa[ 7*20+ 1] = 58.46650; daa[ 7*20+ 2] = 112.55600; - daa[ 7*20+ 3] = 86.55840; daa[ 7*20+ 4] = 30.66740; daa[ 7*20+ 5] = 33.00520; - daa[ 7*20+ 6] = 56.77170; daa[ 8*20+ 0] = 31.69540; daa[ 8*20+ 1] = 213.71500; - daa[ 8*20+ 2] = 395.62900; daa[ 8*20+ 3] = 93.06760; daa[ 8*20+ 4] = 24.89720; - daa[ 8*20+ 5] = 429.41100; daa[ 8*20+ 6] = 57.00250; daa[ 8*20+ 7] = 24.94100; - daa[ 9*20+ 0] = 19.33350; daa[ 9*20+ 1] = 18.69790; daa[ 9*20+ 2] = 55.42360; - daa[ 9*20+ 3] = 3.94370; daa[ 9*20+ 4] = 17.01350; daa[ 9*20+ 5] = 11.39170; - daa[ 9*20+ 6] = 12.73950; daa[ 9*20+ 7] = 3.04501; daa[ 9*20+ 8] = 13.81900; - daa[10*20+ 0] = 39.79150; daa[10*20+ 1] = 49.76710; daa[10*20+ 2] = 13.15280; - daa[10*20+ 3] = 8.48047; daa[10*20+ 4] = 38.42870; daa[10*20+ 5] = 86.94890; - daa[10*20+ 6] = 15.42630; daa[10*20+ 7] = 6.13037; daa[10*20+ 8] = 49.94620; - daa[10*20+ 9] = 317.09700; daa[11*20+ 0] = 90.62650; daa[11*20+ 1] = 535.14200; - daa[11*20+ 2] = 301.20100; daa[11*20+ 3] = 47.98550; daa[11*20+ 4] = 7.40339; - daa[11*20+ 5] = 389.49000; daa[11*20+ 6] = 258.44300; daa[11*20+ 7] = 37.35580; - daa[11*20+ 8] = 89.04320; daa[11*20+ 9] = 32.38320; daa[11*20+10] = 25.75550; - daa[12*20+ 0] = 89.34960; daa[12*20+ 1] = 68.31620; daa[12*20+ 2] = 19.82210; - daa[12*20+ 3] = 10.37540; daa[12*20+ 4] = 39.04820; daa[12*20+ 5] = 154.52600; - daa[12*20+ 6] = 31.51240; daa[12*20+ 7] = 17.41000; daa[12*20+ 8] = 40.41410; - daa[12*20+ 9] = 425.74600; daa[12*20+10] = 485.40200; daa[12*20+11] = 93.42760; - daa[13*20+ 0] = 21.04940; daa[13*20+ 1] = 10.27110; daa[13*20+ 2] = 9.61621; - daa[13*20+ 3] = 4.67304; daa[13*20+ 4] = 39.80200; daa[13*20+ 5] = 9.99208; - daa[13*20+ 6] = 8.11339; daa[13*20+ 7] = 4.99310; daa[13*20+ 8] = 67.93710; - daa[13*20+ 9] = 105.94700; daa[13*20+10] = 211.51700; daa[13*20+11] = 8.88360; - daa[13*20+12] = 119.06300; daa[14*20+ 0] = 143.85500; daa[14*20+ 1] = 67.94890; - daa[14*20+ 2] = 19.50810; daa[14*20+ 3] = 42.39840; daa[14*20+ 4] = 10.94040; - daa[14*20+ 5] = 93.33720; daa[14*20+ 6] = 68.23550; daa[14*20+ 7] = 24.35700; - daa[14*20+ 8] = 69.61980; daa[14*20+ 9] = 9.99288; daa[14*20+10] = 41.58440; - daa[14*20+11] = 55.68960; daa[14*20+12] = 17.13290; daa[14*20+13] = 16.14440; - daa[15*20+ 0] = 337.07900; daa[15*20+ 1] = 122.41900; daa[15*20+ 2] = 397.42300; - daa[15*20+ 3] = 107.17600; daa[15*20+ 4] = 140.76600; daa[15*20+ 5] = 102.88700; - daa[15*20+ 6] = 70.49390; daa[15*20+ 7] = 134.18200; daa[15*20+ 8] = 74.01690; - daa[15*20+ 9] = 31.94400; daa[15*20+10] = 34.47390; daa[15*20+11] = 96.71300; - daa[15*20+12] = 49.39050; daa[15*20+13] = 54.59310; daa[15*20+14] = 161.32800; - daa[16*20+ 0] = 212.11100; daa[16*20+ 1] = 55.44130; daa[16*20+ 2] = 203.00600; - daa[16*20+ 3] = 37.48660; daa[16*20+ 4] = 51.29840; daa[16*20+ 5] = 85.79280; - daa[16*20+ 6] = 82.27650; daa[16*20+ 7] = 22.58330; daa[16*20+ 8] = 47.33070; - daa[16*20+ 9] = 145.81600; daa[16*20+10] = 32.66220; daa[16*20+11] = 138.69800; - daa[16*20+12] = 151.61200; daa[16*20+13] = 17.19030; daa[16*20+14] = 79.53840; - daa[16*20+15] = 437.80200; daa[17*20+ 0] = 11.31330; daa[17*20+ 1] = 116.39200; - daa[17*20+ 2] = 7.19167; daa[17*20+ 3] = 12.97670; daa[17*20+ 4] = 71.70700; - daa[17*20+ 5] = 21.57370; daa[17*20+ 6] = 15.65570; daa[17*20+ 7] = 33.69830; - daa[17*20+ 8] = 26.25690; daa[17*20+ 9] = 21.24830; daa[17*20+10] = 66.53090; - daa[17*20+11] = 13.75050; daa[17*20+12] = 51.57060; daa[17*20+13] = 152.96400; - daa[17*20+14] = 13.94050; daa[17*20+15] = 52.37420; daa[17*20+16] = 11.08640; - daa[18*20+ 0] = 24.07350; daa[18*20+ 1] = 38.15330; daa[18*20+ 2] = 108.60000; - daa[18*20+ 3] = 32.57110; daa[18*20+ 4] = 54.38330; daa[18*20+ 5] = 22.77100; - daa[18*20+ 6] = 19.63030; daa[18*20+ 7] = 10.36040; daa[18*20+ 8] = 387.34400; - daa[18*20+ 9] = 42.01700; daa[18*20+10] = 39.86180; daa[18*20+11] = 13.32640; - daa[18*20+12] = 42.84370; daa[18*20+13] = 645.42800; daa[18*20+14] = 21.60460; - daa[18*20+15] = 78.69930; daa[18*20+16] = 29.11480; daa[18*20+17] = 248.53900; - daa[19*20+ 0] = 200.60100; daa[19*20+ 1] = 25.18490; daa[19*20+ 2] = 19.62460; - daa[19*20+ 3] = 15.23350; daa[19*20+ 4] = 100.21400; daa[19*20+ 5] = 30.12810; - daa[19*20+ 6] = 58.87310; daa[19*20+ 7] = 18.72470; daa[19*20+ 8] = 11.83580; - daa[19*20+ 9] = 782.13000; daa[19*20+10] = 180.03400; daa[19*20+11] = 30.54340; - daa[19*20+12] = 205.84500; daa[19*20+13] = 64.98920; daa[19*20+14] = 31.48870; - daa[19*20+15] = 23.27390; daa[19*20+16] = 138.82300; daa[19*20+17] = 36.53690; - daa[19*20+18] = 31.47300; - -/* THIS WRONG FREQUENCIES ARE ROUNDED to 3 digits, same for RAxML - f[0] = 0.08700; f[1] = 0.04400; f[2] = 0.03900; f[3] = 0.05700; - f[4] = 0.01900; f[5] = 0.03700; f[6] = 0.05800; f[7] = 0.08300; - f[8] = 0.02400; f[9] = 0.04900; f[10] = 0.08600; f[11] = 0.06200; - f[12] = 0.02000; f[13] = 0.03800; f[14] = 0.04600; f[15] = 0.07000; - f[16] = 0.06100; f[17] = 0.01400; f[18] = 0.03500; f[19] = 0.07100; -*/ - // NOTE: originally, f[19]= 0.0708956 but frequencies do not sum up to 1 - f[0] = 0.0866279; f[1] = 0.043972; f[2] = 0.0390894; f[3] = 0.0570451; - f[4] = 0.0193078; f[5] = 0.0367281; f[6] = 0.0580589; f[7] = 0.0832518; - f[8] = 0.0244313; f[9] = 0.048466; f[10] = 0.086209; f[11] = 0.0620286; - f[12] = 0.0195027; f[13] = 0.0384319; f[14] = 0.0457631; f[15] = 0.0695179; - f[16] = 0.0610127; f[17] = 0.0143859; f[18] = 0.0352742; f[19] = 0.0708957; - } - else if (prot_model == "RTREV") - { - daa[1*20+0]= 34; daa[2*20+0]= 51; daa[2*20+1]= 35; daa[3*20+0]= 10; - daa[3*20+1]= 30; daa[3*20+2]= 384; daa[4*20+0]= 439; daa[4*20+1]= 92; - daa[4*20+2]= 128; daa[4*20+3]= 1; daa[5*20+0]= 32; daa[5*20+1]= 221; - daa[5*20+2]= 236; daa[5*20+3]= 78; daa[5*20+4]= 70; daa[6*20+0]= 81; - daa[6*20+1]= 10; daa[6*20+2]= 79; daa[6*20+3]= 542; daa[6*20+4]= 1; - daa[6*20+5]= 372; daa[7*20+0]= 135; daa[7*20+1]= 41; daa[7*20+2]= 94; - daa[7*20+3]= 61; daa[7*20+4]= 48; daa[7*20+5]= 18; daa[7*20+6]= 70; - daa[8*20+0]= 30; daa[8*20+1]= 90; daa[8*20+2]= 320; daa[8*20+3]= 91; - daa[8*20+4]= 124; daa[8*20+5]= 387; daa[8*20+6]= 34; daa[8*20+7]= 68; - daa[9*20+0]= 1; daa[9*20+1]= 24; daa[9*20+2]= 35; daa[9*20+3]= 1; - daa[9*20+4]= 104; daa[9*20+5]= 33; daa[9*20+6]= 1; daa[9*20+7]= 1; - daa[9*20+8]= 34; daa[10*20+0]= 45; daa[10*20+1]= 18; daa[10*20+2]= 15; - daa[10*20+3]= 5; daa[10*20+4]= 110; daa[10*20+5]= 54; daa[10*20+6]= 21; - daa[10*20+7]= 3; daa[10*20+8]= 51; daa[10*20+9]= 385; daa[11*20+0]= 38; - daa[11*20+1]= 593; daa[11*20+2]= 123; daa[11*20+3]= 20; daa[11*20+4]= 16; - daa[11*20+5]= 309; daa[11*20+6]= 141; daa[11*20+7]= 30; daa[11*20+8]= 76; - daa[11*20+9]= 34; daa[11*20+10]= 23; daa[12*20+0]= 235; daa[12*20+1]= 57; - daa[12*20+2]= 1; daa[12*20+3]= 1; daa[12*20+4]= 156; daa[12*20+5]= 158; - daa[12*20+6]= 1; daa[12*20+7]= 37; daa[12*20+8]= 116; daa[12*20+9]= 375; - daa[12*20+10]= 581; daa[12*20+11]= 134; daa[13*20+0]= 1; daa[13*20+1]= 7; - daa[13*20+2]= 49; daa[13*20+3]= 1; daa[13*20+4]= 70; daa[13*20+5]= 1; - daa[13*20+6]= 1; daa[13*20+7]= 7; daa[13*20+8]= 141; daa[13*20+9]= 64; - daa[13*20+10]= 179; daa[13*20+11]= 14; daa[13*20+12]= 247; daa[14*20+0]= 97; - daa[14*20+1]= 24; daa[14*20+2]= 33; daa[14*20+3]= 55; daa[14*20+4]= 1; - daa[14*20+5]= 68; daa[14*20+6]= 52; daa[14*20+7]= 17; daa[14*20+8]= 44; - daa[14*20+9]= 10; daa[14*20+10]= 22; daa[14*20+11]= 43; daa[14*20+12]= 1; - daa[14*20+13]= 11; daa[15*20+0]= 460; daa[15*20+1]= 102; daa[15*20+2]= 294; - daa[15*20+3]= 136; daa[15*20+4]= 75; daa[15*20+5]= 225; daa[15*20+6]= 95; - daa[15*20+7]= 152; daa[15*20+8]= 183; daa[15*20+9]= 4; daa[15*20+10]= 24; - daa[15*20+11]= 77; daa[15*20+12]= 1; daa[15*20+13]= 20; daa[15*20+14]= 134; - daa[16*20+0]= 258; daa[16*20+1]= 64; daa[16*20+2]= 148; daa[16*20+3]= 55; - daa[16*20+4]= 117; daa[16*20+5]= 146; daa[16*20+6]= 82; daa[16*20+7]= 7; - daa[16*20+8]= 49; daa[16*20+9]= 72; daa[16*20+10]= 25; daa[16*20+11]= 110; - daa[16*20+12]= 131; daa[16*20+13]= 69; daa[16*20+14]= 62; daa[16*20+15]= 671; - daa[17*20+0]= 5; daa[17*20+1]= 13; daa[17*20+2]= 16; daa[17*20+3]= 1; - daa[17*20+4]= 55; daa[17*20+5]= 10; daa[17*20+6]= 17; daa[17*20+7]= 23; - daa[17*20+8]= 48; daa[17*20+9]= 39; daa[17*20+10]= 47; daa[17*20+11]= 6; - daa[17*20+12]= 111; daa[17*20+13]= 182; daa[17*20+14]= 9; daa[17*20+15]= 14; - daa[17*20+16]= 1; daa[18*20+0]= 55; daa[18*20+1]= 47; daa[18*20+2]= 28; - daa[18*20+3]= 1; daa[18*20+4]= 131; daa[18*20+5]= 45; daa[18*20+6]= 1; - daa[18*20+7]= 21; daa[18*20+8]= 307; daa[18*20+9]= 26; daa[18*20+10]= 64; - daa[18*20+11]= 1; daa[18*20+12]= 74; daa[18*20+13]= 1017; daa[18*20+14]= 14; - daa[18*20+15]= 31; daa[18*20+16]= 34; daa[18*20+17]= 176; daa[19*20+0]= 197; - daa[19*20+1]= 29; daa[19*20+2]= 21; daa[19*20+3]= 6; daa[19*20+4]= 295; - daa[19*20+5]= 36; daa[19*20+6]= 35; daa[19*20+7]= 3; daa[19*20+8]= 1; - daa[19*20+9]= 1048; daa[19*20+10]= 112; daa[19*20+11]= 19; daa[19*20+12]= 236; - daa[19*20+13]= 92; daa[19*20+14]= 25; daa[19*20+15]= 39; daa[19*20+16]= 196; - daa[19*20+17]= 26; daa[19*20+18]= 59; - - f[0]= 0.0646; f[1]= 0.0453; f[2]= 0.0376; f[3]= 0.0422; - f[4]= 0.0114; f[5]= 0.0606; f[6]= 0.0607; f[7]= 0.0639; - f[8]= 0.0273; f[9]= 0.0679; f[10]= 0.1018; f[11]= 0.0751; - f[12]= 0.015; f[13]= 0.0287; f[14]= 0.0681; f[15]= 0.0488; - f[16]= 0.0622; f[17]= 0.0251; f[18]= 0.0318; f[19]= 0.0619; - } - else if (prot_model == "CPREV") - { - daa[1*20+0]= 105; daa[2*20+0]= 227; daa[2*20+1]= 357; daa[3*20+0]= 175; - daa[3*20+1]= 43; daa[3*20+2]= 4435; daa[4*20+0]= 669; daa[4*20+1]= 823; - daa[4*20+2]= 538; daa[4*20+3]= 10; daa[5*20+0]= 157; daa[5*20+1]= 1745; - daa[5*20+2]= 768; daa[5*20+3]= 400; daa[5*20+4]= 10; daa[6*20+0]= 499; - daa[6*20+1]= 152; daa[6*20+2]= 1055; daa[6*20+3]= 3691; daa[6*20+4]= 10; - daa[6*20+5]= 3122; daa[7*20+0]= 665; daa[7*20+1]= 243; daa[7*20+2]= 653; - daa[7*20+3]= 431; daa[7*20+4]= 303; daa[7*20+5]= 133; daa[7*20+6]= 379; - daa[8*20+0]= 66; daa[8*20+1]= 715; daa[8*20+2]= 1405; daa[8*20+3]= 331; - daa[8*20+4]= 441; daa[8*20+5]= 1269; daa[8*20+6]= 162; daa[8*20+7]= 19; - daa[9*20+0]= 145; daa[9*20+1]= 136; daa[9*20+2]= 168; daa[9*20+3]= 10; - daa[9*20+4]= 280; daa[9*20+5]= 92; daa[9*20+6]= 148; daa[9*20+7]= 40; - daa[9*20+8]= 29; daa[10*20+0]= 197; daa[10*20+1]= 203; daa[10*20+2]= 113; - daa[10*20+3]= 10; daa[10*20+4]= 396; daa[10*20+5]= 286; daa[10*20+6]= 82; - daa[10*20+7]= 20; daa[10*20+8]= 66; daa[10*20+9]= 1745; daa[11*20+0]= 236; - daa[11*20+1]= 4482; daa[11*20+2]= 2430; daa[11*20+3]= 412; daa[11*20+4]= 48; - daa[11*20+5]= 3313; daa[11*20+6]= 2629; daa[11*20+7]= 263; daa[11*20+8]= 305; - daa[11*20+9]= 345; daa[11*20+10]= 218; daa[12*20+0]= 185; daa[12*20+1]= 125; - daa[12*20+2]= 61; daa[12*20+3]= 47; daa[12*20+4]= 159; daa[12*20+5]= 202; - daa[12*20+6]= 113; daa[12*20+7]= 21; daa[12*20+8]= 10; daa[12*20+9]= 1772; - daa[12*20+10]= 1351; daa[12*20+11]= 193; daa[13*20+0]= 68; daa[13*20+1]= 53; - daa[13*20+2]= 97; daa[13*20+3]= 22; daa[13*20+4]= 726; daa[13*20+5]= 10; - daa[13*20+6]= 145; daa[13*20+7]= 25; daa[13*20+8]= 127; daa[13*20+9]= 454; - daa[13*20+10]= 1268; daa[13*20+11]= 72; daa[13*20+12]= 327; daa[14*20+0]= 490; - daa[14*20+1]= 87; daa[14*20+2]= 173; daa[14*20+3]= 170; daa[14*20+4]= 285; - daa[14*20+5]= 323; daa[14*20+6]= 185; daa[14*20+7]= 28; daa[14*20+8]= 152; - daa[14*20+9]= 117; daa[14*20+10]= 219; daa[14*20+11]= 302; daa[14*20+12]= 100; - daa[14*20+13]= 43; daa[15*20+0]= 2440; daa[15*20+1]= 385; daa[15*20+2]= 2085; - daa[15*20+3]= 590; daa[15*20+4]= 2331; daa[15*20+5]= 396; daa[15*20+6]= 568; - daa[15*20+7]= 691; daa[15*20+8]= 303; daa[15*20+9]= 216; daa[15*20+10]= 516; - daa[15*20+11]= 868; daa[15*20+12]= 93; daa[15*20+13]= 487; daa[15*20+14]= 1202; - daa[16*20+0]= 1340; daa[16*20+1]= 314; daa[16*20+2]= 1393; daa[16*20+3]= 266; - daa[16*20+4]= 576; daa[16*20+5]= 241; daa[16*20+6]= 369; daa[16*20+7]= 92; - daa[16*20+8]= 32; daa[16*20+9]= 1040; daa[16*20+10]= 156; daa[16*20+11]= 918; - daa[16*20+12]= 645; daa[16*20+13]= 148; daa[16*20+14]= 260; daa[16*20+15]= 2151; - daa[17*20+0]= 14; daa[17*20+1]= 230; daa[17*20+2]= 40; daa[17*20+3]= 18; - daa[17*20+4]= 435; daa[17*20+5]= 53; daa[17*20+6]= 63; daa[17*20+7]= 82; - daa[17*20+8]= 69; daa[17*20+9]= 42; daa[17*20+10]= 159; daa[17*20+11]= 10; - daa[17*20+12]= 86; daa[17*20+13]= 468; daa[17*20+14]= 49; daa[17*20+15]= 73; - daa[17*20+16]= 29; daa[18*20+0]= 56; daa[18*20+1]= 323; daa[18*20+2]= 754; - daa[18*20+3]= 281; daa[18*20+4]= 1466; daa[18*20+5]= 391; daa[18*20+6]= 142; - daa[18*20+7]= 10; daa[18*20+8]= 1971; daa[18*20+9]= 89; daa[18*20+10]= 189; - daa[18*20+11]= 247; daa[18*20+12]= 215; daa[18*20+13]= 2370; daa[18*20+14]= 97; - daa[18*20+15]= 522; daa[18*20+16]= 71; daa[18*20+17]= 346; daa[19*20+0]= 968; - daa[19*20+1]= 92; daa[19*20+2]= 83; daa[19*20+3]= 75; daa[19*20+4]= 592; - daa[19*20+5]= 54; daa[19*20+6]= 200; daa[19*20+7]= 91; daa[19*20+8]= 25; - daa[19*20+9]= 4797; daa[19*20+10]= 865; daa[19*20+11]= 249; daa[19*20+12]= 475; - daa[19*20+13]= 317; daa[19*20+14]= 122; daa[19*20+15]= 167; daa[19*20+16]= 760; - daa[19*20+17]= 10; daa[19*20+18]= 119; - - f[0]= 0.076; f[1]= 0.062; f[2]= 0.041; f[3]= 0.037; - f[4]= 0.009; f[5]= 0.038; f[6]= 0.049; f[7]= 0.084; - f[8]= 0.025; f[9]= 0.081; f[10]= 0.101; f[11]= 0.05; - f[12]= 0.022; f[13]= 0.051; f[14]= 0.043; f[15]= 0.062; - f[16]= 0.054; f[17]= 0.018; f[18]= 0.031; f[19]= 0.066; - } - else if (prot_model == "VT") - { - - daa[1*20+0]= 1.2412691067876198; - daa[2*20+0]= 1.2184237953498958; - daa[2*20+1]= 1.5720770753326880; - daa[3*20+0]= 1.3759368509441177; - daa[3*20+1]= 0.7550654439001206; - daa[3*20+2]= 7.8584219153689405; - daa[4*20+0]= 2.4731223087544874; - daa[4*20+1]= 1.4414262567428417; - daa[4*20+2]= 0.9784679122774127; - daa[4*20+3]= 0.2272488448121475; - daa[5*20+0]= 2.2155167805137470; - daa[5*20+1]= 5.5120819705248678; - daa[5*20+2]= 3.0143201670924822; - daa[5*20+3]= 1.6562495638176040; - daa[5*20+4]= 0.4587469126746136; - daa[6*20+0]= 2.3379911207495061; - daa[6*20+1]= 1.3542404860613146; - daa[6*20+2]= 2.0093434778398112; - daa[6*20+3]= 9.6883451875685065; - daa[6*20+4]= 0.4519167943192672; - daa[6*20+5]= 6.8124601839937675; - daa[7*20+0]= 3.3386555146457697; - daa[7*20+1]= 1.3121700301622004; - daa[7*20+2]= 2.4117632898861809; - daa[7*20+3]= 1.9142079025990228; - daa[7*20+4]= 1.1034605684472507; - daa[7*20+5]= 0.8776110594765502; - daa[7*20+6]= 1.3860121390169038; - daa[8*20+0]= 0.9615841926910841; - daa[8*20+1]= 4.9238668283945266; - daa[8*20+2]= 6.1974384977884114; - daa[8*20+3]= 2.1459640610133781; - daa[8*20+4]= 1.5196756759380692; - daa[8*20+5]= 7.9943228564946525; - daa[8*20+6]= 1.6360079688522375; - daa[8*20+7]= 0.8561248973045037; - daa[9*20+0]= 0.8908203061925510; - daa[9*20+1]= 0.4323005487925516; - daa[9*20+2]= 0.9179291175331520; - daa[9*20+3]= 0.2161660372725585; - daa[9*20+4]= 0.9126668032539315; - daa[9*20+5]= 0.4882733432879921; - daa[9*20+6]= 0.4035497929633328; - daa[9*20+7]= 0.2888075033037488; - daa[9*20+8]= 0.5787937115407940; - daa[10*20+0]= 1.0778497408764076; - daa[10*20+1]= 0.8386701149158265; - daa[10*20+2]= 0.4098311270816011; - daa[10*20+3]= 0.3574207468998517; - daa[10*20+4]= 1.4081315998413697; - daa[10*20+5]= 1.3318097154194044; - daa[10*20+6]= 0.5610717242294755; - daa[10*20+7]= 0.3578662395745526; - daa[10*20+8]= 1.0765007949562073; - daa[10*20+9]= 6.0019110258426362; - daa[11*20+0]= 1.4932055816372476; - daa[11*20+1]= 10.017330817366002; - daa[11*20+2]= 4.4034547578962568; - daa[11*20+3]= 1.4521790561663968; - daa[11*20+4]= 0.3371091785647479; - daa[11*20+5]= 6.0519085243118811; - daa[11*20+6]= 4.3290086529582830; - daa[11*20+7]= 0.8945563662345198; - daa[11*20+8]= 1.8085136096039203; - daa[11*20+9]= 0.6244297525127139; - daa[11*20+10]= 0.5642322882556321; - daa[12*20+0]= 1.9006455961717605; - daa[12*20+1]= 1.2488638689609959; - daa[12*20+2]= 0.9378803706165143; - daa[12*20+3]= 0.4075239926000898; - daa[12*20+4]= 1.2213054800811556; - daa[12*20+5]= 1.9106190827629084; - daa[12*20+6]= 0.7471936218068498; - daa[12*20+7]= 0.5954812791740037; - daa[12*20+8]= 1.3808291710019667; - daa[12*20+9]= 6.7597899772045418; - daa[12*20+10]= 8.0327792947421148; - daa[12*20+11]= 1.7129670976916258; - daa[13*20+0]= 0.6883439026872615; - daa[13*20+1]= 0.4224945197276290; - daa[13*20+2]= 0.5044944273324311; - daa[13*20+3]= 0.1675129724559251; - daa[13*20+4]= 1.6953951980808002; - daa[13*20+5]= 0.3573432522499545; - daa[13*20+6]= 0.2317194387691585; - daa[13*20+7]= 0.3693722640980460; - daa[13*20+8]= 1.3629765501081097; - daa[13*20+9]= 2.2864286949316077; - daa[13*20+10]= 4.3611548063555778; - daa[13*20+11]= 0.3910559903834828; - daa[13*20+12]= 2.3201373546296349; - daa[14*20+0]= 2.7355620089953550; - daa[14*20+1]= 1.3091837782420783; - daa[14*20+2]= 0.7103720531974738; - daa[14*20+3]= 1.0714605979577547; - daa[14*20+4]= 0.4326227078645523; - daa[14*20+5]= 2.3019177728300728; - daa[14*20+6]= 1.5132807416252063; - daa[14*20+7]= 0.7744933618134962; - daa[14*20+8]= 1.8370555852070649; - daa[14*20+9]= 0.4811402387911145; - daa[14*20+10]= 1.0084320519837335; - daa[14*20+11]= 1.3918935593582853; - daa[14*20+12]= 0.4953193808676289; - daa[14*20+13]= 0.3746821107962129; - daa[15*20+0]= 6.4208961859142883; - daa[15*20+1]= 1.9202994262316166; - daa[15*20+2]= 6.1234512396801764; - daa[15*20+3]= 2.2161944596741829; - daa[15*20+4]= 3.6366815408744255; - daa[15*20+5]= 2.3193703643237220; - daa[15*20+6]= 1.8273535587773553; - daa[15*20+7]= 3.0637776193717610; - daa[15*20+8]= 1.9699895187387506; - daa[15*20+9]= 0.6047491507504744; - daa[15*20+10]= 0.8953754669269811; - daa[15*20+11]= 1.9776630140912268; - daa[15*20+12]= 1.0657482318076852; - daa[15*20+13]= 1.1079144700606407; - daa[15*20+14]= 3.5465914843628927; - daa[16*20+0]= 5.2892514169776437; - daa[16*20+1]= 1.3363401740560601; - daa[16*20+2]= 3.8852506105922231; - daa[16*20+3]= 1.5066839872944762; - daa[16*20+4]= 1.7557065205837685; - daa[16*20+5]= 2.1576510103471440; - daa[16*20+6]= 1.5839981708584689; - daa[16*20+7]= 0.7147489676267383; - daa[16*20+8]= 1.6136654573285647; - daa[16*20+9]= 2.6344778384442731; - daa[16*20+10]= 1.0192004372506540; - daa[16*20+11]= 2.5513781312660280; - daa[16*20+12]= 3.3628488360462363; - daa[16*20+13]= 0.6882725908872254; - daa[16*20+14]= 1.9485376673137556; - daa[16*20+15]= 8.8479984061248178; - daa[17*20+0]= 0.5488578478106930; - daa[17*20+1]= 1.5170142153962840; - daa[17*20+2]= 0.1808525752605976; - daa[17*20+3]= 0.2496584188151770; - daa[17*20+4]= 1.6275179891253113; - daa[17*20+5]= 0.8959082681546182; - daa[17*20+6]= 0.4198391148111098; - daa[17*20+7]= 0.9349753595598769; - daa[17*20+8]= 0.6301954684360302; - daa[17*20+9]= 0.5604648274060783; - daa[17*20+10]= 1.5183114434679339; - daa[17*20+11]= 0.5851920879490173; - daa[17*20+12]= 1.4680478689711018; - daa[17*20+13]= 3.3448437239772266; - daa[17*20+14]= 0.4326058001438786; - daa[17*20+15]= 0.6791126595939816; - daa[17*20+16]= 0.4514203099376473; - daa[18*20+0]= 0.5411769916657778; - daa[18*20+1]= 0.8912614404565405; - daa[18*20+2]= 1.0894926581511342; - daa[18*20+3]= 0.7447620891784513; - daa[18*20+4]= 2.1579775140421025; - daa[18*20+5]= 0.9183596801412757; - daa[18*20+6]= 0.5818111331782764; - daa[18*20+7]= 0.3374467649724478; - daa[18*20+8]= 7.7587442309146040; - daa[18*20+9]= 0.8626796044156272; - daa[18*20+10]= 1.2452243224541324; - daa[18*20+11]= 0.7835447533710449; - daa[18*20+12]= 1.0899165770956820; - daa[18*20+13]= 10.384852333133459; - daa[18*20+14]= 0.4819109019647465; - daa[18*20+15]= 0.9547229305958682; - daa[18*20+16]= 0.8564314184691215; - daa[18*20+17]= 4.5377235790405388; - daa[19*20+0]= 4.6501894691803214; - daa[19*20+1]= 0.7807017855806767; - daa[19*20+2]= 0.4586061981719967; - daa[19*20+3]= 0.4594535241660911; - daa[19*20+4]= 2.2627456996290891; - daa[19*20+5]= 0.6366932501396869; - daa[19*20+6]= 0.8940572875547330; - daa[19*20+7]= 0.6193321034173915; - daa[19*20+8]= 0.5333220944030346; - daa[19*20+9]= 14.872933461519061; - daa[19*20+10]= 3.5458093276667237; - daa[19*20+11]= 0.7801080335991272; - daa[19*20+12]= 4.0584577156753401; - daa[19*20+13]= 1.7039730522675411; - daa[19*20+14]= 0.5985498912985666; - daa[19*20+15]= 0.9305232113028208; - daa[19*20+16]= 3.4242218450865543; - daa[19*20+17]= 0.5658969249032649; - daa[19*20+18]= 1.0000000000000000; - - f[0]= 0.0770764620135024; - f[1]= 0.0500819370772208; - f[2]= 0.0462377395993731; - f[3]= 0.0537929860758246; - f[4]= 0.0144533387583345; - f[5]= 0.0408923608974345; - f[6]= 0.0633579339160905; - f[7]= 0.0655672355884439; - f[8]= 0.0218802687005936; - f[9]= 0.0591969699027449; - f[10]= 0.0976461276528445; - f[11]= 0.0592079410822730; - f[12]= 0.0220695876653368; - f[13]= 0.0413508521834260; - f[14]= 0.0476871596856874; - f[15]= 0.0707295165111524; - f[16]= 0.0567759161524817; - f[17]= 0.0127019797647213; - f[18]= 0.0323746050281867; - f[19]= 0.0669190817443274; - } - else if (prot_model == "BLOSUM62") - { - daa[1*20+0]= 0.735790389698; daa[2*20+0]= 0.485391055466; daa[2*20+1]= 1.297446705134; - daa[3*20+0]= 0.543161820899; - daa[3*20+1]= 0.500964408555; daa[3*20+2]= 3.180100048216; daa[4*20+0]= 1.45999531047; - daa[4*20+1]= 0.227826574209; - daa[4*20+2]= 0.397358949897; daa[4*20+3]= 0.240836614802; daa[5*20+0]= 1.199705704602; - daa[5*20+1]= 3.020833610064; - daa[5*20+2]= 1.839216146992; daa[5*20+3]= 1.190945703396; daa[5*20+4]= 0.32980150463; - daa[6*20+0]= 1.1709490428; - daa[6*20+1]= 1.36057419042; daa[6*20+2]= 1.24048850864; daa[6*20+3]= 3.761625208368; - daa[6*20+4]= 0.140748891814; - daa[6*20+5]= 5.528919177928; daa[7*20+0]= 1.95588357496; daa[7*20+1]= 0.418763308518; - daa[7*20+2]= 1.355872344485; - daa[7*20+3]= 0.798473248968; daa[7*20+4]= 0.418203192284; daa[7*20+5]= 0.609846305383; - daa[7*20+6]= 0.423579992176; - daa[8*20+0]= 0.716241444998; daa[8*20+1]= 1.456141166336; daa[8*20+2]= 2.414501434208; - daa[8*20+3]= 0.778142664022; - daa[8*20+4]= 0.354058109831; daa[8*20+5]= 2.43534113114; daa[8*20+6]= 1.626891056982; - daa[8*20+7]= 0.539859124954; - daa[9*20+0]= 0.605899003687; daa[9*20+1]= 0.232036445142; daa[9*20+2]= 0.283017326278; - daa[9*20+3]= 0.418555732462; - daa[9*20+4]= 0.774894022794; daa[9*20+5]= 0.236202451204; daa[9*20+6]= 0.186848046932; - daa[9*20+7]= 0.189296292376; - daa[9*20+8]= 0.252718447885; daa[10*20+0]= 0.800016530518; daa[10*20+1]= 0.622711669692; - daa[10*20+2]= 0.211888159615; - daa[10*20+3]= 0.218131577594; daa[10*20+4]= 0.831842640142; daa[10*20+5]= 0.580737093181; - daa[10*20+6]= 0.372625175087; - daa[10*20+7]= 0.217721159236; daa[10*20+8]= 0.348072209797; daa[10*20+9]= 3.890963773304; - daa[11*20+0]= 1.295201266783; - daa[11*20+1]= 5.411115141489; daa[11*20+2]= 1.593137043457; daa[11*20+3]= 1.032447924952; - daa[11*20+4]= 0.285078800906; - daa[11*20+5]= 3.945277674515; daa[11*20+6]= 2.802427151679; daa[11*20+7]= 0.752042440303; - daa[11*20+8]= 1.022507035889; - daa[11*20+9]= 0.406193586642; daa[11*20+10]= 0.445570274261;daa[12*20+0]= 1.253758266664; - daa[12*20+1]= 0.983692987457; - daa[12*20+2]= 0.648441278787; daa[12*20+3]= 0.222621897958; daa[12*20+4]= 0.76768882348; - daa[12*20+5]= 2.494896077113; - daa[12*20+6]= 0.55541539747; daa[12*20+7]= 0.459436173579; daa[12*20+8]= 0.984311525359; - daa[12*20+9]= 3.364797763104; - daa[12*20+10]= 6.030559379572;daa[12*20+11]= 1.073061184332;daa[13*20+0]= 0.492964679748; - daa[13*20+1]= 0.371644693209; - daa[13*20+2]= 0.354861249223; daa[13*20+3]= 0.281730694207; daa[13*20+4]= 0.441337471187; - daa[13*20+5]= 0.14435695975; - daa[13*20+6]= 0.291409084165; daa[13*20+7]= 0.368166464453; daa[13*20+8]= 0.714533703928; - daa[13*20+9]= 1.517359325954; - daa[13*20+10]= 2.064839703237;daa[13*20+11]= 0.266924750511;daa[13*20+12]= 1.77385516883; - daa[14*20+0]= 1.173275900924; - daa[14*20+1]= 0.448133661718; daa[14*20+2]= 0.494887043702; daa[14*20+3]= 0.730628272998; - daa[14*20+4]= 0.356008498769; - daa[14*20+5]= 0.858570575674; daa[14*20+6]= 0.926563934846; daa[14*20+7]= 0.504086599527; daa[14*20+8]= 0.527007339151; - daa[14*20+9]= 0.388355409206; daa[14*20+10]= 0.374555687471;daa[14*20+11]= 1.047383450722;daa[14*20+12]= 0.454123625103; - daa[14*20+13]= 0.233597909629;daa[15*20+0]= 4.325092687057; daa[15*20+1]= 1.12278310421; daa[15*20+2]= 2.904101656456; - daa[15*20+3]= 1.582754142065; daa[15*20+4]= 1.197188415094; daa[15*20+5]= 1.934870924596; daa[15*20+6]= 1.769893238937; - daa[15*20+7]= 1.509326253224; daa[15*20+8]= 1.11702976291; daa[15*20+9]= 0.35754441246; daa[15*20+10]= 0.352969184527; - daa[15*20+11]= 1.752165917819;daa[15*20+12]= 0.918723415746;daa[15*20+13]= 0.540027644824;daa[15*20+14]= 1.169129577716; - daa[16*20+0]= 1.729178019485; daa[16*20+1]= 0.914665954563; daa[16*20+2]= 1.898173634533; daa[16*20+3]= 0.934187509431; - daa[16*20+4]= 1.119831358516; daa[16*20+5]= 1.277480294596; daa[16*20+6]= 1.071097236007; daa[16*20+7]= 0.641436011405; - daa[16*20+8]= 0.585407090225; daa[16*20+9]= 1.17909119726; daa[16*20+10]= 0.915259857694;daa[16*20+11]= 1.303875200799; - daa[16*20+12]= 1.488548053722;daa[16*20+13]= 0.488206118793;daa[16*20+14]= 1.005451683149;daa[16*20+15]= 5.15155629227; - daa[17*20+0]= 0.465839367725; daa[17*20+1]= 0.426382310122; daa[17*20+2]= 0.191482046247; daa[17*20+3]= 0.145345046279; - daa[17*20+4]= 0.527664418872; daa[17*20+5]= 0.758653808642; daa[17*20+6]= 0.407635648938; daa[17*20+7]= 0.508358924638; - daa[17*20+8]= 0.30124860078; daa[17*20+9]= 0.34198578754; daa[17*20+10]= 0.6914746346; daa[17*20+11]= 0.332243040634; - daa[17*20+12]= 0.888101098152;daa[17*20+13]= 2.074324893497;daa[17*20+14]= 0.252214830027;daa[17*20+15]= 0.387925622098; - daa[17*20+16]= 0.513128126891;daa[18*20+0]= 0.718206697586; daa[18*20+1]= 0.720517441216; daa[18*20+2]= 0.538222519037; - daa[18*20+3]= 0.261422208965; daa[18*20+4]= 0.470237733696; daa[18*20+5]= 0.95898974285; daa[18*20+6]= 0.596719300346; - daa[18*20+7]= 0.308055737035; daa[18*20+8]= 4.218953969389; daa[18*20+9]= 0.674617093228; daa[18*20+10]= 0.811245856323; - daa[18*20+11]= 0.7179934869; daa[18*20+12]= 0.951682162246;daa[18*20+13]= 6.747260430801;daa[18*20+14]= 0.369405319355; - daa[18*20+15]= 0.796751520761;daa[18*20+16]= 0.801010243199;daa[18*20+17]= 4.054419006558;daa[19*20+0]= 2.187774522005; - daa[19*20+1]= 0.438388343772; daa[19*20+2]= 0.312858797993; daa[19*20+3]= 0.258129289418; daa[19*20+4]= 1.116352478606; - daa[19*20+5]= 0.530785790125; daa[19*20+6]= 0.524253846338; daa[19*20+7]= 0.25334079019; daa[19*20+8]= 0.20155597175; - daa[19*20+9]= 8.311839405458; daa[19*20+10]= 2.231405688913;daa[19*20+11]= 0.498138475304;daa[19*20+12]= 2.575850755315; - daa[19*20+13]= 0.838119610178;daa[19*20+14]= 0.496908410676;daa[19*20+15]= 0.561925457442;daa[19*20+16]= 2.253074051176; - daa[19*20+17]= 0.266508731426;daa[19*20+18]= 1; - - f[0]= 0.074; f[1]= 0.052; f[2]= 0.045; f[3]= 0.054; - f[4]= 0.025; f[5]= 0.034; f[6]= 0.054; f[7]= 0.074; - f[8]= 0.026; f[9]= 0.068; f[10]= 0.099; f[11]= 0.058; - f[12]= 0.025; f[13]= 0.047; f[14]= 0.039; f[15]= 0.057; - f[16]= 0.051; f[17]= 0.013; f[18]= 0.032; f[19]= 0.073; - } - else if (prot_model == "MTMAM") - { - daa[1*20+0]= 32; daa[2*20+0]= 2; daa[2*20+1]= 4; daa[3*20+0]= 11; - daa[3*20+1]= 1e-6; daa[3*20+2]= 864; daa[4*20+0]= 1e-6; daa[4*20+1]= 186; - daa[4*20+2]= 1e-6; daa[4*20+3]= 1e-6; daa[5*20+0]= 1e-6; daa[5*20+1]= 246; - daa[5*20+2]= 8; daa[5*20+3]= 49; daa[5*20+4]= 1e-6; daa[6*20+0]= 1e-6; - daa[6*20+1]= 1e-6; daa[6*20+2]= 1e-6; daa[6*20+3]= 569; daa[6*20+4]= 1e-6; - daa[6*20+5]= 274; daa[7*20+0]= 78; daa[7*20+1]= 18; daa[7*20+2]= 47; - daa[7*20+3]= 79; daa[7*20+4]= 1e-6; daa[7*20+5]= 1e-6; daa[7*20+6]= 22; - daa[8*20+0]= 8; daa[8*20+1]= 232; daa[8*20+2]= 458; daa[8*20+3]= 11; - daa[8*20+4]= 305; daa[8*20+5]= 550; daa[8*20+6]= 22; daa[8*20+7]= 1e-6; - daa[9*20+0]= 75; daa[9*20+1]= 1e-6; daa[9*20+2]= 19; daa[9*20+3]= 1e-6; - daa[9*20+4]= 41; daa[9*20+5]= 1e-6; daa[9*20+6]= 1e-6; daa[9*20+7]= 1e-6; - daa[9*20+8]= 1e-6; daa[10*20+0]= 21; daa[10*20+1]= 6; daa[10*20+2]= 1e-6; - daa[10*20+3]= 1e-6; daa[10*20+4]= 27; daa[10*20+5]= 20; daa[10*20+6]= 1e-6; - daa[10*20+7]= 1e-6; daa[10*20+8]= 26; daa[10*20+9]= 232; daa[11*20+0]= 1e-6; - daa[11*20+1]= 50; daa[11*20+2]= 408; daa[11*20+3]= 1e-6; daa[11*20+4]= 1e-6; - daa[11*20+5]= 242; daa[11*20+6]= 215; daa[11*20+7]= 1e-6; daa[11*20+8]= 1e-6; - daa[11*20+9]= 6; daa[11*20+10]= 4; daa[12*20+0]= 76; daa[12*20+1]= 1e-6; - daa[12*20+2]= 21; daa[12*20+3]= 1e-6;daa[12*20+4]= 1e-6; daa[12*20+5]= 22; - daa[12*20+6]= 1e-6; daa[12*20+7]= 1e-6;daa[12*20+8]= 1e-6; daa[12*20+9]= 378; - daa[12*20+10]= 609; daa[12*20+11]= 59; daa[13*20+0]= 1e-6; daa[13*20+1]= 1e-6; - daa[13*20+2]= 6; daa[13*20+3]= 5; daa[13*20+4]= 7; daa[13*20+5]= 1e-6; - daa[13*20+6]= 1e-6; daa[13*20+7]= 1e-6;daa[13*20+8]= 1e-6; daa[13*20+9]= 57; - daa[13*20+10]= 246; daa[13*20+11]= 1e-6; daa[13*20+12]= 11; daa[14*20+0]= 53; - daa[14*20+1]= 9; daa[14*20+2]= 33; daa[14*20+3]= 2; daa[14*20+4]= 1e-6; - daa[14*20+5]= 51; daa[14*20+6]= 1e-6;daa[14*20+7]= 1e-6; daa[14*20+8]= 53; - daa[14*20+9]= 5; daa[14*20+10]= 43; daa[14*20+11]= 18; daa[14*20+12]= 1e-6; - daa[14*20+13]= 17; daa[15*20+0]= 342; daa[15*20+1]= 3; daa[15*20+2]= 446; - daa[15*20+3]= 16; daa[15*20+4]= 347; daa[15*20+5]= 30; daa[15*20+6]= 21; - daa[15*20+7]= 112; daa[15*20+8]= 20; daa[15*20+9]= 1e-6; daa[15*20+10]= 74; - daa[15*20+11]= 65; daa[15*20+12]= 47; daa[15*20+13]= 90; daa[15*20+14]= 202; - daa[16*20+0]= 681; daa[16*20+1]= 1e-6;daa[16*20+2]= 110; daa[16*20+3]= 1e-6; - daa[16*20+4]= 114; daa[16*20+5]= 1e-6;daa[16*20+6]= 4; daa[16*20+7]= 1e-6; - daa[16*20+8]= 1; daa[16*20+9]= 360; daa[16*20+10]= 34; daa[16*20+11]= 50; - daa[16*20+12]= 691; daa[16*20+13]= 8; daa[16*20+14]= 78; daa[16*20+15]= 614; - daa[17*20+0]= 5; daa[17*20+1]= 16; daa[17*20+2]= 6; daa[17*20+3]= 1e-6; - daa[17*20+4]= 65; daa[17*20+5]= 1e-6;daa[17*20+6]= 1e-6; daa[17*20+7]= 1e-6; - daa[17*20+8]= 1e-6; daa[17*20+9]= 1e-6;daa[17*20+10]= 12; daa[17*20+11]= 1e-6; - daa[17*20+12]= 13; daa[17*20+13]= 1e-6; daa[17*20+14]= 7; daa[17*20+15]= 17; - daa[17*20+16]= 1e-6; daa[18*20+0]= 1e-6;daa[18*20+1]= 1e-6; daa[18*20+2]= 156; - daa[18*20+3]= 1e-6; daa[18*20+4]= 530; daa[18*20+5]= 54; daa[18*20+6]= 1e-6; - daa[18*20+7]= 1; daa[18*20+8]= 1525;daa[18*20+9]= 16; daa[18*20+10]= 25; - daa[18*20+11]= 67; daa[18*20+12]= 1e-6; daa[18*20+13]= 682; daa[18*20+14]= 8; - daa[18*20+15]= 107; daa[18*20+16]= 1e-6; daa[18*20+17]= 14; daa[19*20+0]= 398; - daa[19*20+1]= 1e-6; daa[19*20+2]= 1e-6;daa[19*20+3]= 10; daa[19*20+4]= 1e-6; - daa[19*20+5]= 33; daa[19*20+6]= 20; daa[19*20+7]= 5; daa[19*20+8]= 1e-6; - daa[19*20+9]= 2220; daa[19*20+10]= 100;daa[19*20+11]= 1e-6; daa[19*20+12]= 832; - daa[19*20+13]= 6; daa[19*20+14]= 1e-6; daa[19*20+15]= 1e-6; daa[19*20+16]= 237; - daa[19*20+17]= 1e-6; daa[19*20+18]= 1e-6; - - f[0]= 0.06920; f[1]= 0.01840; f[2]= 0.04000; f[3]= 0.018600; - f[4]= 0.00650; f[5]= 0.02380; f[6]= 0.02360; f[7]= 0.055700; - f[8]= 0.02770; f[9]= 0.09050; f[10]=0.16750; f[11]= 0.02210; - f[12]=0.05610; f[13]= 0.06110; f[14]=0.05360; f[15]= 0.07250; - f[16]=0.08700; f[17]= 0.02930; f[18]=0.03400; f[19]= 0.04280; - } - else if (prot_model == "LG") - { - daa[1*20+0] = 0.425093; - - daa[2*20+0] = 0.276818; daa[2*20+1] = 0.751878; - - daa[3*20+0] = 0.395144; daa[3*20+1] = 0.123954; daa[3*20+2] = 5.076149; - - daa[4*20+0] = 2.489084; daa[4*20+1] = 0.534551; daa[4*20+2] = 0.528768; daa[4*20+3] = 0.062556; - - daa[5*20+0] = 0.969894; daa[5*20+1] = 2.807908; daa[5*20+2] = 1.695752; daa[5*20+3] = 0.523386; daa[5*20+4] = 0.084808; - - daa[6*20+0] = 1.038545; daa[6*20+1] = 0.363970; daa[6*20+2] = 0.541712; daa[6*20+3] = 5.243870; daa[6*20+4] = 0.003499; daa[6*20+5] = 4.128591; - - daa[7*20+0] = 2.066040; daa[7*20+1] = 0.390192; daa[7*20+2] = 1.437645; daa[7*20+3] = 0.844926; daa[7*20+4] = 0.569265; daa[7*20+5] = 0.267959; daa[7*20+6] = 0.348847; +const char* builtin_prot_models = R"( +#nexus; + +begin models; + +model POISSON= +1 +1 1 +1 1 1 +1 1 1 1 +1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05; + +model DAYHOFF= +27 +98 32 +120 0 905 +36 23 0 0 +89 246 103 134 0 +198 1 148 1153 0 716 +240 9 139 125 11 28 81 +23 240 535 86 28 606 43 10 +65 64 77 24 44 18 61 0 7 +41 15 34 0 0 73 11 7 44 257 +26 464 318 71 0 153 83 27 26 46 18 +72 90 1 0 0 114 30 17 0 336 527 243 +18 14 14 0 0 0 0 15 48 196 157 0 92 +250 103 42 13 19 153 51 34 94 12 32 33 17 11 +409 154 495 95 161 56 79 234 35 24 17 96 62 46 245 +371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550 +0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0 +24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61 +208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28 +0.08712691 0.04090396 0.04043196 0.04687195 0.03347397 0.03825496 0.04952995 0.08861191 0.03361897 0.03688596 0.08535691 0.08048092 0.01475299 0.03977196 0.05067995 0.06957693 0.05854194 0.01049399 0.02991597 0.06471794; +[ NOTE 2019-06-04: normalised from original Dayhoff freqs, which do not sum to 1.0 + https://www.ebi.ac.uk/goldman-srv/dayhoff/dayhoff-dcmut.dat ] + +model DCMUT= +26.7828 +98.4474 32.7059 +119.9805 0.0000 893.1515 +36.0016 23.2374 0.0000 0.0000 +88.7753 243.9939 102.8509 134.8551 0.0000 +196.1167 0.0000 149.3409 1138.8659 0.0000 708.6022 +238.6111 8.7791 138.5352 124.0981 10.7278 28.1581 81.1907 +22.8116 238.3148 529.0024 86.8241 28.2729 601.1613 43.9469 10.6802 +65.3416 63.2629 76.8024 23.9248 43.8074 18.0393 60.9526 0.0000 7.6981 +40.6431 15.4924 34.1113 0.0000 0.0000 73.0772 11.2880 7.1514 44.3504 255.6685 +25.8635 461.0124 314.8371 71.6913 0.0000 151.9078 83.0078 26.7683 27.0475 46.0857 18.0629 +71.7840 89.6321 0.0000 0.0000 0.0000 112.7499 30.4803 17.0372 0.0000 333.2732 523.0115 241.1739 +18.3641 13.6906 13.8503 0.0000 0.0000 0.0000 0.0000 15.3478 47.5927 195.1951 156.5160 0.0000 92.1860 +248.5920 102.8313 41.9244 13.3940 18.7550 152.6188 50.7003 34.7153 93.3709 11.9152 31.6258 33.5419 17.0205 11.0506 +405.1870 153.1590 488.5892 95.6097 159.8356 56.1828 79.3999 232.2243 35.3643 24.7955 17.1432 95.4557 61.9951 45.9901 242.7202 +368.0365 26.5745 227.1697 66.0930 16.2366 52.5651 34.0156 30.6662 22.6333 190.0739 33.1090 135.0599 103.1534 13.6655 78.2857 543.6674 +0.0000 200.1375 22.4968 0.0000 0.0000 0.0000 0.0000 0.0000 27.0564 0.0000 46.1776 0.0000 0.0000 76.2354 0.0000 74.0819 0.0000 +24.4139 7.8012 94.6940 0.0000 95.3164 0.0000 21.4717 0.0000 126.5400 37.4834 28.6572 13.2142 0.0000 695.2629 0.0000 33.6289 41.7839 60.8070 +205.9564 24.0368 15.8067 17.8316 48.4678 34.6983 36.7250 53.8165 43.8715 881.0038 174.5156 10.3850 256.5955 12.3606 48.5026 30.3836 156.1997 0.0000 27.9379 + 0.08712691 0.04090396 0.04043196 0.04687195 0.03347397 0.03825496 0.04952995 0.08861191 0.03361897 0.03688596 0.08535691 0.08048092 0.01475299 0.03977196 0.05067995 0.06957693 0.05854194 0.01049399 0.02991597 0.06471794; +[ NOTE 2019-06-04: normalised from original Dayhoff-DCMUT freqs, which do not sum to 1.0 + https://www.ebi.ac.uk/goldman-srv/dayhoff/dayhoff-dcmut.dat ] + +model JTT= +58 +54 45 +81 16 528 +56 113 34 10 +57 310 86 49 9 +105 29 58 767 5 323 +179 137 81 130 59 26 119 +27 328 391 112 69 597 26 23 +36 22 47 11 17 9 12 6 16 +30 38 12 7 23 72 9 6 56 229 +35 646 263 26 7 292 181 27 45 21 14 +54 44 30 15 31 43 18 14 33 479 388 65 +15 5 10 4 78 4 5 5 40 89 248 4 43 +194 74 15 15 14 164 18 24 115 10 102 21 16 17 +378 101 503 59 223 53 30 201 73 40 59 47 29 92 285 +475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477 +9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12 +11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71 +298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16 +0.07674792 0.05169095 0.04264496 0.05154395 0.01980298 0.04075196 0.06182994 0.07315193 0.02294398 0.05376095 0.09190391 0.05867594 0.02382598 0.04012596 0.05090095 0.06876493 0.05856494 0.01426099 0.03210197 0.06600493; +[ NOTE 2019-06-04: original JTT freqs do not sum to 1.0, taken from PAML package ] + +model MTREV= +23.18 +26.95 13.24 +17.67 1.90 794.38 +59.93 103.33 58.94 1.90 +1.90 220.99 173.56 55.28 75.24 +9.77 1.90 63.05 583.55 1.90 313.56 +120.71 23.03 53.30 56.77 30.71 6.75 28.28 +13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90 +96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26 +25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09 +8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88 +141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37 +6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82 +54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31 +387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90 +480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21 +1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99 +6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25 +195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90 +0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169 0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043; +[ NOTE 2019-06-04: The PI's used to sum to 0.999 and I (Z. Yang) changed one of the freq from 0.168 + into 0.169 so that the sum is 1. Prepared by Z. Yang ] - daa[8*20+0] = 0.358858; daa[8*20+1] = 2.426601; daa[8*20+2] = 4.509238; daa[8*20+3] = 0.927114; daa[8*20+4] = 0.640543; daa[8*20+5] = 4.813505; daa[8*20+6] = 0.423881; - daa[8*20+7] = 0.311484; - - daa[9*20+0] = 0.149830; daa[9*20+1] = 0.126991; daa[9*20+2] = 0.191503; daa[9*20+3] = 0.010690; daa[9*20+4] = 0.320627; daa[9*20+5] = 0.072854; daa[9*20+6] = 0.044265; - daa[9*20+7] = 0.008705; daa[9*20+8] = 0.108882; - - daa[10*20+0] = 0.395337; daa[10*20+1] = 0.301848; daa[10*20+2] = 0.068427; daa[10*20+3] = 0.015076; daa[10*20+4] = 0.594007; daa[10*20+5] = 0.582457; daa[10*20+6] = 0.069673; - daa[10*20+7] = 0.044261; daa[10*20+8] = 0.366317; daa[10*20+9] = 4.145067 ; - - daa[11*20+0] = 0.536518; daa[11*20+1] = 6.326067; daa[11*20+2] = 2.145078; daa[11*20+3] = 0.282959; daa[11*20+4] = 0.013266; daa[11*20+5] = 3.234294; daa[11*20+6] = 1.807177; - daa[11*20+7] = 0.296636; daa[11*20+8] = 0.697264; daa[11*20+9] = 0.159069; daa[11*20+10] = 0.137500; - - - daa[12*20+0] = 1.124035; daa[12*20+1] = 0.484133; daa[12*20+2] = 0.371004; daa[12*20+3] = 0.025548; daa[12*20+4] = 0.893680; daa[12*20+5] = 1.672569; daa[12*20+6] = 0.173735; - daa[12*20+7] = 0.139538; daa[12*20+8] = 0.442472; daa[12*20+9] = 4.273607; daa[12*20+10] = 6.312358; daa[12*20+11] = 0.656604; - - daa[13*20+0] = 0.253701; daa[13*20+1] = 0.052722;daa[13*20+2] = 0.089525; daa[13*20+3] = 0.017416; daa[13*20+4] = 1.105251; daa[13*20+5] = 0.035855; daa[13*20+6] = 0.018811; - daa[13*20+7] = 0.089586; daa[13*20+8] = 0.682139; daa[13*20+9] = 1.112727; daa[13*20+10] = 2.592692; daa[13*20+11] = 0.023918; daa[13*20+12] = 1.798853; - - daa[14*20+0] = 1.177651; daa[14*20+1] = 0.332533;daa[14*20+2] = 0.161787; daa[14*20+3] = 0.394456; daa[14*20+4] = 0.075382; daa[14*20+5] = 0.624294; daa[14*20+6] = 0.419409; - daa[14*20+7] = 0.196961; daa[14*20+8] = 0.508851; daa[14*20+9] = 0.078281; daa[14*20+10] = 0.249060; daa[14*20+11] = 0.390322; daa[14*20+12] = 0.099849; - daa[14*20+13] = 0.094464; - - daa[15*20+0] = 4.727182; daa[15*20+1] = 0.858151;daa[15*20+2] = 4.008358; daa[15*20+3] = 1.240275; daa[15*20+4] = 2.784478; daa[15*20+5] = 1.223828; daa[15*20+6] = 0.611973; - daa[15*20+7] = 1.739990; daa[15*20+8] = 0.990012; daa[15*20+9] = 0.064105; daa[15*20+10] = 0.182287; daa[15*20+11] = 0.748683; daa[15*20+12] = 0.346960; - daa[15*20+13] = 0.361819; daa[15*20+14] = 1.338132; - - daa[16*20+0] = 2.139501; daa[16*20+1] = 0.578987;daa[16*20+2] = 2.000679; daa[16*20+3] = 0.425860; daa[16*20+4] = 1.143480; daa[16*20+5] = 1.080136; daa[16*20+6] = 0.604545; - daa[16*20+7] = 0.129836; daa[16*20+8] = 0.584262; daa[16*20+9] = 1.033739; daa[16*20+10] = 0.302936; daa[16*20+11] = 1.136863; daa[16*20+12] = 2.020366; - daa[16*20+13] = 0.165001; daa[16*20+14] = 0.571468; daa[16*20+15] = 6.472279; - - daa[17*20+0] = 0.180717; daa[17*20+1] = 0.593607;daa[17*20+2] = 0.045376; daa[17*20+3] = 0.029890; daa[17*20+4] = 0.670128; daa[17*20+5] = 0.236199; daa[17*20+6] = 0.077852; - daa[17*20+7] = 0.268491; daa[17*20+8] = 0.597054; daa[17*20+9] = 0.111660; daa[17*20+10] = 0.619632; daa[17*20+11] = 0.049906; daa[17*20+12] = 0.696175; - daa[17*20+13] = 2.457121; daa[17*20+14] = 0.095131; daa[17*20+15] = 0.248862; daa[17*20+16] = 0.140825; - - daa[18*20+0] = 0.218959; daa[18*20+1] = 0.314440;daa[18*20+2] = 0.612025; daa[18*20+3] = 0.135107; daa[18*20+4] = 1.165532; daa[18*20+5] = 0.257336; daa[18*20+6] = 0.120037; - daa[18*20+7] = 0.054679; daa[18*20+8] = 5.306834; daa[18*20+9] = 0.232523; daa[18*20+10] = 0.299648; daa[18*20+11] = 0.131932; daa[18*20+12] = 0.481306; - daa[18*20+13] = 7.803902; daa[18*20+14] = 0.089613; daa[18*20+15] = 0.400547; daa[18*20+16] = 0.245841; daa[18*20+17] = 3.151815; - - daa[19*20+0] = 2.547870; daa[19*20+1] = 0.170887;daa[19*20+2] = 0.083688; daa[19*20+3] = 0.037967; daa[19*20+4] = 1.959291; daa[19*20+5] = 0.210332; daa[19*20+6] = 0.245034; - daa[19*20+7] = 0.076701; daa[19*20+8] = 0.119013; daa[19*20+9] = 10.649107; daa[19*20+10] = 1.702745; daa[19*20+11] = 0.185202; daa[19*20+12] = 1.898718; - daa[19*20+13] = 0.654683; daa[19*20+14] = 0.296501; daa[19*20+15] = 0.098369; daa[19*20+16] = 2.188158; daa[19*20+17] = 0.189510; daa[19*20+18] = 0.249313; - -/* same problem here - * f[0] = 0.07906; - f[1] = 0.05594; - f[2] = 0.04198; - f[3] = 0.05305; - f[4] = 0.01294; - f[5] = 0.04077; - f[6] = 0.07158; - f[7] = 0.05734; - f[8] = 0.02235; - f[9] = 0.06216; - f[10] = 0.09908; - f[11] = 0.06460; - f[12] = 0.02295; - f[13] = 0.04230; - f[14] = 0.04404; - f[15] = 0.06120; - f[16] = 0.05329; - f[17] = 0.01207; - f[18] = 0.03415; - f[19] = 0.06915;*/ - // NOTE: originally f[19]=0.069147 but frequencies do not sum up to 1 - f[0] = 0.079066; f[1] = 0.055941; f[2] = 0.041977; f[3] = 0.053052; - f[4] = 0.012937; f[5] = 0.040767; f[6] = 0.071586; f[7] = 0.057337; - f[8] = 0.022355; f[9] = 0.062157; f[10] = 0.099081; f[11] = 0.064600; - f[12] = 0.022951; f[13] = 0.042302; f[14] = 0.044040; f[15] = 0.061197; - f[16] = 0.053287; f[17] = 0.012066; f[18] = 0.034155; f[19] = 0.069146; - - } - else if (prot_model == "MTART") - { - - - daa[1*20+0]= 0.2; - daa[2*20+0]= 0.2; - daa[2*20+1]= 0.2; - daa[3*20+0]= 1; - daa[3*20+1]= 4; - daa[3*20+2]= 500; - daa[4*20+0]= 254; - daa[4*20+1]= 36; - daa[4*20+2]= 98; - daa[4*20+3]= 11; - daa[5*20+0]= 0.2; - daa[5*20+1]= 154; - daa[5*20+2]= 262; - daa[5*20+3]= 0.2; - daa[5*20+4]= 0.2; - daa[6*20+0]= 0.2; - daa[6*20+1]= 0.2; - daa[6*20+2]= 183; - daa[6*20+3]= 862; - daa[6*20+4]= 0.2; - daa[6*20+5]= 262; - daa[7*20+0]= 200; - daa[7*20+1]= 0.2; - daa[7*20+2]= 121; - daa[7*20+3]= 12; - daa[7*20+4]= 81; - daa[7*20+5]= 3; - daa[7*20+6]= 44; - daa[8*20+0]= 0.2; - daa[8*20+1]= 41; - daa[8*20+2]= 180; - daa[8*20+3]= 0.2; - daa[8*20+4]= 12; - daa[8*20+5]= 314; - daa[8*20+6]= 15; - daa[8*20+7]= 0.2; - daa[9*20+0]= 26; - daa[9*20+1]= 2; - daa[9*20+2]= 21; - daa[9*20+3]= 7; - daa[9*20+4]= 63; - daa[9*20+5]= 11; - daa[9*20+6]= 7; - daa[9*20+7]= 3; - daa[9*20+8]= 0.2; - daa[10*20+0]= 4; - daa[10*20+1]= 2; - daa[10*20+2]= 13; - daa[10*20+3]= 1; - daa[10*20+4]= 79; - daa[10*20+5]= 16; - daa[10*20+6]= 2; - daa[10*20+7]= 1; - daa[10*20+8]= 6; - daa[10*20+9]= 515; - daa[11*20+0]= 0.2; - daa[11*20+1]= 209; - daa[11*20+2]= 467; - daa[11*20+3]= 2; - daa[11*20+4]= 0.2; - daa[11*20+5]= 349; - daa[11*20+6]= 106; - daa[11*20+7]= 0.2; - daa[11*20+8]= 0.2; - daa[11*20+9]= 3; - daa[11*20+10]= 4; - daa[12*20+0]= 121; - daa[12*20+1]= 5; - daa[12*20+2]= 79; - daa[12*20+3]= 0.2; - daa[12*20+4]= 312; - daa[12*20+5]= 67; - daa[12*20+6]= 0.2; - daa[12*20+7]= 56; - daa[12*20+8]= 0.2; - daa[12*20+9]= 515; - daa[12*20+10]= 885; - daa[12*20+11]= 106; - daa[13*20+0]= 13; - daa[13*20+1]= 5; - daa[13*20+2]= 20; - daa[13*20+3]= 0.2; - daa[13*20+4]= 184; - daa[13*20+5]= 0.2; - daa[13*20+6]= 0.2; - daa[13*20+7]= 1; - daa[13*20+8]= 14; - daa[13*20+9]= 118; - daa[13*20+10]= 263; - daa[13*20+11]= 11; - daa[13*20+12]= 322; - daa[14*20+0]= 49; - daa[14*20+1]= 0.2; - daa[14*20+2]= 17; - daa[14*20+3]= 0.2; - daa[14*20+4]= 0.2; - daa[14*20+5]= 39; - daa[14*20+6]= 8; - daa[14*20+7]= 0.2; - daa[14*20+8]= 1; - daa[14*20+9]= 0.2; - daa[14*20+10]= 12; - daa[14*20+11]= 17; - daa[14*20+12]= 5; - daa[14*20+13]= 15; - daa[15*20+0]= 673; - daa[15*20+1]= 3; - daa[15*20+2]= 398; - daa[15*20+3]= 44; - daa[15*20+4]= 664; - daa[15*20+5]= 52; - daa[15*20+6]= 31; - daa[15*20+7]= 226; - daa[15*20+8]= 11; - daa[15*20+9]= 7; - daa[15*20+10]= 8; - daa[15*20+11]= 144; - daa[15*20+12]= 112; - daa[15*20+13]= 36; - daa[15*20+14]= 87; - daa[16*20+0]= 244; - daa[16*20+1]= 0.2; - daa[16*20+2]= 166; - daa[16*20+3]= 0.2; - daa[16*20+4]= 183; - daa[16*20+5]= 44; - daa[16*20+6]= 43; - daa[16*20+7]= 0.2; - daa[16*20+8]= 19; - daa[16*20+9]= 204; - daa[16*20+10]= 48; - daa[16*20+11]= 70; - daa[16*20+12]= 289; - daa[16*20+13]= 14; - daa[16*20+14]= 47; - daa[16*20+15]= 660; - daa[17*20+0]= 0.2; - daa[17*20+1]= 0.2; - daa[17*20+2]= 8; - daa[17*20+3]= 0.2; - daa[17*20+4]= 22; - daa[17*20+5]= 7; - daa[17*20+6]= 11; - daa[17*20+7]= 2; - daa[17*20+8]= 0.2; - daa[17*20+9]= 0.2; - daa[17*20+10]= 21; - daa[17*20+11]= 16; - daa[17*20+12]= 71; - daa[17*20+13]= 54; - daa[17*20+14]= 0.2; - daa[17*20+15]= 2; - daa[17*20+16]= 0.2; - daa[18*20+0]= 1; - daa[18*20+1]= 4; - daa[18*20+2]= 251; - daa[18*20+3]= 0.2; - daa[18*20+4]= 72; - daa[18*20+5]= 87; - daa[18*20+6]= 8; - daa[18*20+7]= 9; - daa[18*20+8]= 191; - daa[18*20+9]= 12; - daa[18*20+10]= 20; - daa[18*20+11]= 117; - daa[18*20+12]= 71; - daa[18*20+13]= 792; - daa[18*20+14]= 18; - daa[18*20+15]= 30; - daa[18*20+16]= 46; - daa[18*20+17]= 38; - daa[19*20+0]= 340; - daa[19*20+1]= 0.2; - daa[19*20+2]= 23; - daa[19*20+3]= 0.2; - daa[19*20+4]= 350; - daa[19*20+5]= 0.2; - daa[19*20+6]= 14; - daa[19*20+7]= 3; - daa[19*20+8]= 0.2; - daa[19*20+9]= 1855; - daa[19*20+10]= 85; - daa[19*20+11]= 26; - daa[19*20+12]= 281; - daa[19*20+13]= 52; - daa[19*20+14]= 32; - daa[19*20+15]= 61; - daa[19*20+16]= 544; - daa[19*20+17]= 0.2; - daa[19*20+18]= 2; - - f[0]= 0.054116; - f[1]= 0.018227; - f[2]= 0.039903; - f[3]= 0.020160; - f[4]= 0.009709; - f[5]= 0.018781; - f[6]= 0.024289; - f[7]= 0.068183; - f[8]= 0.024518; - f[9]= 0.092638; - f[10]= 0.148658; - f[11]= 0.021718; - f[12]= 0.061453; - f[13]= 0.088668; - f[14]= 0.041826; - f[15]= 0.091030; - f[16]= 0.049194; - f[17]= 0.029786; - f[18]= 0.039443; - f[19]= 0.057700; - } - else if (prot_model == "MTZOA") - { - daa[1*20+0]= 3.3; - daa[2*20+0]= 1.7; - daa[2*20+1]= 33.6; - daa[3*20+0]= 16.1; - daa[3*20+1]= 3.2; - daa[3*20+2]= 617.0; - daa[4*20+0]= 272.5; - daa[4*20+1]= 61.1; - daa[4*20+2]= 94.6; - daa[4*20+3]= 9.5; - daa[5*20+0]= 7.3; - daa[5*20+1]= 231.0; - daa[5*20+2]= 190.3; - daa[5*20+3]= 19.3; - daa[5*20+4]= 49.1; - daa[6*20+0]= 17.1; - daa[6*20+1]= 6.4; - daa[6*20+2]= 174.0; - daa[6*20+3]= 883.6; - daa[6*20+4]= 3.4; - daa[6*20+5]= 349.4; - daa[7*20+0]= 289.3; - daa[7*20+1]= 7.2; - daa[7*20+2]= 99.3; - daa[7*20+3]= 26.0; - daa[7*20+4]= 82.4; - daa[7*20+5]= 8.9; - daa[7*20+6]= 43.1; - daa[8*20+0]= 2.3; - daa[8*20+1]= 61.7; - daa[8*20+2]= 228.9; - daa[8*20+3]= 55.6; - daa[8*20+4]= 37.5; - daa[8*20+5]= 421.8; - daa[8*20+6]= 14.9; - daa[8*20+7]= 7.4; - daa[9*20+0]= 33.2; - daa[9*20+1]= 0.2; - daa[9*20+2]= 24.3; - daa[9*20+3]= 1.5; - daa[9*20+4]= 48.8; - daa[9*20+5]= 0.2; - daa[9*20+6]= 7.3; - daa[9*20+7]= 3.4; - daa[9*20+8]= 1.6; - daa[10*20+0]= 15.6; - daa[10*20+1]= 4.1; - daa[10*20+2]= 7.9; - daa[10*20+3]= 0.5; - daa[10*20+4]= 59.7; - daa[10*20+5]= 23.0; - daa[10*20+6]= 1.0; - daa[10*20+7]= 3.5; - daa[10*20+8]= 6.6; - daa[10*20+9]= 425.2; - daa[11*20+0]= 0.2; - daa[11*20+1]= 292.3; - daa[11*20+2]= 413.4; - daa[11*20+3]= 0.2; - daa[11*20+4]= 0.2; - daa[11*20+5]= 334.0; - daa[11*20+6]= 163.2; - daa[11*20+7]= 10.1; - daa[11*20+8]= 23.9; - daa[11*20+9]= 8.4; - daa[11*20+10]= 6.7; - daa[12*20+0]= 136.5; - daa[12*20+1]= 3.8; - daa[12*20+2]= 73.7; - daa[12*20+3]= 0.2; - daa[12*20+4]= 264.8; - daa[12*20+5]= 83.9; - daa[12*20+6]= 0.2; - daa[12*20+7]= 52.2; - daa[12*20+8]= 7.1; - daa[12*20+9]= 449.7; - daa[12*20+10]= 636.3; - daa[12*20+11]= 83.0; - daa[13*20+0]= 26.5; - daa[13*20+1]= 0.2; - daa[13*20+2]= 12.9; - daa[13*20+3]= 2.0; - daa[13*20+4]= 167.8; - daa[13*20+5]= 9.5; - daa[13*20+6]= 0.2; - daa[13*20+7]= 5.8; - daa[13*20+8]= 13.1; - daa[13*20+9]= 90.3; - daa[13*20+10]= 234.2; - daa[13*20+11]= 16.3; - daa[13*20+12]= 215.6; - daa[14*20+0]= 61.8; - daa[14*20+1]= 7.5; - daa[14*20+2]= 22.6; - daa[14*20+3]= 0.2; - daa[14*20+4]= 8.1; - daa[14*20+5]= 52.2; - daa[14*20+6]= 20.6; - daa[14*20+7]= 1.3; - daa[14*20+8]= 15.6; - daa[14*20+9]= 2.6; - daa[14*20+10]= 11.4; - daa[14*20+11]= 24.3; - daa[14*20+12]= 5.4; - daa[14*20+13]= 10.5; - daa[15*20+0]= 644.9; - daa[15*20+1]= 11.8; - daa[15*20+2]= 420.2; - daa[15*20+3]= 51.4; - daa[15*20+4]= 656.3; - daa[15*20+5]= 96.4; - daa[15*20+6]= 38.4; - daa[15*20+7]= 257.1; - daa[15*20+8]= 23.1; - daa[15*20+9]= 7.2; - daa[15*20+10]= 15.2; - daa[15*20+11]= 144.9; - daa[15*20+12]= 95.3; - daa[15*20+13]= 32.2; - daa[15*20+14]= 79.7; - daa[16*20+0]= 378.1; - daa[16*20+1]= 3.2; - daa[16*20+2]= 184.6; - daa[16*20+3]= 2.3; - daa[16*20+4]= 199.0; - daa[16*20+5]= 39.4; - daa[16*20+6]= 34.5; - daa[16*20+7]= 5.2; - daa[16*20+8]= 19.4; - daa[16*20+9]= 222.3; - daa[16*20+10]= 50.0; - daa[16*20+11]= 75.5; - daa[16*20+12]= 305.1; - daa[16*20+13]= 19.3; - daa[16*20+14]= 56.9; - daa[16*20+15]= 666.3; - daa[17*20+0]= 3.1; - daa[17*20+1]= 16.9; - daa[17*20+2]= 6.4; - daa[17*20+3]= 0.2; - daa[17*20+4]= 36.1; - daa[17*20+5]= 6.1; - daa[17*20+6]= 3.5; - daa[17*20+7]= 12.3; - daa[17*20+8]= 4.5; - daa[17*20+9]= 9.7; - daa[17*20+10]= 27.2; - daa[17*20+11]= 6.6; - daa[17*20+12]= 48.7; - daa[17*20+13]= 58.2; - daa[17*20+14]= 1.3; - daa[17*20+15]= 10.3; - daa[17*20+16]= 3.6; - daa[18*20+0]= 2.1; - daa[18*20+1]= 13.8; - daa[18*20+2]= 141.6; - daa[18*20+3]= 13.9; - daa[18*20+4]= 76.7; - daa[18*20+5]= 52.3; - daa[18*20+6]= 10.0; - daa[18*20+7]= 4.3; - daa[18*20+8]= 266.5; - daa[18*20+9]= 13.1; - daa[18*20+10]= 5.7; - daa[18*20+11]= 45.0; - daa[18*20+12]= 41.4; - daa[18*20+13]= 590.5; - daa[18*20+14]= 4.2; - daa[18*20+15]= 29.7; - daa[18*20+16]= 29.0; - daa[18*20+17]= 79.8; - daa[19*20+0]= 321.9; - daa[19*20+1]= 5.1; - daa[19*20+2]= 7.1; - daa[19*20+3]= 3.7; - daa[19*20+4]= 243.8; - daa[19*20+5]= 9.0; - daa[19*20+6]= 16.3; - daa[19*20+7]= 23.7; - daa[19*20+8]= 0.3; - daa[19*20+9]= 1710.6; - daa[19*20+10]= 126.1; - daa[19*20+11]= 11.1; - daa[19*20+12]= 279.6; - daa[19*20+13]= 59.6; - daa[19*20+14]= 17.9; - daa[19*20+15]= 49.5; - daa[19*20+16]= 396.4; - daa[19*20+17]= 13.7; - daa[19*20+18]= 15.6; - - f[0]= 0.069; - f[1]= 0.021; - f[2]= 0.030; - f[3]= 0.020; - f[4]= 0.010; - f[5]= 0.019; - f[6]= 0.025; - f[7]= 0.072; - f[8]= 0.027; - f[9]= 0.085; - f[10]= 0.157; - f[11]= 0.019; - f[12]= 0.051; - f[13]= 0.082; - f[14]= 0.045; - f[15]= 0.081; - f[16]= 0.056; - f[17]= 0.028; - f[18]= 0.037; - f[19]= 0.066; - } - else if (prot_model == "PMB") - { - daa[1*20+0]= 0.674995699; - daa[2*20+0]= 0.589645178; - daa[2*20+1]= 1.189067034; - daa[3*20+0]= 0.462499504; - daa[3*20+1]= 0.605460903; - daa[3*20+2]= 3.573373315; - daa[4*20+0]= 1.065445546; - daa[4*20+1]= 0.31444833; - daa[4*20+2]= 0.589852457; - daa[4*20+3]= 0.246951424; - daa[5*20+0]= 1.111766964; - daa[5*20+1]= 2.967840934; - daa[5*20+2]= 2.299755865; - daa[5*20+3]= 1.686058219; - daa[5*20+4]= 0.245163782; - daa[6*20+0]= 1.046334652; - daa[6*20+1]= 1.201770702; - daa[6*20+2]= 1.277836748; - daa[6*20+3]= 4.399995525; - daa[6*20+4]= 0.091071867; - daa[6*20+5]= 4.15967899; - daa[7*20+0]= 1.587964372; - daa[7*20+1]= 0.523770553; - daa[7*20+2]= 1.374854049; - daa[7*20+3]= 0.734992057; - daa[7*20+4]= 0.31706632; - daa[7*20+5]= 0.596789898; - daa[7*20+6]= 0.463812837; - daa[8*20+0]= 0.580830874; - daa[8*20+1]= 1.457127446; - daa[8*20+2]= 2.283037894; - daa[8*20+3]= 0.839348444; - daa[8*20+4]= 0.411543728; - daa[8*20+5]= 1.812173605; - daa[8*20+6]= 0.877842609; - daa[8*20+7]= 0.476331437; - daa[9*20+0]= 0.464590585; - daa[9*20+1]= 0.35964586; - daa[9*20+2]= 0.426069419; - daa[9*20+3]= 0.266775558; - daa[9*20+4]= 0.417547309; - daa[9*20+5]= 0.315256838; - daa[9*20+6]= 0.30421529; - daa[9*20+7]= 0.180198883; - daa[9*20+8]= 0.285186418; - daa[10*20+0]= 0.804404505; - daa[10*20+1]= 0.520701585; - daa[10*20+2]= 0.41009447; - daa[10*20+3]= 0.269124919; - daa[10*20+4]= 0.450795211; - daa[10*20+5]= 0.625792937; - daa[10*20+6]= 0.32078471; - daa[10*20+7]= 0.259854426; - daa[10*20+8]= 0.363981358; - daa[10*20+9]= 4.162454693; - daa[11*20+0]= 0.831998835; - daa[11*20+1]= 4.956476453; - daa[11*20+2]= 2.037575629; - daa[11*20+3]= 1.114178954; - daa[11*20+4]= 0.274163536; - daa[11*20+5]= 3.521346591; - daa[11*20+6]= 2.415974716; - daa[11*20+7]= 0.581001076; - daa[11*20+8]= 0.985885486; - daa[11*20+9]= 0.374784947; - daa[11*20+10]= 0.498011337; - daa[12*20+0]= 1.546725076; - daa[12*20+1]= 0.81346254; - daa[12*20+2]= 0.737846301; - daa[12*20+3]= 0.341932741; - daa[12*20+4]= 0.618614612; - daa[12*20+5]= 2.067388546; - daa[12*20+6]= 0.531773639; - daa[12*20+7]= 0.465349326; - daa[12*20+8]= 0.380925433; - daa[12*20+9]= 3.65807012; - daa[12*20+10]= 5.002338375; - daa[12*20+11]= 0.661095832; - daa[13*20+0]= 0.546169219; - daa[13*20+1]= 0.303437244; - daa[13*20+2]= 0.425193716; - daa[13*20+3]= 0.219005213; - daa[13*20+4]= 0.669206193; - daa[13*20+5]= 0.406042546; - daa[13*20+6]= 0.224154698; - daa[13*20+7]= 0.35402891; - daa[13*20+8]= 0.576231691; - daa[13*20+9]= 1.495264661; - daa[13*20+10]= 2.392638293; - daa[13*20+11]= 0.269496317; - daa[13*20+12]= 2.306919847; - daa[14*20+0]= 1.241586045; - daa[14*20+1]= 0.65577338; - daa[14*20+2]= 0.711495595; - daa[14*20+3]= 0.775624818; - daa[14*20+4]= 0.198679914; - daa[14*20+5]= 0.850116543; - daa[14*20+6]= 0.794584081; - daa[14*20+7]= 0.588254139; - daa[14*20+8]= 0.456058589; - daa[14*20+9]= 0.366232942; - daa[14*20+10]= 0.430073179; - daa[14*20+11]= 1.036079005; - daa[14*20+12]= 0.337502282; - daa[14*20+13]= 0.481144863; - daa[15*20+0]= 3.452308792; - daa[15*20+1]= 0.910144334; - daa[15*20+2]= 2.572577221; - daa[15*20+3]= 1.440896785; - daa[15*20+4]= 0.99870098; - daa[15*20+5]= 1.348272505; - daa[15*20+6]= 1.205509425; - daa[15*20+7]= 1.402122097; - daa[15*20+8]= 0.799966711; - daa[15*20+9]= 0.530641901; - daa[15*20+10]= 0.402471997; - daa[15*20+11]= 1.234648153; - daa[15*20+12]= 0.945453716; - daa[15*20+13]= 0.613230817; - daa[15*20+14]= 1.217683028; - daa[16*20+0]= 1.751412803; - daa[16*20+1]= 0.89517149; - daa[16*20+2]= 1.823161023; - daa[16*20+3]= 0.994227284; - daa[16*20+4]= 0.847312432; - daa[16*20+5]= 1.320626678; - daa[16*20+6]= 0.949599791; - daa[16*20+7]= 0.542185658; - daa[16*20+8]= 0.83039281; - daa[16*20+9]= 1.114132523; - daa[16*20+10]= 0.779827336; - daa[16*20+11]= 1.290709079; - daa[16*20+12]= 1.551488041; - daa[16*20+13]= 0.718895136; - daa[16*20+14]= 0.780913179; - daa[16*20+15]= 4.448982584; - daa[17*20+0]= 0.35011051; - daa[17*20+1]= 0.618778365; - daa[17*20+2]= 0.422407388; - daa[17*20+3]= 0.362495245; - daa[17*20+4]= 0.445669347; - daa[17*20+5]= 0.72038474; - daa[17*20+6]= 0.261258229; - daa[17*20+7]= 0.37874827; - daa[17*20+8]= 0.72436751; - daa[17*20+9]= 0.516260502; - daa[17*20+10]= 0.794797115; - daa[17*20+11]= 0.43340962; - daa[17*20+12]= 0.768395107; - daa[17*20+13]= 3.29519344; - daa[17*20+14]= 0.499869138; - daa[17*20+15]= 0.496334956; - daa[17*20+16]= 0.38372361; - daa[18*20+0]= 0.573154753; - daa[18*20+1]= 0.628599063; - daa[18*20+2]= 0.720013799; - daa[18*20+3]= 0.436220437; - daa[18*20+4]= 0.55626163; - daa[18*20+5]= 0.728970584; - daa[18*20+6]= 0.50720003; - daa[18*20+7]= 0.284727562; - daa[18*20+8]= 2.210952064; - daa[18*20+9]= 0.570562395; - daa[18*20+10]= 0.811019594; - daa[18*20+11]= 0.664884513; - daa[18*20+12]= 0.93253606; - daa[18*20+13]= 5.894735673; - daa[18*20+14]= 0.433748126; - daa[18*20+15]= 0.593795813; - daa[18*20+16]= 0.523549536; - daa[18*20+17]= 2.996248013; - daa[19*20+0]= 2.063050067; - daa[19*20+1]= 0.388680158; - daa[19*20+2]= 0.474418852; - daa[19*20+3]= 0.275658381; - daa[19*20+4]= 0.998911631; - daa[19*20+5]= 0.634408285; - daa[19*20+6]= 0.527640634; - daa[19*20+7]= 0.314700907; - daa[19*20+8]= 0.305792277; - daa[19*20+9]= 8.002789424; - daa[19*20+10]= 2.113077156; - daa[19*20+11]= 0.526184203; - daa[19*20+12]= 1.737356217; - daa[19*20+13]= 0.983844803; - daa[19*20+14]= 0.551333603; - daa[19*20+15]= 0.507506011; - daa[19*20+16]= 1.89965079; - daa[19*20+17]= 0.429570747; - daa[19*20+18]= 0.716795463; - - f[0]= 0.076; - f[1]= 0.054; - f[2]= 0.038; - f[3]= 0.045; - f[4]= 0.028; - f[5]= 0.034; - f[6]= 0.053; - f[7]= 0.078; - f[8]= 0.030; - f[9]= 0.060; - f[10]= 0.096; - f[11]= 0.052; - f[12]= 0.022; - f[13]= 0.045; - f[14]= 0.042; - f[15]= 0.068; - f[16]= 0.056; - f[17]= 0.016; - f[18]= 0.036; - f[19]= 0.071; - } - else if (prot_model == "HIVB") - { - daa[1*20+0]= 0.30750700; - daa[2*20+0]= 0.00500000; - daa[2*20+1]= 0.29554300; - daa[3*20+0]= 1.45504000; - daa[3*20+1]= 0.00500000; - daa[3*20+2]= 17.66120000; - daa[4*20+0]= 0.12375800; - daa[4*20+1]= 0.35172100; - daa[4*20+2]= 0.08606420; - daa[4*20+3]= 0.00500000; - daa[5*20+0]= 0.05511280; - daa[5*20+1]= 3.42150000; - daa[5*20+2]= 0.67205200; - daa[5*20+3]= 0.00500000; - daa[5*20+4]= 0.00500000; - daa[6*20+0]= 1.48135000; - daa[6*20+1]= 0.07492180; - daa[6*20+2]= 0.07926330; - daa[6*20+3]= 10.58720000; - daa[6*20+4]= 0.00500000; - daa[6*20+5]= 2.56020000; - daa[7*20+0]= 2.13536000; - daa[7*20+1]= 3.65345000; - daa[7*20+2]= 0.32340100; - daa[7*20+3]= 2.83806000; - daa[7*20+4]= 0.89787100; - daa[7*20+5]= 0.06191370; - daa[7*20+6]= 3.92775000; - daa[8*20+0]= 0.08476130; - daa[8*20+1]= 9.04044000; - daa[8*20+2]= 7.64585000; - daa[8*20+3]= 1.91690000; - daa[8*20+4]= 0.24007300; - daa[8*20+5]= 7.05545000; - daa[8*20+6]= 0.11974000; - daa[8*20+7]= 0.00500000; - daa[9*20+0]= 0.00500000; - daa[9*20+1]= 0.67728900; - daa[9*20+2]= 0.68056500; - daa[9*20+3]= 0.01767920; - daa[9*20+4]= 0.00500000; - daa[9*20+5]= 0.00500000; - daa[9*20+6]= 0.00609079; - daa[9*20+7]= 0.00500000; - daa[9*20+8]= 0.10311100; - daa[10*20+0]= 0.21525600; - daa[10*20+1]= 0.70142700; - daa[10*20+2]= 0.00500000; - daa[10*20+3]= 0.00876048; - daa[10*20+4]= 0.12977700; - daa[10*20+5]= 1.49456000; - daa[10*20+6]= 0.00500000; - daa[10*20+7]= 0.00500000; - daa[10*20+8]= 1.74171000; - daa[10*20+9]= 5.95879000; - daa[11*20+0]= 0.00500000; - daa[11*20+1]= 20.45000000; - daa[11*20+2]= 7.90443000; - daa[11*20+3]= 0.00500000; - daa[11*20+4]= 0.00500000; - daa[11*20+5]= 6.54737000; - daa[11*20+6]= 4.61482000; - daa[11*20+7]= 0.52170500; - daa[11*20+8]= 0.00500000; - daa[11*20+9]= 0.32231900; - daa[11*20+10]= 0.08149950; - daa[12*20+0]= 0.01866430; - daa[12*20+1]= 2.51394000; - daa[12*20+2]= 0.00500000; - daa[12*20+3]= 0.00500000; - daa[12*20+4]= 0.00500000; - daa[12*20+5]= 0.30367600; - daa[12*20+6]= 0.17578900; - daa[12*20+7]= 0.00500000; - daa[12*20+8]= 0.00500000; - daa[12*20+9]= 11.20650000; - daa[12*20+10]= 5.31961000; - daa[12*20+11]= 1.28246000; - daa[13*20+0]= 0.01412690; - daa[13*20+1]= 0.00500000; - daa[13*20+2]= 0.00500000; - daa[13*20+3]= 0.00500000; - daa[13*20+4]= 9.29815000; - daa[13*20+5]= 0.00500000; - daa[13*20+6]= 0.00500000; - daa[13*20+7]= 0.29156100; - daa[13*20+8]= 0.14555800; - daa[13*20+9]= 3.39836000; - daa[13*20+10]= 8.52484000; - daa[13*20+11]= 0.03426580; - daa[13*20+12]= 0.18802500; - daa[14*20+0]= 2.12217000; - daa[14*20+1]= 1.28355000; - daa[14*20+2]= 0.00739578; - daa[14*20+3]= 0.03426580; - daa[14*20+4]= 0.00500000; - daa[14*20+5]= 4.47211000; - daa[14*20+6]= 0.01202260; - daa[14*20+7]= 0.00500000; - daa[14*20+8]= 2.45318000; - daa[14*20+9]= 0.04105930; - daa[14*20+10]= 2.07757000; - daa[14*20+11]= 0.03138620; - daa[14*20+12]= 0.00500000; - daa[14*20+13]= 0.00500000; - daa[15*20+0]= 2.46633000; - daa[15*20+1]= 3.47910000; - daa[15*20+2]= 13.14470000; - daa[15*20+3]= 0.52823000; - daa[15*20+4]= 4.69314000; - daa[15*20+5]= 0.11631100; - daa[15*20+6]= 0.00500000; - daa[15*20+7]= 4.38041000; - daa[15*20+8]= 0.38274700; - daa[15*20+9]= 1.21803000; - daa[15*20+10]= 0.92765600; - daa[15*20+11]= 0.50411100; - daa[15*20+12]= 0.00500000; - daa[15*20+13]= 0.95647200; - daa[15*20+14]= 5.37762000; - daa[16*20+0]= 15.91830000; - daa[16*20+1]= 2.86868000; - daa[16*20+2]= 6.88667000; - daa[16*20+3]= 0.27472400; - daa[16*20+4]= 0.73996900; - daa[16*20+5]= 0.24358900; - daa[16*20+6]= 0.28977400; - daa[16*20+7]= 0.36961500; - daa[16*20+8]= 0.71159400; - daa[16*20+9]= 8.61217000; - daa[16*20+10]= 0.04376730; - daa[16*20+11]= 4.67142000; - daa[16*20+12]= 4.94026000; - daa[16*20+13]= 0.01412690; - daa[16*20+14]= 2.01417000; - daa[16*20+15]= 8.93107000; - daa[17*20+0]= 0.00500000; - daa[17*20+1]= 0.99133800; - daa[17*20+2]= 0.00500000; - daa[17*20+3]= 0.00500000; - daa[17*20+4]= 2.63277000; - daa[17*20+5]= 0.02665600; - daa[17*20+6]= 0.00500000; - daa[17*20+7]= 1.21674000; - daa[17*20+8]= 0.06951790; - daa[17*20+9]= 0.00500000; - daa[17*20+10]= 0.74884300; - daa[17*20+11]= 0.00500000; - daa[17*20+12]= 0.08907800; - daa[17*20+13]= 0.82934300; - daa[17*20+14]= 0.04445060; - daa[17*20+15]= 0.02487280; - daa[17*20+16]= 0.00500000; - daa[18*20+0]= 0.00500000; - daa[18*20+1]= 0.00991826; - daa[18*20+2]= 1.76417000; - daa[18*20+3]= 0.67465300; - daa[18*20+4]= 7.57932000; - daa[18*20+5]= 0.11303300; - daa[18*20+6]= 0.07926330; - daa[18*20+7]= 0.00500000; - daa[18*20+8]= 18.69430000; - daa[18*20+9]= 0.14816800; - daa[18*20+10]= 0.11198600; - daa[18*20+11]= 0.00500000; - daa[18*20+12]= 0.00500000; - daa[18*20+13]= 15.34000000; - daa[18*20+14]= 0.03043810; - daa[18*20+15]= 0.64802400; - daa[18*20+16]= 0.10565200; - daa[18*20+17]= 1.28022000; - daa[19*20+0]= 7.61428000; - daa[19*20+1]= 0.08124540; - daa[19*20+2]= 0.02665600; - daa[19*20+3]= 1.04793000; - daa[19*20+4]= 0.42002700; - daa[19*20+5]= 0.02091530; - daa[19*20+6]= 1.02847000; - daa[19*20+7]= 0.95315500; - daa[19*20+8]= 0.00500000; - daa[19*20+9]= 17.73890000; - daa[19*20+10]= 1.41036000; - daa[19*20+11]= 0.26582900; - daa[19*20+12]= 6.85320000; - daa[19*20+13]= 0.72327400; - daa[19*20+14]= 0.00500000; - daa[19*20+15]= 0.07492180; - daa[19*20+16]= 0.70922600; - daa[19*20+17]= 0.00500000; - daa[19*20+18]= 0.04105930; - /* ROUNDING ERROR: - f[0]= 0.060; - f[1]= 0.066; - f[2]= 0.044; - f[3]= 0.042; - f[4]= 0.020; - f[5]= 0.054; - f[6]= 0.071; - f[7]= 0.072; - f[8]= 0.022; - f[9]= 0.070; - f[10]= 0.099; - f[11]= 0.057; - f[12]= 0.020; - f[13]= 0.029; - f[14]= 0.046; - f[15]= 0.051; - f[16]= 0.054; - f[17]= 0.033; - f[18]= 0.028; - f[19]= 0.062; - */ - f[0]= 0.060490222; f[1]= 0.066039665; f[2]= 0.044127815; f[3]= 0.042109048; - f[4]= 0.020075899; f[5]= 0.053606488; f[6]= 0.071567447; f[7]= 0.072308239; - f[8]= 0.022293943; f[9]= 0.069730629; f[10]= 0.098851122; f[11]= 0.056968211; - f[12]= 0.019768318; f[13]= 0.028809447; f[14]= 0.046025282; f[15]= 0.05060433; - f[16]= 0.053636813; f[17]= 0.033011601; f[18]= 0.028350243; f[19]= 0.061625237; - - } - else if (prot_model == "HIVW") - { - daa[1*20+0]= 0.0744808; - daa[2*20+0]= 0.6175090; - daa[2*20+1]= 0.1602400; - daa[3*20+0]= 4.4352100; - daa[3*20+1]= 0.0674539; - daa[3*20+2]= 29.4087000; - daa[4*20+0]= 0.1676530; - daa[4*20+1]= 2.8636400; - daa[4*20+2]= 0.0604932; - daa[4*20+3]= 0.0050000; - daa[5*20+0]= 0.0050000; - daa[5*20+1]= 10.6746000; - daa[5*20+2]= 0.3420680; - daa[5*20+3]= 0.0050000; - daa[5*20+4]= 0.0050000; - daa[6*20+0]= 5.5632500; - daa[6*20+1]= 0.0251632; - daa[6*20+2]= 0.2015260; - daa[6*20+3]= 12.1233000; - daa[6*20+4]= 0.0050000; - daa[6*20+5]= 3.2065600; - daa[7*20+0]= 1.8685000; - daa[7*20+1]= 13.4379000; - daa[7*20+2]= 0.0604932; - daa[7*20+3]= 10.3969000; - daa[7*20+4]= 0.0489798; - daa[7*20+5]= 0.0604932; - daa[7*20+6]= 14.7801000; - daa[8*20+0]= 0.0050000; - daa[8*20+1]= 6.8440500; - daa[8*20+2]= 8.5987600; - daa[8*20+3]= 2.3177900; - daa[8*20+4]= 0.0050000; - daa[8*20+5]= 18.5465000; - daa[8*20+6]= 0.0050000; - daa[8*20+7]= 0.0050000; - daa[9*20+0]= 0.0050000; - daa[9*20+1]= 1.3406900; - daa[9*20+2]= 0.9870280; - daa[9*20+3]= 0.1451240; - daa[9*20+4]= 0.0050000; - daa[9*20+5]= 0.0342252; - daa[9*20+6]= 0.0390512; - daa[9*20+7]= 0.0050000; - daa[9*20+8]= 0.0050000; - daa[10*20+0]= 0.1602400; - daa[10*20+1]= 0.5867570; - daa[10*20+2]= 0.0050000; - daa[10*20+3]= 0.0050000; - daa[10*20+4]= 0.0050000; - daa[10*20+5]= 2.8904800; - daa[10*20+6]= 0.1298390; - daa[10*20+7]= 0.0489798; - daa[10*20+8]= 1.7638200; - daa[10*20+9]= 9.1024600; - daa[11*20+0]= 0.5927840; - daa[11*20+1]= 39.8897000; - daa[11*20+2]= 10.6655000; - daa[11*20+3]= 0.8943130; - daa[11*20+4]= 0.0050000; - daa[11*20+5]= 13.0705000; - daa[11*20+6]= 23.9626000; - daa[11*20+7]= 0.2794250; - daa[11*20+8]= 0.2240600; - daa[11*20+9]= 0.8174810; - daa[11*20+10]= 0.0050000; - daa[12*20+0]= 0.0050000; - daa[12*20+1]= 3.2865200; - daa[12*20+2]= 0.2015260; - daa[12*20+3]= 0.0050000; - daa[12*20+4]= 0.0050000; - daa[12*20+5]= 0.0050000; - daa[12*20+6]= 0.0050000; - daa[12*20+7]= 0.0489798; - daa[12*20+8]= 0.0050000; - daa[12*20+9]= 17.3064000; - daa[12*20+10]= 11.3839000; - daa[12*20+11]= 4.0956400; - daa[13*20+0]= 0.5979230; - daa[13*20+1]= 0.0050000; - daa[13*20+2]= 0.0050000; - daa[13*20+3]= 0.0050000; - daa[13*20+4]= 0.3629590; - daa[13*20+5]= 0.0050000; - daa[13*20+6]= 0.0050000; - daa[13*20+7]= 0.0050000; - daa[13*20+8]= 0.0050000; - daa[13*20+9]= 1.4828800; - daa[13*20+10]= 7.4878100; - daa[13*20+11]= 0.0050000; - daa[13*20+12]= 0.0050000; - daa[14*20+0]= 1.0098100; - daa[14*20+1]= 0.4047230; - daa[14*20+2]= 0.3448480; - daa[14*20+3]= 0.0050000; - daa[14*20+4]= 0.0050000; - daa[14*20+5]= 3.0450200; - daa[14*20+6]= 0.0050000; - daa[14*20+7]= 0.0050000; - daa[14*20+8]= 13.9444000; - daa[14*20+9]= 0.0050000; - daa[14*20+10]= 9.8309500; - daa[14*20+11]= 0.1119280; - daa[14*20+12]= 0.0050000; - daa[14*20+13]= 0.0342252; - daa[15*20+0]= 8.5942000; - daa[15*20+1]= 8.3502400; - daa[15*20+2]= 14.5699000; - daa[15*20+3]= 0.4278810; - daa[15*20+4]= 1.1219500; - daa[15*20+5]= 0.1602400; - daa[15*20+6]= 0.0050000; - daa[15*20+7]= 6.2796600; - daa[15*20+8]= 0.7251570; - daa[15*20+9]= 0.7400910; - daa[15*20+10]= 6.1439600; - daa[15*20+11]= 0.0050000; - daa[15*20+12]= 0.3925750; - daa[15*20+13]= 4.2793900; - daa[15*20+14]= 14.2490000; - daa[16*20+0]= 24.1422000; - daa[16*20+1]= 0.9282030; - daa[16*20+2]= 4.5420600; - daa[16*20+3]= 0.6303950; - daa[16*20+4]= 0.0050000; - daa[16*20+5]= 0.2030910; - daa[16*20+6]= 0.4587430; - daa[16*20+7]= 0.0489798; - daa[16*20+8]= 0.9595600; - daa[16*20+9]= 9.3634500; - daa[16*20+10]= 0.0050000; - daa[16*20+11]= 4.0480200; - daa[16*20+12]= 7.4131300; - daa[16*20+13]= 0.1145120; - daa[16*20+14]= 4.3370100; - daa[16*20+15]= 6.3407900; - daa[17*20+0]= 0.0050000; - daa[17*20+1]= 5.9656400; - daa[17*20+2]= 0.0050000; - daa[17*20+3]= 0.0050000; - daa[17*20+4]= 5.4989400; - daa[17*20+5]= 0.0443298; - daa[17*20+6]= 0.0050000; - daa[17*20+7]= 2.8258000; - daa[17*20+8]= 0.0050000; - daa[17*20+9]= 0.0050000; - daa[17*20+10]= 1.3703100; - daa[17*20+11]= 0.0050000; - daa[17*20+12]= 0.0050000; - daa[17*20+13]= 0.0050000; - daa[17*20+14]= 0.0050000; - daa[17*20+15]= 1.1015600; - daa[17*20+16]= 0.0050000; - daa[18*20+0]= 0.0050000; - daa[18*20+1]= 0.0050000; - daa[18*20+2]= 5.0647500; - daa[18*20+3]= 2.2815400; - daa[18*20+4]= 8.3483500; - daa[18*20+5]= 0.0050000; - daa[18*20+6]= 0.0050000; - daa[18*20+7]= 0.0050000; - daa[18*20+8]= 47.4889000; - daa[18*20+9]= 0.1145120; - daa[18*20+10]= 0.0050000; - daa[18*20+11]= 0.0050000; - daa[18*20+12]= 0.5791980; - daa[18*20+13]= 4.1272800; - daa[18*20+14]= 0.0050000; - daa[18*20+15]= 0.9331420; - daa[18*20+16]= 0.4906080; - daa[18*20+17]= 0.0050000; - daa[19*20+0]= 24.8094000; - daa[19*20+1]= 0.2794250; - daa[19*20+2]= 0.0744808; - daa[19*20+3]= 2.9178600; - daa[19*20+4]= 0.0050000; - daa[19*20+5]= 0.0050000; - daa[19*20+6]= 2.1995200; - daa[19*20+7]= 2.7962200; - daa[19*20+8]= 0.8274790; - daa[19*20+9]= 24.8231000; - daa[19*20+10]= 2.9534400; - daa[19*20+11]= 0.1280650; - daa[19*20+12]= 14.7683000; - daa[19*20+13]= 2.2800000; - daa[19*20+14]= 0.0050000; - daa[19*20+15]= 0.8626370; - daa[19*20+16]= 0.0050000; - daa[19*20+17]= 0.0050000; - daa[19*20+18]= 1.3548200; - /* - f[0]= 0.038; - f[1]= 0.057; - f[2]= 0.089; - f[3]= 0.034; - f[4]= 0.024; - f[5]= 0.044; - f[6]= 0.062; - f[7]= 0.084; - f[8]= 0.016; - f[9]= 0.098; - f[10]= 0.058; - f[11]= 0.064; - f[12]= 0.016; - f[13]= 0.042; - f[14]= 0.046; - f[15]= 0.055; - f[16]= 0.081; - f[17]= 0.020; - f[18]= 0.021; - f[19]= 0.051; - */ - // NOTE: originally f[19]=0.0515639 but frequencies do not sum up to 1 - f[0]= 0.0377494; f[1]= 0.057321; f[2]= 0.0891129; f[3]= 0.0342034; - f[4]= 0.0240105; f[5]= 0.0437824; f[6]= 0.0618606; f[7]= 0.0838496; - f[8]= 0.0156076; f[9]= 0.0983641; f[10]= 0.0577867; f[11]= 0.0641682; - f[12]= 0.0158419; f[13]= 0.0422741; f[14]= 0.0458601; f[15]= 0.0550846; - f[16]= 0.0813774; f[17]= 0.019597; f[18]= 0.0205847; f[19]= 0.0515638; - - } - else if (prot_model == "JTTDCMUT") - { - daa[1*20+0]= 0.531678; - daa[2*20+0]= 0.557967; - daa[2*20+1]= 0.451095; - daa[3*20+0]= 0.827445; - daa[3*20+1]= 0.154899; - daa[3*20+2]= 5.549530; - daa[4*20+0]= 0.574478; - daa[4*20+1]= 1.019843; - daa[4*20+2]= 0.313311; - daa[4*20+3]= 0.105625; - daa[5*20+0]= 0.556725; - daa[5*20+1]= 3.021995; - daa[5*20+2]= 0.768834; - daa[5*20+3]= 0.521646; - daa[5*20+4]= 0.091304; - daa[6*20+0]= 1.066681; - daa[6*20+1]= 0.318483; - daa[6*20+2]= 0.578115; - daa[6*20+3]= 7.766557; - daa[6*20+4]= 0.053907; - daa[6*20+5]= 3.417706; - daa[7*20+0]= 1.740159; - daa[7*20+1]= 1.359652; - daa[7*20+2]= 0.773313; - daa[7*20+3]= 1.272434; - daa[7*20+4]= 0.546389; - daa[7*20+5]= 0.231294; - daa[7*20+6]= 1.115632; - daa[8*20+0]= 0.219970; - daa[8*20+1]= 3.210671; - daa[8*20+2]= 4.025778; - daa[8*20+3]= 1.032342; - daa[8*20+4]= 0.724998; - daa[8*20+5]= 5.684080; - daa[8*20+6]= 0.243768; - daa[8*20+7]= 0.201696; - daa[9*20+0]= 0.361684; - daa[9*20+1]= 0.239195; - daa[9*20+2]= 0.491003; - daa[9*20+3]= 0.115968; - daa[9*20+4]= 0.150559; - daa[9*20+5]= 0.078270; - daa[9*20+6]= 0.111773; - daa[9*20+7]= 0.053769; - daa[9*20+8]= 0.181788; - daa[10*20+0]= 0.310007; - daa[10*20+1]= 0.372261; - daa[10*20+2]= 0.137289; - daa[10*20+3]= 0.061486; - daa[10*20+4]= 0.164593; - daa[10*20+5]= 0.709004; - daa[10*20+6]= 0.097485; - daa[10*20+7]= 0.069492; - daa[10*20+8]= 0.540571; - daa[10*20+9]= 2.335139; - daa[11*20+0]= 0.369437; - daa[11*20+1]= 6.529255; - daa[11*20+2]= 2.529517; - daa[11*20+3]= 0.282466; - daa[11*20+4]= 0.049009; - daa[11*20+5]= 2.966732; - daa[11*20+6]= 1.731684; - daa[11*20+7]= 0.269840; - daa[11*20+8]= 0.525096; - daa[11*20+9]= 0.202562; - daa[11*20+10]= 0.146481; - daa[12*20+0]= 0.469395; - daa[12*20+1]= 0.431045; - daa[12*20+2]= 0.330720; - daa[12*20+3]= 0.190001; - daa[12*20+4]= 0.409202; - daa[12*20+5]= 0.456901; - daa[12*20+6]= 0.175084; - daa[12*20+7]= 0.130379; - daa[12*20+8]= 0.329660; - daa[12*20+9]= 4.831666; - daa[12*20+10]= 3.856906; - daa[12*20+11]= 0.624581; - daa[13*20+0]= 0.138293; - daa[13*20+1]= 0.065314; - daa[13*20+2]= 0.073481; - daa[13*20+3]= 0.032522; - daa[13*20+4]= 0.678335; - daa[13*20+5]= 0.045683; - daa[13*20+6]= 0.043829; - daa[13*20+7]= 0.050212; - daa[13*20+8]= 0.453428; - daa[13*20+9]= 0.777090; - daa[13*20+10]= 2.500294; - daa[13*20+11]= 0.024521; - daa[13*20+12]= 0.436181; - daa[14*20+0]= 1.959599; - daa[14*20+1]= 0.710489; - daa[14*20+2]= 0.121804; - daa[14*20+3]= 0.127164; - daa[14*20+4]= 0.123653; - daa[14*20+5]= 1.608126; - daa[14*20+6]= 0.191994; - daa[14*20+7]= 0.208081; - daa[14*20+8]= 1.141961; - daa[14*20+9]= 0.098580; - daa[14*20+10]= 1.060504; - daa[14*20+11]= 0.216345; - daa[14*20+12]= 0.164215; - daa[14*20+13]= 0.148483; - daa[15*20+0]= 3.887095; - daa[15*20+1]= 1.001551; - daa[15*20+2]= 5.057964; - daa[15*20+3]= 0.589268; - daa[15*20+4]= 2.155331; - daa[15*20+5]= 0.548807; - daa[15*20+6]= 0.312449; - daa[15*20+7]= 1.874296; - daa[15*20+8]= 0.743458; - daa[15*20+9]= 0.405119; - daa[15*20+10]= 0.592511; - daa[15*20+11]= 0.474478; - daa[15*20+12]= 0.285564; - daa[15*20+13]= 0.943971; - daa[15*20+14]= 2.788406; - daa[16*20+0]= 4.582565; - daa[16*20+1]= 0.650282; - daa[16*20+2]= 2.351311; - daa[16*20+3]= 0.425159; - daa[16*20+4]= 0.469823; - daa[16*20+5]= 0.523825; - daa[16*20+6]= 0.331584; - daa[16*20+7]= 0.316862; - daa[16*20+8]= 0.477355; - daa[16*20+9]= 2.553806; - daa[16*20+10]= 0.272514; - daa[16*20+11]= 0.965641; - daa[16*20+12]= 2.114728; - daa[16*20+13]= 0.138904; - daa[16*20+14]= 1.176961; - daa[16*20+15]= 4.777647; - daa[17*20+0]= 0.084329; - daa[17*20+1]= 1.257961; - daa[17*20+2]= 0.027700; - daa[17*20+3]= 0.057466; - daa[17*20+4]= 1.104181; - daa[17*20+5]= 0.172206; - daa[17*20+6]= 0.114381; - daa[17*20+7]= 0.544180; - daa[17*20+8]= 0.128193; - daa[17*20+9]= 0.134510; - daa[17*20+10]= 0.530324; - daa[17*20+11]= 0.089134; - daa[17*20+12]= 0.201334; - daa[17*20+13]= 0.537922; - daa[17*20+14]= 0.069965; - daa[17*20+15]= 0.310927; - daa[17*20+16]= 0.080556; - daa[18*20+0]= 0.139492; - daa[18*20+1]= 0.235601; - daa[18*20+2]= 0.700693; - daa[18*20+3]= 0.453952; - daa[18*20+4]= 2.114852; - daa[18*20+5]= 0.254745; - daa[18*20+6]= 0.063452; - daa[18*20+7]= 0.052500; - daa[18*20+8]= 5.848400; - daa[18*20+9]= 0.303445; - daa[18*20+10]= 0.241094; - daa[18*20+11]= 0.087904; - daa[18*20+12]= 0.189870; - daa[18*20+13]= 5.484236; - daa[18*20+14]= 0.113850; - daa[18*20+15]= 0.628608; - daa[18*20+16]= 0.201094; - daa[18*20+17]= 0.747889; - daa[19*20+0]= 2.924161; - daa[19*20+1]= 0.171995; - daa[19*20+2]= 0.164525; - daa[19*20+3]= 0.315261; - daa[19*20+4]= 0.621323; - daa[19*20+5]= 0.179771; - daa[19*20+6]= 0.465271; - daa[19*20+7]= 0.470140; - daa[19*20+8]= 0.121827; - daa[19*20+9]= 9.533943; - daa[19*20+10]= 1.761439; - daa[19*20+11]= 0.124066; - daa[19*20+12]= 3.038533; - daa[19*20+13]= 0.593478; - daa[19*20+14]= 0.211561; - daa[19*20+15]= 0.408532; - daa[19*20+16]= 1.143980; - daa[19*20+17]= 0.239697; - daa[19*20+18]= 0.165473; - - f[0]= 0.077; - f[1]= 0.051; - f[2]= 0.043; - f[3]= 0.051; - f[4]= 0.020; - f[5]= 0.041; - f[6]= 0.062; - f[7]= 0.075; - f[8]= 0.023; - f[9]= 0.053; - f[10]= 0.091; - f[11]= 0.059; - f[12]= 0.024; - f[13]= 0.040; - f[14]= 0.051; - f[15]= 0.068; - f[16]= 0.059; - f[17]= 0.014; - f[18]= 0.032; - f[19]= 0.066; - } - else if (prot_model == "FLU") - { - daa[ 1*20+ 0] = 0.138658765 ; - daa[ 2*20+ 0] = 0.053366579 ; - daa[ 2*20+ 1] = 0.161000889 ; - daa[ 3*20+ 0] = 0.584852306 ; - daa[ 3*20+ 1] = 0.006771843 ; - daa[ 3*20+ 2] = 7.737392871 ; - daa[ 4*20+ 0] = 0.026447095 ; - daa[ 4*20+ 1] = 0.167207008 ; - daa[ 4*20+ 2] = 1.30E-05 ; - daa[ 4*20+ 3] = 1.41E-02 ; - daa[ 5*20+ 0] = 0.353753982 ; - daa[ 5*20+ 1] = 3.292716942 ; - daa[ 5*20+ 2] = 0.530642655 ; - daa[ 5*20+ 3] = 0.145469388 ; - daa[ 5*20+ 4] = 0.002547334 ; - daa[ 6*20+ 0] = 1.484234503 ; - daa[ 6*20+ 1] = 0.124897617 ; - daa[ 6*20+ 2] = 0.061652192 ; - daa[ 6*20+ 3] = 5.370511279 ; - daa[ 6*20+ 4] = 3.91E-11 ; - daa[ 6*20+ 5] = 1.195629122 ; - daa[ 7*20+ 0] = 1.132313122 ; - daa[ 7*20+ 1] = 1.190624465 ; - daa[ 7*20+ 2] = 0.322524648 ; - daa[ 7*20+ 3] = 1.934832784 ; - daa[ 7*20+ 4] = 0.116941459 ; - daa[ 7*20+ 5] = 0.108051341 ; - daa[ 7*20+ 6] = 1.593098825 ; - daa[ 8*20+ 0] = 0.214757862 ; - daa[ 8*20+ 1] = 1.879569938 ; - daa[ 8*20+ 2] = 1.387096032 ; - daa[ 8*20+ 3] = 0.887570549 ; - daa[ 8*20+ 4] = 2.18E-02 ; - daa[ 8*20+ 5] = 5.330313412 ; - daa[ 8*20+ 6] = 0.256491863 ; - daa[ 8*20+ 7] = 0.058774527 ; - daa[ 9*20+ 0] = 0.149926734 ; - daa[ 9*20+ 1] = 0.246117172 ; - daa[ 9*20+ 2] = 0.218571975 ; - daa[ 9*20+ 3] = 0.014085917 ; - daa[ 9*20+ 4] = 0.001112158 ; - daa[ 9*20+ 5] = 0.02883995 ; - daa[ 9*20+ 6] = 1.42E-02 ; - daa[ 9*20+ 7] = 1.63E-05 ; - daa[ 9*20+ 8] = 0.243190142 ; - daa[10*20+ 0] = 0.023116952 ; - daa[10*20+ 1] = 0.296045557 ; - daa[10*20+ 2] = 8.36E-04 ; - daa[10*20+ 3] = 0.005730682 ; - daa[10*20+ 4] = 0.005613627 ; - daa[10*20+ 5] = 1.020366955 ; - daa[10*20+ 6] = 0.016499536 ; - daa[10*20+ 7] = 0.006516229 ; - daa[10*20+ 8] = 0.321611694 ; - daa[10*20+ 9] = 3.512072282 ; - daa[11*20+ 0] = 0.47433361 ; - daa[11*20+ 1] = 15.30009662 ; - daa[11*20+ 2] = 2.646847965 ; - daa[11*20+ 3] = 0.29004298 ; - daa[11*20+ 4] = 3.83E-06 ; - daa[11*20+ 5] = 2.559587177 ; - daa[11*20+ 6] = 3.881488809 ; - daa[11*20+ 7] = 0.264148929 ; - daa[11*20+ 8] = 0.347302791 ; - daa[11*20+ 9] = 0.227707997 ; - daa[11*20+10] = 0.129223639 ; - daa[12*20+ 0] = 0.058745423 ; - daa[12*20+ 1] = 0.890162346 ; - daa[12*20+ 2] = 0.005251688 ; - daa[12*20+ 3] = 0.041762964 ; - daa[12*20+ 4] = 0.11145731 ; - daa[12*20+ 5] = 0.190259181 ; - daa[12*20+ 6] = 0.313974351 ; - daa[12*20+ 7] = 0.001500467 ; - daa[12*20+ 8] = 0.001273509 ; - daa[12*20+ 9] = 9.017954203 ; - daa[12*20+10] = 6.746936485 ; - daa[12*20+11] = 1.331291619 ; - daa[13*20+ 0] = 0.080490909 ; - daa[13*20+ 1] = 1.61E-02 ; - daa[13*20+ 2] = 8.36E-04 ; - daa[13*20+ 3] = 1.06E-06 ; - daa[13*20+ 4] = 0.104053666 ; - daa[13*20+ 5] = 0.032680657 ; - daa[13*20+ 6] = 0.001003501 ; - daa[13*20+ 7] = 0.001236645 ; - daa[13*20+ 8] = 0.119028506 ; - daa[13*20+ 9] = 1.463357278 ; - daa[13*20+10] = 2.986800036 ; - daa[13*20+11] = 3.20E-01 ; - daa[13*20+12] = 0.279910509 ; - daa[14*20+ 0] = 0.659311478 ; - daa[14*20+ 1] = 0.15402718 ; - daa[14*20+ 2] = 3.64E-02 ; - daa[14*20+ 3] = 0.188539456 ; - daa[14*20+ 4] = 1.59E-13 ; - daa[14*20+ 5] = 0.712769599 ; - daa[14*20+ 6] = 0.319558828 ; - daa[14*20+ 7] = 0.038631761 ; - daa[14*20+ 8] = 0.924466914 ; - daa[14*20+ 9] = 0.080543327 ; - daa[14*20+10] = 0.634308521 ; - daa[14*20+11] = 0.195750632 ; - daa[14*20+12] = 5.69E-02 ; - daa[14*20+13] = 0.00713243 ; - daa[15*20+ 0] = 3.011344519 ; - daa[15*20+ 1] = 0.95013841 ; - daa[15*20+ 2] = 3.881310531 ; - daa[15*20+ 3] = 0.338372183 ; - daa[15*20+ 4] = 0.336263345 ; - daa[15*20+ 5] = 0.487822499 ; - daa[15*20+ 6] = 0.307140298 ; - daa[15*20+ 7] = 1.585646577 ; - daa[15*20+ 8] = 0.58070425 ; - daa[15*20+ 9] = 0.290381075 ; - daa[15*20+10] = 0.570766693 ; - daa[15*20+11] = 0.283807672 ; - daa[15*20+12] = 0.007026588 ; - daa[15*20+13] = 0.99668567 ; - daa[15*20+14] = 2.087385344 ; - daa[16*20+ 0] = 5.418298175 ; - daa[16*20+ 1] = 0.183076905 ; - daa[16*20+ 2] = 2.140332316 ; - daa[16*20+ 3] = 0.135481233 ; - daa[16*20+ 4] = 0.011975266 ; - daa[16*20+ 5] = 0.602340963 ; - daa[16*20+ 6] = 0.280124895 ; - daa[16*20+ 7] = 0.01880803 ; - daa[16*20+ 8] = 0.368713573 ; - daa[16*20+ 9] = 2.904052286 ; - daa[16*20+10] = 0.044926357 ; - daa[16*20+11] = 1.5269642 ; - daa[16*20+12] = 2.031511321 ; - daa[16*20+13] = 0.000134906 ; - daa[16*20+14] = 0.542251094 ; - daa[16*20+15] = 2.206859934 ; - daa[17*20+ 0] = 1.96E-01 ; - daa[17*20+ 1] = 1.369429408 ; - daa[17*20+ 2] = 5.36E-04 ; - daa[17*20+ 3] = 1.49E-05 ; - daa[17*20+ 4] = 0.09410668 ; - daa[17*20+ 5] = 4.40E-02 ; - daa[17*20+ 6] = 0.155245492 ; - daa[17*20+ 7] = 0.196486447 ; - daa[17*20+ 8] = 2.24E-02 ; - daa[17*20+ 9] = 0.03213215 ; - daa[17*20+10] = 0.431277663 ; - daa[17*20+11] = 4.98E-05 ; - daa[17*20+12] = 0.070460039 ; - daa[17*20+13] = 0.814753094 ; - daa[17*20+14] = 0.000431021 ; - daa[17*20+15] = 0.099835753 ; - daa[17*20+16] = 0.207066206 ; - daa[18*20+ 0] = 0.018289288 ; - daa[18*20+ 1] = 0.099855497 ; - daa[18*20+ 2] = 0.373101927 ; - daa[18*20+ 3] = 0.525398543 ; - daa[18*20+ 4] = 0.601692431 ; - daa[18*20+ 5] = 0.072205935 ; - daa[18*20+ 6] = 0.10409287 ; - daa[18*20+ 7] = 0.074814997 ; - daa[18*20+ 8] = 6.448954446 ; - daa[18*20+ 9] = 0.273934263 ; - daa[18*20+10] = 0.340058468 ; - daa[18*20+11] = 0.012416222 ; - daa[18*20+12] = 0.874272175 ; - daa[18*20+13] = 5.393924245 ; - daa[18*20+14] = 1.82E-04 ; - daa[18*20+15] = 0.39255224 ; - daa[18*20+16] = 0.12489802 ; - daa[18*20+17] = 0.42775543 ; - daa[19*20+ 0] = 3.53200527 ; - daa[19*20+ 1] = 0.103964386 ; - daa[19*20+ 2] = 0.010257517 ; - daa[19*20+ 3] = 0.297123975 ; - daa[19*20+ 4] = 0.054904564 ; - daa[19*20+ 5] = 0.406697814 ; - daa[19*20+ 6] = 0.285047948 ; - daa[19*20+ 7] = 0.337229619 ; - daa[19*20+ 8] = 0.098631355 ; - daa[19*20+ 9] = 14.39405219 ; - daa[19*20+10] = 0.890598579 ; - daa[19*20+11] = 0.07312793 ; - daa[19*20+12] = 4.904842235 ; - daa[19*20+13] = 0.592587985 ; - daa[19*20+14] = 0.058971975 ; - daa[19*20+15] = 0.088256423 ; - daa[19*20+16] = 0.654109108 ; - daa[19*20+17] = 0.256900461 ; - daa[19*20+18] = 0.167581647 ; - +model WAG= +55.15710 +50.98480 63.53460 +73.89980 14.73040 542.94200 +102.70400 52.81910 26.52560 3.02949 +90.85980 303.55000 154.36400 61.67830 9.88179 +158.28500 43.91570 94.71980 617.41600 2.13520 546.94700 +141.67200 58.46650 112.55600 86.55840 30.66740 33.00520 56.77170 +31.69540 213.71500 395.62900 93.06760 24.89720 429.41100 57.00250 24.94100 +19.33350 18.69790 55.42360 3.94370 17.01350 11.39170 12.73950 3.04501 13.81900 +39.79150 49.76710 13.15280 8.48047 38.42870 86.94890 15.42630 6.13037 49.94620 317.09700 +90.62650 535.14200 301.20100 47.98550 7.40339 389.49000 258.44300 37.35580 89.04320 32.38320 25.75550 +89.34960 68.31620 19.82210 10.37540 39.04820 154.52600 31.51240 17.41000 40.41410 425.74600 485.40200 93.42760 +21.04940 10.27110 9.61621 4.67304 39.80200 9.99208 8.11339 4.99310 67.93710 105.94700 211.51700 8.88360 119.06300 +143.85500 67.94890 19.50810 42.39840 10.94040 93.33720 68.23550 24.35700 69.61980 9.99288 41.58440 55.68960 17.13290 16.14440 +337.07900 122.41900 397.42300 107.17600 140.76600 102.88700 70.49390 134.18200 74.01690 31.94400 34.47390 96.71300 49.39050 54.59310 161.32800 +212.11100 55.44130 203.00600 37.48660 51.29840 85.79280 82.27650 22.58330 47.33070 145.81600 32.66220 138.69800 151.61200 17.19030 79.53840 437.80200 +11.31330 116.39200 7.19167 12.97670 71.70700 21.57370 15.65570 33.69830 26.25690 21.24830 66.53090 13.75050 51.57060 152.96400 13.94050 52.37420 11.08640 +24.07350 38.15330 108.60000 32.57110 54.38330 22.77100 19.63030 10.36040 387.34400 42.01700 39.86180 13.32640 42.84370 645.42800 21.60460 78.69930 29.11480 248.53900 +200.60100 25.18490 19.62460 15.23350 100.21400 30.12810 58.87310 18.72470 11.83580 782.13000 180.03400 30.54340 205.84500 64.98920 31.48870 23.27390 138.82300 36.53690 31.47300 +0.08662791 0.043972 0.0390894 0.05704511 0.0193078 0.0367281 0.05805891 0.08325181 0.0244313 0.048466 0.08620901 0.06202861 0.0195027 0.0384319 0.0457631 0.06951791 0.06101271 0.0143859 0.0352742 0.07089561; +[ NOTE 2019-06-04: normalised from original WAG freqs, which do not sum to 1.0 ] + +model RTREV= +34 +51 35 +10 30 384 +439 92 128 1 +32 221 236 78 70 +81 10 79 542 1 372 +135 41 94 61 48 18 70 +30 90 320 91 124 387 34 68 +1 24 35 1 104 33 1 1 34 +45 18 15 5 110 54 21 3 51 385 +38 593 123 20 16 309 141 30 76 34 23 +235 57 1 1 156 158 1 37 116 375 581 134 +1 7 49 1 70 1 1 7 141 64 179 14 247 +97 24 33 55 1 68 52 17 44 10 22 43 1 11 +460 102 294 136 75 225 95 152 183 4 24 77 1 20 134 +258 64 148 55 117 146 82 7 49 72 25 110 131 69 62 671 +5 13 16 1 55 10 17 23 48 39 47 6 111 182 9 14 1 +55 47 28 1 131 45 1 21 307 26 64 1 74 1017 14 31 34 176 +197 29 21 6 295 36 35 3 1 1048 112 19 236 92 25 39 196 26 59 +0.0646 0.0453 0.0376 0.0422 0.0114 0.0606 0.0607 0.0639 0.0273 0.0679 0.1018 0.0751 0.0150 0.0287 0.0681 0.0488 0.0622 0.0251 0.0318 0.0619; + +model CPREV= +105 +227 357 +175 43 4435 +669 823 538 10 +157 1745 768 400 10 +499 152 1055 3691 10 3122 +665 243 653 431 303 133 379 +66 715 1405 331 441 1269 162 19 +145 136 168 10 280 92 148 40 29 +197 203 113 10 396 286 82 20 66 1745 +236 4482 2430 412 48 3313 2629 263 305 345 218 +185 125 61 47 159 202 113 21 10 1772 1351 193 +68 53 97 22 726 10 145 25 127 454 1268 72 327 +490 87 173 170 285 323 185 28 152 117 219 302 100 43 +2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202 +1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151 +14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29 +56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346 +968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119 +0.0755 0.0621 0.0410 0.0371 0.0091 0.0382 0.0495 0.0838 0.0246 0.0806 0.1011 0.0504 0.0220 0.0506 0.0431 0.0622 0.0543 0.0181 0.0307 0.0660; +[ NOTE 2019-06-04: CPREV freqs taken from PAML package with higher precision ] + +model VT= +1.2412691067876198 +1.2184237953498958 1.5720770753326880 +1.3759368509441177 0.7550654439001206 7.8584219153689405 +2.4731223087544874 1.4414262567428417 0.9784679122774127 0.2272488448121475 +2.2155167805137470 5.5120819705248678 3.0143201670924822 1.6562495638176040 0.4587469126746136 +2.3379911207495061 1.3542404860613146 2.0093434778398112 9.6883451875685065 0.4519167943192672 6.8124601839937675 +3.3386555146457697 1.3121700301622004 2.4117632898861809 1.9142079025990228 1.1034605684472507 0.8776110594765502 1.3860121390169038 +0.9615841926910841 4.9238668283945266 6.1974384977884114 2.1459640610133781 1.5196756759380692 7.9943228564946525 1.6360079688522375 0.8561248973045037 +0.8908203061925510 0.4323005487925516 0.9179291175331520 0.2161660372725585 0.9126668032539315 0.4882733432879921 0.4035497929633328 0.2888075033037488 0.5787937115407940 +1.0778497408764076 0.8386701149158265 0.4098311270816011 0.3574207468998517 1.4081315998413697 1.3318097154194044 0.5610717242294755 0.3578662395745526 1.0765007949562073 6.0019110258426362 +1.4932055816372476 10.0173308173660018 4.4034547578962568 1.4521790561663968 0.3371091785647479 6.0519085243118811 4.3290086529582830 0.8945563662345198 1.8085136096039203 0.6244297525127139 0.5642322882556321 +1.9006455961717605 1.2488638689609959 0.9378803706165143 0.4075239926000898 1.2213054800811556 1.9106190827629084 0.7471936218068498 0.5954812791740037 1.3808291710019667 6.7597899772045418 8.0327792947421148 1.7129670976916258 +0.6883439026872615 0.4224945197276290 0.5044944273324311 0.1675129724559251 1.6953951980808002 0.3573432522499545 0.2317194387691585 0.3693722640980460 1.3629765501081097 2.2864286949316077 4.3611548063555778 0.3910559903834828 2.3201373546296349 +2.7355620089953550 1.3091837782420783 0.7103720531974738 1.0714605979577547 0.4326227078645523 2.3019177728300728 1.5132807416252063 0.7744933618134962 1.8370555852070649 0.4811402387911145 1.0084320519837335 1.3918935593582853 0.4953193808676289 0.3746821107962129 +6.4208961859142883 1.9202994262316166 6.1234512396801764 2.2161944596741829 3.6366815408744255 2.3193703643237220 1.8273535587773553 3.0637776193717610 1.9699895187387506 0.6047491507504744 0.8953754669269811 1.9776630140912268 1.0657482318076852 1.1079144700606407 3.5465914843628927 +5.2892514169776437 1.3363401740560601 3.8852506105922231 1.5066839872944762 1.7557065205837685 2.1576510103471440 1.5839981708584689 0.7147489676267383 1.6136654573285647 2.6344778384442731 1.0192004372506540 2.5513781312660280 3.3628488360462363 0.6882725908872254 1.9485376673137556 8.8479984061248178 +0.5488578478106930 1.5170142153962840 0.1808525752605976 0.2496584188151770 1.6275179891253113 0.8959082681546182 0.4198391148111098 0.9349753595598769 0.6301954684360302 0.5604648274060783 1.5183114434679339 0.5851920879490173 1.4680478689711018 3.3448437239772266 0.4326058001438786 0.6791126595939816 0.4514203099376473 +0.5411769916657778 0.8912614404565405 1.0894926581511342 0.7447620891784513 2.1579775140421025 0.9183596801412757 0.5818111331782764 0.3374467649724478 7.7587442309146040 0.8626796044156272 1.2452243224541324 0.7835447533710449 1.0899165770956820 10.3848523331334590 0.4819109019647465 0.9547229305958682 0.8564314184691215 4.5377235790405388 +4.6501894691803214 0.7807017855806767 0.4586061981719967 0.4594535241660911 2.2627456996290891 0.6366932501396869 0.8940572875547330 0.6193321034173915 0.5333220944030346 14.8729334615190609 3.5458093276667237 0.7801080335991272 4.0584577156753401 1.7039730522675411 0.5985498912985666 0.9305232113028208 3.4242218450865543 0.5658969249032649 1.0000000000000000 +0.0770764620135024 0.0500819370772208 0.0462377395993731 0.0537929860758246 0.0144533387583345 0.0408923608974345 0.0633579339160905 0.0655672355884439 0.0218802687005936 0.0591969699027449 0.0976461276528445 0.0592079410822730 0.0220695876653368 0.0413508521834260 0.0476871596856874 0.0707295165111524 0.0567759161524817 0.0127019797647213 0.0323746050281867 0.0669190817443274; + +model BLOSUM62= +0.735790389698 +0.485391055466 1.297446705134 +0.543161820899 0.500964408555 3.180100048216 +1.459995310470 0.227826574209 0.397358949897 0.240836614802 +1.199705704602 3.020833610064 1.839216146992 1.190945703396 0.329801504630 +1.170949042800 1.360574190420 1.240488508640 3.761625208368 0.140748891814 5.528919177928 +1.955883574960 0.418763308518 1.355872344485 0.798473248968 0.418203192284 0.609846305383 0.423579992176 +0.716241444998 1.456141166336 2.414501434208 0.778142664022 0.354058109831 2.435341131140 1.626891056982 0.539859124954 +0.605899003687 0.232036445142 0.283017326278 0.418555732462 0.774894022794 0.236202451204 0.186848046932 0.189296292376 0.252718447885 +0.800016530518 0.622711669692 0.211888159615 0.218131577594 0.831842640142 0.580737093181 0.372625175087 0.217721159236 0.348072209797 3.890963773304 +1.295201266783 5.411115141489 1.593137043457 1.032447924952 0.285078800906 3.945277674515 2.802427151679 0.752042440303 1.022507035889 0.406193586642 0.445570274261 +1.253758266664 0.983692987457 0.648441278787 0.222621897958 0.767688823480 2.494896077113 0.555415397470 0.459436173579 0.984311525359 3.364797763104 6.030559379572 1.073061184332 +0.492964679748 0.371644693209 0.354861249223 0.281730694207 0.441337471187 0.144356959750 0.291409084165 0.368166464453 0.714533703928 1.517359325954 2.064839703237 0.266924750511 1.773855168830 +1.173275900924 0.448133661718 0.494887043702 0.730628272998 0.356008498769 0.858570575674 0.926563934846 0.504086599527 0.527007339151 0.388355409206 0.374555687471 1.047383450722 0.454123625103 0.233597909629 +4.325092687057 1.122783104210 2.904101656456 1.582754142065 1.197188415094 1.934870924596 1.769893238937 1.509326253224 1.117029762910 0.357544412460 0.352969184527 1.752165917819 0.918723415746 0.540027644824 1.169129577716 +1.729178019485 0.914665954563 1.898173634533 0.934187509431 1.119831358516 1.277480294596 1.071097236007 0.641436011405 0.585407090225 1.179091197260 0.915259857694 1.303875200799 1.488548053722 0.488206118793 1.005451683149 5.151556292270 +0.465839367725 0.426382310122 0.191482046247 0.145345046279 0.527664418872 0.758653808642 0.407635648938 0.508358924638 0.301248600780 0.341985787540 0.691474634600 0.332243040634 0.888101098152 2.074324893497 0.252214830027 0.387925622098 0.513128126891 +0.718206697586 0.720517441216 0.538222519037 0.261422208965 0.470237733696 0.958989742850 0.596719300346 0.308055737035 4.218953969389 0.674617093228 0.811245856323 0.717993486900 0.951682162246 6.747260430801 0.369405319355 0.796751520761 0.801010243199 4.054419006558 +2.187774522005 0.438388343772 0.312858797993 0.258129289418 1.116352478606 0.530785790125 0.524253846338 0.253340790190 0.201555971750 8.311839405458 2.231405688913 0.498138475304 2.575850755315 0.838119610178 0.496908410676 0.561925457442 2.253074051176 0.266508731426 1.000000000000 +0.074 0.052 0.045 0.054 0.025 0.034 0.054 0.074 0.026 0.068 0.099 0.058 0.025 0.047 0.039 0.057 0.051 0.013 0.032 0.073; + +model MTMAM= +32 +2 4 +11 0.000001 864 +0.000001 186 0.000001 0.000001 +0.000001 246 8 49 0.000001 +0.000001 0.000001 0.000001 569 0.000001 274 +78 18 47 79 0.000001 0.000001 22 +8 232 458 11 305 550 22 0.000001 +75 0.000001 19 0.000001 41 0.000001 0.000001 0.000001 0.000001 +21 6 0.000001 0.000001 27 20 0.000001 0.000001 26 232 +0.000001 50 408 0.000001 0.000001 242 215 0.000001 0.000001 6 4 +76 0.000001 21 0.000001 0.000001 22 0.000001 0.000001 0.000001 378 609 59 +0.000001 0.000001 6 5 7 0.000001 0.000001 0.000001 0.000001 57 246 0.000001 11 +53 9 33 2 0.000001 51 0.000001 0.000001 53 5 43 18 0.000001 17 +342 3 446 16 347 30 21 112 20 0.000001 74 65 47 90 202 +681 0.000001 110 0.000001 114 0.000001 4 0.000001 1 360 34 50 691 8 78 614 +5 16 6 0.000001 65 0.000001 0.000001 0.000001 0.000001 0.000001 12 0.000001 13 0.000001 7 17 0.000001 +0.000001 0.000001 156 0.000001 530 54 0.000001 1 1525 16 25 67 0.000001 682 8 107 0.000001 14 +398 0.000001 0.000001 10 0.000001 33 20 5 0.000001 2220 100 0.000001 832 6 0.000001 0.000001 237 0.000001 0.000001 +0.0692 0.0184 0.0400 0.0186 0.0065 0.0238 0.0236 0.0557 0.0277 0.0905 0.1675 0.0221 0.0561 0.0611 0.0536 0.0725 0.0870 0.0293 0.0340 0.0428; + +model LG= +0.425093 +0.276818 0.751878 +0.395144 0.123954 5.076149 +2.489084 0.534551 0.528768 0.062556 +0.969894 2.807908 1.695752 0.523386 0.084808 +1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 +2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 +0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 +0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 +0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 +0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 +1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 +0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 +1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 +4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 +2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 +0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 +0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 +2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 +0.07906592 0.05594094 0.04197696 0.05305195 0.01293699 0.04076696 0.07158593 0.05733694 0.02235498 0.06215694 0.0990809 0.06459994 0.02295098 0.04230196 0.04403996 0.06119694 0.05328695 0.01206599 0.03415497 0.06914693; +[ NOTE 2019-06-04: normalised from original LG freqs, which do not sum to 1.0 + http://www.atgc-montpellier.fr/download/datasets/models/lg_LG.PAML.txt ] + +model MTART= +0.2 +0.2 0.2 +1.0 4.0 500.0 +254.0 36.0 98.0 11.0 +0.2 154.0 262.0 0.2 0.2 +0.2 0.2 183.0 862.0 0.2 262.0 +200.0 0.2 121.0 12.0 81.0 3.0 44.0 +0.2 41.0 180.0 0.2 12.0 314.0 15.0 0.2 +26.0 2.0 21.0 7.0 63.0 11.0 7.0 3.0 0.2 +4.0 2.0 13.0 1.0 79.0 16.0 2.0 1.0 6.0 515.0 +0.2 209.0 467.0 2.0 0.2 349.0 106.0 0.2 0.2 3.0 4.0 +121.0 5.0 79.0 0.2 312.0 67.0 0.2 56.0 0.2 515.0 885.0 106.0 +13.0 5.0 20.0 0.2 184.0 0.2 0.2 1.0 14.0 118.0 263.0 11.0 322.0 +49.0 0.2 17.0 0.2 0.2 39.0 8.0 0.2 1.0 0.2 12.0 17.0 5.0 15.0 +673.0 3.0 398.0 44.0 664.0 52.0 31.0 226.0 11.0 7.0 8.0 144.0 112.0 36.0 87.0 +244.0 0.2 166.0 0.2 183.0 44.0 43.0 0.2 19.0 204.0 48.0 70.0 289.0 14.0 47.0 660.0 +0.2 0.2 8.0 0.2 22.0 7.0 11.0 2.0 0.2 0.2 21.0 16.0 71.0 54.0 0.2 2.0 0.2 +1.0 4.0 251.0 0.2 72.0 87.0 8.0 9.0 191.0 12.0 20.0 117.0 71.0 792.0 18.0 30.0 46.0 38.0 +340.0 0.2 23.0 0.2 350.0 0.2 14.0 3.0 0.2 1855.0 85.0 26.0 281.0 52.0 32.0 61.0 544.0 0.2 2.0 +0.054116 0.018227 0.039903 0.020160 0.009709 0.018781 0.024289 0.068183 0.024518 0.092638 0.148658 0.021718 0.061453 0.088668 0.041826 0.091030 0.049194 0.029786 0.039443 0.057700; + +model MTZOA= +3.3 +1.7 33.6 +16.1 3.2 617.0 +272.5 61.1 94.6 9.5 +7.3 231.0 190.3 19.3 49.1 +17.1 6.4 174.0 883.6 3.4 349.4 +289.3 7.2 99.3 26.0 82.4 8.9 43.1 +2.3 61.7 228.9 55.6 37.5 421.8 14.9 7.4 +33.2 0.2 24.3 1.5 48.8 0.2 7.3 3.4 1.6 +15.6 4.1 7.9 0.5 59.7 23.0 1.0 3.5 6.6 425.2 +0.2 292.3 413.4 0.2 0.2 334.0 163.2 10.1 23.9 8.4 6.7 +136.5 3.8 73.7 0.2 264.8 83.9 0.2 52.2 7.1 449.7 636.3 83.0 +26.5 0.2 12.9 2.0 167.8 9.5 0.2 5.8 13.1 90.3 234.2 16.3 215.6 +61.8 7.5 22.6 0.2 8.1 52.2 20.6 1.3 15.6 2.6 11.4 24.3 5.4 10.5 +644.9 11.8 420.2 51.4 656.3 96.4 38.4 257.1 23.1 7.2 15.2 144.9 95.3 32.2 79.7 +378.1 3.2 184.6 2.3 199.0 39.4 34.5 5.2 19.4 222.3 50.0 75.5 305.1 19.3 56.9 666.3 +3.1 16.9 6.4 0.2 36.1 6.1 3.5 12.3 4.5 9.7 27.2 6.6 48.7 58.2 1.3 10.3 3.6 +2.1 13.8 141.6 13.9 76.7 52.3 10.0 4.3 266.5 13.1 5.7 45.0 41.4 590.5 4.2 29.7 29.0 79.8 +321.9 5.1 7.1 3.7 243.8 9.0 16.3 23.7 0.3 1710.6 126.1 11.1 279.6 59.6 17.9 49.5 396.4 13.7 15.6 +0.06887993 0.02103698 0.03038997 0.02069598 0.00996599 0.01862298 0.02498898 0.07196793 0.02681397 0.08507191 0.15671684 0.01927598 0.05065195 0.08171192 0.04480296 0.08053492 0.05638594 0.02799797 0.03740396 0.06608293; +[ NOTE 2019-06-04: original mtzoa freqs do not sum to 1.0, modified from PAML package ] + +model PMB= +0.674995699 +0.589645178 1.189067034 +0.462499504 0.605460903 3.573373315 +1.065445546 0.314448330 0.589852457 0.246951424 +1.111766964 2.967840934 2.299755865 1.686058219 0.245163782 +1.046334652 1.201770702 1.277836748 4.399995525 0.091071867 4.159678990 +1.587964372 0.523770553 1.374854049 0.734992057 0.317066320 0.596789898 0.463812837 +0.580830874 1.457127446 2.283037894 0.839348444 0.411543728 1.812173605 0.877842609 0.476331437 +0.464590585 0.359645860 0.426069419 0.266775558 0.417547309 0.315256838 0.304215290 0.180198883 0.285186418 +0.804404505 0.520701585 0.410094470 0.269124919 0.450795211 0.625792937 0.320784710 0.259854426 0.363981358 4.162454693 +0.831998835 4.956476453 2.037575629 1.114178954 0.274163536 3.521346591 2.415974716 0.581001076 0.985885486 0.374784947 0.498011337 +1.546725076 0.813462540 0.737846301 0.341932741 0.618614612 2.067388546 0.531773639 0.465349326 0.380925433 3.658070120 5.002338375 0.661095832 +0.546169219 0.303437244 0.425193716 0.219005213 0.669206193 0.406042546 0.224154698 0.354028910 0.576231691 1.495264661 2.392638293 0.269496317 2.306919847 +1.241586045 0.655773380 0.711495595 0.775624818 0.198679914 0.850116543 0.794584081 0.588254139 0.456058589 0.366232942 0.430073179 1.036079005 0.337502282 0.481144863 +3.452308792 0.910144334 2.572577221 1.440896785 0.998700980 1.348272505 1.205509425 1.402122097 0.799966711 0.530641901 0.402471997 1.234648153 0.945453716 0.613230817 1.217683028 +1.751412803 0.895171490 1.823161023 0.994227284 0.847312432 1.320626678 0.949599791 0.542185658 0.830392810 1.114132523 0.779827336 1.290709079 1.551488041 0.718895136 0.780913179 4.448982584 +0.350110510 0.618778365 0.422407388 0.362495245 0.445669347 0.720384740 0.261258229 0.378748270 0.724367510 0.516260502 0.794797115 0.433409620 0.768395107 3.295193440 0.499869138 0.496334956 0.383723610 +0.573154753 0.628599063 0.720013799 0.436220437 0.556261630 0.728970584 0.507200030 0.284727562 2.210952064 0.570562395 0.811019594 0.664884513 0.932536060 5.894735673 0.433748126 0.593795813 0.523549536 2.996248013 +2.063050067 0.388680158 0.474418852 0.275658381 0.998911631 0.634408285 0.527640634 0.314700907 0.305792277 8.002789424 2.113077156 0.526184203 1.737356217 0.983844803 0.551333603 0.507506011 1.899650790 0.429570747 0.716795463 - - f[0] = 0.0471 ; - f[1] = 0.0509 ; - f[2] = 0.0742 ; - f[3] = 0.0479 ; - f[4] = 0.0250 ; - f[5] = 0.0333 ; - f[6] = 0.0546 ; - f[7] = 0.0764 ; - f[8] = 0.0200 ; - f[9] = 0.0671 ; - f[10] = 0.0715 ; - f[11] = 0.0568 ; - f[12] = 0.0181 ; - f[13] = 0.0305 ; - f[14] = 0.0507 ; - f[15] = 0.0884 ; - f[16] = 0.0743 ; - f[17] = 0.0185 ; - f[18] = 0.0315 ; - f[19] = 0.0632 ; - } - else if (prot_model == "MTMET") - { - daa[ 1*20+ 0] = 0.058078195 ; - daa[ 2*20+ 0] = 0.03289392 ; - daa[ 2*20+ 1] = 0.141364275 ; - daa[ 3*20+ 0] = 0.119156855 ; - daa[ 3*20+ 1] = 0.049700412 ; - daa[ 3*20+ 2] = 4.658420071 ; - daa[ 4*20+ 0] = 0.633255848 ; - daa[ 4*20+ 1] = 0.739813857 ; - daa[ 4*20+ 2] = 2.93E-01 ; - daa[ 4*20+ 3] = 7.74E-02 ; - daa[ 5*20+ 0] = 0.052454947 ; - daa[ 5*20+ 1] = 2.673108089 ; - daa[ 5*20+ 2] = 0.832791533 ; - daa[ 5*20+ 3] = 0.131355702 ; - daa[ 5*20+ 4] = 0.152595208 ; - daa[ 6*20+ 0] = 0.179163888 ; - daa[ 6*20+ 1] = 0.080835481 ; - daa[ 6*20+ 2] = 0.812241124 ; - daa[ 6*20+ 3] = 6.033788982 ; - daa[ 6*20+ 4] = 5.06E-02 ; - daa[ 6*20+ 5] = 2.236617623 ; - daa[ 7*20+ 0] = 1.46586228 ; - daa[ 7*20+ 1] = 0.219967124 ; - daa[ 7*20+ 2] = 0.543750757 ; - daa[ 7*20+ 3] = 0.630753299 ; - daa[ 7*20+ 4] = 0.91412559 ; - daa[ 7*20+ 5] = 0.072395536 ; - daa[ 7*20+ 6] = 0.768853295 ; - daa[ 8*20+ 0] = 0.03019213 ; - daa[ 8*20+ 1] = 1.522256865 ; - daa[ 8*20+ 2] = 1.738679644 ; - daa[ 8*20+ 3] = 0.479791112 ; - daa[ 8*20+ 4] = 6.04E-01 ; - daa[ 8*20+ 5] = 4.518450891 ; - daa[ 8*20+ 6] = 0.105414735 ; - daa[ 8*20+ 7] = 0.025252656 ; - daa[ 9*20+ 0] = 0.367600449 ; - daa[ 9*20+ 1] = 0.012428576 ; - daa[ 9*20+ 2] = 0.244934765 ; - daa[ 9*20+ 3] = 0.010668856 ; - daa[ 9*20+ 4] = 0.235804245 ; - daa[ 9*20+ 5] = 0.008875686 ; - daa[ 9*20+ 6] = 1.40E-02 ; - daa[ 9*20+ 7] = 1.38E-02 ; - daa[ 9*20+ 8] = 0.017140139 ; - daa[10*20+ 0] = 0.109872766 ; - daa[10*20+ 1] = 0.058180015 ; - daa[10*20+ 2] = 4.63E-02 ; - daa[10*20+ 3] = 0.005529144 ; - daa[10*20+ 4] = 0.299518997 ; - daa[10*20+ 5] = 0.254452467 ; - daa[10*20+ 6] = 0.019157619 ; - daa[10*20+ 7] = 0.027264554 ; - daa[10*20+ 8] = 0.111638937 ; - daa[10*20+ 9] = 1.897974368 ; - daa[11*20+ 0] = 0.020509508 ; - daa[11*20+ 1] = 1.057185633 ; - daa[11*20+ 2] = 2.53039843 ; - daa[11*20+ 3] = 0.049007456 ; - daa[11*20+ 4] = 1.58E-02 ; - daa[11*20+ 5] = 1.827218186 ; - daa[11*20+ 6] = 1.379217783 ; - daa[11*20+ 7] = 0.134187175 ; - daa[11*20+ 8] = 0.135153663 ; - daa[11*20+ 9] = 0.064936611 ; - daa[11*20+10] = 0.06132452 ; - daa[12*20+ 0] = 0.653363993 ; - daa[12*20+ 1] = 0.013494034 ; - daa[12*20+ 2] = 0.399827723 ; - daa[12*20+ 3] = 0.026109947 ; - daa[12*20+ 4] = 0.492340144 ; - daa[12*20+ 5] = 0.237094366 ; - daa[12*20+ 6] = 0.128410054 ; - daa[12*20+ 7] = 0.145331466 ; - daa[12*20+ 8] = 0.032834314 ; - daa[12*20+ 9] = 2.918353208 ; - daa[12*20+10] = 3.425553709 ; - daa[12*20+11] = 0.65931076 ; - daa[13*20+ 0] = 0.062762255 ; - daa[13*20+ 1] = 8.04E-03 ; - daa[13*20+ 2] = 1.39E-01 ; - daa[13*20+ 3] = 1.26E-02 ; - daa[13*20+ 4] = 0.925810864 ; - daa[13*20+ 5] = 0.026306325 ; - daa[13*20+ 6] = 0.017716308 ; - daa[13*20+ 7] = 0.068139281 ; - daa[13*20+ 8] = 0.090353067 ; - daa[13*20+ 9] = 0.750900541 ; - daa[13*20+10] = 1.811101233 ; - daa[13*20+11] = 9.71E-02 ; - daa[13*20+12] = 0.748424997 ; - daa[14*20+ 0] = 0.408077053 ; - daa[14*20+ 1] = 0.155008566 ; - daa[14*20+ 2] = 8.03E-02 ; - daa[14*20+ 3] = 0.044609563 ; - daa[14*20+ 4] = 2.94E-02 ; - daa[14*20+ 5] = 0.849512435 ; - daa[14*20+ 6] = 0.048786299 ; - daa[14*20+ 7] = 0.005914206 ; - daa[14*20+ 8] = 0.519954375 ; - daa[14*20+ 9] = 0.024850021 ; - daa[14*20+10] = 0.270260781 ; - daa[14*20+11] = 0.121234921 ; - daa[14*20+12] = 3.27E-02 ; - daa[14*20+13] = 0.054271889 ; - daa[15*20+ 0] = 2.771686015 ; - daa[15*20+ 1] = 0.197379185 ; - daa[15*20+ 2] = 2.634378514 ; - daa[15*20+ 3] = 0.360804781 ; - daa[15*20+ 4] = 3.283014871 ; - daa[15*20+ 5] = 0.384800284 ; - daa[15*20+ 6] = 0.363104466 ; - daa[15*20+ 7] = 1.746570145 ; - daa[15*20+ 8] = 0.297586084 ; - daa[15*20+ 9] = 0.096272864 ; - daa[15*20+10] = 0.311525131 ; - daa[15*20+11] = 0.695088128 ; - daa[15*20+12] = 0.458734096 ; - daa[15*20+13] = 0.499349901 ; - daa[15*20+14] = 1.231180819 ; - daa[16*20+ 0] = 6.73088516 ; - daa[16*20+ 1] = 0.056079813 ; - daa[16*20+ 2] = 0.961285093 ; - daa[16*20+ 3] = 0.102136221 ; - daa[16*20+ 4] = 0.338668196 ; - daa[16*20+ 5] = 0.274195947 ; - daa[16*20+ 6] = 0.134802671 ; - daa[16*20+ 7] = 0.02455829 ; - daa[16*20+ 8] = 0.221010609 ; - daa[16*20+ 9] = 2.453458143 ; - daa[16*20+10] = 0.253366704 ; - daa[16*20+11] = 0.393851704 ; - daa[16*20+12] = 3.035215726 ; - daa[16*20+13] = 0.053947743 ; - daa[16*20+14] = 0.73460491 ; - daa[16*20+15] = 3.114742907 ; - daa[17*20+ 0] = 1.36E-02 ; - daa[17*20+ 1] = 0.370819892 ; - daa[17*20+ 2] = 4.90E-02 ; - daa[17*20+ 3] = 4.09E-02 ; - daa[17*20+ 4] = 1.018410485 ; - daa[17*20+ 5] = 1.23E-01 ; - daa[17*20+ 6] = 0.086028795 ; - daa[17*20+ 7] = 0.233963371 ; - daa[17*20+ 8] = 3.75E-02 ; - daa[17*20+ 9] = 0.028656797 ; - daa[17*20+10] = 0.253243013 ; - daa[17*20+11] = 7.35E-02 ; - daa[17*20+12] = 0.167575318 ; - daa[17*20+13] = 0.330781928 ; - daa[17*20+14] = 0.029433866 ; - daa[17*20+15] = 0.169212029 ; - daa[17*20+16] = 0.014378616 ; - daa[18*20+ 0] = 0.014501407 ; - daa[18*20+ 1] = 0.127519332 ; - daa[18*20+ 2] = 1.020785491 ; - daa[18*20+ 3] = 0.160289958 ; - daa[18*20+ 4] = 1.967371255 ; - daa[18*20+ 5] = 0.319105788 ; - daa[18*20+ 6] = 0.093214721 ; - daa[18*20+ 7] = 0.046746341 ; - daa[18*20+ 8] = 3.907918551 ; - daa[18*20+ 9] = 0.135319461 ; - daa[18*20+10] = 0.123555332 ; - daa[18*20+11] = 0.281699174 ; - daa[18*20+12] = 0.316599031 ; - daa[18*20+13] = 3.209083303 ; - daa[18*20+14] = 5.40E-02 ; - daa[18*20+15] = 0.374184286 ; - daa[18*20+16] = 0.091031787 ; - daa[18*20+17] = 0.481044316 ; - daa[19*20+ 0] = 2.815163085 ; - daa[19*20+ 1] = 0.041063684 ; - daa[19*20+ 2] = 0.051741627 ; - daa[19*20+ 3] = 0.084589029 ; - daa[19*20+ 4] = 1.394528044 ; - daa[19*20+ 5] = 0.027669233 ; - daa[19*20+ 6] = 0.227827051 ; - daa[19*20+ 7] = 0.417148954 ; - daa[19*20+ 8] = 0.003511008 ; - daa[19*20+ 9] = 10.95342584 ; - daa[19*20+10] = 0.958273743 ; - daa[19*20+11] = 0.055461435 ; - daa[19*20+12] = 2.562484895 ; - daa[19*20+13] = 0.466243442 ; - daa[19*20+14] = 0.054078533 ; - daa[19*20+15] = 0.267109465 ; - daa[19*20+16] = 1.514059674 ; - daa[19*20+17] = 0.093136256 ; - daa[19*20+18] = 0.06996454 ; - - f[0] = 0.0437932 ; - f[1] = 0.0129578 ; - f[2] = 0.0570013 ; - f[3] = 0.016899 ; - f[4] = 0.0113305 ; - f[5] = 0.0180181 ; - f[6] = 0.0225385 ; - f[7] = 0.0470501 ; - f[8] = 0.0171837 ; - f[9] = 0.0897794 ; - f[10] = 0.155226 ; - f[11] = 0.0399135 ; - f[12] = 0.0674443 ; - f[13] = 0.088448 ; - f[14] = 0.0375282 ; - f[15] = 0.0937522 ; - f[16] = 0.063579 ; - f[17] = 0.0226713 ; - f[18] = 0.0415682 ; - f[19] = 0.0533174 ; - } - else if (prot_model == "MTVER") - { - daa[ 1*20+ 0] = 0.064684561 ; - daa[ 2*20+ 0] = 0.032311357 ; - daa[ 2*20+ 1] = 0.153152691 ; - daa[ 3*20+ 0] = 0.159447221 ; - daa[ 3*20+ 1] = 0.060872106 ; - daa[ 3*20+ 2] = 8.760492724 ; - daa[ 4*20+ 0] = 0.246553601 ; - daa[ 4*20+ 1] = 1.223667072 ; - daa[ 4*20+ 2] = 3.36E-01 ; - daa[ 4*20+ 3] = 1.21E-01 ; - daa[ 5*20+ 0] = 0.030870527 ; - daa[ 5*20+ 1] = 3.170385554 ; - daa[ 5*20+ 2] = 0.392967027 ; - daa[ 5*20+ 3] = 0.083338178 ; - daa[ 5*20+ 4] = 0.116173285 ; - daa[ 6*20+ 0] = 0.191894132 ; - daa[ 6*20+ 1] = 0.094355832 ; - daa[ 6*20+ 2] = 0.335733987 ; - daa[ 6*20+ 3] = 6.603635331 ; - daa[ 6*20+ 4] = 3.64E-02 ; - daa[ 6*20+ 5] = 2.018142888 ; - daa[ 7*20+ 0] = 1.094200392 ; - daa[ 7*20+ 1] = 0.292471121 ; - daa[ 7*20+ 2] = 0.566906806 ; - daa[ 7*20+ 3] = 0.99925644 ; - daa[ 7*20+ 4] = 0.916864782 ; - daa[ 7*20+ 5] = 0.042512974 ; - daa[ 7*20+ 6] = 1.162715397 ; - daa[ 8*20+ 0] = 0.023294168 ; - daa[ 8*20+ 1] = 2.204734553 ; - daa[ 8*20+ 2] = 2.462270432 ; - daa[ 8*20+ 3] = 0.818111764 ; - daa[ 8*20+ 4] = 1.40E+00 ; - daa[ 8*20+ 5] = 4.282731418 ; - daa[ 8*20+ 6] = 0.081141371 ; - daa[ 8*20+ 7] = 0.01333325 ; - daa[ 9*20+ 0] = 0.412484838 ; - daa[ 9*20+ 1] = 0.001173176 ; - daa[ 9*20+ 2] = 0.126395546 ; - daa[ 9*20+ 3] = 0.002450023 ; - daa[ 9*20+ 4] = 0.067512547 ; - daa[ 9*20+ 5] = 0.00417909 ; - daa[ 9*20+ 6] = 1.25E-03 ; - daa[ 9*20+ 7] = 3.12E-03 ; - daa[ 9*20+ 8] = 0.022239982 ; - daa[10*20+ 0] = 0.078175467 ; - daa[10*20+ 1] = 0.078821913 ; - daa[10*20+ 2] = 4.52E-03 ; - daa[10*20+ 3] = 0.002303981 ; - daa[10*20+ 4] = 0.170992927 ; - daa[10*20+ 5] = 0.251583659 ; - daa[10*20+ 6] = 0.007547825 ; - daa[10*20+ 7] = 0.007030628 ; - daa[10*20+ 8] = 0.148659568 ; - daa[10*20+ 9] = 1.378981232 ; - daa[11*20+ 0] = 0.020785848 ; - daa[11*20+ 1] = 0.418858497 ; - daa[11*20+ 2] = 2.458222646 ; - daa[11*20+ 3] = 0.01952784 ; - daa[11*20+ 4] = 2.48E-02 ; - daa[11*20+ 5] = 2.001704573 ; - daa[11*20+ 6] = 2.154178607 ; - daa[11*20+ 7] = 0.106373644 ; - daa[11*20+ 8] = 0.176407082 ; - daa[11*20+ 9] = 0.005826058 ; - daa[11*20+10] = 0.022236492 ; - daa[12*20+ 0] = 0.751880464 ; - daa[12*20+ 1] = 0.003339426 ; - daa[12*20+ 2] = 0.036342261 ; - daa[12*20+ 3] = 0.005454653 ; - daa[12*20+ 4] = 0.084565906 ; - daa[12*20+ 5] = 0.118300058 ; - daa[12*20+ 6] = 0.064939149 ; - daa[12*20+ 7] = 0.034441682 ; - daa[12*20+ 8] = 0.024341794 ; - daa[12*20+ 9] = 2.709083916 ; - daa[12*20+10] = 3.102547734 ; - daa[12*20+11] = 0.440831666 ; - daa[13*20+ 0] = 0.064446493 ; - daa[13*20+ 1] = 2.48E-03 ; - daa[13*20+ 2] = 1.03E-02 ; - daa[13*20+ 3] = 5.85E-03 ; - daa[13*20+ 4] = 1.071949752 ; - daa[13*20+ 5] = 0.012628422 ; - daa[13*20+ 6] = 0.00033294 ; - daa[13*20+ 7] = 0.015567812 ; - daa[13*20+ 8] = 0.173363873 ; - daa[13*20+ 9] = 0.516016404 ; - daa[13*20+10] = 2.403418258 ; - daa[13*20+11] = 1.04E-02 ; - daa[13*20+12] = 0.123894544 ; - daa[14*20+ 0] = 0.285569251 ; - daa[14*20+ 1] = 0.223106214 ; - daa[14*20+ 2] = 3.45E-02 ; - daa[14*20+ 3] = 0.027518471 ; - daa[14*20+ 4] = 1.89E-02 ; - daa[14*20+ 5] = 0.918408255 ; - daa[14*20+ 6] = 0.027495627 ; - daa[14*20+ 7] = 0.001858863 ; - daa[14*20+ 8] = 0.679484957 ; - daa[14*20+ 9] = 0.018484688 ; - daa[14*20+10] = 0.366808078 ; - daa[14*20+11] = 0.141827239 ; - daa[14*20+12] = 3.09E-02 ; - daa[14*20+13] = 0.070221272 ; - daa[15*20+ 0] = 2.481816654 ; - daa[15*20+ 1] = 0.135104164 ; - daa[15*20+ 2] = 3.854775171 ; - daa[15*20+ 3] = 0.371726478 ; - daa[15*20+ 4] = 3.359147075 ; - daa[15*20+ 5] = 0.243906624 ; - daa[15*20+ 6] = 0.09774973 ; - daa[15*20+ 7] = 1.247283002 ; - daa[15*20+ 8] = 0.445289092 ; - daa[15*20+ 9] = 0.044144989 ; - daa[15*20+10] = 0.440666821 ; - daa[15*20+11] = 0.187506391 ; - daa[15*20+12] = 0.13717283 ; - daa[15*20+13] = 0.886724071 ; - daa[15*20+14] = 1.961394689 ; - daa[16*20+ 0] = 5.562698831 ; - daa[16*20+ 1] = 0.028643193 ; - daa[16*20+ 2] = 0.865443188 ; - daa[16*20+ 3] = 0.090462156 ; - daa[16*20+ 4] = 0.152312817 ; - daa[16*20+ 5] = 0.113601661 ; - daa[16*20+ 6] = 0.095055175 ; - daa[16*20+ 7] = 0.008518897 ; - daa[16*20+ 8] = 0.150502662 ; - daa[16*20+ 9] = 2.439177342 ; - daa[16*20+10] = 0.173310893 ; - daa[16*20+11] = 0.399752385 ; - daa[16*20+12] = 3.736804641 ; - daa[16*20+13] = 0.061992977 ; - daa[16*20+14] = 0.57380811 ; - daa[16*20+15] = 2.845372128 ; - daa[17*20+ 0] = 8.42E-03 ; - daa[17*20+ 1] = 0.55854063 ; - daa[17*20+ 2] = 6.68E-03 ; - daa[17*20+ 3] = 2.69E-02 ; - daa[17*20+ 4] = 1.778858854 ; - daa[17*20+ 5] = 1.24E-01 ; - daa[17*20+ 6] = 0.06234136 ; - daa[17*20+ 7] = 0.258646022 ; - daa[17*20+ 8] = 3.06E-02 ; - daa[17*20+ 9] = 0.001901139 ; - daa[17*20+10] = 0.186367593 ; - daa[17*20+11] = 2.94E-02 ; - daa[17*20+12] = 0.056840232 ; - daa[17*20+13] = 0.091906911 ; - daa[17*20+14] = 0.020008899 ; - daa[17*20+15] = 0.144081245 ; - daa[17*20+16] = 0.003186141 ; - daa[18*20+ 0] = 0.012484937 ; - daa[18*20+ 1] = 0.146342679 ; - daa[18*20+ 2] = 0.856969381 ; - daa[18*20+ 3] = 0.186327964 ; - daa[18*20+ 4] = 5.248216959 ; - daa[18*20+ 5] = 0.228833107 ; - daa[18*20+ 6] = 0.037356107 ; - daa[18*20+ 7] = 0.021047197 ; - daa[18*20+ 8] = 9.175807849 ; - daa[18*20+ 9] = 0.045429828 ; - daa[18*20+10] = 0.103344553 ; - daa[18*20+11] = 0.062036726 ; - daa[18*20+12] = 0.079617041 ; - daa[18*20+13] = 3.987258869 ; - daa[18*20+14] = 7.19E-02 ; - daa[18*20+15] = 0.543166729 ; - daa[18*20+16] = 0.084368257 ; - daa[18*20+17] = 0.288348205 ; - daa[19*20+ 0] = 3.196936472 ; - daa[19*20+ 1] = 0.037629734 ; - daa[19*20+ 2] = 0.013796204 ; - daa[19*20+ 3] = 0.128207953 ; - daa[19*20+ 4] = 0.38185994 ; - daa[19*20+ 5] = 0.010633046 ; - daa[19*20+ 6] = 0.21626084 ; - daa[19*20+ 7] = 0.442992767 ; - daa[19*20+ 8] = 0.00275996 ; - daa[19*20+ 9] = 13.50513748 ; - daa[19*20+10] = 0.856259068 ; - daa[19*20+11] = 0.025253714 ; - daa[19*20+12] = 4.499323771 ; - daa[19*20+13] = 0.336159338 ; - daa[19*20+14] = 0.02576807 ; - daa[19*20+15] = 0.058280963 ; - daa[19*20+16] = 1.317930337 ; - daa[19*20+17] = 0.047174761 ; - daa[19*20+18] = 0.028686374 ; - - f[0] = 0.0708203 ; - f[1] = 0.0140499 ; - f[2] = 0.0452099 ; - f[3] = 0.0147937 ; - f[4] = 0.0068142 ; - f[5] = 0.0263409 ; - f[6] = 0.0214952 ; - f[7] = 0.04424 ; - f[8] = 0.024231 ; - f[9] = 0.0907351 ; - f[10] = 0.17231 ; - f[11] = 0.0273812 ; - f[12] = 0.056194 ; - f[13] = 0.0497758 ; - f[14] = 0.0543863 ; - f[15] = 0.0744219 ; - f[16] = 0.10881 ; - f[17] = 0.0256527 ; - f[18] = 0.0264847 ; - f[19] = 0.0458537 ; - } - else if (prot_model == "MTINV") - { - daa[ 1*20+ 0] = 0.074334248 ; - daa[ 2*20+ 0] = 0.023989062 ; - daa[ 2*20+ 1] = 0.147604282 ; - daa[ 3*20+ 0] = 0.100963383 ; - daa[ 3*20+ 1] = 0.070670691 ; - daa[ 3*20+ 2] = 3.471724219 ; - daa[ 4*20+ 0] = 1.303664196 ; - daa[ 4*20+ 1] = 0.680712248 ; - daa[ 4*20+ 2] = 2.80E-01 ; - daa[ 4*20+ 3] = 5.83E-02 ; - daa[ 5*20+ 0] = 0.090980805 ; - daa[ 5*20+ 1] = 2.507738035 ; - daa[ 5*20+ 2] = 1.287189506 ; - daa[ 5*20+ 3] = 0.204531494 ; - daa[ 5*20+ 4] = 0.163006168 ; - daa[ 6*20+ 0] = 0.145240578 ; - daa[ 6*20+ 1] = 0.121635308 ; - daa[ 6*20+ 2] = 1.159082937 ; - daa[ 6*20+ 3] = 6.962512075 ; - daa[ 6*20+ 4] = 4.71E-02 ; - daa[ 6*20+ 5] = 2.55996613 ; - daa[ 7*20+ 0] = 1.931555054 ; - daa[ 7*20+ 1] = 0.217695194 ; - daa[ 7*20+ 2] = 0.55141996 ; - daa[ 7*20+ 3] = 0.591216902 ; - daa[ 7*20+ 4] = 0.883545442 ; - daa[ 7*20+ 5] = 0.114080073 ; - daa[ 7*20+ 6] = 0.599017534 ; - daa[ 8*20+ 0] = 0.045194583 ; - daa[ 8*20+ 1] = 1.283367123 ; - daa[ 8*20+ 2] = 1.728635643 ; - daa[ 8*20+ 3] = 0.358935427 ; - daa[ 8*20+ 4] = 3.41E-01 ; - daa[ 8*20+ 5] = 4.708032587 ; - daa[ 8*20+ 6] = 0.21738768 ; - daa[ 8*20+ 7] = 0.06435237 ; - daa[ 9*20+ 0] = 0.209390309 ; - daa[ 9*20+ 1] = 0.028420835 ; - daa[ 9*20+ 2] = 0.310892314 ; - daa[ 9*20+ 3] = 0.017124811 ; - daa[ 9*20+ 4] = 0.287547623 ; - daa[ 9*20+ 5] = 0.025100279 ; - daa[ 9*20+ 6] = 2.08E-02 ; - daa[ 9*20+ 7] = 2.48E-02 ; - daa[ 9*20+ 8] = 0.025529471 ; - daa[10*20+ 0] = 0.153999868 ; - daa[10*20+ 1] = 0.052763492 ; - daa[10*20+ 2] = 7.51E-02 ; - daa[10*20+ 3] = 0.010524113 ; - daa[10*20+ 4] = 0.381393652 ; - daa[10*20+ 5] = 0.211473563 ; - daa[10*20+ 6] = 0.033099609 ; - daa[10*20+ 7] = 0.050781964 ; - daa[10*20+ 8] = 0.08084231 ; - daa[10*20+ 9] = 2.159574141 ; - daa[11*20+ 0] = 0.014824603 ; - daa[11*20+ 1] = 1.631886026 ; - daa[11*20+ 2] = 2.642320784 ; - daa[11*20+ 3] = 0.096074109 ; - daa[11*20+ 4] = 4.39E-03 ; - daa[11*20+ 5] = 1.933672947 ; - daa[11*20+ 6] = 1.172487361 ; - daa[11*20+ 7] = 0.156144417 ; - daa[11*20+ 8] = 0.250219038 ; - daa[11*20+ 9] = 0.100258581 ; - daa[11*20+10] = 0.081982103 ; - daa[12*20+ 0] = 0.592865041 ; - daa[12*20+ 1] = 0.022385994 ; - daa[12*20+ 2] = 0.566990153 ; - daa[12*20+ 3] = 0.049089614 ; - daa[12*20+ 4] = 0.461586134 ; - daa[12*20+ 5] = 0.397060932 ; - daa[12*20+ 6] = 0.189458658 ; - daa[12*20+ 7] = 0.207637181 ; - daa[12*20+ 8] = 0.070807607 ; - daa[12*20+ 9] = 2.830889408 ; - daa[12*20+10] = 3.49491657 ; - daa[12*20+11] = 0.694473511 ; - daa[13*20+ 0] = 0.111082058 ; - daa[13*20+ 1] = 1.54E-02 ; - daa[13*20+ 2] = 1.94E-01 ; - daa[13*20+ 3] = 1.72E-02 ; - daa[13*20+ 4] = 0.887097368 ; - daa[13*20+ 5] = 0.05254553 ; - daa[13*20+ 6] = 0.027169135 ; - daa[13*20+ 7] = 0.103612519 ; - daa[13*20+ 8] = 0.151813293 ; - daa[13*20+ 9] = 0.879509936 ; - daa[13*20+10] = 1.735139585 ; - daa[13*20+11] = 9.88E-02 ; - daa[13*20+12] = 0.916540366 ; - daa[14*20+ 0] = 0.629532202 ; - daa[14*20+ 1] = 0.130198061 ; - daa[14*20+ 2] = 2.24E-01 ; - daa[14*20+ 3] = 0.104690635 ; - daa[14*20+ 4] = 6.93E-02 ; - daa[14*20+ 5] = 0.581011465 ; - daa[14*20+ 6] = 0.120987665 ; - daa[14*20+ 7] = 0.028323528 ; - daa[14*20+ 8] = 0.312042811 ; - daa[14*20+ 9] = 0.070886418 ; - daa[14*20+10] = 0.154372954 ; - daa[14*20+11] = 0.183146672 ; - daa[14*20+12] = 9.25E-02 ; - daa[14*20+13] = 0.105108176 ; - daa[15*20+ 0] = 3.368753997 ; - daa[15*20+ 1] = 0.309426918 ; - daa[15*20+ 2] = 2.084470081 ; - daa[15*20+ 3] = 0.505643623 ; - daa[15*20+ 4] = 3.042785241 ; - daa[15*20+ 5] = 0.538789683 ; - daa[15*20+ 6] = 0.580233246 ; - daa[15*20+ 7] = 2.020141073 ; - daa[15*20+ 8] = 0.368422409 ; - daa[15*20+ 9] = 0.126690083 ; - daa[15*20+10] = 0.225488096 ; - daa[15*20+11] = 0.879841219 ; - daa[15*20+12] = 0.633585504 ; - daa[15*20+13] = 0.398961745 ; - daa[15*20+14] = 1.042510137 ; - daa[16*20+ 0] = 3.327043094 ; - daa[16*20+ 1] = 0.108593944 ; - daa[16*20+ 2] = 1.186265817 ; - daa[16*20+ 3] = 0.141991554 ; - daa[16*20+ 4] = 0.736861924 ; - daa[16*20+ 5] = 0.554297635 ; - daa[16*20+ 6] = 0.241928717 ; - daa[16*20+ 7] = 0.043791212 ; - daa[16*20+ 8] = 0.270595612 ; - daa[16*20+ 9] = 1.958518823 ; - daa[16*20+10] = 0.298279866 ; - daa[16*20+11] = 0.511284083 ; - daa[16*20+12] = 2.192059537 ; - daa[16*20+13] = 0.121269565 ; - daa[16*20+14] = 0.746917102 ; - daa[16*20+15] = 3.833300295 ; - daa[17*20+ 0] = 2.65E-02 ; - daa[17*20+ 1] = 0.321740806 ; - daa[17*20+ 2] = 7.54E-02 ; - daa[17*20+ 3] = 7.16E-02 ; - daa[17*20+ 4] = 0.758572155 ; - daa[17*20+ 5] = 1.18E-01 ; - daa[17*20+ 6] = 0.107622629 ; - daa[17*20+ 7] = 0.24469826 ; - daa[17*20+ 8] = 8.12E-02 ; - daa[17*20+ 9] = 0.062889052 ; - daa[17*20+10] = 0.298463258 ; - daa[17*20+11] = 1.06E-01 ; - daa[17*20+12] = 0.254195761 ; - daa[17*20+13] = 0.478877081 ; - daa[17*20+14] = 0.059737849 ; - daa[17*20+15] = 0.204318523 ; - daa[17*20+16] = 0.049859223 ; - daa[18*20+ 0] = 0.021939778 ; - daa[18*20+ 1] = 0.182762575 ; - daa[18*20+ 2] = 1.139165497 ; - daa[18*20+ 3] = 0.192760013 ; - daa[18*20+ 4] = 1.342952521 ; - daa[18*20+ 5] = 0.476209531 ; - daa[18*20+ 6] = 0.137649701 ; - daa[18*20+ 7] = 0.072268144 ; - daa[18*20+ 8] = 2.321702829 ; - daa[18*20+ 9] = 0.177138386 ; - daa[18*20+10] = 0.161052695 ; - daa[18*20+11] = 0.348477791 ; - daa[18*20+12] = 0.393193334 ; - daa[18*20+13] = 3.234682804 ; - daa[18*20+14] = 8.14E-02 ; - daa[18*20+15] = 0.345719707 ; - daa[18*20+16] = 0.174898703 ; - daa[18*20+17] = 0.636664895 ; - daa[19*20+ 0] = 2.751363323 ; - daa[19*20+ 1] = 0.072153256 ; - daa[19*20+ 2] = 0.065420954 ; - daa[19*20+ 3] = 0.088077428 ; - daa[19*20+ 4] = 1.555397528 ; - daa[19*20+ 5] = 0.065221967 ; - daa[19*20+ 6] = 0.231186579 ; - daa[19*20+ 7] = 0.396428236 ; - daa[19*20+ 8] = 0.008043438 ; - daa[19*20+ 9] = 8.542592791 ; - daa[19*20+10] = 1.075873286 ; - daa[19*20+11] = 0.056252096 ; - daa[19*20+12] = 1.509852775 ; - daa[19*20+13] = 0.534676938 ; - daa[19*20+14] = 0.162975938 ; - daa[19*20+15] = 0.377288817 ; - daa[19*20+16] = 1.885339324 ; - daa[19*20+17] = 0.128598618 ; - daa[19*20+18] = 0.080346116 ; - - f[0] = 0.0317423 ; - f[1] = 0.0109007 ; - f[2] = 0.0615792 ; - f[3] = 0.0161492 ; - f[4] = 0.0135701 ; - f[5] = 0.0146441 ; - f[6] = 0.0223112 ; - f[7] = 0.0478475 ; - f[8] = 0.0116418 ; - f[9] = 0.0943223 ; - f[10] = 0.149407 ; - f[11] = 0.0444387 ; - f[12] = 0.0772625 ; - f[13] = 0.102287 ; - f[14] = 0.0262902 ; - f[15] = 0.105939 ; - f[16] = 0.0428691 ; - f[17] = 0.020701 ; - f[18] = 0.0465567 ; - f[19] = 0.05954 ; - } - else return false; - - for (i=0; i<20; i++) - daa[i*20+i] = 0.0; - for (i=0; i<20; i++) - for (j=0; j max) - max = temp; - } + 0.07559244 0.05379462 0.03769623 0.04469553 0.02849715 0.03389661 0.05349465 0.0779922 0.029997 0.05989401 0.09579042 0.0519948 0.02189781 0.0449955 0.0419958 0.06819318 0.05639436 0.01569843 0.0359964 0.07149285; + [ NOTE 2019-06-04: normalised from original PMB freqs, which do not sum to 1.0: + 0.0756 0.0538 0.0377 0.0447 0.0285 0.0339 0.0535 0.0780 0.0300 0.0599 0.0958 0.0520 0.0219 0.0450 0.0420 0.0682 0.0564 0.0157 0.0360 0.0715 ] + +model HIVB= +0.30750700 +0.00500000 0.29554300 +1.45504000 0.00500000 17.66120000 +0.12375800 0.35172100 0.08606420 0.00500000 +0.05511280 3.42150000 0.67205200 0.00500000 0.00500000 +1.48135000 0.07492180 0.07926330 10.58720000 0.00500000 2.56020000 +2.13536000 3.65345000 0.32340100 2.83806000 0.89787100 0.06191370 3.92775000 +0.08476130 9.04044000 7.64585000 1.91690000 0.24007300 7.05545000 0.11974000 0.00500000 +0.00500000 0.67728900 0.68056500 0.01767920 0.00500000 0.00500000 0.00609079 0.00500000 0.10311100 +0.21525600 0.70142700 0.00500000 0.00876048 0.12977700 1.49456000 0.00500000 0.00500000 1.74171000 5.95879000 +0.00500000 20.45000000 7.90443000 0.00500000 0.00500000 6.54737000 4.61482000 0.52170500 0.00500000 0.32231900 0.08149950 +0.01866430 2.51394000 0.00500000 0.00500000 0.00500000 0.30367600 0.17578900 0.00500000 0.00500000 11.20650000 5.31961000 1.28246000 +0.01412690 0.00500000 0.00500000 0.00500000 9.29815000 0.00500000 0.00500000 0.29156100 0.14555800 3.39836000 8.52484000 0.03426580 0.18802500 +2.12217000 1.28355000 0.00739578 0.03426580 0.00500000 4.47211000 0.01202260 0.00500000 2.45318000 0.04105930 2.07757000 0.03138620 0.00500000 0.00500000 +2.46633000 3.47910000 13.14470000 0.52823000 4.69314000 0.11631100 0.00500000 4.38041000 0.38274700 1.21803000 0.92765600 0.50411100 0.00500000 0.95647200 5.37762000 +15.91830000 2.86868000 6.88667000 0.27472400 0.73996900 0.24358900 0.28977400 0.36961500 0.71159400 8.61217000 0.04376730 4.67142000 4.94026000 0.01412690 2.01417000 8.93107000 +0.00500000 0.99133800 0.00500000 0.00500000 2.63277000 0.02665600 0.00500000 1.21674000 0.06951790 0.00500000 0.74884300 0.00500000 0.08907800 0.82934300 0.04445060 0.02487280 0.00500000 +0.00500000 0.00991826 1.76417000 0.67465300 7.57932000 0.11303300 0.07926330 0.00500000 18.69430000 0.14816800 0.11198600 0.00500000 0.00500000 15.34000000 0.03043810 0.64802400 0.10565200 1.28022000 +7.61428000 0.08124540 0.02665600 1.04793000 0.42002700 0.02091530 1.02847000 0.95315500 0.00500000 17.73890000 1.41036000 0.26582900 6.85320000 0.72327400 0.00500000 0.07492180 0.70922600 0.00500000 0.04105930 +0.060490222 0.066039665 0.044127815 0.042109048 0.020075899 0.053606488 0.071567447 0.072308239 0.022293943 0.069730629 0.098851122 0.056968211 0.019768318 0.028809447 0.046025282 0.050604330 0.053636813 0.033011601 0.028350243 0.061625237; + +model HIVW= +0.0744808 +0.6175090 0.1602400 +4.4352100 0.0674539 29.4087000 +0.1676530 2.8636400 0.0604932 0.0050000 +0.0050000 10.6746000 0.3420680 0.0050000 0.0050000 +5.5632500 0.0251632 0.2015260 12.1233000 0.0050000 3.2065600 +1.8685000 13.4379000 0.0604932 10.3969000 0.0489798 0.0604932 14.7801000 +0.0050000 6.8440500 8.5987600 2.3177900 0.0050000 18.5465000 0.0050000 0.0050000 +0.0050000 1.3406900 0.9870280 0.1451240 0.0050000 0.0342252 0.0390512 0.0050000 0.0050000 +0.1602400 0.5867570 0.0050000 0.0050000 0.0050000 2.8904800 0.1298390 0.0489798 1.7638200 9.1024600 +0.5927840 39.8897000 10.6655000 0.8943130 0.0050000 13.0705000 23.9626000 0.2794250 0.2240600 0.8174810 0.0050000 +0.0050000 3.2865200 0.2015260 0.0050000 0.0050000 0.0050000 0.0050000 0.0489798 0.0050000 17.3064000 11.3839000 4.0956400 +0.5979230 0.0050000 0.0050000 0.0050000 0.3629590 0.0050000 0.0050000 0.0050000 0.0050000 1.4828800 7.4878100 0.0050000 0.0050000 +1.0098100 0.4047230 0.3448480 0.0050000 0.0050000 3.0450200 0.0050000 0.0050000 13.9444000 0.0050000 9.8309500 0.1119280 0.0050000 0.0342252 +8.5942000 8.3502400 14.5699000 0.4278810 1.1219500 0.1602400 0.0050000 6.2796600 0.7251570 0.7400910 6.1439600 0.0050000 0.3925750 4.2793900 14.2490000 +24.1422000 0.9282030 4.5420600 0.6303950 0.0050000 0.2030910 0.4587430 0.0489798 0.9595600 9.3634500 0.0050000 4.0480200 7.4131300 0.1145120 4.3370100 6.3407900 +0.0050000 5.9656400 0.0050000 0.0050000 5.4989400 0.0443298 0.0050000 2.8258000 0.0050000 0.0050000 1.3703100 0.0050000 0.0050000 0.0050000 0.0050000 1.1015600 0.0050000 +0.0050000 0.0050000 5.0647500 2.2815400 8.3483500 0.0050000 0.0050000 0.0050000 47.4889000 0.1145120 0.0050000 0.0050000 0.5791980 4.1272800 0.0050000 0.9331420 0.4906080 0.0050000 +24.8094000 0.2794250 0.0744808 2.9178600 0.0050000 0.0050000 2.1995200 2.7962200 0.8274790 24.8231000 2.9534400 0.1280650 14.7683000 2.2800000 0.0050000 0.8626370 0.0050000 0.0050000 1.3548200 +0.0377494 0.0573210 0.0891129 0.0342034 0.0240105 0.0437824 0.0618606 0.0838496 0.0156076 0.0983641 0.0577867 0.0641682 0.0158419 0.0422741 0.0458601 0.0550846 0.0813774 0.0195970 0.0205847 0.0515638; + +model JTTDCMUT= +0.531678 +0.557967 0.451095 +0.827445 0.154899 5.549530 +0.574478 1.019843 0.313311 0.105625 +0.556725 3.021995 0.768834 0.521646 0.091304 +1.066681 0.318483 0.578115 7.766557 0.053907 3.417706 +1.740159 1.359652 0.773313 1.272434 0.546389 0.231294 1.115632 +0.219970 3.210671 4.025778 1.032342 0.724998 5.684080 0.243768 0.201696 +0.361684 0.239195 0.491003 0.115968 0.150559 0.078270 0.111773 0.053769 0.181788 +0.310007 0.372261 0.137289 0.061486 0.164593 0.709004 0.097485 0.069492 0.540571 2.335139 +0.369437 6.529255 2.529517 0.282466 0.049009 2.966732 1.731684 0.269840 0.525096 0.202562 0.146481 +0.469395 0.431045 0.330720 0.190001 0.409202 0.456901 0.175084 0.130379 0.329660 4.831666 3.856906 0.624581 +0.138293 0.065314 0.073481 0.032522 0.678335 0.045683 0.043829 0.050212 0.453428 0.777090 2.500294 0.024521 0.436181 +1.959599 0.710489 0.121804 0.127164 0.123653 1.608126 0.191994 0.208081 1.141961 0.098580 1.060504 0.216345 0.164215 0.148483 +3.887095 1.001551 5.057964 0.589268 2.155331 0.548807 0.312449 1.874296 0.743458 0.405119 0.592511 0.474478 0.285564 0.943971 2.788406 +4.582565 0.650282 2.351311 0.425159 0.469823 0.523825 0.331584 0.316862 0.477355 2.553806 0.272514 0.965641 2.114728 0.138904 1.176961 4.777647 +0.084329 1.257961 0.027700 0.057466 1.104181 0.172206 0.114381 0.544180 0.128193 0.134510 0.530324 0.089134 0.201334 0.537922 0.069965 0.310927 0.080556 +0.139492 0.235601 0.700693 0.453952 2.114852 0.254745 0.063452 0.052500 5.848400 0.303445 0.241094 0.087904 0.189870 5.484236 0.113850 0.628608 0.201094 0.747889 +2.924161 0.171995 0.164525 0.315261 0.621323 0.179771 0.465271 0.470140 0.121827 9.533943 1.761439 0.124066 3.038533 0.593478 0.211561 0.408532 1.143980 0.239697 0.165473 +0.07686192 0.05105695 0.04254596 0.05126895 0.02027898 0.04106096 0.06181994 0.07471393 0.02298298 0.05256895 0.09111091 0.05949794 0.02341398 0.04052996 0.05053195 0.06822493 0.05851794 0.01433599 0.03230297 0.06637393; +[ NOTE 2019-06-04: normalised from original JTTDCMUTT freqs, which do not sum to 1.0 + https://www.ebi.ac.uk/goldman-srv/dayhoff/jtt-dcmut.dat ] + +model FLU= +0.138658765 +0.053366579 0.161000889 +0.584852306 0.006771843 7.737392871 +0.026447095 0.167207008 0.000013000 0.014100000 +0.353753982 3.292716942 0.530642655 0.145469388 0.002547334 +1.484234503 0.124897617 0.061652192 5.370511279 0.000000000 1.195629122 +1.132313122 1.190624465 0.322524648 1.934832784 0.116941459 0.108051341 1.593098825 +0.214757862 1.879569938 1.387096032 0.887570549 0.021800000 5.330313412 0.256491863 0.058774527 +0.149926734 0.246117172 0.218571975 0.014085917 0.001112158 0.028839950 0.014200000 0.000016300 0.243190142 +0.023116952 0.296045557 0.000836000 0.005730682 0.005613627 1.020366955 0.016499536 0.006516229 0.321611694 3.512072282 +0.474333610 15.300096620 2.646847965 0.290042980 0.000003830 2.559587177 3.881488809 0.264148929 0.347302791 0.227707997 0.129223639 +0.058745423 0.890162346 0.005251688 0.041762964 0.111457310 0.190259181 0.313974351 0.001500467 0.001273509 9.017954203 6.746936485 1.331291619 +0.080490909 0.016100000 0.000836000 0.000001060 0.104053666 0.032680657 0.001003501 0.001236645 0.119028506 1.463357278 2.986800036 0.320000000 0.279910509 +0.659311478 0.154027180 0.036400000 0.188539456 0.000000000 0.712769599 0.319558828 0.038631761 0.924466914 0.080543327 0.634308521 0.195750632 0.056900000 0.007132430 +3.011344519 0.950138410 3.881310531 0.338372183 0.336263345 0.487822499 0.307140298 1.585646577 0.580704250 0.290381075 0.570766693 0.283807672 0.007026588 0.996685670 2.087385344 +5.418298175 0.183076905 2.140332316 0.135481233 0.011975266 0.602340963 0.280124895 0.018808030 0.368713573 2.904052286 0.044926357 1.526964200 2.031511321 0.000134906 0.542251094 2.206859934 +0.196000000 1.369429408 0.000536000 0.000014900 0.094106680 0.044000000 0.155245492 0.196486447 0.022400000 0.032132150 0.431277663 0.000049800 0.070460039 0.814753094 0.000431021 0.099835753 0.207066206 +0.018289288 0.099855497 0.373101927 0.525398543 0.601692431 0.072205935 0.104092870 0.074814997 6.448954446 0.273934263 0.340058468 0.012416222 0.874272175 5.393924245 0.000182000 0.392552240 0.124898020 0.427755430 +3.532005270 0.103964386 0.010257517 0.297123975 0.054904564 0.406697814 0.285047948 0.337229619 0.098631355 14.394052190 0.890598579 0.073127930 4.904842235 0.592587985 0.058971975 0.088256423 0.654109108 0.256900461 0.167581647 +0.04707195 0.05090995 0.07421393 0.04785995 0.02502197 0.03330397 0.05458695 0.07637292 0.01996398 0.06713393 0.07149793 0.05678494 0.01815098 0.03049597 0.05065595 0.08840891 0.07433893 0.01852398 0.03147397 0.06322894; +[ NOTE 2019-06-04: normalised from FLU freqs in PhyML, which do not sum 1.0 ] - const double AA_SCALE = 10.0; - scaler = AA_SCALE / max; - - /* SCALING HAS BEEN RE-INTRODUCED TO RESOLVE NUMERICAL PROBLEMS */ - - for(i = 0; i < 20; i++) - for(j = 0; j < 20; j++) - { - daa[i*20+j] *= scaler; - } - return true; -} - -string model_WAG = -"0.551571 \ -0.509848 0.635346 \ -0.738998 0.147304 5.429420 \ -1.027040 0.528191 0.265256 0.0302949 \ -0.908598 3.035500 1.543640 0.616783 0.0988179 \ -1.582850 0.439157 0.947198 6.174160 0.021352 5.469470 \ -1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717 \ -0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410 \ -0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190 \ -0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970 \ -0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555 \ -0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276 \ -0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630 \ -1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444 \ -3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280 \ -2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020 \ -0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864 \ -0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390 \ -2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730 \ -\ -0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956"; - -string model_cpREV = -" 105\ - 227 357\ - 175 43 4435\ - 669 823 538 10\ - 157 1745 768 400 10\ - 499 152 1055 3691 10 3122\ - 665 243 653 431 303 133 379\ - 66 715 1405 331 441 1269 162 19\ - 145 136 168 10 280 92 148 40 29\ - 197 203 113 10 396 286 82 20 66 1745\ - 236 4482 2430 412 48 3313 2629 263 305 345 218\ - 185 125 61 47 159 202 113 21 10 1772 1351 193\ - 68 53 97 22 726 10 145 25 127 454 1268 72 327\ - 490 87 173 170 285 323 185 28 152 117 219 302 100 43\ - 2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202\ - 1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151\ - 14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29\ - 56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346\ - 968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119\ -\ - 0.0755 0.0621 0.0410 0.0371 0.0091 0.0382 0.0495 0.0838 0.0246 0.0806\ - 0.1011 0.0504 0.0220 0.0506 0.0431 0.0622 0.0543 0.0181 0.0307 0.0660"; - -string model_mtREV = -" 23.18\ - 26.95 13.24\ - 17.67 1.90 794.38\ - 59.93 103.33 58.94 1.90\ - 1.90 220.99 173.56 55.28 75.24\ - 9.77 1.90 63.05 583.55 1.90 313.56\ - 120.71 23.03 53.30 56.77 30.71 6.75 28.28\ - 13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90\ - 96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26\ - 25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09\ - 8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88\ - 141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37\ - 6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82\ - 54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31\ - 387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90\ - 480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21\ - 1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99\ - 6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25\ - 195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90\ -\ -0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169\ -0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043"; - -/* -string model_Dayhoff = -" 27 \ - 98 32 \ -120 0 905 \ - 36 23 0 0 \ - 89 246 103 134 0 \ -198 1 148 1153 0 716 \ -240 9 139 125 11 28 81 \ - 23 240 535 86 28 606 43 10 \ - 65 64 77 24 44 18 61 0 7 \ - 41 15 34 0 0 73 11 7 44 257 \ - 26 464 318 71 0 153 83 27 26 46 18 \ - 72 90 1 0 0 114 30 17 0 336 527 243 \ - 18 14 14 0 0 0 0 15 48 196 157 0 92 \ -250 103 42 13 19 153 51 34 94 12 32 33 17 11 \ -409 154 495 95 161 56 79 234 35 24 17 96 62 46 245 \ -371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550 \ - 0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0 \ - 24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61 \ -208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28 \ -\ -0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530\ -0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772\ -0.050680 0.069577 0.058542 0.010494 0.029916 0.064718"; -*/ -string model_mtMAM = -" 32 \ - 2 4 \ - 11 0 864 \ - 0 186 0 0 \ - 0 246 8 49 0 \ - 0 0 0 569 0 274 \ - 78 18 47 79 0 0 22 \ - 8 232 458 11 305 550 22 0 \ - 75 0 19 0 41 0 0 0 0 \ - 21 6 0 0 27 20 0 0 26 232 \ - 0 50 408 0 0 242 215 0 0 6 4 \ - 76 0 21 0 0 22 0 0 0 378 609 59 \ - 0 0 6 5 7 0 0 0 0 57 246 0 11 \ - 53 9 33 2 0 51 0 0 53 5 43 18 0 17 \ -342 3 446 16 347 30 21 112 20 0 74 65 47 90 202 \ -681 0 110 0 114 0 4 0 1 360 34 50 691 8 78 614 \ - 5 16 6 0 65 0 0 0 0 0 12 0 13 0 7 17 0 \ - 0 0 156 0 530 54 0 1 1525 16 25 67 0 682 8 107 0 14 \ -398 0 0 10 0 33 20 5 0 2220 100 0 832 6 0 0 237 0 0 \ -\ -0.0692 0.0184 0.0400 0.0186 0.0065 0.0238 0.0236 0.0557 0.0277 0.0905\ -0.1675 0.0221 0.0561 0.0611 0.0536 0.0725 0.0870 0.0293 0.0340 0.0428"; - - -string model_JTT= -" 58 \ - 54 45 \ - 81 16 528 \ - 56 113 34 10 \ - 57 310 86 49 9 \ -105 29 58 767 5 323 \ -179 137 81 130 59 26 119 \ - 27 328 391 112 69 597 26 23 \ - 36 22 47 11 17 9 12 6 16 \ - 30 38 12 7 23 72 9 6 56 229 \ - 35 646 263 26 7 292 181 27 45 21 14 \ - 54 44 30 15 31 43 18 14 33 479 388 65 \ - 15 5 10 4 78 4 5 5 40 89 248 4 43 \ -194 74 15 15 14 164 18 24 115 10 102 21 16 17 \ -378 101 503 59 223 53 30 201 73 40 59 47 29 92 285 \ -475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477 \ - 9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12 \ - 11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71 \ -298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16 \ -\ -0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830\ -0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126\ -0.050901 0.068765 0.058565 0.014261 0.032102 0.066005"; - -string model_LG = -"0.425093 \ -0.276818 0.751878 \ -0.395144 0.123954 5.076149 \ -2.489084 0.534551 0.528768 0.062556 \ -0.969894 2.807908 1.695752 0.523386 0.084808 \ -1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 \ -2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 \ -0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 \ -0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 \ -0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 \ -0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 \ -1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 \ -0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 \ -1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 \ -4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 \ -2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 \ -0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 \ -0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 \ -2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 \ -\ -0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147"; - -string model_mtART = -"0.2 \ -0.2 0.2 \ -1 4 500 \ -254 36 98 11 \ -0.2 154 262 0.2 0.2 \ -0.2 0.2 183 862 0.2 262 \ -200 0.2 121 12 81 3 44 \ -0.2 41 180 0.2 12 314 15 0.2 \ -26 2 21 7 63 11 7 3 0.2 \ -4 2 13 1 79 16 2 1 6 515 \ -0.2 209 467 2 0.2 349 106 0.2 0.2 3 4 \ -121 5 79 0.2 312 67 0.2 56 0.2 515 885 106 \ -13 5 20 0.2 184 0.2 0.2 1 14 118 263 11 322 \ -49 0.2 17 0.2 0.2 39 8 0.2 1 0.2 12 17 5 15 \ -673 3 398 44 664 52 31 226 11 7 8 144 112 36 87 \ -244 0.2 166 0.2 183 44 43 0.2 19 204 48 70 289 14 47 660 \ -0.2 0.2 8 0.2 22 7 11 2 0.2 0.2 21 16 71 54 0.2 2 0.2 \ -1 4 251 0.2 72 87 8 9 191 12 20 117 71 792 18 30 46 38 \ -340 0.2 23 0.2 350 0.2 14 3 0.2 1855 85 26 281 52 32 61 544 0.2 2 \ -\ -0.054116 0.018227 0.039903 0.020160 0.009709 0.018781 0.024289 0.068183 0.024518 0.092638 \ -0.148658 0.021718 0.061453 0.088668 0.041826 0.091030 0.049194 0.029786 0.039443 0.057700"; - - -string model_mtZOA = -" 3.3\ - 1.7 33.6\ - 16.1 3.2 617.0\ - 272.5 61.1 94.6 9.5\ - 7.3 231.0 190.3 19.3 49.1\ - 17.1 6.4 174.0 883.6 3.4 349.4\ - 289.3 7.2 99.3 26.0 82.4 8.9 43.1\ - 2.3 61.7 228.9 55.6 37.5 421.8 14.9 7.4\ - 33.2 0.2 24.3 1.5 48.8 0.2 7.3 3.4 1.6\ - 15.6 4.1 7.9 0.5 59.7 23.0 1.0 3.5 6.6 425.2\ - 0.2 292.3 413.4 0.2 0.2 334.0 163.2 10.1 23.9 8.4 6.7\ - 136.5 3.8 73.7 0.2 264.8 83.9 0.2 52.2 7.1 449.7 636.3 83.0\ - 26.5 0.2 12.9 2.0 167.8 9.5 0.2 5.8 13.1 90.3 234.2 16.3 215.6\ - 61.8 7.5 22.6 0.2 8.1 52.2 20.6 1.3 15.6 2.6 11.4 24.3 5.4 10.5\ - 644.9 11.8 420.2 51.4 656.3 96.4 38.4 257.1 23.1 7.2 15.2 144.9 95.3 32.2 79.7\ - 378.1 3.2 184.6 2.3 199.0 39.4 34.5 5.2 19.4 222.3 50.0 75.5 305.1 19.3 56.9 666.3\ - 3.1 16.9 6.4 0.2 36.1 6.1 3.5 12.3 4.5 9.7 27.2 6.6 48.7 58.2 1.3 10.3 3.6\ - 2.1 13.8 141.6 13.9 76.7 52.3 10.0 4.3 266.5 13.1 5.7 45.0 41.4 590.5 4.2 29.7 29.0 79.8\ - 321.9 5.1 7.1 3.7 243.8 9.0 16.3 23.7 0.3 1710.6 126.1 11.1 279.6 59.6 17.9 49.5 396.4 13.7 15.6 \ -\ - 0.068880 0.021037 0.030390 0.020696 0.009966 0.018623 0.024989 0.071968 0.026814 0.085072 0.156717 0.019276 0.050652 0.081712 0.044803 0.080535 0.056386 0.027998 0.037404 0.066083"; - -void get_rtREV(double **q, double *f) { - /* rtRev */ - q[ 0][ 0] = 0; q[ 1][ 0] = 34; q[ 2][ 0] = 51; q[ 3][ 0] = 10; q[ 4][ 0] = 439; - q[ 5][ 0] = 32; q[ 6][ 0] = 81; q[ 7][ 0] = 135; q[ 8][ 0] = 30; q[ 9][ 0] = 1; - q[10][ 0] = 45; q[11][ 0] = 38; q[12][ 0] = 235; q[13][ 0] = 1; q[14][ 0] = 97; - q[15][ 0] = 460; q[16][ 0] = 258; q[17][ 0] = 5; q[18][ 0] = 55; q[19][ 0] = 197; - - q[ 0][ 1] = 34; q[ 1][ 1] = 0; q[ 2][ 1] = 35; q[ 3][ 1] = 30; q[ 4][ 1] = 92; - q[ 5][ 1] = 221; q[ 6][ 1] = 10; q[ 7][ 1] = 41; q[ 8][ 1] = 90; q[ 9][ 1] = 24; - q[10][ 1] = 18; q[11][ 1] = 593; q[12][ 1] = 57; q[13][ 1] = 7; q[14][ 1] = 24; - q[15][ 1] = 102; q[16][ 1] = 64; q[17][ 1] = 13; q[18][ 1] = 47; q[19][ 1] = 29; - - q[ 0][ 2] = 51; q[ 1][ 2] = 35; q[ 2][ 2] = 0; q[ 3][ 2] = 384; q[ 4][ 2] = 128; - q[ 5][ 2] = 236; q[ 6][ 2] = 79; q[ 7][ 2] = 94; q[ 8][ 2] = 320; q[ 9][ 2] = 35; - q[10][ 2] = 15; q[11][ 2] = 123; q[12][ 2] = 1; q[13][ 2] = 49; q[14][ 2] = 33; - q[15][ 2] = 294; q[16][ 2] = 148; q[17][ 2] = 16; q[18][ 2] = 28; q[19][ 2] = 21; - - q[ 0][ 3] = 10; q[ 1][ 3] = 30; q[ 2][ 3] = 384; q[ 3][ 3] = 0; q[ 4][ 3] = 1; - q[ 5][ 3] = 78; q[ 6][ 3] = 542; q[ 7][ 3] = 61; q[ 8][ 3] = 91; q[ 9][ 3] = 1; - q[10][ 3] = 5; q[11][ 3] = 20; q[12][ 3] = 1; q[13][ 3] = 1; q[14][ 3] = 55; - q[15][ 3] = 136; q[16][ 3] = 55; q[17][ 3] = 1; q[18][ 3] = 1; q[19][ 3] = 6; - - q[ 0][ 4] = 439; q[ 1][ 4] = 92; q[ 2][ 4] = 128; q[ 3][ 4] = 1; q[ 4][ 4] = 0; - q[ 5][ 4] = 70; q[ 6][ 4] = 1; q[ 7][ 4] = 48; q[ 8][ 4] = 124; q[ 9][ 4] = 104; - q[10][ 4] = 110; q[11][ 4] = 16; q[12][ 4] = 156; q[13][ 4] = 70; q[14][ 4] = 1; - q[15][ 4] = 75; q[16][ 4] = 117; q[17][ 4] = 55; q[18][ 4] = 131; q[19][ 4] = 295; - - q[ 0][ 5] = 32; q[ 1][ 5] = 221; q[ 2][ 5] = 236; q[ 3][ 5] = 78; q[ 4][ 5] = 70; - q[ 5][ 5] = 0; q[ 6][ 5] = 372; q[ 7][ 5] = 18; q[ 8][ 5] = 387; q[ 9][ 5] = 33; - q[10][ 5] = 54; q[11][ 5] = 309; q[12][ 5] = 158; q[13][ 5] = 1; q[14][ 5] = 68; - q[15][ 5] = 225; q[16][ 5] = 146; q[17][ 5] = 10; q[18][ 5] = 45; q[19][ 5] = 36; - - q[ 0][ 6] = 81; q[ 1][ 6] = 10; q[ 2][ 6] = 79; q[ 3][ 6] = 542; q[ 4][ 6] = 1; - q[ 5][ 6] = 372; q[ 6][ 6] = 0; q[ 7][ 6] = 70; q[ 8][ 6] = 34; q[ 9][ 6] = 1; - q[10][ 6] = 21; q[11][ 6] = 141; q[12][ 6] = 1; q[13][ 6] = 1; q[14][ 6] = 52; - q[15][ 6] = 95; q[16][ 6] = 82; q[17][ 6] = 17; q[18][ 6] = 1; q[19][ 6] = 35; - - q[ 0][ 7] = 135; q[ 1][ 7] = 41; q[ 2][ 7] = 94; q[ 3][ 7] = 61; q[ 4][ 7] = 48; - q[ 5][ 7] = 18; q[ 6][ 7] = 70; q[ 7][ 7] = 0; q[ 8][ 7] = 68; q[ 9][ 7] = 1; - q[10][ 7] = 3; q[11][ 7] = 30; q[12][ 7] = 37; q[13][ 7] = 7; q[14][ 7] = 17; - q[15][ 7] = 152; q[16][ 7] = 7; q[17][ 7] = 23; q[18][ 7] = 21; q[19][ 7] = 3; - - q[ 0][ 8] = 30; q[ 1][ 8] = 90; q[ 2][ 8] = 320; q[ 3][ 8] = 91; q[ 4][ 8] = 124; - q[ 5][ 8] = 387; q[ 6][ 8] = 34; q[ 7][ 8] = 68; q[ 8][ 8] = 0; q[ 9][ 8] = 34; - q[10][ 8] = 51; q[11][ 8] = 76; q[12][ 8] = 116; q[13][ 8] = 141; q[14][ 8] = 44; - q[15][ 8] = 183; q[16][ 8] = 49; q[17][ 8] = 48; q[18][ 8] = 307; q[19][ 8] = 1; - - q[ 0][ 9] = 1; q[ 1][ 9] = 24; q[ 2][ 9] = 35; q[ 3][ 9] = 1; q[ 4][ 9] = 104; - q[ 5][ 9] = 33; q[ 6][ 9] = 1; q[ 7][ 9] = 1; q[ 8][ 9] = 34; q[ 9][ 9] = 0; - q[10][ 9] = 385; q[11][ 9] = 34; q[12][ 9] = 375; q[13][ 9] = 64; q[14][ 9] = 10; - q[15][ 9] = 4; q[16][ 9] = 72; q[17][ 9] = 39; q[18][ 9] = 26; q[19][ 9] =1048; - - q[ 0][10] = 45; q[ 1][10] = 18; q[ 2][10] = 15; q[ 3][10] = 5; q[ 4][10] = 110; - q[ 5][10] = 54; q[ 6][10] = 21; q[ 7][10] = 3; q[ 8][10] = 51; q[ 9][10] = 385; - q[10][10] = 0; q[11][10] = 23; q[12][10] = 581; q[13][10] = 179; q[14][10] = 22; - q[15][10] = 24; q[16][10] = 25; q[17][10] = 47; q[18][10] = 64; q[19][10] = 112; - - q[ 0][11] = 38; q[ 1][11] = 593; q[ 2][11] = 123; q[ 3][11] = 20; q[ 4][11] = 16; - q[ 5][11] = 309; q[ 6][11] = 141; q[ 7][11] = 30; q[ 8][11] = 76; q[ 9][11] = 34; - q[10][11] = 23; q[11][11] = 0; q[12][11] = 134; q[13][11] = 14; q[14][11] = 43; - q[15][11] = 77; q[16][11] = 110; q[17][11] = 6; q[18][11] = 1; q[19][11] = 19; - - q[ 0][12] = 235; q[ 1][12] = 57; q[ 2][12] = 1; q[ 3][12] = 1; q[ 4][12] = 156; - q[ 5][12] = 158; q[ 6][12] = 1; q[ 7][12] = 37; q[ 8][12] = 116; q[ 9][12] = 375; - q[10][12] = 581; q[11][12] = 134; q[12][12] = 0; q[13][12] = 247; q[14][12] = 1; - q[15][12] = 1; q[16][12] = 131; q[17][12] = 111; q[18][12] = 74; q[19][12] = 236; - - q[ 0][13] = 1; q[ 1][13] = 7; q[ 2][13] = 49; q[ 3][13] = 1; q[ 4][13] = 70; - q[ 5][13] = 1; q[ 6][13] = 1; q[ 7][13] = 7; q[ 8][13] = 141; q[ 9][13] = 64; - q[10][13] = 179; q[11][13] = 14; q[12][13] = 247; q[13][13] = 0; q[14][13] = 11; - q[15][13] = 20; q[16][13] = 69; q[17][13] = 182; q[18][13] =1017; q[19][13] = 92; - - q[ 0][14] = 97; q[ 1][14] = 24; q[ 2][14] = 33; q[ 3][14] = 55; q[ 4][14] = 1; - q[ 5][14] = 68; q[ 6][14] = 52; q[ 7][14] = 17; q[ 8][14] = 44; q[ 9][14] = 10; - q[10][14] = 22; q[11][14] = 43; q[12][14] = 1; q[13][14] = 11; q[14][14] = 0; - q[15][14] = 134; q[16][14] = 62; q[17][14] = 9; q[18][14] = 14; q[19][14] = 25; - - q[ 0][15] = 460; q[ 1][15] = 102; q[ 2][15] = 294; q[ 3][15] = 136; q[ 4][15] = 75; - q[ 5][15] = 225; q[ 6][15] = 95; q[ 7][15] = 152; q[ 8][15] = 183; q[ 9][15] = 4; - q[10][15] = 24; q[11][15] = 77; q[12][15] = 1; q[13][15] = 20; q[14][15] = 134; - q[15][15] = 0; q[16][15] = 671; q[17][15] = 14; q[18][15] = 31; q[19][15] = 39; - - q[ 0][16] = 258; q[ 1][16] = 64; q[ 2][16] = 148; q[ 3][16] = 55; q[ 4][16] = 117; - q[ 5][16] = 146; q[ 6][16] = 82; q[ 7][16] = 7; q[ 8][16] = 49; q[ 9][16] = 72; - q[10][16] = 25; q[11][16] = 110; q[12][16] = 131; q[13][16] = 69; q[14][16] = 62; - q[15][16] = 671; q[16][16] = 0; q[17][16] = 1; q[18][16] = 34; q[19][16] = 196; - - q[ 0][17] = 5; q[ 1][17] = 13; q[ 2][17] = 16; q[ 3][17] = 1; q[ 4][17] = 55; - q[ 5][17] = 10; q[ 6][17] = 17; q[ 7][17] = 23; q[ 8][17] = 48; q[ 9][17] = 39; - q[10][17] = 47; q[11][17] = 6; q[12][17] = 111; q[13][17] = 182; q[14][17] = 9; - q[15][17] = 14; q[16][17] = 1; q[17][17] = 0; q[18][17] = 176; q[19][17] = 26; - - q[ 0][18] = 55; q[ 1][18] = 47; q[ 2][18] = 28; q[ 3][18] = 1; q[ 4][18] = 131; - q[ 5][18] = 45; q[ 6][18] = 1; q[ 7][18] = 21; q[ 8][18] = 307; q[ 9][18] = 26; - q[10][18] = 64; q[11][18] = 1; q[12][18] = 74; q[13][18] =1017; q[14][18] = 14; - q[15][18] = 31; q[16][18] = 34; q[17][18] = 176; q[18][18] = 0; q[19][18] = 59; - - q[ 0][19] = 197; q[ 1][19] = 29; q[ 2][19] = 21; q[ 3][19] = 6; q[ 4][19] = 295; - q[ 5][19] = 36; q[ 6][19] = 35; q[ 7][19] = 3; q[ 8][19] = 1; q[ 9][19] =1048; - q[10][19] = 112; q[11][19] = 19; q[12][19] = 236; q[13][19] = 92; q[14][19] = 25; - q[15][19] = 39; q[16][19] = 196; q[17][19] = 26; q[18][19] = 59; q[19][19] = 0; - - f[ 0] = 0.0646; - f[ 1] = 0.0453; - f[ 2] = 0.0376; - f[ 3] = 0.0422; - f[ 4] = 0.0114; - f[ 5] = 0.0606; - f[ 6] = 0.0607; - f[ 7] = 0.0639; - f[ 8] = 0.0273; - f[ 9] = 0.0679; - f[10] = 0.1018; - f[11] = 0.0751; - f[12] = 0.0150; - f[13] = 0.0287; - f[14] = 0.0681; - f[15] = 0.0488; - f[16] = 0.0622; - f[17] = 0.0251; - f[18] = 0.0318; - f[19] = 0.0619; +model MTMET= +0.058078177576542 +0.032893910131824 0.141364232590718 +0.119156819252943 0.049700397089876 4.658418673473980 +0.633255658023246 0.739813635055843 0.292999912100000 0.077399976780000 +0.052454931263516 2.673107287067570 0.832791283162540 0.131355662593289 0.152595162221438 +0.179163834250834 0.080835456749356 0.812240880327663 6.033787171863310 0.050599984820000 2.236616952014710 +1.465861840241320 0.219967058009863 0.543750593874773 0.630753109774010 0.914125315762323 0.072395514281339 0.768853064344011 +0.030192120942361 1.522256408322940 1.738679122396110 0.479790968062666 0.603999818800000 4.518449535464730 0.105414703375579 0.025252648424203 +0.367600338719865 0.012428572271427 0.244934691519570 0.010668852799343 0.235804174258726 0.008875683337294 0.013999995800000 0.013799995860000 0.017140133857958 +0.109872733038170 0.058179997545996 0.046299986110000 0.005529142341257 0.299518907144301 0.254452390664260 0.019157613252714 0.027264545820634 0.111638903508319 1.897973798607690 +0.020509501847148 1.057185315844310 2.530397670880470 0.049007441297763 0.015799995260000 1.827217637834540 1.379217369234670 0.134187134743847 0.135153622453901 0.064936591519017 0.061324501602644 +0.653363796990802 0.013494029951790 0.399827603051683 0.026109939167016 0.492339996297957 0.237094294871690 0.128410015476984 0.145331422400560 0.032834304149706 2.918352332494040 3.425552681333890 0.659310562206772 +0.062762236171324 0.008039997588000 0.138999958300000 0.012599996220000 0.925810586256741 0.026306317108103 0.017716302685108 0.068139260558216 0.090353039894080 0.750900315729838 1.811100689669630 0.097099970870000 0.748424772472501 +0.408076930576884 0.155008519497430 0.080299975910000 0.044609549617131 0.029399991180000 0.849512180146269 0.048786284364110 0.005914204225738 0.519954219013687 0.024850013544994 0.270260699921766 0.121234884629524 0.032699990190000 0.054271872718433 +2.771685183494200 0.197379125786245 2.634377723686450 0.360804672758566 3.283013886095540 0.384800168559915 0.363104357068660 1.746569621028960 0.297585994724175 0.096272835118141 0.311525037542461 0.695087919473562 0.458733958379771 0.499349751195030 1.231180449645750 +6.730883140734450 0.056079796176056 0.961284804614472 0.102136190359134 0.338668094399541 0.274195864741216 0.134802630559199 0.024558282632513 0.221010542696817 2.453457406962560 0.253366627989989 0.393851585844489 3.035214815435280 0.053947726815677 0.734604689618527 3.114741972577130 +0.013599995920000 0.370819780754032 0.048999985300000 0.040899987730000 1.018410179476850 0.122999963100000 0.086028769191362 0.233963300810989 0.037499988750000 0.028656788402961 0.253242937027096 0.073499977950000 0.167575267727405 0.330781828765422 0.029433857169840 0.169211978236391 0.014378611686415 +0.014501402649578 0.127519293744200 1.020785184764350 0.160289909913013 1.967370664788620 0.319105692268264 0.093214693035584 0.046746326976098 3.907917378624430 0.135319420404162 0.123555294933400 0.281699089490248 0.316598936020291 3.209082340275010 0.053999983800000 0.374184173744714 0.091031759690464 0.481044171686705 +2.815162240451070 0.041063671680895 0.051741611477512 0.084589003623291 1.394527625641590 0.027669224699230 0.227826982651885 0.417148828855314 0.003511006946698 10.953422553972199 0.958273455517877 0.055461418361570 2.562484126254530 0.466243302126967 0.054078516776440 0.267109384867160 1.514059219782100 0.093136228059123 0.069964519010638 +0.043793213137964 0.012957803887341 0.057001317100395 0.016899005069702 0.011330503399151 0.018018105405432 0.022538506761552 0.047050114115034 0.017183705155112 0.089779426933828 0.155226046567814 0.039913511974054 0.067444320233296 0.088448026534408 0.037528211258463 0.093752228125668 0.063579019073706 0.022671306801392 0.041568212470464 0.053317415995225; + +model MTVER= +0.064684593342281 +0.032311373155679 0.153152767576346 +0.159447300723611 0.060872136436053 8.760497104246360 +0.246553724276801 1.223667683833540 0.336000168000000 0.121000060500000 +0.030870542435264 3.170387139192780 0.392967223483514 0.083338219669089 0.116173343086643 +0.191894227947066 0.094355879177916 0.335734154866994 6.603638632817670 0.036400018200000 2.018143897071440 +1.094200939100200 0.292471267235560 0.566907089453403 0.999256939628220 0.916865240432391 0.042512995256487 1.162715978357700 +0.023294179647084 2.204735655367280 2.462271663135220 0.818112173055882 1.400000700000000 4.282733559365710 0.081141411570686 0.013333256666625 +0.412485044242419 0.001173176586588 0.126395609197773 0.002450024225012 0.067512580756274 0.004179092089545 0.001250000625000 0.003120001560000 0.022239993119991 +0.078175506087734 0.078821952410957 0.004520002260000 0.002303982151991 0.170993012496464 0.251583784791830 0.007547828773913 0.007030631515314 0.148659642329784 1.378981921490620 +0.020785858392924 0.418858706429249 2.458223875111320 0.019527849763920 0.024800012400000 2.001705573852290 2.154179684089300 0.106373697186822 0.176407170203541 0.005826060913029 0.022236503118246 +0.751880839940232 0.003339427669713 0.036342279171131 0.005454655727327 0.084565948282953 0.118300117150029 0.064939181469575 0.034441699220841 0.024341806170897 2.709085270541960 3.102549285273870 0.440831886415833 +0.064446525223247 0.002480001240000 0.010300005150000 0.005850002925000 1.071950287974880 0.012628428314211 0.000332940166470 0.015567819783906 0.173363959681937 0.516016662008202 2.403419459709130 0.010400005200000 0.123894605947272 +0.285569393784626 0.223106325553107 0.034500017250000 0.027518484759236 0.018900009450000 0.918408714204128 0.027495640747814 0.001858863929432 0.679485296742479 0.018484697242344 0.366808261404039 0.141827309913619 0.030900015450000 0.070221307110636 +2.481817894908330 0.135104231552082 3.854777098387590 0.371726663863239 3.359148754573540 0.243906745953312 0.097749778874865 1.247283625641500 0.445289314644546 0.044145011072495 0.440667041333411 0.187506484753196 0.137172898586415 0.886724514362035 1.961395669697340 +5.562701612349420 0.028643207321597 0.865443620721594 0.090462201231078 0.152312893156409 0.113601717800831 0.095055222527588 0.008518901259449 0.150502737251331 2.439178561588670 0.173310979655447 0.399752584876193 3.736806509402320 0.061993007996489 0.573808396904055 2.845373550686060 +0.008420004210000 0.558540909270315 0.006680003340000 0.026900013450000 1.778859743429430 0.124000062000000 0.062341391170680 0.258646151323011 0.030600015300000 0.001901139950570 0.186367686183797 0.029400014700000 0.056840260420116 0.091906956953456 0.020008909004450 0.144081317040623 0.003186142593071 +0.012484943242469 0.146342752171340 0.856969809484690 0.186328057163982 5.248219583108480 0.228833221416554 0.037356125678054 0.021047207523599 9.175812436903930 0.045429850714914 0.103344604672277 0.062036757018363 0.079617080808521 3.987260862629440 0.071900035950000 0.543167000583365 0.084368299184129 0.288348349174103 +3.196938070468240 0.037629752814867 0.013796210898102 0.128208017103977 0.381860130929970 0.010633051316523 0.216260948130420 0.442992988496384 0.002759961379980 13.505144232568700 0.856259496129534 0.025253726626857 4.499326020661890 0.336159506079669 0.025768082884035 0.058280992140482 1.317930995965170 0.047174784587381 0.028686388343187 +0.070820264589868 0.014049892975054 0.045209877395061 0.014793692603154 0.006814196592902 0.026340886829557 0.021495189252405 0.044239977880011 0.024230987884506 0.090735054632473 0.172309913845043 0.027381186309407 0.056193971903014 0.049775775112112 0.054386272806864 0.074421862789069 0.108809945595027 0.025652687173656 0.026484686757657 0.045853677073162; + +model MTINV= +0.074334218266301 +0.023989052404375 0.147604222958287 +0.100963342614647 0.070670662731724 3.471722830310310 +1.303663674534320 0.680711975715101 0.279969423012186 0.058303793678473 +0.090980768607678 2.507737031904790 1.287188991124200 0.204531412187402 0.163006102797533 +0.145240519903769 0.121635259345877 1.159082473366830 6.962509289995170 0.047068462172608 2.559965106013550 +1.931554281377980 0.217695106921922 0.551419739432016 0.591216665513239 0.883545088581823 0.114080027367971 0.599017294392986 +0.045194564922167 1.283366609653150 1.728634951545740 0.358935283425829 0.341191895523187 4.708030703786970 0.217387593044928 0.064352344259052 +0.209390225243876 0.028420823631666 0.310892189643074 0.017124804150076 0.287547507980951 0.025100268959888 0.020795063681971 0.024809855076054 0.025529460788212 +0.153999806400053 0.052763470894603 0.075114618954140 0.010524108790355 0.381393499442539 0.211473478410575 0.033099595760156 0.050781943687214 0.080842277663076 2.159573277170340 +0.014824597070159 1.631885373245590 2.642319727071690 0.096074070570356 0.004392611242955 1.933672173530820 1.172486892005060 0.156144354542233 0.250218937912385 0.100258540896568 0.081982070207159 +0.592864803853984 0.022385985045602 0.566989926203939 0.049089594364154 0.461585949365546 0.397060773175627 0.189458582216537 0.207637097945128 0.070807578676957 2.830888275644240 3.494915172033370 0.694473233210596 +0.111082013567177 0.015357906856835 0.194205108317926 0.017157066137171 0.887097013161053 0.052545508981788 0.027169124132346 0.103612477554992 0.151813232274683 0.879509584196026 1.735138890944170 0.098766126493534 0.916539999383854 +0.629531950187119 0.130198008920776 0.224156768337257 0.104690593123746 0.069312726274898 0.581011232595414 0.120987616604934 0.028323516670589 0.312042686182876 0.070886389645433 0.154372892250818 0.183146598741331 0.092516642993328 0.105108133956730 +3.368752649498400 0.309426794229233 2.084469247211970 0.505643420742551 3.042784023885900 0.538789467484127 0.580233013906702 2.020140264943570 0.368422261631036 0.126690032323967 0.225488005804762 0.879840867063512 0.633585250565799 0.398961585415302 1.042509719995950 +3.327041763182760 0.108593900562422 1.186265342493670 0.141991497203378 0.736861629255230 0.554297413280946 0.241928620228513 0.043791194483515 0.270595503761755 1.958518039592470 0.298279746688054 0.511283878486367 2.192058660176190 0.121269516492174 0.746916803233159 3.833298761679880 +0.026501471399407 0.321740677303678 0.075398643840530 0.071630764347683 0.758571851571138 0.118046810781257 0.107622585950948 0.244698162120696 0.081171865531241 0.062889026844379 0.298463138614697 0.106016335593449 0.254195659321696 0.478876889449168 0.059737825104860 0.204318441272591 0.049859203056311 +0.021939769224089 0.182762501894970 1.139165041333800 0.192759935895995 1.342951983818990 0.476209340516188 0.137649645940120 0.072268115092742 2.321701900318870 0.177138315144646 0.161052630578922 0.348477651608884 0.393193176722666 3.234681510126880 0.081395316441860 0.345719568712117 0.174898633040519 0.636664640334042 +2.751362222454670 0.072153227138698 0.065420927831618 0.088077392769029 1.555396905840990 0.065221940911213 0.231186486525368 0.396428077428706 0.008043434782625 8.542589373962890 1.075872855650690 0.056252073499162 1.509852171058890 0.534676724129225 0.162975872809625 0.377288666084473 1.885338569864270 0.128598566560553 0.080346083861554 +0.031742312696925 0.010900704360282 0.061579224631690 0.016149206459683 0.013570105428042 0.014644105857642 0.022311208924484 0.047847519139008 0.011641804656722 0.094322337728935 0.149407059762824 0.044438717775487 0.077262530905012 0.102287040914816 0.026290210516084 0.105939042375617 0.042869117147647 0.020701008280403 0.046556718622687 0.059540023816010; + +model Q.PFAM= +0.531344742 +0.266631781 0.610524242 +0.479415354 0.145193836 4.395589145 +2.490407258 0.797726764 0.617331366 0.086320436 +1.058818226 2.850794598 1.685541958 0.623180282 0.163023963 +1.178483844 0.358512949 0.572153867 4.775857514 0.004224284 4.045465925 +1.897932882 0.427923043 1.417171473 0.993358642 0.723368327 0.349584077 0.412573692 +0.453464468 2.765967911 4.395995003 0.944775779 1.220289602 4.992256584 0.444851397 0.432227777 +0.176454495 0.103023046 0.192557924 0.012280201 0.599859067 0.090487083 0.045755066 0.025135568 0.108027826 +0.419433650 0.307712278 0.070917051 0.019106538 0.827369996 0.609556427 0.066812844 0.070095729 0.420152907 4.316039810 +0.501174376 5.070603955 2.126974783 0.311739636 0.042113153 3.211891588 1.628511729 0.323774881 0.779002069 0.188076678 0.128912693 +1.175077280 0.565654138 0.405987508 0.044371788 1.330027314 1.704580053 0.217689890 0.196370346 0.488138895 5.052397990 6.674964742 0.790881216 +0.266730243 0.050284344 0.098902029 0.023281590 1.570975979 0.044860498 0.016021778 0.116848629 0.754840320 1.333037626 2.539936322 0.022355802 1.944915128 +1.371519672 0.396818483 0.230865860 0.538351193 0.117103191 0.764761023 0.532587532 0.323334201 0.635697033 0.101194285 0.285369684 0.446883087 0.146079999 0.106999973 +3.745215188 0.711941869 3.559567653 1.188991720 3.084090802 1.178336151 0.587105765 1.645856748 1.047872072 0.078854093 0.182164122 0.756113129 0.449949835 0.319772739 1.499944856 +2.134156546 0.664457704 1.923647217 0.498902533 1.382576296 1.172710577 0.733559201 0.232722239 0.648977470 1.067438502 0.303042511 1.169845557 2.176841262 0.185666747 0.667935113 5.538833003 +0.198118657 0.503943335 0.048383536 0.030638664 0.964172901 0.185024339 0.044029570 0.199917400 0.592439554 0.143667666 0.614180565 0.040725071 0.765641182 2.174974075 0.133590865 0.217347672 0.125958817 +0.208747809 0.263834003 0.570488409 0.134714779 1.863419357 0.264729772 0.100447410 0.074554161 5.545635324 0.271724216 0.338670344 0.138599247 0.651180870 7.474120415 0.108442089 0.374198514 0.267599595 2.604411280 +2.688525915 0.201107356 0.119873351 0.052396485 2.371412865 0.282178057 0.297627071 0.134209258 0.229732340 11.786948184 2.030164484 0.222793132 2.397008325 0.758096789 0.362295352 0.127446562 2.284500453 0.201953893 0.337688120 +0.085788000 0.057731000 0.042028000 0.056462000 0.010447000 0.039548000 0.067799000 0.064861000 0.021040000 0.055398000 0.100413000 0.059401000 0.019898000 0.042789000 0.039579000 0.069262000 0.055498000 0.014430000 0.033233000 0.064396000; + +model Q.PFAM_GB= +0.365164838 +0.267954861 0.668182618 +0.412395647 0.109946883 5.109280517 +2.729878231 0.826085083 0.726938192 0.087850737 +0.877084208 2.867777221 1.690914729 0.549211875 0.175083471 +1.030653110 0.296644302 0.531939174 5.408576458 0.009322812 4.263791122 +2.118558237 0.406823565 1.377653412 0.873060903 0.782691675 0.293835651 0.404384209 +0.350857800 2.639511515 4.957438759 0.957717365 1.116928692 5.496513866 0.357768891 0.337006729 +0.117950785 0.097118281 0.189788694 0.007265312 0.427089565 0.083014733 0.034316792 0.014707735 0.086519266 +0.265550676 0.256367291 0.059881044 0.010401807 0.642230229 0.589163556 0.053257602 0.037254822 0.365721292 3.923176859 +0.457693311 5.846934542 2.118089709 0.248389767 0.027767460 3.282211527 1.530154625 0.267310479 0.632651172 0.148725361 0.106747583 +0.983716229 0.449899179 0.360441883 0.019663070 1.089667442 1.579564776 0.178533992 0.144384262 0.390780590 5.076767492 6.772375029 0.740162467 +0.184682627 0.034838076 0.079279076 0.013765656 1.373741900 0.033977644 0.013172775 0.066574829 0.649373699 0.993172189 2.231416771 0.015878425 1.671492831 +1.208554025 0.298720644 0.164723116 0.340542665 0.126175761 0.713343366 0.393866840 0.185899793 0.585878841 0.068750403 0.259147052 0.350619140 0.101213184 0.074341327 +4.632633836 0.692974365 4.084102769 1.010171060 3.782357424 1.053657420 0.483380661 1.759872389 0.936824568 0.067637494 0.172313759 0.617037785 0.391313179 0.312203677 1.419107393 +2.317571767 0.564138173 2.051675662 0.400824407 1.655895774 1.036694976 0.582434600 0.163002252 0.527116986 1.125002936 0.262729464 1.039042636 2.278959095 0.137832417 0.601398020 6.382983692 +0.107582893 0.435536872 0.047263468 0.026999980 0.817007211 0.206727272 0.030420354 0.141119644 0.501645686 0.086205240 0.414640307 0.024983931 0.528052784 1.764940773 0.070162265 0.201322252 0.086102239 +0.157118656 0.205907035 0.544230162 0.119435501 1.653471491 0.208926712 0.080278936 0.047930460 5.372406910 0.169889476 0.258512719 0.100866499 0.460416547 7.845710592 0.075597432 0.317057267 0.177135947 2.300580433 +2.411548529 0.146939179 0.105041792 0.041670032 2.240651656 0.235573727 0.256746635 0.094430216 0.142765142 12.429218156 1.730591322 0.181062532 2.353311726 0.601728375 0.266981380 0.093044732 2.271589526 0.130636191 0.212433758 +0.087660000 0.058154000 0.037239000 0.048117000 0.013233000 0.038080000 0.063213000 0.059035000 0.021871000 0.061155000 0.111580000 0.056999000 0.022763000 0.046732000 0.035355000 0.065285000 0.052818000 0.015550000 0.035618000 0.069541000; + +model Q.LG= +0.424057540 +0.271250376 0.764825201 +0.401980425 0.140794893 5.062142812 +2.327453440 0.519625401 0.507639701 0.071616388 +1.072849735 3.123615724 1.879768813 0.664578644 0.104402693 +1.069798507 0.402468665 0.595330390 5.601033240 0.000110712 4.776313857 +1.793212924 0.339928462 1.257892965 0.741295542 0.488516386 0.272681884 0.332128789 +0.392890955 2.558126695 4.639398318 0.987385467 0.677157762 5.468173089 0.498257950 0.296507145 +0.138014910 0.132223556 0.192515195 0.013803595 0.328676783 0.094404276 0.045491635 0.009271038 0.121398119 +0.367358512 0.278306063 0.065286972 0.014185552 0.536563257 0.592378803 0.071724886 0.041522603 0.366698667 3.930811583 +0.563978915 6.845986165 2.226004549 0.353825055 0.002829803 3.797608224 2.027714606 0.282308308 0.801983530 0.169487062 0.141910872 +1.128143826 0.489462975 0.408708404 0.023806440 0.904437688 1.839460723 0.196671330 0.125388186 0.497496251 4.359960777 6.201341249 0.706300529 +0.236258684 0.055016203 0.092864269 0.016998748 0.957089079 0.037738461 0.018574182 0.075225605 0.703446369 1.021152682 2.226104190 0.019952549 1.726515479 +1.158823278 0.344518589 0.176377788 0.403179438 0.080797749 0.708201457 0.456455246 0.180072253 0.538060086 0.083244488 0.235273346 0.427986716 0.111914514 0.092858734 +4.667976453 0.835065436 3.955582326 1.241154390 2.628965883 1.346684133 0.670392140 1.526697708 1.067966220 0.054379608 0.183116838 0.828615345 0.354736216 0.335899646 1.341323449 +2.159899413 0.617964388 2.022264246 0.477448489 1.129405944 1.248400502 0.659780378 0.124572237 0.649139830 1.017998492 0.303426657 1.225974643 2.088967227 0.169151604 0.605969830 6.645417766 +0.151862790 0.477052140 0.039141184 0.025562335 0.529678067 0.221132079 0.066497056 0.187213189 0.555296865 0.099536892 0.479069315 0.048841785 0.606743708 1.854514258 0.076842003 0.202206939 0.123868108 +0.192903795 0.274767702 0.526873728 0.124040819 1.020373268 0.276139426 0.112342970 0.044355728 5.074607897 0.213437583 0.276570096 0.143396629 0.480201901 6.641252774 0.081147599 0.358925565 0.235545698 2.373266140 +2.511548974 0.180665670 0.088670492 0.042435978 1.853035143 0.252220059 0.270634816 0.078641075 0.148865811 11.081231885 1.642242475 0.209350089 2.027034834 0.628362476 0.310510022 0.091930966 2.286257438 0.164177306 0.267739693 +0.080009000 0.052947000 0.041171000 0.050146000 0.015018000 0.035929000 0.061392000 0.064793000 0.021709000 0.063895000 0.106292000 0.057047000 0.023440000 0.047712000 0.039604000 0.062980000 0.052863000 0.014987000 0.037434000 0.070634000; + +model Q.BIRD= +0.086772353 +0.041489234 0.145522693 +0.684872641 0.038182359 4.588509426 +0.214514127 2.310001798 0.094392928 0.085866464 +0.104071310 4.326376802 0.119230449 0.051178971 0.037945162 +0.768984102 0.089892499 0.080461407 5.114292481 0.028175939 1.988992665 +2.215762795 1.937855330 0.295994975 2.041821920 1.073717504 0.105321639 1.420133178 +0.080617856 6.927918567 3.653639043 1.071049041 2.585629399 6.123585849 0.058085432 0.108840477 +0.046084289 0.203642191 0.465074580 0.025094905 0.142117407 0.031239309 0.016280777 0.029795942 0.047732049 +0.116389136 0.336043365 0.021519291 0.026444908 0.269064319 0.527627637 0.038528060 0.047328310 0.607702969 1.935765980 +0.093630870 5.781174762 2.258646191 0.026415889 0.011551984 1.637753572 1.761938656 0.087005765 0.004280896 0.148647886 0.044628305 +0.235654912 0.732190858 0.048983640 0.016856105 0.129334019 0.168730118 0.077835842 0.072799197 0.035426840 7.398376755 3.334268838 0.708348805 +0.092004559 0.061734224 0.047373731 0.043468817 1.575630749 0.045946765 0.043087788 0.049575370 0.141447452 0.962852023 2.950112395 0.040069839 0.134340383 +2.022526978 0.407828168 0.021912937 0.057096272 0.124231767 1.446258879 0.060414502 0.063511937 1.485608296 0.042135216 1.450911972 0.054326874 0.048270788 0.072731066 +2.352357809 0.961653423 5.657997764 0.161901882 2.848589688 0.086837961 0.056173980 2.558621780 0.248112786 0.287491221 0.653587912 0.105167714 0.055804307 0.936346554 2.818055661 +9.189210510 0.711833578 1.856292472 0.041327738 0.117750493 0.075637987 0.075603922 0.085458940 0.064922501 3.670916550 0.086159175 0.827391264 6.908142924 0.086710531 1.546051817 3.320463486 +0.067881151 2.240359335 0.029075520 0.024018688 2.572562133 0.667514386 0.057387721 0.561105174 0.032498470 0.045679288 0.687156290 0.024744157 0.188378332 0.454841057 0.054773510 0.269153898 0.048484013 +0.062428150 0.069988164 0.632160724 0.452258389 5.266555093 0.049244809 0.032418984 0.035199737 6.764718412 0.083433936 0.094733583 0.036104364 0.043823350 3.894213263 0.056354401 0.461507798 0.048169854 0.308629484 +5.126996382 0.063299974 0.037743293 0.240637305 0.199254052 0.052218599 0.307081397 0.848692284 0.058399592 13.397205976 2.035426441 0.054281453 7.643787012 0.780383422 0.081010240 0.082620150 0.391832532 0.082144494 0.059610809 +0.066363000 0.054021000 0.037784000 0.047511000 0.022651000 0.048841000 0.071571000 0.058368000 0.025403000 0.045108000 0.100181000 0.061361000 0.021069000 0.038230000 0.053861000 0.089298000 0.053536000 0.012313000 0.027173000 0.065359000; + +model Q.INSECT= +0.245103884 +0.396680459 0.602596590 +0.388851211 0.169418318 4.002207051 +1.687379028 0.888477494 0.707887462 0.137794758 +0.813368876 2.004043618 1.229351544 0.369955633 0.230607800 +0.797312266 0.224720163 0.413998478 7.046040747 0.061906941 2.610915158 +2.835188240 0.639959824 1.963743484 1.231717882 1.043646688 0.393592403 0.553182381 +0.362258381 2.615490291 4.573260980 0.797011018 0.896230099 5.675152800 0.330930746 0.378124450 +0.228513969 0.141228769 0.208490043 0.053173950 0.460428950 0.116150694 0.064283924 0.068021312 0.140874297 +0.286953900 0.240779967 0.098667680 0.058795467 0.493466139 0.643789822 0.079309054 0.089730309 0.500992010 3.381560080 +0.325994754 5.655671758 1.541274942 0.211396072 0.082097991 2.054032646 0.761883191 0.341199781 0.396451596 0.144888450 0.103298167 +0.854941063 0.469236454 0.344916944 0.082042638 0.564344515 1.049351234 0.169182004 0.214023208 0.337275045 4.607657510 5.779760276 0.501772274 +0.202912169 0.083503104 0.104497835 0.062211382 1.265119124 0.061813267 0.037371750 0.130615907 0.693502085 0.880367224 2.142804923 0.038837539 1.068089269 +2.098306482 0.448091631 0.295630473 0.393309252 0.214758763 1.226945820 0.411528885 0.517165713 0.865174971 0.164903860 0.371130819 0.335358981 0.264037782 0.131445137 +4.447741802 0.609758408 3.798086697 0.731000508 3.467889377 0.737657764 0.379154866 2.320367575 0.697254375 0.107585891 0.228053049 0.417871055 0.387566384 0.273240209 1.842353422 +3.173624632 0.474607818 2.267368199 0.310254294 1.125638230 0.924249331 0.410818877 0.401697351 0.490764490 1.310627756 0.287148699 0.827466845 2.368908910 0.164073566 1.031968282 5.706447436 +0.113618297 0.421211392 0.052343656 0.053998922 1.063048958 0.118187043 0.058461221 0.275756639 0.303820579 0.088741408 0.404916338 0.067974475 0.484518123 1.412153565 0.131745320 0.257205952 0.121214423 +0.159463305 0.209585392 0.485673084 0.153134973 1.990179836 0.174718362 0.083149696 0.137582926 6.378949466 0.188267059 0.280539346 0.067140987 0.264484864 10.165067155 0.155778076 0.294006077 0.191009158 1.616501717 +2.466363084 0.145305726 0.149535968 0.109375281 1.295115095 0.258250207 0.287452076 0.247675666 0.177412560 10.861831869 1.518647299 0.149072919 2.157390423 0.496751123 0.367029379 0.180581516 1.639361417 0.103038434 0.189789265 +0.063003000 0.049585000 0.047550000 0.048622000 0.015291000 0.044058000 0.072012000 0.037810000 0.022358000 0.066563000 0.107325000 0.080621000 0.023976000 0.041578000 0.028532000 0.081767000 0.055167000 0.009698000 0.032219000 0.072265000; + +model Q.MAMMAL= +0.164520503 +0.133786660 0.301753825 +0.667759687 0.102671327 4.669240542 +0.375561674 1.878554130 0.218506732 0.132842206 +0.172392722 3.619009961 0.250359878 0.124852096 0.095864103 +0.781197529 0.208357490 0.202345038 5.379197337 0.056958677 1.746841825 +2.076338500 1.797395963 0.679494500 1.991708503 1.179185026 0.168866582 1.453533780 +0.161623558 4.710380602 3.940278094 0.956160014 1.796411770 5.600293701 0.164776154 0.227985715 +0.203510704 0.217922574 0.520603634 0.071028795 0.233803448 0.064010956 0.054619397 0.086763961 0.094641478 +0.178925750 0.403058144 0.048270492 0.042419644 0.297464704 0.583097899 0.070620812 0.086464596 0.627289885 2.383373587 +0.150019883 5.862144662 2.011938106 0.109850770 0.049580542 1.705745478 1.499396112 0.184898463 0.108290367 0.192452773 0.074328424 +0.426954296 0.737495653 0.124928924 0.058185276 0.192839562 0.273324822 0.142904022 0.172462927 0.119658259 7.422424301 4.121859707 0.760101625 +0.156046906 0.098468307 0.074017139 0.075986849 1.649041128 0.061484905 0.059915230 0.098183513 0.275397853 1.183422487 2.957377967 0.060573566 0.338235196 +1.899027463 0.525891518 0.068009985 0.123677157 0.200110404 1.590125986 0.137838674 0.136748511 1.445166153 0.107833373 1.381061149 0.129943936 0.137065096 0.146012906 +2.359299936 0.843914331 5.427387027 0.263292147 2.507254736 0.189228647 0.139146074 2.427076788 0.376188219 0.330866332 0.498124825 0.193144225 0.150312624 0.827642694 2.786459548 +7.524532664 0.539886133 1.955763604 0.132705770 0.233037817 0.165733855 0.155868082 0.198123473 0.145627442 3.528654954 0.153102275 0.889109054 4.911310946 0.119728464 1.529988220 3.075670543 +0.101039395 2.131494369 0.058369398 0.043905868 2.144674342 0.521419600 0.088844939 0.579178348 0.134177423 0.088684660 0.683790243 0.064089109 0.276245610 0.603795093 0.100007515 0.264443242 0.079630880 +0.091694198 0.139818982 0.655149301 0.420781565 5.685285320 0.126113134 0.070592995 0.083304052 7.287086510 0.172110578 0.176288430 0.080506454 0.165573576 5.877878503 0.112513257 0.458878972 0.102291161 0.645002709 +4.300445029 0.107177686 0.082093349 0.228482448 0.278600785 0.094238794 0.325642029 0.868890049 0.081513165 12.950251994 1.879650916 0.097391390 7.158497660 0.733119107 0.145626117 0.152049202 0.573328824 0.113423271 0.125003102 +0.067997000 0.055503000 0.036288000 0.046867000 0.021435000 0.050281000 0.068935000 0.055323000 0.026410000 0.041953000 0.101191000 0.060037000 0.019662000 0.036237000 0.055146000 0.096864000 0.057136000 0.011785000 0.024730000 0.066223000; + +model Q.PLANT= +0.061995451 +0.071787018 0.324146307 +0.482723250 0.017012888 5.640569094 +0.523802485 2.773840824 0.412259505 0.072474815 +0.266699470 3.319795598 0.641219533 0.101391710 0.105145707 +0.848857861 0.027378890 0.203417806 6.983296655 0.004325163 3.676487445 +2.099517664 0.923332845 0.989386407 1.214523767 1.225782488 0.189953993 0.893983216 +0.086540313 4.386107333 5.633764157 1.133417157 3.025390235 8.135368256 0.117555479 0.127760160 +0.047711951 0.148339563 0.470979475 0.007582115 0.267133294 0.022845535 0.010786746 0.011450563 0.074259162 +0.132461376 0.242868565 0.008688476 0.007966889 0.387974066 0.713657288 0.029290769 0.013401682 0.666044528 3.178490964 +0.135231094 6.796751125 3.253593795 0.018324171 0.000109001 2.561068546 1.166641775 0.150166421 0.040648681 0.143807298 0.022265650 +0.487603228 0.622545469 0.164436842 0.009922368 0.133016192 0.383963916 0.121953673 0.072782198 0.060192574 6.137883228 5.109951861 0.997263914 +0.101998846 0.018497482 0.033489490 0.011945428 2.787245776 0.000109001 0.003428084 0.016479873 0.304517393 1.260751123 3.067538770 0.004231422 0.367280212 +1.679096158 0.423800539 0.020342871 0.080963818 0.083580934 1.394642593 0.104298769 0.064165663 1.150907470 0.036382379 0.559761940 0.059246444 0.049078824 0.049489758 +3.724953683 0.724887668 4.517199656 0.281847349 5.612663729 0.304709235 0.112097795 1.591990129 0.447466853 0.164991657 0.549645550 0.209386705 0.171531722 0.574024731 2.359864552 +4.753201153 0.655493224 2.669910479 0.105335369 0.389742063 0.300884387 0.195263436 0.123875362 0.116237656 2.552275429 0.089626134 1.172764365 4.055778486 0.062695238 1.066677979 5.735630021 +0.016624844 0.808384672 0.005560145 0.000347713 2.878143952 0.176461840 0.013106289 0.196264065 0.066883060 0.000109001 0.582218341 0.002231252 0.229612944 0.597613653 0.009791567 0.216845648 0.000109001 +0.033775073 0.054821001 0.753418700 0.255631501 4.340476213 0.036965535 0.020444242 0.012699715 7.284279143 0.053844351 0.084699285 0.016433002 0.066804579 8.185198097 0.032534641 0.405822992 0.051593479 0.524555683 +3.226243245 0.041421498 0.061468976 0.147004300 0.560195764 0.088095759 0.303691165 0.297902118 0.066305354 10.911840366 1.918251057 0.050145945 2.585318015 0.627525728 0.114608090 0.058704709 0.972725592 0.026833885 0.133140453 +0.074923000 0.050500000 0.038734000 0.053195000 0.011300000 0.037499000 0.068513000 0.059627000 0.021204000 0.058991000 0.102504000 0.067306000 0.022371000 0.043798000 0.037039000 0.084451000 0.047850000 0.012322000 0.030777000 0.077097000; + +model Q.YEAST= +0.289760345 +0.342709634 0.718300668 +0.367886518 0.072562600 4.199522650 +3.691718604 0.710404342 0.490823952 0.052871678 +1.043523521 2.008049414 1.553982959 0.449223552 0.070963821 +1.093168815 0.139582694 0.508273036 5.624270628 0.003118838 3.707570018 +2.882498207 0.540565704 1.987856240 1.230079486 1.182218128 0.398459557 0.542851891 +0.417376590 2.637569081 4.921959964 0.793976563 0.971187101 5.448912766 0.394563504 0.492571299 +0.182603367 0.132608825 0.173586122 0.017141968 0.595831116 0.083860488 0.036837325 0.027627162 0.116377119 +0.334353188 0.205566131 0.068829530 0.021633405 0.796201770 0.465713591 0.062744857 0.043967523 0.378253079 3.366451402 +0.483774690 6.326126715 1.772882594 0.193037560 0.025984034 2.595024049 0.980236499 0.351568331 0.667015573 0.108153720 0.090350953 +0.980143366 0.618539982 0.354791389 0.052464777 0.984440777 1.122348190 0.146156313 0.190256883 0.392801743 4.449605122 5.986085778 0.578286681 +0.224857913 0.047417869 0.073768891 0.020291850 1.625802715 0.031817027 0.015954525 0.094986525 0.713520963 0.952803363 2.116139937 0.012660509 1.303449237 +1.902789461 0.327908181 0.288401051 0.418241394 0.119283083 0.994116603 0.453784839 0.361756388 0.717288404 0.097506652 0.200252021 0.383751236 0.146206205 0.091990755 +4.877234074 0.595474107 3.112134717 0.924744908 3.584581583 1.239592330 0.512666905 1.863057260 0.955917766 0.058056067 0.241269232 0.551359124 0.486660696 0.253426381 1.572179322 +2.789256813 0.577913042 2.172349980 0.379983795 1.746675706 1.004182700 0.576198956 0.295694231 0.636427012 1.138771705 0.291850288 0.980917255 1.905694316 0.158541858 0.735801862 5.167267165 +0.082544434 0.297203203 0.023885222 0.009802221 1.098149199 0.061054054 0.021273070 0.132048920 0.320030703 0.103698871 0.411524750 0.030961092 0.442192031 1.474552928 0.057108109 0.136000409 0.072755518 +0.176505390 0.179523334 0.380460546 0.103068007 1.987376163 0.160373469 0.057915259 0.060539607 6.419218432 0.171959625 0.282755433 0.046896769 0.361827346 7.660749919 0.085482550 0.275364684 0.182111094 1.812622556 +2.936397672 0.163865947 0.135944973 0.080351380 2.828941339 0.271020707 0.294499028 0.133659895 0.255089465 11.291813609 1.513322757 0.167243117 2.085093473 0.620211936 0.348307577 0.145275987 2.188995240 0.093529663 0.240182683 +0.059954000 0.042032000 0.052518000 0.054641000 0.008189000 0.040467000 0.070691000 0.039935000 0.018393000 0.069555000 0.109563000 0.081967000 0.018694000 0.046979000 0.031382000 0.091102000 0.055887000 0.010241000 0.033496000 0.064313000; + +model FLAVI= +0.077462 +0.078037 0.000020 +0.550515 0.089476 8.801355 +0.114675 0.572845 0.000020 0.000020 +0.090490 3.856678 0.093133 0.183601 0.000020 +0.560685 0.020614 0.000020 7.603314 0.000020 1.058066 +0.478097 1.281339 0.147801 1.359685 0.221287 0.000020 2.331269 +0.168719 2.704348 3.326034 0.543235 0.240631 8.958548 0.025386 0.032838 +0.000020 0.151250 0.538307 0.000020 0.000020 0.000020 0.000020 0.000020 0.037032 +0.002576 0.114188 0.000020 0.000020 0.085459 0.789751 0.023842 0.037784 0.375616 1.571343 +0.000020 19.413475 2.764154 0.000020 0.000020 1.545589 2.992960 0.027884 0.000020 0.253247 0.038560 +0.000020 0.151495 0.000020 0.000020 0.000020 0.000020 0.031428 0.009423 0.000020 8.538171 2.895172 0.340189 +0.000020 0.000020 0.000020 0.000020 0.773082 0.000020 0.000020 0.000020 0.092713 1.368786 6.296781 0.000020 0.000020 +1.256282 0.292376 0.000020 0.000020 0.000020 1.571014 0.000020 0.029602 0.723179 0.000020 1.182805 0.097941 0.000020 0.035125 +2.104417 0.947496 9.494063 0.127754 1.516267 0.140959 0.000020 1.735582 0.079928 0.211717 1.008880 0.131204 0.073511 1.821906 4.417267 +6.315368 0.199572 1.329820 0.000020 0.000020 0.057328 0.000020 0.044927 0.144097 6.767791 0.033401 0.567475 2.228245 0.000020 1.543179 3.290965 +0.000020 0.791312 0.000020 0.000020 2.305902 0.000020 0.000020 0.323982 0.206837 0.000020 0.176249 0.000020 0.173206 0.148283 0.074193 0.154086 0.000020 +0.043583 0.000020 1.247952 0.771837 4.085694 0.000020 0.029684 0.000020 16.468674 0.000020 0.000020 0.071741 0.228965 5.907642 0.000020 0.431256 0.000020 0.000020 +8.030561 0.000020 0.000020 0.439809 0.149716 0.000020 0.346045 0.583259 0.067473 16.913225 1.074192 0.006629 3.855848 1.176807 0.036750 0.052613 0.134211 0.000020 0.234116 +0.077500 0.053813 0.033950 0.034973 0.014056 0.030139 0.054825 0.086284 0.018210 0.063272 0.103857 0.059646 0.040389 0.033630 0.036649 0.060915 0.076327 0.030152 0.020069 0.071343; + +end; +)"; + + +ModelProtein::ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree, ModelsBlock* models_block) + : ModelMarkov(tree, true, false) +{ + this->models_block = models_block; + init(model_name, model_params, freq, freq_params); } -void get_VT(double **q, double *f) { - /* - * Mueller, T. and Vingron, M. - * "Modeling Amino Acid Replacement" - * Journal of Comp. Biology, 7(6):761-776,2000 - */ - - /* amino acid frequencies */ - f[ 0]=0.0770764620135024 ; f[ 1]=0.0500819370772208 ; - f[ 2]=0.0462377395993731 ; f[ 3]=0.0537929860758246 ; - f[ 4]=0.0144533387583345 ; f[ 5]=0.0408923608974345 ; - f[ 6]=0.0633579339160905 ; f[ 7]=0.0655672355884439 ; - f[ 8]=0.0218802687005936 ; f[ 9]=0.0591969699027449 ; - f[10]=0.0976461276528445 ; f[11]=0.0592079410822730 ; - f[12]=0.0220695876653368 ; f[13]=0.0413508521834260 ; - f[14]=0.0476871596856874 ; f[15]=0.0707295165111524 ; - f[16]=0.0567759161524817 ; f[17]=0.0127019797647213 ; - f[18]=0.0323746050281867 ; f[19]=0.0669190817443274 ; - - /* relative rates */ - - q[ 0][ 1] = 1.2412691067876198; q[ 0][ 2] = 1.2184237953498958; - q[ 0][ 3] = 1.3759368509441177; q[ 0][ 4] = 2.4731223087544874; - q[ 0][ 5] = 2.2155167805137470; q[ 0][ 6] = 2.3379911207495061; - q[ 0][ 7] = 3.3386555146457697; q[ 0][ 8] = 0.9615841926910841; - q[ 0][ 9] = 0.8908203061925510; q[ 0][10] = 1.0778497408764076; - q[ 0][11] = 1.4932055816372476; q[ 0][12] = 1.9006455961717605; - q[ 0][13] = 0.6883439026872615; q[ 0][14] = 2.7355620089953550; - q[ 0][15] = 6.4208961859142883; q[ 0][16] = 5.2892514169776437; - q[ 0][17] = 0.5488578478106930; q[ 0][18] = 0.5411769916657778; - q[ 0][19] = 4.6501894691803214; - - q[ 1][ 2] = 1.5720770753326880; q[ 1][ 3] = 0.7550654439001206; - q[ 1][ 4] = 1.4414262567428417; q[ 1][ 5] = 5.5120819705248678; - q[ 1][ 6] = 1.3542404860613146; q[ 1][ 7] = 1.3121700301622004; - q[ 1][ 8] = 4.9238668283945266; q[ 1][ 9] = 0.4323005487925516; - q[ 1][10] = 0.8386701149158265; q[ 1][11] = 10.0173308173660018; - q[ 1][12] = 1.2488638689609959; q[ 1][13] = 0.4224945197276290; - q[ 1][14] = 1.3091837782420783; q[ 1][15] = 1.9202994262316166; - q[ 1][16] = 1.3363401740560601; q[ 1][17] = 1.5170142153962840; - q[ 1][18] = 0.8912614404565405; q[ 1][19] = 0.7807017855806767; - - q[ 2][ 3] = 7.8584219153689405; q[ 2][ 4] = 0.9784679122774127; - q[ 2][ 5] = 3.0143201670924822; q[ 2][ 6] = 2.0093434778398112; - q[ 2][ 7] = 2.4117632898861809; q[ 2][ 8] = 6.1974384977884114; - q[ 2][ 9] = 0.9179291175331520; q[ 2][10] = 0.4098311270816011; - q[ 2][11] = 4.4034547578962568; q[ 2][12] = 0.9378803706165143; - q[ 2][13] = 0.5044944273324311; q[ 2][14] = 0.7103720531974738; - q[ 2][15] = 6.1234512396801764; q[ 2][16] = 3.8852506105922231; - q[ 2][17] = 0.1808525752605976; q[ 2][18] = 1.0894926581511342; - q[ 2][19] = 0.4586061981719967; - - q[ 3][ 4] = 0.2272488448121475; q[ 3][ 5] = 1.6562495638176040; - q[ 3][ 6] = 9.6883451875685065; q[ 3][ 7] = 1.9142079025990228; - q[ 3][ 8] = 2.1459640610133781; q[ 3][ 9] = 0.2161660372725585; - q[ 3][10] = 0.3574207468998517; q[ 3][11] = 1.4521790561663968; - q[ 3][12] = 0.4075239926000898; q[ 3][13] = 0.1675129724559251; - q[ 3][14] = 1.0714605979577547; q[ 3][15] = 2.2161944596741829; - q[ 3][16] = 1.5066839872944762; q[ 3][17] = 0.2496584188151770; - q[ 3][18] = 0.7447620891784513; q[ 3][19] = 0.4594535241660911; - - q[ 4][ 5] = 0.4587469126746136; q[ 4][ 6] = 0.4519167943192672; - q[ 4][ 7] = 1.1034605684472507; q[ 4][ 8] = 1.5196756759380692; - q[ 4][ 9] = 0.9126668032539315; q[ 4][10] = 1.4081315998413697; - q[ 4][11] = 0.3371091785647479; q[ 4][12] = 1.2213054800811556; - q[ 4][13] = 1.6953951980808002; q[ 4][14] = 0.4326227078645523; - q[ 4][15] = 3.6366815408744255; q[ 4][16] = 1.7557065205837685; - q[ 4][17] = 1.6275179891253113; q[ 4][18] = 2.1579775140421025; - q[ 4][19] = 2.2627456996290891; - - q[ 5][ 6] = 6.8124601839937675; q[ 5][ 7] = 0.8776110594765502; - q[ 5][ 8] = 7.9943228564946525; q[ 5][ 9] = 0.4882733432879921; - q[ 5][10] = 1.3318097154194044; q[ 5][11] = 6.0519085243118811; - q[ 5][12] = 1.9106190827629084; q[ 5][13] = 0.3573432522499545; - q[ 5][14] = 2.3019177728300728; q[ 5][15] = 2.3193703643237220; - q[ 5][16] = 2.1576510103471440; q[ 5][17] = 0.8959082681546182; - q[ 5][18] = 0.9183596801412757; q[ 5][19] = 0.6366932501396869; - - q[ 6][ 7] = 1.3860121390169038; q[ 6][ 8] = 1.6360079688522375; - q[ 6][ 9] = 0.4035497929633328; q[ 6][10] = 0.5610717242294755; - q[ 6][11] = 4.3290086529582830; q[ 6][12] = 0.7471936218068498; - q[ 6][13] = 0.2317194387691585; q[ 6][14] = 1.5132807416252063; - q[ 6][15] = 1.8273535587773553; q[ 6][16] = 1.5839981708584689; - q[ 6][17] = 0.4198391148111098; q[ 6][18] = 0.5818111331782764; - q[ 6][19] = 0.8940572875547330; - - q[ 7][ 8] = 0.8561248973045037; q[ 7][ 9] = 0.2888075033037488; - q[ 7][10] = 0.3578662395745526; q[ 7][11] = 0.8945563662345198; - q[ 7][12] = 0.5954812791740037; q[ 7][13] = 0.3693722640980460; - q[ 7][14] = 0.7744933618134962; q[ 7][15] = 3.0637776193717610; - q[ 7][16] = 0.7147489676267383; q[ 7][17] = 0.9349753595598769; - q[ 7][18] = 0.3374467649724478; q[ 7][19] = 0.6193321034173915; - - q[ 8][ 9] = 0.5787937115407940; q[ 8][10] = 1.0765007949562073; - q[ 8][11] = 1.8085136096039203; q[ 8][12] = 1.3808291710019667; - q[ 8][13] = 1.3629765501081097; q[ 8][14] = 1.8370555852070649; - q[ 8][15] = 1.9699895187387506; q[ 8][16] = 1.6136654573285647; - q[ 8][17] = 0.6301954684360302; q[ 8][18] = 7.7587442309146040; - q[ 8][19] = 0.5333220944030346; - - q[ 9][10] = 6.0019110258426362; q[ 9][11] = 0.6244297525127139; - q[ 9][12] = 6.7597899772045418; q[ 9][13] = 2.2864286949316077; - q[ 9][14] = 0.4811402387911145; q[ 9][15] = 0.6047491507504744; - q[ 9][16] = 2.6344778384442731; q[ 9][17] = 0.5604648274060783; - q[ 9][18] = 0.8626796044156272; q[ 9][19] = 14.8729334615190609; - - q[10][11] = 0.5642322882556321; q[10][12] = 8.0327792947421148; - q[10][13] = 4.3611548063555778; q[10][14] = 1.0084320519837335; - q[10][15] = 0.8953754669269811; q[10][16] = 1.0192004372506540; - q[10][17] = 1.5183114434679339; q[10][18] = 1.2452243224541324; - q[10][19] = 3.5458093276667237; - - q[11][12] = 1.7129670976916258; q[11][13] = 0.3910559903834828; - q[11][14] = 1.3918935593582853; q[11][15] = 1.9776630140912268; - q[11][16] = 2.5513781312660280; q[11][17] = 0.5851920879490173; - q[11][18] = 0.7835447533710449; q[11][19] = 0.7801080335991272; - - q[12][13] = 2.3201373546296349; q[12][14] = 0.4953193808676289; - q[12][15] = 1.0657482318076852; q[12][16] = 3.3628488360462363; - q[12][17] = 1.4680478689711018; q[12][18] = 1.0899165770956820; - q[12][19] = 4.0584577156753401; - - q[13][14] = 0.3746821107962129; q[13][15] = 1.1079144700606407; - q[13][16] = 0.6882725908872254; q[13][17] = 3.3448437239772266; - q[13][18] = 10.3848523331334590; q[13][19] = 1.7039730522675411; - - q[14][15] = 3.5465914843628927; q[14][16] = 1.9485376673137556; - q[14][17] = 0.4326058001438786; q[14][18] = 0.4819109019647465; - q[14][19] = 0.5985498912985666; - - q[15][16] = 8.8479984061248178; q[15][17] = 0.6791126595939816; - q[15][18] = 0.9547229305958682; q[15][19] = 0.9305232113028208; - - q[16][17] = 0.4514203099376473; q[16][18] = 0.8564314184691215; - q[16][19] = 3.4242218450865543; - - q[17][18] = 4.5377235790405388; q[17][19] = 0.5658969249032649; - - q[18][19] = 1.0000000000000000; - - -} /* vt data */ - -//this part are taken from PUZZLE-TREE -void get_Dayhoff(double **q, double *f) { - /* - * Dayhoff model for amino acid evolution - * Dayhoff, M.O., Schwartz, R.M., Orcutt, B.C. (1978) - * "A model of evolutionary change in proteins." - * Dayhoff, M.O. (ed.) Atlas of Protein Sequence Structur., Vol5, Suppl. 3, - * National Biomedical Research Foundation, Washington DC, pp. 345-352. - */ - - q[0][1]=9.6472567159749e-01; q[0][2]=3.5927991886410e+00; - q[0][3]=4.3200552414656e+00; q[0][4]=1.3184584178499e+00; - q[0][5]=3.2267534963169e+00; q[0][6]=7.0141987829615e+00; - q[0][7]=8.5773867857875e+00; q[0][8]=8.1434196396611e-01; - q[0][9]=2.3518447453539e+00; q[0][10]=1.4735711728911e+00; - q[0][11]=9.3940162271805e-01; q[0][12]=2.5490196078431e+00; - q[0][13]=6.5922920892495e-01; q[0][14]=8.9189834148670e+00; - q[0][15]=1.4540712836859e+01; q[0][16]=1.3411904595370e+01; - q[0][17]=3.8517964118027e-02; q[0][18]=8.7897227856660e-01; - q[0][19]=7.4036511156187e+00; - - q[1][2]=1.1890243902439e+00; q[1][3]=5.9525626545377e-02; - q[1][4]=8.4778922655537e-01; q[1][5]=8.8348561504191e+00; - q[1][6]=5.5954088952654e-02; q[1][7]=3.1434881434075e-01; - q[1][8]=8.4753987678285e+00; q[1][9]=2.2684090115941e+00; - q[1][10]=5.5954088952654e-01; q[1][11]=1.6681312769010e+01; - q[1][12]=3.1707317073171e+00; q[1][13]=4.8959827833572e-01; - q[1][14]=3.6754156468900e+00; q[1][15]=5.4755072760812e+00; - q[1][16]=9.6472567159749e-01; q[1][17]=7.5538020086083e+00; - q[1][18]=2.7977044476327e-01; q[1][19]=8.6083213773314e-01; - - q[2][3]=3.2459324155194e+01; q[2][4]=7.3852625416383e-02; - q[2][5]=3.7732198142415e+00; q[2][6]=5.3911764705882e+00; - q[2][7]=5.0264375413087e+00; q[2][8]=1.9061418685121e+01; - q[2][9]=2.7901430842607e+00; q[2][10]=1.2482698961938e+00; - q[2][11]=1.1542279411765e+01; q[2][12]=1.9117647058824e-01; - q[2][13]=5.0183823529412e-01; q[2][14]=1.5181660899654e+00; - q[2][15]=1.7697478991597e+01; q[2][16]=8.3557302231237e+00; - q[2][17]=8.6029411764706e-01; q[2][18]=3.4411764705882e+00; - q[2][19]=5.7352941176471e-01; - - q[3][4]=2.5534152404601e-02; q[3][5]=4.8811013767209e+00; - q[3][6]=4.0561952440551e+01; q[3][7]=4.4423506911730e+00; - q[3][8]=3.0865788117500e+00; q[3][9]=8.5749078239692e-01; - q[3][10]=2.5926985518518e-02; q[3][11]=2.5930851063830e+00; - q[3][12]=1.1667143483333e-01; q[3][13]=1.2963492759259e-02; - q[3][14]=4.7853935065891e-01; q[3][15]=3.4167709637046e+00; - q[3][16]=2.3984722282163e+00; q[3][17]=3.2408731898147e-02; - q[3][18]=8.1351689612015e-02; q[3][19]=6.3829787234043e-01; - - q[4][5]=2.1864264103535e-02; q[4][6]=1.4770525083277e-02; - q[4][7]=3.9055458751427e-01; q[4][8]=1.0223340673168e+00; - q[4][9]=1.5970515970516e+00; q[4][10]=3.9098448749850e-02; - q[4][11]=8.0776309049169e-03; q[4][12]=1.4155086538140e-01; - q[4][13]=8.6898395721925e-02; q[4][14]=6.8155604487784e-01; - q[4][15]=5.8097784568373e+00; q[4][16]=5.9929928084086e-01; - q[4][17]=3.4759358288770e-01; q[4][18]=3.4759358288770e+00; - q[4][19]=1.7647058823529e+00; - - q[5][6]=2.5476780185759e+01; q[5][7]=1.0174974779977e+00; - q[5][8]=2.1573939173192e+01; q[5][9]=6.5266504894988e-01; - q[5][10]=2.6634492806410e+00; q[5][11]=5.5466331269350e+00; - q[5][12]=4.0247678018576e+00; q[5][13]=1.8038017885416e-02; - q[5][14]=5.5044618466582e+00; q[5][15]=2.0267580716497e+00; - q[5][16]=1.9256432155439e+00; q[5][17]=9.6202762055552e-02; - q[5][18]=1.0061919504644e-01; q[5][19]=1.2538699690402e+00; - - q[6][7]=2.8869795109055e+00; q[6][8]=1.5519031141869e+00; - q[6][9]=2.1701112877583e+00; q[6][10]=4.0484429065744e-01; - q[6][11]=2.9823529411765e+00; q[6][12]=1.0705882352941e+00; - q[6][13]=1.9801735189768e-02; q[6][14]=1.7993079584775e+00; - q[6][15]=2.8184873949580e+00; q[6][16]=1.2261663286004e+00; - q[6][17]=7.3114099162219e-02; q[6][18]=7.6470588235294e-01; - q[6][19]=1.3058823529412e+00; - - q[7][8]=3.7906768788150e-01; q[7][9]=2.3128004846840e-02; - q[7][10]=2.5776602775942e-01; q[7][11]=9.6662260409782e-01; - q[7][12]=6.0145406477198e-01; q[7][13]=5.4775280898876e-01; - q[7][14]=1.2382877804129e+00; q[7][15]=8.2853366065527e+00; - q[7][16]=1.1110604644803e+00; q[7][17]=1.2888301387971e-01; - q[7][18]=1.7114723586662e-02; q[7][19]=1.9233311302049e+00; - - q[8][9]=2.7354343963341e-01; q[8][10]=1.5876246692449e+00; - q[8][11]=9.6993944636678e-01; q[8][12]=1.2544085640577e-01; - q[8][13]=1.6868512110727e+00; q[8][14]=3.3075513942601e+00; - q[8][15]=1.2530894710826e+00; q[8][16]=8.1434196396611e-01; - q[8][17]=1.0121107266436e+00; q[8][18]=4.4982698961938e+00; - q[8][19]=1.5570934256055e+00; - - q[9][10]=9.2275320303002e+00; q[9][11]=1.6663354531002e+00; - q[9][12]=1.1780604133545e+01; q[9][13]=6.9753577106518e+00; - q[9][14]=4.2551201720752e-01; q[9][15]=8.8575970928912e-01; - q[9][16]=6.8951811852420e+00; q[9][17]=9.8802836705702e-02; - q[9][18]=1.3434022257552e+00; q[9][19]=3.1526232114467e+01; - - q[10][11]=6.5787197231834e-01; q[10][12]=1.8622837370242e+01; - q[10][13]=5.6340830449827e+00; q[10][14]=1.1377976796255e+00; - q[10][15]=6.1690558576372e-01; q[10][16]=1.2098794893211e+00; - q[10][17]=1.7543252595156e+00; q[10][18]=1.0346020761246e+00; - q[10][19]=6.2906574394464e+00; +void rescaleRates(double *rates, int nrates) { + int i; - q[11][12]=8.6029411764706e+00; q[11][13]=6.6640454965565e-03; - q[11][14]=1.2089100346021e+00; q[11][15]=3.4411764705882e+00; - q[11][16]=4.9442190669371e+00; q[11][17]=3.4272233982290e-02; - q[11][18]=4.7794117647059e-01; q[11][19]=3.7500000000000e-01; + double max_rate = 0.0; - q[12][13]=3.2500000000000e+00; q[12][14]=5.9976931949250e-01; - q[12][15]=2.1848739495798e+00; q[12][16]=3.6916835699797e+00; - q[12][17]=1.6247577591604e-01; q[12][18]=1.1508700794053e-01; - q[12][19]=9.0588235294118e+00; + for (i = 0; i < nrates; i++) + max_rate = max(max_rate, rates[i]); - q[13][14]=3.9359861591695e-01; q[13][15]=1.6386554621849e+00; - q[13][16]=4.9442190669371e-01; q[13][17]=2.8676470588235e+00; - q[13][18]=2.4852941176471e+01; q[13][19]=4.4117647058824e-01; + const double AA_SCALE = 10.0; + double scaler = AA_SCALE / max_rate; - q[14][15]=8.6431043005437e+00; q[14][16]=2.8308077795013e+00; - q[14][17]=3.5840244687362e-02; q[14][18]=4.3804743506776e-02; - q[14][19]=1.7301038062284e+00; + /* SCALING HAS BEEN RE-INTRODUCED TO RESOLVE NUMERICAL PROBLEMS */ - q[15][16]=1.9663865546218e+01; q[15][17]=2.7857142857143e+00; - q[15][18]=1.2016806722689e+00; q[15][19]=1.0840336134454e+00; - - q[16][17]=4.2019597219666e-02; q[16][18]=1.5162271805274e+00; - q[16][19]=5.6592292089249e+00; - - q[17][18]=2.2941176470588e+00; q[17][19]=1.2654363316538e-01; - - q[18][19]=1.0000000000000e+00; - - - f[0] = 0.087; f[1] = 0.041; f[2] = 0.040; f[3] = 0.047; - f[4] = 0.033; f[5] = 0.038; f[6] = 0.05; f[7] = 0.089; - f[8] = 0.034; f[9] = 0.037; f[10] = 0.085; f[11] = 0.08; - f[12] = 0.015; f[13] = 0.04; f[14] = 0.051; f[15] = 0.07; - f[16] = 0.058; f[17] = 0.01; f[18] = 0.03; f[19] = 0.065; - -} /* dayhoff data */ - - -ModelProtein::ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree) - : ModelMarkov(tree) -{ - init(model_name, model_params, freq, freq_params); + for (i = 0; i < nrates; i++) + rates[i] *= scaler; } - void ModelProtein::init(const char *model_name, string model_params, StateFreqType freq, string freq_params) { ASSERT(num_states == 20); + ASSERT(models_block && "models_block uninitialized"); name = model_name; - //string model_str; - //bool user_model = false; - double daa[400]; - double f[20]; + string name_upper = model_name; for (string::iterator it = name_upper.begin(); it != name_upper.end(); it++) (*it) = toupper(*it); - if (initProtMat(state_freq, daa, name_upper)) { - int i, j, k; + NxsModel *nxs_model = models_block->findModel(name_upper); + if (nxs_model) { + if (nxs_model->flag != NM_ATOMIC) + outError("Invalid protein model name ", model_name); + + readParametersString(nxs_model->description); + rescaleRates(rates, getNumRateEntries()); + + int i; double sum = 0.0; for (i = 0; i < num_states; i++) sum += (double) state_freq[i]; - if (round(sum*1e8) != 1e8) { + if (fabs(sum-1.0) > 1e-7) { cout.precision(7); cout << "WARNING: " << name_upper << " state frequencies do not sum up to 1: " << sum << endl; } - if (verbose_mode >= VB_DEBUG) { - cout.precision(6); - cout.unsetf(ios::fixed); - cout << name_upper << " rate matrix and state frequencies:" << endl; - for (i=0; i < num_states; i++) { - for (j=0; j < num_states; j++) - cout << ((j>0) ? "\t":"") << daa[i*20+j]; - cout << endl; - } - for (i=0; i < num_states; i++) - cout << ((i>0)? "\t":"") << state_freq[i]; - cout << endl; - - } - for (i = 0, k = 0; i < num_states-1; i++) - for (j = i+1; j < num_states; j++) - rates[k++] = daa[i*20+j]; num_params = 0; + } else if (!model_params.empty()) { - stringstream ss(model_params); - readRates(ss); - readStateFreq(ss); + readParametersString(model_params); + rescaleRates(rates, getNumRateEntries()); num_params = 0; } else if (name_upper == "GTR20") { - outWarning("GTR20 model will estimate 189 substitution rates that might be overfitting!"); - outWarning("Please only use GTR20 with very large data and always test for model fit!"); - if (freq == FREQ_UNKNOWN || freq == FREQ_USER_DEFINED) + if (!Params::getInstance().link_model) { + outWarning("GTR20 model will estimate 189 substitution rates that might be overfitting!"); + outWarning("Please only use GTR20 with very large data and always test for model fit!"); + } + if (freq == FREQ_UNKNOWN) freq = FREQ_EMPIRICAL; - // initialize rate matrix with LG - int i, j, k; - initProtMat(f, daa, "LG"); - for (i = 0, k = 0; i < num_states-1; i++) - for (j = i+1; j < num_states; j++) - rates[k++] = daa[i*20+j]; + if (Params::getInstance().model_name_init) { + nxs_model = models_block->findModel(Params::getInstance().model_name_init); + if (nxs_model) { + readParametersString(nxs_model->description, false); + } else { + // initialize with custom model file + readParameters(Params::getInstance().model_name_init, false); + } + rescaleRates(rates, getNumRateEntries()); + if (!isReversible()) + outError("Cannot initialize from non-reversible model"); + } else { + // initialize rate matrix with LG + nxs_model = models_block->findModel("LG"); + ASSERT(nxs_model); + readParametersString(nxs_model->description, false); + rescaleRates(rates, getNumRateEntries()); + } + // 2018-05-08 bug fix: GTR20 rates are not optimized + num_params = getNumRateEntries()-1; + } else if (name_upper == "NONREV") { + if (!Params::getInstance().link_model) { + outWarning("NONREV model will estimate 379 substitution rates that might be overfitting!"); + outWarning("Please only use NONREV with very large data and always test for model fit!"); + } + if (freq == FREQ_UNKNOWN) + freq = FREQ_ESTIMATE; + if (Params::getInstance().model_name_init) { + nxs_model = models_block->findModel(Params::getInstance().model_name_init); + if (nxs_model) { + readParametersString(nxs_model->description, false); + } else { + // initialize with custom model file + readParameters(Params::getInstance().model_name_init, false); + } + rescaleRates(rates, getNumRateEntries()); + if (isReversible()) + setReversible(false); + } else { + // initialize rate matrix with LG + nxs_model = models_block->findModel("LG"); + ASSERT(nxs_model); + readParametersString(nxs_model->description, false); + rescaleRates(rates, getNumRateEntries()); + setReversible(false); + } + num_params = getNumRateEntries()-1; } else { // if name does not match, read the user-defined model readParameters(model_name); + rescaleRates(rates, getNumRateEntries()); num_params = 0; } if (freq_params != "") { // stringstream ss(freq_params); readStateFreq(freq_params); } -/* if (name == "WAG") { model_str = model_WAG;} - else if (name == "cpREV") model_str = model_cpREV; - else if (name == "mtREV") model_str = model_mtREV; - //else if (name == "Dayhoff") model_str = model_Dayhoff; - else if (name == "mtMAM") model_str = model_mtMAM; - else if (name == "JTT") model_str = model_JTT; - else if (name == "LG") model_str = model_LG; - else if (name == "mtART") model_str = model_mtART; - else if (name == "mtZOA") model_str = model_mtZOA; - else if (name == "VT" || name == "rtREV" || name == "Dayhoff" || name == "PAM") { - double *q[num_states]; - int i, j, k; - for (i = 0; i < num_states; i++) - q[i] = new double[num_states]; - if (name == "VT") - get_VT(q, state_freq); - else if (name == "rtREV") - get_rtREV(q, state_freq); - else - get_Dayhoff(q, state_freq); - - for (i = 0, k = 0; i < num_states-1; i++) - for (j = i+1; j < num_states; j++) - rates[k++] = q[i][j]; - for (i = num_states-1; i >= 0; i--) - delete q[i]; - } else { - //outError("Invalid model name: " + name); - user_model = true; - } - - if (!model_str.empty()) { - // read rates from internal string - try { - istringstream in(model_str); - readRates(in); - readStateFreq(in); - } - catch (const char *str) { - outError(str); - } - } else if (user_model) { - readParameters(model_name); - }*/ //assert(freq != FREQ_ESTIMATE); if (freq == FREQ_UNKNOWN) freq = FREQ_USER_DEFINED; @@ -3880,7 +868,7 @@ void ModelProtein::startCheckpoint() { void ModelProtein::saveCheckpoint() { - if (num_params > 0) { + if (num_params > 0 && !fixed_parameters) { startCheckpoint(); CKP_ARRAY_SAVE(getNumRateEntries(), rates); endCheckpoint(); @@ -3891,7 +879,7 @@ void ModelProtein::saveCheckpoint() { void ModelProtein::restoreCheckpoint() { ModelMarkov::restoreCheckpoint(); - if (num_params > 0) { + if (num_params > 0 && !fixed_parameters) { startCheckpoint(); CKP_ARRAY_RESTORE(getNumRateEntries(), rates); endCheckpoint(); @@ -3904,22 +892,81 @@ void ModelProtein::restoreCheckpoint() { void ModelProtein::readRates(istream &in) throw(const char*, string) { int nrates = getNumRateEntries(); int row = 1, col = 0; - // since states for protein is stored in lower-triangle, special treatment is needed - for (int i = 0; i < nrates; i++, col++) { - if (col == row) { - row++; col = 0; - } - // switch col and row - int id = col*(2*num_states-col-1)/2 + (row-col-1); - if (id >= nrates) { - cout << row << " " << col << endl; - } - ASSERT(id < nrates && id >= 0); // make sure that the conversion is correct - if (!(in >> rates[id])) - throw name+string(": Rate entries could not be read"); - if (rates[id] < 0.0) - throw "Negative rates found"; - } + if (is_reversible) { + // since states for protein is stored in lower-triangle, special treatment is needed + for (int i = 0; i < nrates; i++, col++) { + if (col == row) { + row++; col = 0; + } + // switch col and row + int id = col*(2*num_states-col-1)/2 + (row-col-1); + if (id >= nrates) { + cout << row << " " << col << endl; + } + ASSERT(id < nrates && id >= 0); // make sure that the conversion is correct + if (!(in >> rates[id])) + throw name+string(": Rate entries could not be read"); + if (rates[id] < 0.0) + throw "Negative rates found"; + } + } else { + // non-reversible model, read the whole rate matrix + int i = 0; + for (row = 0; row < num_states; row++) { + double row_sum = 0.0; + for (col = 0; col < num_states; col++) { + if (row != col) { + if (!(in >> rates[i])) + throw name+string(": Rate entries could not be read"); + if (rates[i] < 0.0) + throw "Negative rates found"; + row_sum += rates[i]; + i++; + } else { + double d; + in >> d; + row_sum += d; + } + } + if (fabs(row_sum) > 1e-3) + throw "Row " + convertIntToString(row) + " does not sum to 0"; + } + } } +string ModelProtein::getNameParams() { + ostringstream retname; + retname << name; + retname << freqTypeString(freq_type, phylo_tree->aln->seq_type, true); + + if (fixed_parameters) + return retname.str(); + + if (freq_type == FREQ_ESTIMATE) { + retname << "{" << state_freq[0]; + for (int i = 1; i < num_states; i++) + retname << "," << state_freq[i]; + retname << "}"; + } + return retname.str(); +} + +void ModelProtein::computeTipLikelihood(PML::StateType state, double *state_lk) { + int ambi_aa[] = { + 2, 3, //4+8, // B = N or D + 5, 6, //32+64, // Z = Q or E + 9, 10 //512+1024 // U = I or L + }; + if (state < num_states || state >= num_states + sizeof(ambi_aa)/sizeof(int)/2) { + ModelSubst::computeTipLikelihood(state, state_lk); + return; + } + + // special treatment for ambiguous (polymorphic) state + memset(state_lk, 0, num_states*sizeof(double)); + int cstate = state - num_states; + state_lk[ambi_aa[cstate*2]] = 1.0; + state_lk[ambi_aa[cstate*2+1]] = 1.0; +} + diff --git a/model/modelprotein.h b/model/modelprotein.h index 56a45b478..44d17189a 100644 --- a/model/modelprotein.h +++ b/model/modelprotein.h @@ -22,6 +22,8 @@ #include "modelmarkov.h" +extern const char* builtin_prot_models; + /** Substitution models for protein sequences @@ -36,7 +38,7 @@ class ModelProtein : public ModelMarkov @param freq state frequency type @param tree associated phylogenetic tree */ - ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree); + ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree, ModelsBlock *models_block); /** initialization, called automatically by the constructor, no need to call it @@ -69,8 +71,16 @@ class ModelProtein : public ModelMarkov /** * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} */ - virtual string getNameParams() { return name; } + virtual string getNameParams(); + + /** compute the tip likelihood vector of a state for Felsenstein's pruning algorithm + @param state character state + @param[out] state_lk state likehood vector of size num_states + */ + virtual void computeTipLikelihood(PML::StateType state, double *state_lk); +private: + ModelsBlock *models_block; }; diff --git a/model/modelset.cpp b/model/modelset.cpp index 27e3d7915..2da7050ac 100644 --- a/model/modelset.cpp +++ b/model/modelset.cpp @@ -57,8 +57,9 @@ int ModelSet::getPtnModelID(int ptn) double ModelSet::computeTrans(double time, int model_id, int state1, int state2) { - if (phylo_tree->vector_size == 1) + if (phylo_tree->vector_size == 1) { return at(model_id)->computeTrans(time, state1, state2); + } // temporary fix problem with vectorized eigenvectors int i; int vsize = phylo_tree->vector_size; @@ -78,9 +79,9 @@ double ModelSet::computeTrans(double time, int model_id, int state1, int state2) } double ModelSet::computeTrans(double time, int model_id, int state1, int state2, double &derv1, double &derv2) { - if (phylo_tree->vector_size == 1) + if (phylo_tree->vector_size == 1) { return at(model_id)->computeTrans(time, state1, state2, derv1, derv2); - + } // temporary fix problem with vectorized eigenvectors int i; int vsize = phylo_tree->vector_size; @@ -111,8 +112,9 @@ int ModelSet::getNDim() void ModelSet::writeInfo(ostream& out) { - if (empty()) + if (empty()) { return; + } if (verbose_mode >= VB_DEBUG) { int i = 1; for (iterator it = begin(); it != end(); it++, i++) { @@ -126,16 +128,17 @@ void ModelSet::writeInfo(ostream& out) void ModelSet::decomposeRateMatrix() { - if (empty()) + if (empty()) { return; - for (iterator it = begin(); it != end(); it++) - (*it)->decomposeRateMatrix(); + } + for (iterator it = begin(); it != end(); it++) { + (*it)->decomposeRateMatrix(); + } if (phylo_tree->vector_size == 1) return; // rearrange eigen to obey vector_size size_t vsize = phylo_tree->vector_size; size_t states2 = num_states*num_states; - size_t ptn, i, x; size_t max_size = get_safe_upper_limit(size()); @@ -144,35 +147,37 @@ void ModelSet::decomposeRateMatrix() memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states); memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2); memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2); + memcpy(&inv_eigenvectors_transposed[m*states2], &inv_eigenvectors_transposed[(m-1)*states2], sizeof(double)*states2); } double new_eval[num_states*vsize]; double new_evec[states2*vsize]; double new_inv_evec[states2*vsize]; - for (ptn = 0; ptn < size(); ptn += vsize) { - double *eval_ptr = &eigenvalues[ptn*num_states]; - double *evec_ptr = &eigenvectors[ptn*states2]; - double *inv_evec_ptr = &inv_eigenvectors[ptn*states2]; - for (i = 0; i < vsize; i++) { - for (x = 0; x < num_states; x++) - new_eval[x*vsize+i] = eval_ptr[x]; - for (x = 0; x < states2; x++) { - new_evec[x*vsize+i] = evec_ptr[x]; - new_inv_evec[x*vsize+i] = inv_evec_ptr[x]; - } - eval_ptr += num_states; - evec_ptr += states2; - inv_evec_ptr += states2; - } - // copy new values + for (size_t ptn = 0; ptn < size(); ptn += vsize) { + double *eval_ptr = &eigenvalues[ptn*num_states]; + double *evec_ptr = &eigenvectors[ptn*states2]; + double *inv_evec_ptr = &inv_eigenvectors[ptn*states2]; + for (size_t i = 0; i < vsize; i++) { + for (size_t x = 0; x < num_states; x++) + new_eval[x*vsize+i] = eval_ptr[x]; + for (size_t x = 0; x < states2; x++) { + new_evec[x*vsize+i] = evec_ptr[x]; + new_inv_evec[x*vsize+i] = inv_evec_ptr[x]; + } + eval_ptr += num_states; + evec_ptr += states2; + inv_evec_ptr += states2; + } + // copy new values memcpy(&eigenvalues[ptn*num_states], new_eval, sizeof(double)*num_states*vsize); memcpy(&eigenvectors[ptn*states2], new_evec, sizeof(double)*states2*vsize); memcpy(&inv_eigenvectors[ptn*states2], new_inv_evec, sizeof(double)*states2*vsize); - } + calculateSquareMatrixTranspose(new_inv_evec, num_states + , &inv_eigenvectors_transposed[ptn*states2]); + } } - bool ModelSet::getVariables(double* variables) { ASSERT(size()); @@ -188,52 +193,57 @@ void ModelSet::setVariables(double* variables) front()->setVariables(variables); } - ModelSet::~ModelSet() { - for (reverse_iterator rit = rbegin(); rit != rend(); rit++) { - (*rit)->eigenvalues = NULL; - (*rit)->eigenvectors = NULL; - (*rit)->inv_eigenvectors = NULL; - delete (*rit); - } + for (reverse_iterator rit = rbegin(); rit != rend(); rit++) { + (*rit)->eigenvalues = nullptr; + (*rit)->eigenvectors = nullptr; + (*rit)->inv_eigenvectors = nullptr; + (*rit)->inv_eigenvectors_transposed = nullptr; + delete (*rit); + } } void ModelSet::joinEigenMemory() { size_t nmixtures = get_safe_upper_limit(size()); - if (eigenvalues) aligned_free(eigenvalues); - if (eigenvectors) aligned_free(eigenvectors); - if (inv_eigenvectors) aligned_free(inv_eigenvectors); - + aligned_free(eigenvalues); + aligned_free(eigenvectors); + aligned_free(inv_eigenvectors); + aligned_free(inv_eigenvectors_transposed); + size_t states2 = num_states*num_states; - - eigenvalues = aligned_alloc(num_states*nmixtures); - eigenvectors = aligned_alloc(states2*nmixtures); - inv_eigenvectors = aligned_alloc(states2*nmixtures); - - // assigning memory for individual models - size_t m = 0; - for (iterator it = begin(); it != end(); it++, m++) { + + eigenvalues = aligned_alloc(num_states*nmixtures); + eigenvectors = aligned_alloc(states2*nmixtures); + inv_eigenvectors = aligned_alloc(states2*nmixtures); + inv_eigenvectors_transposed = aligned_alloc(states2*nmixtures); + + // assigning memory for individual models + size_t m = 0; + for (iterator it = begin(); it != end(); it++, m++) { // first copy memory for eigen stuffs memcpy(&eigenvalues[m*num_states], (*it)->eigenvalues, num_states*sizeof(double)); memcpy(&eigenvectors[m*states2], (*it)->eigenvectors, states2*sizeof(double)); memcpy(&inv_eigenvectors[m*states2], (*it)->inv_eigenvectors, states2*sizeof(double)); + memcpy(&inv_eigenvectors_transposed[m*states2], (*it)->inv_eigenvectors_transposed, states2*sizeof(double)); // then delete - if ((*it)->eigenvalues) aligned_free((*it)->eigenvalues); - if ((*it)->eigenvectors) aligned_free((*it)->eigenvectors); - if ((*it)->inv_eigenvectors) aligned_free((*it)->inv_eigenvectors); -// if ((*it)->eigen_coeff) aligned_free((*it)->eigen_coeff); - + aligned_free((*it)->eigenvalues); + aligned_free((*it)->eigenvectors); + aligned_free((*it)->inv_eigenvectors); + aligned_free((*it)->inv_eigenvectors_transposed); + // and assign new memory - (*it)->eigenvalues = &eigenvalues[m*num_states]; - (*it)->eigenvectors = &eigenvectors[m*states2]; - (*it)->inv_eigenvectors = &inv_eigenvectors[m*states2]; - } - + (*it)->eigenvalues = &eigenvalues[m*num_states]; + (*it)->eigenvectors = &eigenvectors[m*states2]; + (*it)->inv_eigenvectors = &inv_eigenvectors[m*states2]; + (*it)->inv_eigenvectors_transposed = &inv_eigenvectors_transposed[m*states2]; + } + // copy dummy values for (m = size(); m < nmixtures; m++) { memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states); memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2); memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2); + memcpy(&inv_eigenvectors_transposed[m*states2], &inv_eigenvectors_transposed[(m-1)*states2], sizeof(double)*states2); } } diff --git a/model/modelsubst.cpp b/model/modelsubst.cpp index db32c8d63..5c37c40d1 100644 --- a/model/modelsubst.cpp +++ b/model/modelsubst.cpp @@ -21,6 +21,8 @@ ModelSubst::ModelSubst(int nstates) : Optimization(), CheckpointFactory() for (int i = 0; i < num_states; i++) state_freq[i] = 1.0 / num_states; freq_type = FREQ_EQUAL; + fixed_parameters = false; +// linked_model = NULL; } void ModelSubst::startCheckpoint() { @@ -33,7 +35,7 @@ void ModelSubst::saveCheckpoint() { // CKP_SAVE(name); // CKP_SAVE(full_name); // CKP_SAVE(freq_type); - if (freq_type == FREQ_ESTIMATE) + if (freq_type == FREQ_ESTIMATE && !fixed_parameters) CKP_ARRAY_SAVE(num_states, state_freq); endCheckpoint(); CheckpointFactory::saveCheckpoint(); @@ -48,7 +50,7 @@ void ModelSubst::restoreCheckpoint() { // int freq_type = this->freq_type; // CKP_RESTORE(freq_type); // this->freq_type = (StateFreqType)freq_type; - if (freq_type == FREQ_ESTIMATE) + if (freq_type == FREQ_ESTIMATE && !fixed_parameters) CKP_ARRAY_RESTORE(num_states, state_freq); endCheckpoint(); @@ -123,6 +125,10 @@ void ModelSubst::getStateFrequency(double *state_freq, int mixture) { state_freq[i] = freq; } +void ModelSubst::setStateFrequency(double *state_freq) { + memcpy(this->state_freq, state_freq, sizeof(double)*num_states); +} + void ModelSubst::computeTransDerv(double time, double *trans_matrix, double *trans_derv1, double *trans_derv2, int mixture) { @@ -173,13 +179,44 @@ void ModelSubst::computeTransDerv(double time, double *trans_matrix, } +void ModelSubst::multiplyWithInvEigenvector(double *state_lk) { + int nmixtures = getNMixtures(); + double *inv_eigenvectors = getInverseEigenvectors(); + double saved_state_lk[num_states]; + memcpy(saved_state_lk, state_lk, sizeof(double)*num_states); + memset(state_lk, 0, sizeof(double)*num_states*nmixtures); + for (int m = 0; m < nmixtures; m++) { + double *inv_evec = &inv_eigenvectors[m*num_states*num_states]; + double *this_state_lk = &state_lk[m*num_states]; + for (int i = 0; i < num_states; i++) + for (int j = 0; j < num_states; j++, inv_evec++) + this_state_lk[i] += (*inv_evec) * saved_state_lk[j]; + } +} + +void ModelSubst::computeTipLikelihood(PML::StateType state, double *state_lk) { + if (state < num_states) { + // single state + memset(state_lk, 0, num_states*sizeof(double)); + state_lk[state] = 1.0; + } else { + // unknown state + for (int i = 0; i < num_states; i++) + state_lk[i] = 1.0; + } +} + double *ModelSubst::newTransMatrix() { return new double[num_states * num_states]; } ModelSubst::~ModelSubst() { - if (state_freq) delete [] state_freq; + // mem space pointing to target model and thus avoid double free here +// if (linked_model && linked_model != this) +// return; + + if (state_freq) delete [] state_freq; } diff --git a/model/modelsubst.h b/model/modelsubst.h index cef1d73b8..f4c79eea9 100644 --- a/model/modelsubst.h +++ b/model/modelsubst.h @@ -16,9 +16,13 @@ #include "utils/tools.h" #include "utils/optimization.h" #include "utils/checkpoint.h" +#include "phylo-yaml/statespace.h" using namespace std; +const char OPEN_BRACKET = '{'; +const char CLOSE_BRACKET = '}'; + /** Substitution model abstract class @@ -27,6 +31,7 @@ Substitution model abstract class class ModelSubst: public Optimization, public CheckpointFactory { friend class ModelFactory; + friend class PartitionModel; public: /** @@ -60,7 +65,23 @@ class ModelSubst: public Optimization, public CheckpointFactory @return TRUE if model is time-reversible, FALSE otherwise */ virtual bool isReversible() { return true; }; - + + /** return true if using reversible likelihood kernel, false for using non-reversible kernel */ + bool useRevKernel() { + return isReversible() && !Params::getInstance().kernel_nonrev; + }; + + /** + fix parameters of the model + @param fix true to fix, false to not fix + @return the current state of fixing parameters + */ + virtual bool fixParameters(bool fix) { + bool current = fixed_parameters; + fixed_parameters = fix; + return current; + } + /** * @return TRUE if this is a site-specific model, FALSE otherwise */ @@ -118,6 +139,18 @@ class ModelSubst: public Optimization, public CheckpointFactory in the upper-diagonal of the rate matrix (since model is reversible) */ virtual int getNumRateEntries() { return num_states*(num_states-1)/2; } + + /** + set num_params variable + */ + virtual void setNParams(int num_params) {} + + /** + get num_params variable + */ + virtual int getNParams() { + return 0; + } /** * get the size of transition matrix, default is num_states*num_states. @@ -209,17 +242,33 @@ class ModelSubst: public Optimization, public CheckpointFactory compute the state frequency vector. One should override this function when defining new model. The default is equal state sequency, valid for all kind of data. @param mixture (optional) class for mixture model - @param state_freq (OUT) state frequency vector. Assume state_freq has size of num_states + @param[out] state_freq state frequency vector. Assume state_freq has size of num_states */ virtual void getStateFrequency(double *state_freq, int mixture = 0); + /** + set the state frequency vector. + @param state_freq state frequency vector. Assume state_freq has size of num_states + */ + virtual void setStateFrequency(double *state_freq); + /** get frequency type @return frequency type */ virtual StateFreqType getFreqType() { return FREQ_EQUAL; } + /** for reversible models, multiply likelihood with inverse eigenvectors for fast pruning algorithm + @param[in/out] state_lk state likelihood multiplied with inverse eigenvectors + */ + void multiplyWithInvEigenvector(double *state_lk); + /** compute the tip likelihood vector of a state for Felsenstein's pruning algorithm + @param state character state + @param[out] state_lk state likehood vector of size num_states + */ + virtual void computeTipLikelihood(PML::StateType state, double *state_lk); + /** allocate memory for a transition matrix. One should override this function when defining new model such as Gamma model. The default is to allocate a double vector of size num_states * num_states. This @@ -287,9 +336,14 @@ class ModelSubst: public Optimization, public CheckpointFactory } virtual double *getInverseEigenvectors() const { - return NULL; + return nullptr; } + virtual double *getInverseEigenvectorsTransposed() const { + return nullptr; + } + + /** * compute the memory size for the model, can be large for site-specific models * @return memory size required in bytes @@ -321,7 +375,7 @@ class ModelSubst: public Optimization, public CheckpointFactory restore object from the checkpoint */ virtual void restoreCheckpoint(); - + /** number of states */ @@ -337,7 +391,10 @@ class ModelSubst: public Optimization, public CheckpointFactory full name of the model */ string full_name; - + + /** true to fix parameters, otherwise false */ + bool fixed_parameters; + /** state frequencies */ @@ -349,6 +406,9 @@ class ModelSubst: public Optimization, public CheckpointFactory */ StateFreqType freq_type; + /** state set for each sequence in the alignment */ + //vector > seq_states; + /** destructor */ @@ -371,6 +431,7 @@ class ModelSubst: public Optimization, public CheckpointFactory */ virtual bool getVariables(double *variables) { return false; } + }; #endif diff --git a/model/modelunrest.cpp b/model/modelunrest.cpp index 750784252..d33d7b44f 100644 --- a/model/modelunrest.cpp +++ b/model/modelunrest.cpp @@ -10,10 +10,12 @@ ModelUnrest::ModelUnrest(PhyloTree *tree, string model_params) : ModelMarkov(tree, false) { - num_params = getNumRateEntries() - 1; - model_parameters = new double [num_params]; - for (int i=0; i< num_params; i++) model_parameters[i] = 1; - setRates(); + num_params = getNumRateEntries() - 1; + //ModelMarkov::setReversible in the ModelMarkov + //constructor sets all the rates to 0.0. But... + for (int i=0; i <= num_params; i++) { + rates[i] = 1.0; + } if (model_params != "") { cout << "WARNING: Supplying model params to constructor not yet properly implemented -- ignored" << endl; // TODO: parse model_params into model_parameters, then call setRates(). @@ -31,18 +33,44 @@ void ModelUnrest::setBounds(double *lower_bound, double *upper_bound, bool *boun int i, ndim = getNDim(); for (i = 1; i <= ndim; i++) { - lower_bound[i] = 0.01; - upper_bound[i] = 100.0; + lower_bound[i] = MIN_RATE; + upper_bound[i] = MAX_RATE; bound_check[i] = false; } } /* - * Set rates from model_parameters - */ void ModelUnrest::setRates() { // For UNREST, parameters are simply the off-diagonal rate matrix entries // (except [4,3] = rates[11], which is constrained to be 1) memcpy(rates, model_parameters, num_params*sizeof(double)); rates[num_params]=1; } +*/ + +void ModelUnrest::setStateFrequency(double* freq) { + // DOES NOTHING +} + +void ModelUnrest::startCheckpoint() { + checkpoint->startStruct("ModelUnrest"); +} + +void ModelUnrest::saveCheckpoint() { + startCheckpoint(); + if (!fixed_parameters) + CKP_ARRAY_SAVE(getNumRateEntries(), rates); + endCheckpoint(); + ModelMarkov::saveCheckpoint(); +} + +void ModelUnrest::restoreCheckpoint() { + ModelMarkov::restoreCheckpoint(); + startCheckpoint(); + if (!fixed_parameters) + CKP_ARRAY_RESTORE(getNumRateEntries(), rates); + endCheckpoint(); + decomposeRateMatrix(); + if (phylo_tree) + phylo_tree->clearAllPartialLH(); +} diff --git a/model/modelunrest.h b/model/modelunrest.h index df54b6fd0..01aeb07eb 100644 --- a/model/modelunrest.h +++ b/model/modelunrest.h @@ -24,22 +24,30 @@ class ModelUnrest: public ModelMarkov { /** * setup the bounds for joint optimization with BFGS */ - void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); + virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); -protected: - - /** - Model parameters - cached so we know when they change, and thus when - recalculations are needed. + /** + set the state frequency vector. + @param state_freq state frequency vector. Assume state_freq has size of num_states + */ + virtual void setStateFrequency(double *state_freq); - */ - double *model_parameters; + /** + start structure for checkpointing + */ + virtual void startCheckpoint(); + + /** + save object into the checkpoint + */ + virtual void saveCheckpoint(); + + /** + restore object from the checkpoint + */ + virtual void restoreCheckpoint(); - /** - * Called from getVariables to update the rate matrix for the new - * model parameters. - */ - virtual void setRates(); +protected: }; #endif /* MODELUNREST_H_ */ diff --git a/model/partitionmodel.cpp b/model/partitionmodel.cpp index 8d6b1ddae..0940658da 100644 --- a/model/partitionmodel.cpp +++ b/model/partitionmodel.cpp @@ -20,11 +20,13 @@ #include "partitionmodel.h" #include "alignment/superalignment.h" #include "model/rategamma.h" +#include "model/modelmarkov.h" PartitionModel::PartitionModel() : ModelFactory() { linked_alpha = -1.0; + opt_gamma_invar = false; } PartitionModel::PartitionModel(Params ¶ms, PhyloSuperTree *tree, ModelsBlock *models_block) @@ -35,6 +37,7 @@ PartitionModel::PartitionModel(Params ¶ms, PhyloSuperTree *tree, ModelsBlock joint_optimize = params.optimize_model_rate_joint; fused_mix_rate = false; linked_alpha = -1.0; + opt_gamma_invar = false; // create dummy model model = new ModelSubst(tree->aln->num_states); @@ -48,16 +51,28 @@ PartitionModel::PartitionModel(Params ¶ms, PhyloSuperTree *tree, ModelsBlock params.gamma_shape = fabs(params.gamma_shape); linked_alpha = params.gamma_shape; } + double init_by_divmat = false; + if (params.model_name_init && strcmp(params.model_name_init, "DIVMAT") == 0) { + init_by_divmat = true; + params.model_name_init = NULL; + } for (it = tree->begin(), part = 0; it != tree->end(); it++, part++) { ASSERT(!((*it)->getModelFactory())); - string model_name = tree->part_info[part].model_name; + string model_name = (*it)->aln->model_name; if (model_name == "") // if empty, take model name from command option model_name = params.model_name; (*it)->setModelFactory(new ModelFactory(params, model_name, (*it), models_block)); (*it)->setModel((*it)->getModelFactory()->model); (*it)->setRate((*it)->getModelFactory()->site_rate); -// params.model_name = model_name; - if ((*it)->aln->getNSeq() < tree->aln->getNSeq() && (*it)->getModel()->freq_type == FREQ_EMPIRICAL && (*it)->aln->seq_type != SEQ_CODON) { + + // link models between partitions + if (params.link_model) { + (*it)->getModel()->fixParameters(true); + if (linked_models.find((*it)->getModel()->getName()) == linked_models.end()) { + linked_models[(*it)->getModel()->getName()] = (*it)->getModel(); + } + } else if ((*it)->aln->getNSeq() < tree->aln->getNSeq() && params.partition_type != TOPO_UNLINKED && + (*it)->getModel()->freq_type == FREQ_EMPIRICAL && (*it)->aln->seq_type != SEQ_CODON) { // modify state_freq to account for empty sequences (*it)->aln->computeStateFreq((*it)->getModel()->state_freq, (*it)->aln->getNSite() * (tree->aln->getNSeq() - (*it)->aln->getNSeq())); (*it)->getModel()->decomposeRateMatrix(); @@ -67,6 +82,80 @@ PartitionModel::PartitionModel(Params ¶ms, PhyloSuperTree *tree, ModelsBlock //(*it)->copyTree(tree, taxa_set); //(*it)->drawTree(cout); } + if (init_by_divmat) { + ASSERT(0 && "init_by_div_mat not working"); + int nstates = linked_models.begin()->second->num_states; + double *pair_freq = new double[nstates * nstates]; + double *state_freq = new double[nstates]; + tree->aln->computeDivergenceMatrix(pair_freq, state_freq); + /* + MatrixXd divmat = Map > (pair_freq, nstates, nstates); + cout << "DivMat: " << endl << divmat << endl; + auto pi = Map(state_freq, nstates); + MatrixXd Q = (pi.asDiagonal() * divmat).log(); + cout << "Q: " << endl << Q << endl; + cout << "rowsum: " << Q.rowwise().sum() << endl; + Map >(pair_freq, nstates, nstates) = Q; + */ + ((ModelMarkov*)linked_models.begin()->second)->setFullRateMatrix(pair_freq, state_freq); + ((ModelMarkov*)linked_models.begin()->second)->decomposeRateMatrix(); + delete [] state_freq; + delete [] pair_freq; + + } else + for (auto mit = linked_models.begin(); mit != linked_models.end(); mit++) { + PhyloSuperTree *stree = (PhyloSuperTree*)site_rate->phylo_tree; + if (mit->second->freq_type != FREQ_ESTIMATE && mit->second->freq_type != FREQ_EMPIRICAL) + continue; + // count state occurrences + size_t *sum_state_counts = NULL; + int num_parts = 0; + for (it = stree->begin(); it != stree->end(); it++) { + if ((*it)->getModel()->getName() == mit->second->getName()) { + num_parts++; + if ((*it)->aln->seq_type == SEQ_CODON) + outError("Linking codon models not supported"); + if ((*it)->aln->seq_type == SEQ_POMO) + outError("Linking POMO models not supported"); + size_t state_counts[(*it)->aln->STATE_UNKNOWN+1]; + size_t unknown_states = 0; + if( params.partition_type != TOPO_UNLINKED) + unknown_states = (*it)->aln->getNSite() * (tree->aln->getNSeq() - (*it)->aln->getNSeq()); + (*it)->aln->countStates(state_counts, unknown_states); + if (!sum_state_counts) { + sum_state_counts = new size_t[(*it)->aln->STATE_UNKNOWN+1]; + memset(sum_state_counts, 0, sizeof(size_t)*((*it)->aln->STATE_UNKNOWN+1)); + } + for (int state = 0; state <= (*it)->aln->STATE_UNKNOWN; ++state) { + sum_state_counts[state] += state_counts[state]; + } + } + } + cout << "Linking " << mit->first << " model across " << num_parts << " partitions" << endl; + int nstates = mit->second->num_states; + double sum_state_freq[nstates]; + // convert counts to frequencies + for (it = stree->begin(); it != stree->end(); it++) { + if ((*it)->getModel()->getName() == mit->second->getName()) { + (*it)->aln->convertCountToFreq(sum_state_counts, sum_state_freq); + break; + } + } + + cout << "Mean state frequencies:"; + int prec = cout.precision(8); + for (int state = 0; state < mit->second->num_states; state++) + cout << " " << sum_state_freq[state]; + cout << endl; + cout.precision(prec); + + for (it = stree->begin(); it != stree->end(); it++) + if ((*it)->getModel()->getName() == mit->second->getName()) { + ((ModelMarkov*)(*it)->getModel())->adaptStateFrequency(sum_state_freq); + (*it)->getModel()->decomposeRateMatrix(); + } + delete [] sum_state_counts; + } } void PartitionModel::setCheckpoint(Checkpoint *checkpoint) { @@ -83,10 +172,17 @@ void PartitionModel::startCheckpoint() { void PartitionModel::saveCheckpoint() { startCheckpoint(); CKP_SAVE(linked_alpha); + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + checkpoint->startStruct(it->first); + bool fixed = it->second->fixParameters(false); + it->second->saveCheckpoint(); + it->second->fixParameters(fixed); + checkpoint->endStruct(); + } PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); int part = 0; for (PhyloSuperTree::iterator it = tree->begin(); it != tree->end(); it++, part++) { - checkpoint->startStruct(tree->part_info[part].name); + checkpoint->startStruct((*it)->aln->name); (*it)->getModelFactory()->saveCheckpoint(); checkpoint->endStruct(); } @@ -103,11 +199,23 @@ void PartitionModel::restoreCheckpoint() { PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); int part = 0; for (PhyloSuperTree::iterator it = tree->begin(); it != tree->end(); it++, part++) { - checkpoint->startStruct(tree->part_info[part].name); + checkpoint->startStruct((*it)->aln->name); (*it)->getModelFactory()->restoreCheckpoint(); checkpoint->endStruct(); } + // restore linked models + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + checkpoint->startStruct(it->first); + for (auto tit = tree->begin(); tit != tree->end(); tit++) + if ((*tit)->getModel()->getName() == it->first) { + bool fixed = (*tit)->getModel()->fixParameters(false); + (*tit)->getModel()->restoreCheckpoint(); + (*tit)->getModel()->fixParameters(fixed); + } + checkpoint->endStruct(); + } + endCheckpoint(); } @@ -119,99 +227,298 @@ int PartitionModel::getNParameters(int brlen_type) { } if (linked_alpha > 0) df ++; + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + bool fixed = it->second->fixParameters(false); + df += it->second->getNDim() + it->second->getNDimFreq(); + it->second->fixParameters(fixed); + } return df; } double PartitionModel::computeFunction(double shape) { PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); double res = 0.0; + int ntrees = tree->size(); linked_alpha = shape; - for (PhyloSuperTree::iterator it = tree->begin(); it != tree->end(); it++) - if ((*it)->getRate()->isGammaRate()) { - res += (*it)->getRate()->computeFunction(shape); - } - if (res == 0.0) + if (tree->part_order.empty()) tree->computePartitionOrder(); +#ifdef _OPENMP +#pragma omp parallel for reduction(+: res) schedule(dynamic) if(tree->num_threads > 1) +#endif + for (int j = 0; j < ntrees; j++) { + int i = tree->part_order[j]; + if (tree->at(i)->getRate()->isGammaRate()) + res += tree->at(i)->getRate()->computeFunction(shape); + } + if (res == 0.0) { outError("No partition has Gamma rate heterogeneity!"); + } return res; } double PartitionModel::optimizeLinkedAlpha(bool write_info, double gradient_epsilon) { - if (write_info) + if (write_info) { cout << "Optimizing linked gamma shape..." << endl; + } double negative_lh; double current_shape = linked_alpha; double ferror, optx; optx = minimizeOneDimen(site_rate->getTree()->params->min_gamma_shape, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror); - if (write_info) + double tree_lh = site_rate->getTree()->computeLikelihood(); + if (write_info) { cout << "Linked alpha across partitions: " << linked_alpha << endl; - return site_rate->getTree()->computeLikelihood(); + cout << "Linked alpha log-likelihood: " << tree_lh << endl; + } + return tree_lh; } -double PartitionModel::optimizeParameters(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { +int PartitionModel::getNDim() { + return model->getNDim(); +} + +double PartitionModel::targetFunk(double x[]) { PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); - double tree_lh = 0.0; + + double res = 0; int ntrees = tree->size(); - if (tree->part_order.empty()) tree->computePartitionOrder(); - #ifdef _OPENMP - #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(tree->num_threads > 1) - #endif - for (int i = 0; i < ntrees; i++) { - int part = tree->part_order[i]; - if (write_info) - #ifdef _OPENMP - #pragma omp critical - #endif - { - cout << "Optimizing " << tree->at(part)->getModelName() << - " parameters for partition " << tree->part_info[part].name << - " (" << tree->at(part)->getModelFactory()->getNParameters(fixed_len) << " free parameters)" << endl; +#ifdef _OPENMP +#pragma omp parallel for reduction(+: res) schedule(dynamic) if(tree->num_threads > 1) +#endif + for (int j = 0; j < ntrees; j++) { + int i = tree->part_order[j]; + ModelSubst *part_model = tree->at(i)->getModel(); + if (part_model->getName() != model->getName()) + continue; + bool fixed = part_model->fixParameters(false); + res += part_model->targetFunk(x); + part_model->fixParameters(fixed); + } + if (res == 0.0) + outError("No partition has model ", model->getName()); + return res; +} + +void PartitionModel::setVariables(double *variables) { + model->setVariables(variables); +} + +bool PartitionModel::getVariables(double *variables) { + bool changed = false; + PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); + for (auto it = tree->begin(); it != tree->end(); it++) + if ((*it)->getModel()->getName() == model->getName()) + changed |= (*it)->getModel()->getVariables(variables); + return changed; +} + +void PartitionModel::scaleStateFreq(bool sum_one) { + PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); + for (auto it = tree->begin(); it != tree->end(); it++) + if ((*it)->getModel()->getName() == model->getName()) + ((ModelMarkov*)(*it)->getModel())->scaleStateFreq(sum_one); +} + +double PartitionModel::optimizeLinkedModel(bool write_info, double gradient_epsilon) { + int ndim = getNDim(); + + // return if nothing to be optimized + if (ndim == 0) return 0.0; + + if (write_info) + cout << "Optimizing linked " << model->getName() << " parameters across all partitions (" << ndim << " free parameters)" << endl; + + if (verbose_mode >= VB_MAX) + cout << "Optimizing " << model->name << " model parameters..." << endl; + + //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); + + double *variables = new double[ndim+1]; // used for BFGS numerical recipes + double *variables2 = new double[ndim+1]; // used for L-BFGS-B + double *upper_bound = new double[ndim+1]; + double *lower_bound = new double[ndim+1]; + bool *bound_check = new bool[ndim+1]; + double score; + + + // by BFGS algorithm + setVariables(variables); + setVariables(variables2); + ((ModelMarkov*)model)->setBounds(lower_bound, upper_bound, bound_check); + // expand the bound for linked model +// for (int i = 1; i <= ndim; i++) { +// lower_bound[i] = MIN_RATE*0.2; +// upper_bound[i] = MAX_RATE*2.0; +// } + if (Params::getInstance().optimize_alg.find("BFGS-B") == string::npos) + score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); + else + score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE)); + + bool changed = getVariables(variables); + + /* 2019-09-05: REMOVED due to numerical issue (NAN) with L-BFGS-B + // 2017-12-06: more robust optimization using 2 different routines + // when estimates are at boundary + score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE)); + bool changed = getVariables(variables); + + if (model->isUnstableParameters()) { + // parameters at boundary, restart with L-BFGS-B with parameters2 + double score2 = -L_BFGS_B(ndim, variables2+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE)); + if (score2 > score+0.1) { + if (verbose_mode >= VB_MED) + cout << "NICE: L-BFGS-B found better parameters with LnL=" << score2 << " than BFGS LnL=" << score << endl; + changed = getVariables(variables2); + score = score2; + } else { + // otherwise, revert what BFGS found + changed = getVariables(variables); } - tree_lh += tree->at(part)->getModelFactory()->optimizeParameters(fixed_len, write_info && verbose_mode >= VB_MED, - logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10)); } - //return ModelFactory::optimizeParameters(fixed_len, write_info); + */ + + // BQM 2015-09-07: normalize state_freq + if (model->isReversible() && model->freq_type == FREQ_ESTIMATE) { + scaleStateFreq(true); + changed = true; + } + if (changed) { + PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); + for (auto it = tree->begin(); it != tree->end(); it++) + if ((*it)->getModel()->getName() == model->getName()) + (*it)->getModel()->decomposeRateMatrix(); + site_rate->phylo_tree->clearAllPartialLH(); + score = site_rate->phylo_tree->computeLikelihood(); + } + + delete [] bound_check; + delete [] lower_bound; + delete [] upper_bound; + delete [] variables2; + delete [] variables; + + if (write_info) { + cout << "Linked-model log-likelihood: " << score << endl; + } - if (tree->params->link_alpha) { - tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon); + return score; +} + +double PartitionModel::optimizeLinkedModels(bool write_info, double gradient_epsilon) { + PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); + double tree_lh; + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + ModelSubst *saved_model = model; + model = it->second; + PhyloSuperTree::iterator part_tree; + // un-fix model parameters + for (part_tree = tree->begin(); part_tree != tree->end(); part_tree++) + if ((*part_tree)->getModel()->getName() == model->getName()) + (*part_tree)->getModel()->fixParameters(false); + + // main call to optimize linked model parameters + tree_lh = optimizeLinkedModel(write_info, gradient_epsilon); + + // fix model parameters again + for (part_tree = tree->begin(); part_tree != tree->end(); part_tree++) + if ((*part_tree)->getModel()->getName() == model->getName()) + (*part_tree)->getModel()->fixParameters(true); + + saveCheckpoint(); + getCheckpoint()->dump(); + model = saved_model; + } + + return site_rate->phylo_tree->computeLikelihood(); +} + +void PartitionModel::reportLinkedModel(ostream &out) { + if (linked_alpha > 0.0) + out << "Linked alpha across partitions: " << linked_alpha << endl; + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + out << "Linked model " << it->first << ": " << endl; + it->second->report(out); } - if (verbose_mode >= VB_MED || write_info) - cout << "Optimal log-likelihood: " << tree_lh << endl; - return tree_lh; } +bool PartitionModel::isLinkedModel() { + return Params::getInstance().link_alpha || (linked_models.size()>0); +} -double PartitionModel::optimizeParametersGammaInvar(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { +double PartitionModel::optimizeParameters(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree(); - double tree_lh = 0.0; + double prev_tree_lh = -DBL_MAX, tree_lh = 0.0; int ntrees = tree->size(); - if (tree->part_order.empty()) tree->computePartitionOrder(); - #ifdef _OPENMP - #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(tree->num_threads > 1) - #endif - for (int i = 0; i < ntrees; i++) { - int part = tree->part_order[i]; - if (write_info) + for (int step = 0; step < Params::getInstance().model_opt_steps; step++) { + tree_lh = 0.0; + if (tree->part_order.empty()) tree->computePartitionOrder(); #ifdef _OPENMP - #pragma omp critical + #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(tree->num_threads > 1) #endif - { - cout << "Optimizing " << tree->at(part)->getModelName() << - " parameters for partition " << tree->part_info[part].name << - " (" << tree->at(part)->getModelFactory()->getNParameters(fixed_len) << " free parameters)" << endl; + for (int i = 0; i < ntrees; i++) { + int part = tree->part_order[i]; + double score; + if (opt_gamma_invar) + score = tree->at(part)->getModelFactory()->optimizeParametersGammaInvar(fixed_len, + write_info && verbose_mode >= VB_MED, + logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10)); + else + score = tree->at(part)->getModelFactory()->optimizeParameters(fixed_len, + write_info && verbose_mode >= VB_MED, + logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10)); + tree_lh += score; + if (write_info) +#ifdef _OPENMP +#pragma omp critical +#endif + { + cout << "Partition " << tree->at(part)->aln->name + << " / Model: " << tree->at(part)->getModelName() + << " / df: " << tree->at(part)->getModelFactory()->getNParameters(fixed_len) + << " / LogL: " << score << endl; + } } - tree_lh += tree->at(part)->getModelFactory()->optimizeParametersGammaInvar(fixed_len, write_info && verbose_mode >= VB_MED, - logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10)); - } - //return ModelFactory::optimizeParameters(fixed_len, write_info); + //return ModelFactory::optimizeParameters(fixed_len, write_info); + + if (!isLinkedModel()) + break; + + if (verbose_mode >= VB_MED || write_info) + cout << step+1 << ". Log-likelihood: " << tree_lh << endl; - if (tree->params->link_alpha) { - tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon); + if (tree->params->link_alpha) { + tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon); + } + + // optimize linked models + if (!linked_models.empty()) { + double new_tree_lh = optimizeLinkedModels(write_info, gradient_epsilon); + ASSERT(new_tree_lh > tree_lh - 0.1); + tree_lh = new_tree_lh; + } + + if (tree_lh-logl_epsilon*10 < prev_tree_lh) + break; + prev_tree_lh = tree_lh; } - if (verbose_mode >= VB_MED || write_info) + + if (verbose_mode >= VB_MED || write_info) cout << "Optimal log-likelihood: " << tree_lh << endl; + // write linked_models + if (verbose_mode <= VB_MIN && write_info) { + for (auto it = linked_models.begin(); it != linked_models.end(); it++) + it->second->writeInfo(cout); + } + return tree_lh; +} + + +double PartitionModel::optimizeParametersGammaInvar(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) { + opt_gamma_invar = true; + double tree_lh = optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon); + opt_gamma_invar = false; return tree_lh; } diff --git a/model/partitionmodel.h b/model/partitionmodel.h index 12b45969b..ce9c3c7f2 100644 --- a/model/partitionmodel.h +++ b/model/partitionmodel.h @@ -105,12 +105,74 @@ class PartitionModel : public ModelFactory */ virtual double computeFunction(double shape); + /** + return the number of dimensions + */ + virtual int getNDim(); + + + /** + the target function which needs to be optimized + @param x the input vector x + @return the function value at x + */ + virtual double targetFunk(double x[]); -protected: + /** + rescale the state frequencies + @param sum_one TRUE to make frequencies sum to 1, FALSE to make last entry equal to 1 + */ + void scaleStateFreq(bool sum_one); + + /** + the approximated derivative function + @param x the input vector x + @param dfx the derivative at x + @return the function value at x + */ + + /** optimize linked model parameter of over all partitions */ + double optimizeLinkedModel(bool write_info, double gradient_epsilon); + + /** optimize all linked models parameter of over all partitions */ + double optimizeLinkedModels(bool write_info, double gradient_epsilon); + + void reportLinkedModel(ostream &out); + + /** + @return true if some model is linked between partitions + */ + bool isLinkedModel(); + +//protected: /** linked Gamma shape alpha between partitions */ double linked_alpha; + /** + map of linked models by names. + */ + unordered_map linked_models; + + bool opt_gamma_invar; + +protected: + + /** + this function is served for the multi-dimension optimization. It should pack the model parameters + into a vector that is index from 1 (NOTE: not from 0) + @param variables (OUT) vector of variables, indexed from 1 + */ + virtual void setVariables(double *variables); + + /** + this function is served for the multi-dimension optimization. It should assign the model parameters + from a vector of variables that is index from 1 (NOTE: not from 0) + @param variables vector of variables, indexed from 1 + @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) + */ + virtual bool getVariables(double *variables); + }; #endif diff --git a/model/partitionmodelplen.cpp b/model/partitionmodelplen.cpp index ef1e562aa..967efca07 100644 --- a/model/partitionmodelplen.cpp +++ b/model/partitionmodelplen.cpp @@ -7,8 +7,9 @@ // #include -#include "partitionmodelplen.h" +#include "model/partitionmodelplen.h" #include "utils/timeutil.h" +#include "model/modelmarkov.h" /********************************************************** * class PartitionModelPlen @@ -78,16 +79,20 @@ double PartitionModelPlen::optimizeParameters(int fixed_len, bool write_info, do //tree->initPartitionInfo(); // FOR OLGA: needed here - + + unordered_map fixed_params; + unordered_map::iterator it; + for(int part = 0; part < ntrees; part++){ tree->part_info[part].cur_score = 0.0; } - if (fixed_len == BRLEN_OPTIMIZE) { - tree_lh = tree->optimizeAllBranches(1); - } else { - tree_lh = tree->computeLikelihood(); - } +// if (fixed_len == BRLEN_OPTIMIZE) { +// tree_lh = tree->optimizeAllBranches(1); +// } else { +// tree_lh = tree->computeLikelihood(); +// } + tree_lh = tree->computeLikelihood(); cout<<"Initial log-likelihood: "<part_order[partid]; // Subtree model parameters optimization - tree->part_info[part].cur_score = tree->at(part)->getModelFactory()->optimizeParametersOnly(i+1, - gradient_epsilon/min(min(i,ntrees),10), tree->part_info[part].cur_score); + tree->part_info[part].cur_score = tree->at(part)->getModelFactory()-> + optimizeParametersOnly(i+1, gradient_epsilon/min(min(i,ntrees),10), + tree->part_info[part].cur_score); if (tree->part_info[part].cur_score == 0.0) tree->part_info[part].cur_score = tree->at(part)->computeLikelihood(); cur_lh += tree->part_info[part].cur_score; @@ -122,6 +128,14 @@ double PartitionModelPlen::optimizeParameters(int fixed_len, bool write_info, do if (tree->params->link_alpha) { cur_lh = optimizeLinkedAlpha(write_info, gradient_epsilon); } + + // optimize linked models + if (!linked_models.empty()) { + double new_cur_lh = optimizeLinkedModels(write_info, gradient_epsilon); + ASSERT(new_cur_lh > cur_lh - 0.1); + cur_lh = new_cur_lh; + } + if (verbose_mode >= VB_MED) cout << "LnL after optimizing individual models: " << cur_lh << endl; if (cur_lh <= tree_lh - 1.0) { @@ -167,6 +181,13 @@ double PartitionModelPlen::optimizeParameters(int fixed_len, bool write_info, do // cout <<"OPTIMIZE MODEL has finished"<< endl; if (write_info) writeInfo(cout); + + // write linked_models + if (verbose_mode <= VB_MIN && write_info) { + for (auto it = linked_models.begin(); it != linked_models.end(); it++) + it->second->writeInfo(cout); + } + cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl; return tree_lh; @@ -193,23 +214,24 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) { PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); // BQM 22-05-2015: change to optimize individual rates - int i; double score = 0.0; - double nsites = tree->getAlnNSite(); + size_t nsites = tree->getAlnNSite(); vector brlen; brlen.resize(tree->branchNum); tree->getBranchLengths(brlen); double max_brlen = 0.0; - for (i = 0; i < brlen.size(); i++) - for (int j = 0; j < brlen[i].size(); j++) - if (brlen[i][j] > max_brlen) + for (size_t i = 0; i < brlen.size(); ++i) { + for (size_t j = 0; j < brlen[i].size(); ++j) { + if (brlen[i][j] > max_brlen) { max_brlen = brlen[i][j]; - + } + } + } if (tree->part_order.empty()) tree->computePartitionOrder(); #ifdef _OPENMP -#pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->num_threads > 1) +#pragma omp parallel for reduction(+: score) schedule(dynamic) if(tree->num_threads > 1) #endif for (int j = 0; j < tree->size(); j++) { int i = tree->part_order[j]; @@ -225,7 +247,7 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) // now normalize the rates double sum = 0.0; size_t nsite = 0; - for (i = 0; i < tree->size(); i++) { + for (size_t i = 0; i < tree->size(); ++i) { sum += tree->part_info[i].part_rate * tree->at(i)->aln->getNSite(); if (tree->at(i)->aln->seq_type == SEQ_CODON && tree->rescale_codon_brlen) nsite += 3*tree->at(i)->aln->getNSite(); @@ -241,7 +263,7 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) } tree->scaleLength(sum); sum = 1.0/sum; - for (i = 0; i < tree->size(); i++) + for (size_t i = 0; i < tree->size(); ++i) tree->part_info[i].part_rate *= sum; return score; } @@ -260,11 +282,18 @@ int PartitionModelPlen::getNParameters(int brlen_type) { df += tree->size()-1; if (linked_alpha > 0.0) df ++; + for (auto it = linked_models.begin(); it != linked_models.end(); it++) { + bool fixed = it->second->fixParameters(false); + df += it->second->getNDim() + it->second->getNDimFreq(); + it->second->fixParameters(fixed); + } return df; } +/* int PartitionModelPlen::getNDim(){ PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); int ndim = tree->size() -1; return ndim; } +*/ diff --git a/model/partitionmodelplen.h b/model/partitionmodelplen.h index 4f83d8c07..48dbd4795 100644 --- a/model/partitionmodelplen.h +++ b/model/partitionmodelplen.h @@ -49,7 +49,7 @@ class PartitionModelPlen : public PartitionModel */ virtual int getNParameters(int brlen_type); - virtual int getNDim(); + //virtual int getNDim(); /** write information diff --git a/model/ratefree.cpp b/model/ratefree.cpp index eca95c01d..61a4fa8ad 100644 --- a/model/ratefree.cpp +++ b/model/ratefree.cpp @@ -11,7 +11,7 @@ #include "model/modelfactory.h" #include "model/modelmixture.h" - +#include "utils/timeutil.h" //temporary : for time log-lining const double MIN_FREE_RATE = 0.001; const double MAX_FREE_RATE = 1000.0; @@ -44,7 +44,7 @@ RateFree::RateFree(int ncat, double start_alpha, string params, bool sorted_rate rates[i] = 1.0; sum_prop += prop[i]; } - fix_params = 1; + fix_params = (Params::getInstance().optimize_from_given_params) ? 0 : 1; } else { if (params_vec.size() != ncategory*2) outError("Number of parameters for FreeRate model must be twice number of categories"); @@ -56,7 +56,7 @@ RateFree::RateFree(int ncat, double start_alpha, string params, bool sorted_rate } for (i = 0; i < ncategory; i++) rates[i] /= sum; - fix_params = 2; + fix_params = (Params::getInstance().optimize_from_given_params) ? 0 : 2; } if (fabs(sum_prop-1.0) > 1e-5) outError("Sum of category proportions not equal to 1"); @@ -100,9 +100,9 @@ void RateFree::setNCategory(int ncat) { if (prop) delete [] prop; prop = new double[ncategory]; - int i; - for (i = 0; i < ncategory; i++) + for (int i = 0; i < ncategory; i++) { prop[i] = (1.0-getPInvar())/ncategory; + } // double sum_prop = (ncategory)*(ncategory+1)/2.0; // double sum = 0.0; @@ -128,24 +128,31 @@ void RateFree::initFromCatMinusOne() { restoreCheckpoint(); ncategory++; - int first = 0, second = -1, i; + int first = 0; // get the category k with largest proportion - for (i = 1; i < ncategory-1; i++) + for (int i = 1; i < ncategory-1; i++) { if (prop[i] > prop[first]) { first = i; } - second = (first == 0) ? 1 : 0; - for (i = 0; i < ncategory-1; i++) - if (prop[i] > prop[second] && second != first) + } + int second = (first == 0) ? 1 : 0; + for (int i = 0; i < ncategory-1; i++) + if (prop[i] > prop[second] && i != first) second = i; // memmove(rates, input->rates, (k+1)*sizeof(double)); // memmove(prop, input->prop, (k+1)*sizeof(double)); // divide highest category into 2 of the same prop - rates[ncategory-1] = (-rates[second] + 3*rates[first])/2.0; + // 2018-06-12: fix bug negative rates + if (-rates[second] + 3*rates[first] > 0.0) { + rates[ncategory-1] = (-rates[second] + 3*rates[first])/2.0; + rates[first] = (rates[second]+rates[first])/2.0; + } else { + rates[ncategory-1] = (3*rates[first])/2.0; + rates[first] = (rates[first])/2.0; + } prop[ncategory-1] = prop[first]/2; - rates[first] = (rates[second]+rates[first])/2.0; prop[first] = prop[first]/2; // if (k < ncategory-2) { // memcpy(&rates[k+2], &input->rates[k+1], (ncategory-2-k)*sizeof(double)); @@ -158,15 +165,16 @@ void RateFree::initFromCatMinusOne() { // prop[ncategory-1] = prop[k] / 2; // prop[k] = prop[k] / 2; // sort the rates in increasing order - if (sorted_rates) + if (sorted_rates) { quicksort(rates, 0, ncategory-1, prop); + } phylo_tree->clearAllPartialLH(); } RateFree::~RateFree() { - if (prop) delete [] prop; - prop = NULL; + delete [] prop; + prop = nullptr; } string RateFree::getNameParams() { @@ -212,14 +220,13 @@ int RateFree::getNDim() { double RateFree::targetFunk(double x[]) { getVariables(x); - if (optimizing_params != 2) + if (optimizing_params != 2) { // only clear partial_lh if optimizing rates phylo_tree->clearAllPartialLH(); + } return -phylo_tree->computeLikelihood(); } - - /** optimize parameters. Default is to optimize gamma shape @return the best likelihood @@ -229,17 +236,18 @@ double RateFree::optimizeParameters(double gradient_epsilon) { int ndim = getNDim(); // return if nothing to be optimized - if (ndim == 0) - return phylo_tree->computeLikelihood(); - - if (verbose_mode >= VB_MED) - cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; - + if (ndim == 0) { + return phylo_tree->computeLikelihood(); + } + if (verbose_mode >= VB_MED) { + cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; + } // TODO: turn off EM algorithm for +ASC model - if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())) - if (fix_params == 0) + if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())) { + if (fix_params == 0) { return optimizeWithEM(); - + } + } //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); double *variables = new double[ndim+1]; @@ -320,7 +328,6 @@ void RateFree::setBounds(double *lower_bound, double *upper_bound, bool *bound_c upper_bound[i+ncategory-1] = MAX_FREE_RATE; bound_check[i+ncategory-1] = false; } - } // for (i = ncategory; i <= 2*ncategory-2; i++) { // lower_bound[i] = MIN_FREE_RATE; @@ -352,12 +359,13 @@ void RateFree::setVariables(double *variables) { variables[i+1] = rates[i]; } else { // both rates and weights - for (i = 0; i < ncategory-1; i++) + for (i = 0; i < ncategory-1; i++) { variables[i+1] = prop[i] / prop[ncategory-1]; - for (i = 0; i < ncategory-1; i++) + } + for (i = 0; i < ncategory-1; i++) { variables[i+ncategory] = rates[i] / rates[ncategory-1]; + } } - } bool RateFree::getVariables(double *variables) { @@ -493,10 +501,12 @@ double RateFree::optimizeWithEM() { // tree->central_scale_num = phylo_tree->central_scale_num; // tree->central_partial_pars = phylo_tree->central_partial_pars; - tree->copyPhyloTree(phylo_tree); + tree->copyPhyloTree(phylo_tree, true); tree->optimize_by_newton = phylo_tree->optimize_by_newton; tree->setParams(phylo_tree->params); - tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads); + tree->setLikelihoodKernel(phylo_tree->sse); + tree->setNumThreads(phylo_tree->num_threads); + // initialize model ModelFactory *model_fac = new ModelFactory(); model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint; @@ -521,15 +531,21 @@ double RateFree::optimizeWithEM() { } ASSERT(score < 0); - if (step > 0) + if (step > 0) { + if (score <= old_score-0.1) { + phylo_tree->printTree(cout, WT_BR_LEN+WT_NEWLINE); + writeInfo(cout); + cout << "Partition " << phylo_tree->aln->name << endl; + cout << "score: " << score << " old_score: " << old_score << endl; + } ASSERT(score > old_score-0.1); - + } old_score = score; - memset(new_prop, 0, nmix*sizeof(double)); // E-step // decoupled weights (prop) from _pattern_lh_cat to obtain L_ci and compute pattern likelihood L_i + memset(new_prop, 0, nmix*sizeof(double)); for (ptn = 0; ptn < nptn; ptn++) { double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix; double lk_ptn = phylo_tree->ptn_invar[ptn]; @@ -544,12 +560,11 @@ double RateFree::optimizeWithEM() { this_lk_cat[c] *= lk_ptn; new_prop[c] += this_lk_cat[c]; } - - } + } // M-step, update weights according to (*) int maxpropid = 0; - double new_pinvar = 0.0; + double new_pinvar = 0.0; for (c = 0; c < nmix; c++) { new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite(); if (new_prop[c] > new_prop[maxpropid]) @@ -594,7 +609,7 @@ double RateFree::optimizeWithEM() { // now optimize rates one by one double sum = 0.0; for (c = 0; c < nmix; c++) { - tree->copyPhyloTree(phylo_tree); + tree->copyPhyloTree(phylo_tree, true); ModelMarkov *subst_model; if (phylo_tree->getModel()->isMixture() && phylo_tree->getModelFactory()->fused_mix_rate) subst_model = (ModelMarkov*)phylo_tree->getModel()->getMixtureClass(c); @@ -604,16 +619,16 @@ double RateFree::optimizeWithEM() { subst_model->setTree(tree); model_fac->model = subst_model; if (subst_model->isMixture() || subst_model->isSiteSpecificModel() || !subst_model->isReversible()) - tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads); + tree->setLikelihoodKernel(phylo_tree->sse); - // initialize likelihood tree->initializeAllPartialLh(); // copy posterior probability into ptn_freq tree->computePtnFreq(); double *this_lk_cat = phylo_tree->_pattern_lh_cat+c; - for (ptn = 0; ptn < nptn; ptn++) + for (ptn = 0; ptn < nptn; ptn++) { tree->ptn_freq[ptn] = this_lk_cat[ptn*nmix]; + } double scaling = rates[c]; tree->scaleLength(scaling); tree->optimizeTreeLengthScaling(MIN_PROP, scaling, 1.0/prop[c], 0.001); @@ -623,16 +638,16 @@ double RateFree::optimizeWithEM() { // reset subst model tree->setModel(NULL); subst_model->setTree(phylo_tree); - } phylo_tree->clearAllPartialLH(); if (converged) break; } - // sort the rates in increasing order - if (sorted_rates) + // sort the rates in increasing order + if (sorted_rates) { quicksort(rates, 0, ncategory-1, prop); + } // deattach memory // tree->central_partial_lh = NULL; diff --git a/model/rategamma.cpp b/model/rategamma.cpp index 3bc5e67bc..8b339abc4 100644 --- a/model/rategamma.cpp +++ b/model/rategamma.cpp @@ -72,10 +72,11 @@ void RateGamma::restoreCheckpoint() { void RateGamma::setNCategory(int ncat) { ncategory = ncat; - if (rates) delete [] rates; + delete [] rates; rates = new double[ncategory]; - for (int cat = 0; cat < ncategory; cat++) - rates[cat] = 1.0; + for (int cat = 0; cat < ncategory; cat++) { + rates[cat] = 1.0; + } name = "+G" + convertIntToString(ncategory); full_name = "Gamma with " + convertIntToString(ncategory) + " categories"; computeRates(); @@ -90,23 +91,21 @@ string RateGamma::getNameParams() { RateGamma::~RateGamma() { - if (rates) delete [] rates; - rates = NULL; + delete [] rates; + rates = nullptr; } void RateGamma::computeRates() { int cat; /* category id */ double sum_rates = 0.0; - if (ncategory == 1) { rates[0] = 1.0; return; } - double curScale = 0.0; - for (cat = 0; cat < ncategory; cat++) - curScale += rates[cat]; - + for (cat = 0; cat < ncategory; cat++) { + curScale += rates[cat]; + } if (!cut_median) { computeRatesMean(); } else { diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp index 1e4f75dd4..134349dbc 100644 --- a/model/rategammainvar.cpp +++ b/model/rategammainvar.cpp @@ -29,8 +29,9 @@ RateGammaInvar::RateGammaInvar(int ncat, double shape, bool median, this->optimize_alg = optimize_alg; cur_optimize = 0; this->testParamDone = testParamDone; - for (int cat = 0; cat < ncategory; cat++) - rates[cat] = 1.0/(1.0-p_invar); + for (int cat = 0; cat < ncategory; cat++) { + rates[cat] = 1.0 / (1.0 - p_invar); + } computeRates(); } @@ -53,15 +54,17 @@ void RateGammaInvar::saveCheckpoint() { void RateGammaInvar::restoreCheckpoint() { // should restore p_invar first before gamma, because RateGamma will call computeRates() RateInvar::restoreCheckpoint(); - for (int cat = 0; cat < ncategory; cat++) - rates[cat] = 1.0/(1.0-p_invar); + for (int cat = 0; cat < ncategory; cat++) { + rates[cat] = 1.0 / (1.0 - p_invar); + } RateGamma::restoreCheckpoint(); } void RateGammaInvar::setNCategory(int ncat) { RateGamma::setNCategory(ncat); - for (int cat = 0; cat < ncategory; cat++) - rates[cat] = 1.0/(1.0-p_invar); + for (int cat = 0; cat < ncategory; cat++) { + rates[cat] = 1.0 / (1.0 - p_invar); + } name = "+I" + name; full_name = "Invar+" + full_name; computeRates(); @@ -130,12 +133,12 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) { int ndim = getNDim(); // return if nothing to be optimized - if (ndim == 0) - return phylo_tree->computeLikelihood(); - - if (verbose_mode >= VB_MED) + if (ndim == 0) { + return phylo_tree->computeLikelihood(); + } + if (verbose_mode >= VB_MED) { cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; - + } if (optimize_alg.find("EM_RR") != string::npos) { return randomRestartOptimization(gradient_epsilon); } else if (optimize_alg.find("Brent") != string::npos || phylo_tree->aln->frac_const_sites == 0.0 || isFixPInvar() || isFixGammaShape()) { diff --git a/model/rateheterotachy.cpp b/model/rateheterotachy.cpp index b71aafa20..ac80df151 100644 --- a/model/rateheterotachy.cpp +++ b/model/rateheterotachy.cpp @@ -47,17 +47,17 @@ RateHeterotachy::RateHeterotachy(int ncat, string params, PhyloTree *tree) : Rat destructor */ RateHeterotachy::~RateHeterotachy() { - if (prop) - delete [] prop; - prop = NULL; + delete [] prop; + prop = nullptr; } void RateHeterotachy::setNCategory(int ncat) { ncategory = ncat; - if (optimize_steps == 0) - optimize_steps = ncat*100; + if (optimize_steps == 0) { + optimize_steps = ncat * 100; + } // initialize with gamma rates - if (prop) delete [] prop; + delete [] prop; prop = new double[ncategory]; int i; @@ -138,11 +138,12 @@ void RateHeterotachy::writeParameters(ostream &out) { @return the best likelihood */ double RateHeterotachy::optimizeParameters(double gradient_epsilon) { - if (fix_params) + if (fix_params) { return phylo_tree->computeLikelihood(); - if (verbose_mode >= VB_MED) - cout << "Optimizing " << name << " model parameters by EM algorithm..." << endl; - + } + if (verbose_mode >= VB_MED) { + cout << "Optimizing " << name << " model parameters by EM algorithm..." << endl; + } return optimizeWithEM(); } @@ -150,7 +151,6 @@ double RateHeterotachy::optimizeWithEM() { // first compute _pattern_lh_cat phylo_tree->computePatternLhCat(WSL_RATECAT); - size_t ptn, c; size_t nptn = phylo_tree->aln->getNPattern(); size_t nmix = ncategory; @@ -164,29 +164,29 @@ double RateHeterotachy::optimizeWithEM() { if (step > 0) { // convert _pattern_lh_cat taking into account new weights - for (ptn = 0; ptn < nptn; ptn++) { + for (size_t ptn = 0; ptn < nptn; ptn++) { double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix; - for (c = 0; c < nmix; c++) { + for (size_t c = 0; c < nmix; c++) { this_lk_cat[c] *= ratio_prop[c]; } } } memset(new_prop, 0, nmix*sizeof(double)); - for (ptn = 0; ptn < nptn; ptn++) { + for (size_t ptn = 0; ptn < nptn; ptn++) { double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix; double lk_ptn = phylo_tree->ptn_invar[ptn]; - for (c = 0; c < nmix; c++) { + for (size_t c = 0; c < nmix; c++) { lk_ptn += this_lk_cat[c]; } ASSERT(lk_ptn != 0.0); lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn; - for (c = 0; c < nmix; c++) { + for (size_t c = 0; c < nmix; c++) { new_prop[c] += this_lk_cat[c] * lk_ptn; } } bool converged = true; double new_pinvar = 0.0; - for (c = 0; c < nmix; c++) { + for (size_t c = 0; c < nmix; c++) { new_prop[c] /= phylo_tree->getAlnNSite(); // Make sure that probabilities do not get zero if (new_prop[c] < 1e-10) new_prop[c] = 1e-10; diff --git a/model/ratekategory.cpp b/model/ratekategory.cpp index b91811eb2..60cdc8363 100644 --- a/model/ratekategory.cpp +++ b/model/ratekategory.cpp @@ -40,7 +40,7 @@ RateKategory::RateKategory(int ncat, PhyloTree *tree) RateKategory::~RateKategory() { - if (rates) delete [] rates; + delete [] rates; rates = NULL; } diff --git a/model/ratemeyerdiscrete.cpp b/model/ratemeyerdiscrete.cpp index 802353311..72a8ab83c 100644 --- a/model/ratemeyerdiscrete.cpp +++ b/model/ratemeyerdiscrete.cpp @@ -90,7 +90,7 @@ double mean_sum(int l, int r, double *sumA, double *sumAsquare, int *sumW) { // and k-1 inclusive). // The final cost of the clustering is also returned. -double RunKMeans1D(int n, int k, double *points, int *weights, double *centers, int *assignments) { +double RunKMeans1D(int n, int k, double *points_orig, int *weights, double *centers, int *assignments) { double *sumA; double *sumAsquare; int *sumW; @@ -107,6 +107,8 @@ double RunKMeans1D(int n, int k, double *points, int *weights, double *centers, int *index = new int[n+1]; for (int i=0; i=0; i--) delete [] trace[i]; delete [] trace; for (int i=n; i>=0; i--) delete [] Cost[i]; @@ -206,7 +209,7 @@ RateMeyerDiscrete::RateMeyerDiscrete() { RateMeyerDiscrete::~RateMeyerDiscrete() { - if (rates) delete [] rates; + delete [] rates; } bool RateMeyerDiscrete::isSiteSpecificRate() { @@ -279,28 +282,55 @@ double RateMeyerDiscrete::computeFunction(double value) { double lh = 0.0; int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; - int i, j, k, state1, state2; ModelSubst *model = phylo_tree->getModel(); int trans_size = nstate * nstate; double *trans_mat = new double[trans_size]; int *pair_freq = new int[trans_size]; - - for (i = 0; i < nseq-1; i++) - for (j = i+1; j < nseq; j++) { - memset(pair_freq, 0, trans_size * sizeof(int)); - for (k = 0; k < size(); k++) { - if (ptn_cat[k] != optimizing_cat) continue; - Pattern *pat = & phylo_tree->aln->at(k); - if ((state1 = pat->at(i)) < nstate && (state2 = pat->at(j)) < nstate) - pair_freq[state1*nstate + state2] += pat->frequency; - } - model->computeTransMatrix(value * dist_mat[i*nseq + j], trans_mat); - for (k = 0; k < trans_size; k++) if (pair_freq[k]) - lh -= pair_freq[k] * log(trans_mat[k]); - } - delete [] pair_freq; - delete [] trans_mat; - return lh; + + auto frequencies = phylo_tree->getConvertedSequenceFrequencies(); + for (size_t i = 0; i < nseq-1; i++) { + auto eyeSequence = phylo_tree->getConvertedSequenceByNumber(i); + for (size_t j = i + 1; j < nseq; j++) { + auto jaySequence = phylo_tree->getConvertedSequenceByNumber(j); + memset(pair_freq, 0, trans_size * sizeof(int)); + if (jaySequence!=nullptr) { + for (size_t k = 0; k < size(); k++) { + if (ptn_cat[k] != optimizing_cat) { + continue; + } + int state1 = eyeSequence[k]; + auto pairRow = pair_freq + state1*nstate; + if (nstate<=state1) { + continue; + } + int state2 = jaySequence[k]; + if ( state2 < nstate) { + pairRow[state2] += frequencies[k]; + } + } + } else { + for (size_t k = 0; k < size(); k++) { + if (ptn_cat[k] != optimizing_cat) { + continue; + } + Pattern *pat = & phylo_tree->aln->at(k); + int state1 = pat->at(i); + int state2 = pat->at(j); + if ( state1 < nstate && state2 < nstate) { + pair_freq[state1*nstate + state2] += pat->frequency; + } + } + } + model->computeTransMatrix(value * dist_mat[i*nseq + j], trans_mat); + for (size_t k = 0; k < trans_size; k++) + { + lh -= pair_freq[k] * log(trans_mat[k]); + } + } + } +delete [] pair_freq; +delete [] trans_mat; +return lh; } void RateMeyerDiscrete::computeFuncDerv(double value, double &df, double &ddf) { @@ -312,7 +342,6 @@ void RateMeyerDiscrete::computeFuncDerv(double value, double &df, double &ddf) { // double lh = 0.0; int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; - int i, j, k, state1, state2; ModelSubst *model = phylo_tree->getModel(); int trans_size = nstate * nstate; double *trans_mat = new double[trans_size]; @@ -320,49 +349,63 @@ void RateMeyerDiscrete::computeFuncDerv(double value, double &df, double &ddf) { double *trans_derv2 = new double[trans_size]; df = ddf = 0.0; - int *pair_freq = new int[trans_size]; - - for (i = 0; i < nseq-1; i++) - for (j = i+1; j < nseq; j++) { - memset(pair_freq, 0, trans_size * sizeof(int)); - for (k = 0; k < size(); k++) { - if (ptn_cat[k] != optimizing_cat) continue; - Pattern *pat = & phylo_tree->aln->at(k); - if ((state1 = pat->at(i)) < nstate && (state2 = pat->at(j)) < nstate) - pair_freq[state1*nstate + state2] += pat->frequency; - } - double dist = dist_mat[i*nseq + j]; - double derv1 = 0.0, derv2 = 0.0; - model->computeTransDerv(value * dist, trans_mat, trans_derv1, trans_derv2); - for (k = 0; k < trans_size; k++) if (pair_freq[k]) { - double t1 = trans_derv1[k] / trans_mat[k]; - double t2 = trans_derv2[k] / trans_mat[k]; - trans_derv1[k] = t1; - trans_derv2[k] = (t2 - t1*t1); -// lh -= log(trans_mat[k]) * pair_freq[k]; - derv1 += trans_derv1[k] * pair_freq[k]; - derv2 += trans_derv2[k] * pair_freq[k]; - } - df -= derv1 * dist; - ddf -= derv2 * dist * dist; - } - delete [] pair_freq; - delete [] trans_derv2; - delete [] trans_derv1; - delete [] trans_mat; -// return lh; - -/* double lh = 0.0, derv1, derv2; - df = 0.0; ddf = 0.0; - for (int i = 0; i < size(); i++) - if (ptn_cat[i] == optimizing_cat) { - optimizing_pattern = i; - int freq = phylo_tree->aln->at(i).frequency; - lh += RateMeyerHaeseler::computeFuncDerv(value, derv1, derv2) * freq; - df += derv1 * freq; - ddf += derv2 * freq; - } - return lh;*/ + int *pair_freq = new int[trans_size]; + auto frequencies = phylo_tree->getConvertedSequenceFrequencies(); + for (size_t i = 0; i + 1 < nseq; ++i) { + auto eyeSequence = phylo_tree->getConvertedSequenceByNumber(i); + for (size_t j = i+1; j < nseq; ++j) { + auto jaySequence = phylo_tree->getConvertedSequenceByNumber(j); + memset(pair_freq, 0, trans_size * sizeof(int)); + if (frequencies!=nullptr && eyeSequence!=nullptr && jaySequence!=nullptr) { + for (size_t k = 0; k < size(); ++k) { + if (ptn_cat[k] != optimizing_cat) { + continue; + } + int state1 = eyeSequence[k]; + if (nstate<=state1) { + continue; + } + auto pairRow = pair_freq + state1*nstate; + int state2 = jaySequence[k]; + if (nstate<=state2) { + continue; + } + pairRow[state2] += frequencies[k]; + } + } else { + for (size_t k = 0; k < size(); ++k) { + if (ptn_cat[k] != optimizing_cat) { + continue; + } + Pattern *pat = & phylo_tree->aln->at(k); + int state1 = pat->at(i); + int state2 = pat->at(j); + if (state1 < nstate && state2 < nstate) + pair_freq[state1*nstate + state2] += pat->frequency; + } + } + double dist = dist_mat[i*nseq + j]; + double derv1 = 0.0, derv2 = 0.0; + model->computeTransDerv(value * dist, trans_mat, trans_derv1, trans_derv2); + for (size_t k = 0; k < trans_size; ++k) { + if (pair_freq[k]) { + double t1 = trans_derv1[k] / trans_mat[k]; + double t2 = trans_derv2[k] / trans_mat[k]; + trans_derv1[k] = t1; + trans_derv2[k] = (t2 - t1*t1); + //lh -= log(trans_mat[k]) * pair_freq[k]; + derv1 += trans_derv1[k] * pair_freq[k]; + derv2 += trans_derv2[k] * pair_freq[k]; + } + } + df -= derv1 * dist; + ddf -= derv2 * dist * dist; + } + } + delete [] pair_freq; + delete [] trans_derv2; + delete [] trans_derv1; + delete [] trans_mat; } @@ -408,25 +451,39 @@ double RateMeyerDiscrete::optimizeCatRate(int cat) { } void RateMeyerDiscrete::normalizeRates() { - double sum = 0.0, ok = 0.0; - int nptn = size(); - int i; - - for (i = 0; i < nptn; i++) { - //at(i) = rates[ptn_cat[i]]; - if (getPtnRate(i) < MAX_SITE_RATE) { - sum += getPtnRate(i) * phylo_tree->aln->at(i).frequency; - ok += phylo_tree->aln->at(i).frequency; - } - } - - if (fabs(sum - ok) > 1e-3) { - //cout << "Normalizing rates " << sum << " / " << ok << endl; - double scale_f = ok / sum; - for (i = 0; i < ncategory; i++) - if (rates[i] > 2*MIN_SITE_RATE && rates[i] < MAX_SITE_RATE) - rates[i] *= scale_f; - } + double sum = 0.0; + double ok = 0.0; + size_t nptn = size(); + + auto frequencies = phylo_tree->getConvertedSequenceFrequencies(); + if (frequencies!=nullptr) { + for (size_t i = 0; i < nptn; i++) { + //at(i) = rates[ptn_cat[i]]; + if (getPtnRate(i) < MAX_SITE_RATE) { + double freq = frequencies[i]; + sum += getPtnRate(i) * freq; + ok += freq; + } + } + } else { + for (size_t i = 0; i < nptn; i++) { + //at(i) = rates[ptn_cat[i]]; + if (getPtnRate(i) < MAX_SITE_RATE) { + double freq = phylo_tree->aln->at(i).frequency; + sum += getPtnRate(i) * freq; + ok += freq; + } + } + } + if (fabs(sum - ok) > 1e-3) { + //cout << "Normalizing rates " << sum << " / " << ok << endl; + double scale_f = ok / sum; + for (int i = 0; i < ncategory; i++) { + if (rates[i] > 2*MIN_SITE_RATE && rates[i] < MAX_SITE_RATE) { + rates[i] *= scale_f; + } + } + } } double RateMeyerDiscrete::classifyRatesKMeans() { diff --git a/model/ratemeyerhaeseler.cpp b/model/ratemeyerhaeseler.cpp index 38710bd3c..18d949144 100644 --- a/model/ratemeyerhaeseler.cpp +++ b/model/ratemeyerhaeseler.cpp @@ -55,16 +55,16 @@ void RateMeyerHaeseler::readRateFile(char *rate_file) { in.exceptions(ios::failbit | ios::badbit); in.open(rate_file); char line[256]; - int site, i; + int site; double rate; - int nsites = phylo_tree->aln->getNSite(); + size_t nsites = phylo_tree->aln->getNSite(); resize(phylo_tree->aln->getNPattern(), -1.0); int saturated_sites = 0, saturated_ptn = 0; in.getline(line, sizeof(line)); //if (strncmp(line, "Site", 4) != 0) throw "Wrong header line"; - for (i = 0; i < nsites; i++) { + for (size_t i = 0; i < nsites; ++i) { in.getline(line, sizeof(line)); stringstream ss(line); string tmp; @@ -89,7 +89,7 @@ void RateMeyerHaeseler::readRateFile(char *rate_file) { in.exceptions(ios::failbit | ios::badbit); in.close(); - for (i = 0; i < size(); i++) + for (size_t i = 0; i < size(); ++i) if (at(i) < 0.0) throw "Some site has no rate information"; if (saturated_sites) { @@ -108,14 +108,17 @@ void RateMeyerHaeseler::readRateFile(char *rate_file) { RateMeyerHaeseler::~RateMeyerHaeseler() { - if (dist_mat) delete [] dist_mat; + if (dist_mat) delete [] dist_mat; } int RateMeyerHaeseler::getNDim() { - if (phylo_tree) - return phylo_tree->aln->getNPattern()-1; - if (empty()) return 0; - return size()-1; + if (phylo_tree) { + return phylo_tree->aln->getNPattern()-1; + } + if (empty()) { + return 0; + } + return size()-1; } /* @@ -154,7 +157,7 @@ void RateMeyerHaeseler::setRates(DoubleVector &rates) { void RateMeyerHaeseler::initializeRates() { - int i, j, rate_id = 0, state1, state2; + int rate_id = 0; int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; @@ -165,13 +168,22 @@ void RateMeyerHaeseler::initializeRates() { for (Alignment::iterator pat = phylo_tree->aln->begin(); pat != phylo_tree->aln->end(); pat++, rate_id++) { int diff = 0, total = 0; - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) { - //total += dist_mat[state1 * nstate + state2]; - //if (state1 != state2) diff += dist_mat[state1 * nstate + state2]; - total++; - if (state1 != state2) diff++; - } + for (size_t i = 0; i+1 < nseq; ++i) { + int state1 = pat->at(i); + if (state1 < nstate) { + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (state2 < nstate) { + //total += dist_mat[state1 * nstate + state2]; + //if (state1 != state2) diff += dist_mat[state1 * nstate + state2]; + total++; + if (state1 != state2) { + diff++; + } + } + } + } + } if (diff == 0) diff = 1; if (total == 0) total = 1; double obs_diff = double(diff) / total; @@ -303,7 +315,7 @@ double RateMeyerHaeseler::optimizeRate(int pattern) { void RateMeyerHaeseler::optimizeRates() { if (!dist_mat) { - dist_mat = new double[phylo_tree->leafNum * phylo_tree->leafNum]; + dist_mat = new double[(size_t)phylo_tree->leafNum * (size_t)phylo_tree->leafNum]; } // compute the distance based on the path lengths between taxa of the tree phylo_tree->calcDist(dist_mat); @@ -318,22 +330,22 @@ void RateMeyerHaeseler::optimizeRates() { int nseq = phylo_tree->leafNum; int nstates = phylo_tree->aln->num_states; for (i = 0; i < size(); i++) { - int freq = phylo_tree->aln->at(i).frequency; + int freq = phylo_tree->aln->at(i).frequency; if (phylo_tree->aln->at(i).computeAmbiguousChar(nstates) <= nseq-2) { optimizeRate(i); - if (at(i) == MIN_SITE_RATE) invar_sites += freq; + if (at(i) == MIN_SITE_RATE) invar_sites += freq; if (at(i) == MAX_SITE_RATE) { - saturated_sites += freq; + saturated_sites += freq; saturated_ptn ++; } } else { at(i) = MIN_SITE_RATE; ambiguous_sites += freq; } - if (at(i) < MAX_SITE_RATE) + if (at(i) < MAX_SITE_RATE) { if (at(i) > MIN_SITE_RATE) sum += at(i) * freq; ok_ptn[i] = 1; ok_sites += freq; } - } + } // now scale such that the mean of rates is 1 double scale_f = ok_sites / sum; @@ -396,52 +408,76 @@ double RateMeyerHaeseler::optimizeParameters(double epsilon) { double RateMeyerHaeseler::computeFunction(double value) { - if (!rate_mh) { - if (value != cur_scale) { - ptn_tree->scaleLength(value/cur_scale); - cur_scale = value; - ptn_tree->clearAllPartialLH(); - } - return -ptn_tree->computeLikelihood(); - } - int nseq = phylo_tree->leafNum; - int nstate = phylo_tree->getModel()->num_states; - int i, j, state1, state2; - double lh = 0.0; - ModelSubst *model = phylo_tree->getModel(); - Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); - - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) - lh -= log(model->computeTrans(value * dist_mat[i*nseq + j], state1, state2)); + if (!rate_mh) { + if (value != cur_scale) { + ptn_tree->scaleLength(value/cur_scale); + cur_scale = value; + ptn_tree->clearAllPartialLH(); + } + return -ptn_tree->computeLikelihood(); + } + int nseq = phylo_tree->leafNum; + int nstate = phylo_tree->getModel()->num_states; + double lh = 0.0; + ModelSubst *model = phylo_tree->getModel(); + Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); + auto nseqLess1 = nseq - 1; + + #ifdef _OPENMP + #pragma omp parallel for reduction(-:lh) + #endif + for (size_t i = 0; i < nseqLess1; ++i) { + int state1 = pat->at(i); + const double* distRow = dist_mat + i * nseq; + if (nstate <= state1 ) { + continue; + } + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (nstate <= state2) { + continue; + } + lh -= log(model->computeTrans(distRow[j], state1, state2)); + } + } return lh; } void RateMeyerHaeseler::computeFuncDerv(double value, double &df, double &ddf) { - int nseq = phylo_tree->leafNum; - int nstate = phylo_tree->getModel()->num_states; - int i, j, state1, state2; -// double lh = 0.0; - double trans, derv1, derv2; - ModelSubst *model = phylo_tree->getModel(); - Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); - df = ddf = 0.0; - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) { - double dist = dist_mat[i*nseq + j]; - trans = model->computeTrans(value * dist, state1, state2, derv1, derv2); -// lh -= log(trans); - double t1 = derv1 / trans; - double t2 = derv2 / trans; - df -= t1 * dist; - ddf -= dist * dist * (t2 - t1*t1); - } -// return lh; + int nseq = phylo_tree->leafNum; + int nstate = phylo_tree->getModel()->num_states; + // double lh = 0.0; + double trans, derv1, derv2; + ModelSubst *model = phylo_tree->getModel(); + Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); + df = ddf = 0.0; + auto nseqLess1 = nseq - 1; + #ifdef _OPENMP + #pragma omp parallel for reduction(-:df,ddf) + #endif + for (size_t i = 0; i < nseqLess1; ++i) { + int state1 = pat->at(i); + if (nstate<=state1) { + continue; + } + double* distRow = dist_mat + i*nseq; + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (nstate<=state2) { + continue; + } + double dist = distRow[j]; + trans = model->computeTrans(value * dist, state1, state2, derv1, derv2); + // lh -= log(trans); + double t1 = derv1 / trans; + double t2 = derv2 / trans; + df -= t1 * dist; + ddf -= dist * dist * (t2 - t1*t1); + } + } } - void RateMeyerHaeseler::runIterativeProc(Params ¶ms, IQTree &tree) { - int i; if (verbose_mode >= VB_MED) { ofstream out("x"); out.close(); @@ -451,31 +487,45 @@ void RateMeyerHaeseler::runIterativeProc(Params ¶ms, IQTree &tree) { if (backup_rate->getGammaShape() > 0 ) { IntVector pattern_cat; backup_rate->computePatternRates(*this, pattern_cat); - double sum = 0.0; - for (i = 0; i < size(); i++) - sum += at(i) * phylo_tree->aln->at(i).frequency; + double sum = 0.0; + size_t seqLen = size(); + auto freq = phylo_tree->getConvertedSequenceFrequencies(); + if (freq!=nullptr && seqLen == phylo_tree->getConvertedSequenceLength()) { + #ifdef _OPENMP + #pragma omp parallel for reduction(+:sum) + #endif + for (size_t i = 0; i < seqLen ; ++i) { + sum += at(i) * freq[i]; + } + } else { + for (size_t i = 0; i < seqLen; ++i) { + sum += at(i) * phylo_tree->aln->at(i).frequency; + } + } sum /= phylo_tree->aln->getNSite(); if (fabs(sum - 1.0) > 0.0001) { - if (verbose_mode >= VB_MED) - cout << "Normalizing Gamma rates (" << sum << ")" << endl; - for (i = 0; i < size(); i++) - at(i) /= sum; - } + if (verbose_mode >= VB_MED) { + cout << "Normalizing Gamma rates (" << sum << ")" << endl; + } + for (size_t i = 0; i < size(); ++i) { + at(i) /= sum; + } + } } tree.getModelFactory()->site_rate = this; tree.setRate(this); - //if (empty()) initializeRates(); - //setRates(prev_rates); - //string rate_file = params.out_prefix; - //rate_file += ".mhrate"; - double prev_lh = tree.getCurScore(); - string dist_file = params.out_prefix; - dist_file += ".tdist"; - tree.getModelFactory()->stopStoringTransMatrix(); + //setRates(prev_rates); + //string rate_file = params.out_prefix; + //rate_file += ".mhrate"; + double prev_lh = tree.getCurScore(); + string dist_file = params.out_prefix; + dist_file += ".tdist"; + tree.getModelFactory()->stopStoringTransMatrix(); + int i=2; for (i = 2; i < 100; i++) { //DoubleVector prev_rates; //getRates(prev_rates); diff --git a/ncl/ncl.h b/ncl/ncl.h index 9c28a4f46..5832c6699 100644 --- a/ncl/ncl.h +++ b/ncl/ncl.h @@ -20,7 +20,7 @@ #ifndef NCL_NCL_H #define NCL_NCL_H -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(CLANG_UNDER_VS) # pragma warning(disable:4786) # pragma warning(disable:4291) # define vsnprintf _vsnprintf diff --git a/ncl/nxsblock.h b/ncl/nxsblock.h index ff05a8592..fdbbeca5c 100644 --- a/ncl/nxsblock.h +++ b/ncl/nxsblock.h @@ -19,6 +19,8 @@ #ifndef NCL_NXSBLOCK_H #define NCL_NXSBLOCK_H +#include "nxsstring.h" //for NxsString +#include "nxstoken.h" //for NxsToken class NxsReader; /*---------------------------------------------------------------------------------------------------------------------- diff --git a/ncl/nxscharactersblock.cpp b/ncl/nxscharactersblock.cpp index 316352600..1c9a9342a 100644 --- a/ncl/nxscharactersblock.cpp +++ b/ncl/nxscharactersblock.cpp @@ -1778,7 +1778,8 @@ unsigned NxsCharactersBlock::HandleTokenState( if (respectingCase) cit = find(ci_begin, ci_end, t); else - cit = find_if (ci_begin, ci_end, bind2nd(NxsStringEqual(), t)); + cit = find_if (ci_begin, ci_end, + [=] (const NxsString& s) { return NxsStringEqual()(s, t); } ); if (cit == ci_end) { @@ -2681,12 +2682,6 @@ void NxsCharactersBlock::Reset() missing = '?'; gap = '\0'; matchchar = '\0'; - matrix = NULL; - charPos = NULL; - taxonPos = NULL; - activeTaxon = NULL; - activeChar = NULL; - symbols = NULL; ResetSymbols(); @@ -2757,7 +2752,7 @@ void NxsCharactersBlock::ResetSymbols() break; case NxsCharactersBlock::protein: - strcpy(symbols, "ACDEFGHIKLMNPQRSTVWY*"); + strcpy(symbols, "ACDEFGHIKLMNPQRSTVWY*XU"); break; default: @@ -2786,6 +2781,7 @@ void NxsCharactersBlock::ResetSymbols() { equates[ NxsString("B") ] = NxsString("{DN}"); equates[ NxsString("Z") ] = NxsString("{EQ}"); + equates[ NxsString("J") ] = NxsString("{IL}"); } } @@ -2917,24 +2913,20 @@ void NxsCharactersBlock::WriteStates( else { assert(symbols != NULL); - unsigned symbolListLen = strlen(symbols); - unsigned numStates = matrix->GetNumStates(d); + unsigned symbolListLen = strlen(symbols); unsigned numCharsNeeded = numStates; if (numStates > 1) numCharsNeeded += 2; assert(slen > numCharsNeeded); - if (numStates == 1) - { + if (numStates == 1) { unsigned v = matrix->GetState(d); assert(v < symbolListLen); - s[0] = symbols[v]; + s[0] = (v //for std::ostream + class NxsTaxaBlock; class NxsAssumptionsBlock; @@ -169,6 +171,7 @@ class NxsAssumptionsBlock; | IsPolymorphic |> */ +class NxsString; //James B. (Added forward declare needed in VS builds, 23-Jul-2020) class NxsCharactersBlock : public NxsBlock { @@ -215,6 +218,7 @@ class NxsCharactersBlock char GetGapSymbol(); char GetMatchcharSymbol(); char GetMissingSymbol(); + NxsDiscreteMatrix *GetMatrix() { return matrix; } bool IsGapState(unsigned i, unsigned j); bool IsInterleave(); bool IsLabels(); @@ -233,7 +237,7 @@ class NxsCharactersBlock void RestoreTaxon(unsigned i); bool IsActiveTaxon(unsigned i); bool IsDeleted(unsigned i); - void ShowStateLabels(ostream &out, unsigned i, unsigned c, unsigned first_taxon = -1); + void ShowStateLabels(std::ostream &out, unsigned i, unsigned c, unsigned first_taxon = -1); unsigned GetStateSymbolIndex(unsigned i, unsigned j, unsigned k = 0); // added by mth for standard data types char GetState(unsigned i, unsigned j, unsigned k = 0); char *GetSymbols(); @@ -246,8 +250,8 @@ class NxsCharactersBlock virtual unsigned TaxonLabelToNumber(NxsString s); virtual unsigned GetMaxObsNumStates(); virtual unsigned GetObsNumStates(unsigned j); - virtual void DebugShowMatrix(ostream &out, bool use_matchchar, const char *marginText = 0); - virtual void Report(ostream &out); + virtual void DebugShowMatrix(std::ostream &out, bool use_matchchar, const char *marginText = 0); + virtual void Report(std::ostream &out); virtual void Reset(); NxsTaxaBlock *taxa; /* pointer to the TAXA block in which taxon labels are stored */ @@ -507,7 +511,7 @@ inline unsigned NxsCharactersBlock::GetNumEliminated() */ inline unsigned NxsCharactersBlock::GetNumEquates() { - return equates.size(); + return static_cast(equates.size()); } /*---------------------------------------------------------------------------------------------------------------------- diff --git a/ncl/nxsdefs.h b/ncl/nxsdefs.h index 84427d4e4..d7e80462b 100644 --- a/ncl/nxsdefs.h +++ b/ncl/nxsdefs.h @@ -31,7 +31,7 @@ // #define NCL_MAX_STATES 76 -#if defined(__MWERKS__) || defined(__DECCXX) || defined(_MSC_VER) +#if defined(__MWERKS__) || defined(__DECCXX) || defined(_MSC_VER) typedef long file_pos; #else typedef streampos file_pos; @@ -39,19 +39,25 @@ #define SUPPORT_OLD_NCL_NAMES -class NxsString; +#include //for std::vector +#include //for std::set +#include //for std::map +#ifdef CLANG_UNDER_VS +#include //for std::less +#endif -typedef vector NxsBoolVector; -typedef vector NxsCharVector; -typedef vector NxsUnsignedVector; -typedef vector NxsStringVector; -typedef vector NxsAllelesVector; +#include "nxsstring.h" -typedef set< unsigned, less > NxsUnsignedSet; +typedef std::vector NxsBoolVector; +typedef std::vector NxsCharVector; +typedef std::vector NxsUnsignedVector; +typedef std::vector NxsAllelesVector; -typedef map< unsigned, NxsStringVector, less > NxsStringVectorMap; -typedef map< NxsString, NxsString, less > NxsStringMap; -typedef map< NxsString, NxsUnsignedSet, less > NxsUnsignedSetMap; +typedef std::set< unsigned, std::less > NxsUnsignedSet; + +typedef std::map< unsigned, NxsStringVector, std::less > NxsStringVectorMap; +typedef std::map< NxsString, NxsString, std::less > NxsStringMap; +typedef std::map< NxsString, NxsUnsignedSet, std::less > NxsUnsignedSetMap; // The following typedefs are simply for maintaining compatibility with existing code. // The names on the right are deprecated and should not be used. diff --git a/ncl/nxsexception.cpp b/ncl/nxsexception.cpp index 67ceced31..d6e0a84fa 100644 --- a/ncl/nxsexception.cpp +++ b/ncl/nxsexception.cpp @@ -24,7 +24,7 @@ | stopped. */ NxsException::NxsException( - NxsString s, /* the message for the user */ + const NxsString &s, /* the message for the user */ file_pos fp, /* the current file position */ long fl, /* the current file line */ long fc) /* the current file column */ diff --git a/ncl/nxsexception.h b/ncl/nxsexception.h index a233e3329..85abc9983 100644 --- a/ncl/nxsexception.h +++ b/ncl/nxsexception.h @@ -20,6 +20,9 @@ #ifndef NCL_NXSEXCEPTION_H #define NCL_NXSEXCEPTION_H +#include "nxsstring.h" //for NxsString +#include "nxsdefs.h" //for file_pos + class NxsToken; /*---------------------------------------------------------------------------------------------------------------------- @@ -33,7 +36,7 @@ class NxsException long line; /* current line in file */ long col; /* column of current line */ - NxsException(NxsString s, file_pos fp = 0, long fl = 0L, long fc = 0L); + explicit NxsException(const NxsString &s, file_pos fp = 0, long fl = 0L, long fc = 0L); NxsException(const NxsString &s, const NxsToken &t); }; diff --git a/ncl/nxsindent.h b/ncl/nxsindent.h index 29f5d0454..3b5df15b3 100644 --- a/ncl/nxsindent.h +++ b/ncl/nxsindent.h @@ -20,6 +20,8 @@ #ifndef NCL_NXSINDENT_H #define NCL_NXSINDENT_H +#include //for std::ostream + /*---------------------------------------------------------------------------------------------------------------------- | Manipulator for use in indenting text `leftMarg' characters. */ @@ -43,8 +45,8 @@ inline Indent::Indent( /*---------------------------------------------------------------------------------------------------------------------- | Output operator for the Indent manipulator. */ -inline ostream &operator <<( - ostream &o, /* the ostream object */ +inline std::ostream &operator <<( + std::ostream &o, /* the ostream object */ const Indent &i) /* the Indent object to be sent to `o' */ { #if defined (HAVE_PRAGMA_UNUSED) diff --git a/ncl/nxsreader.cpp b/ncl/nxsreader.cpp index 7ceb07c73..7813da915 100644 --- a/ncl/nxsreader.cpp +++ b/ncl/nxsreader.cpp @@ -339,6 +339,7 @@ void NxsReader::Execute( for (;;) { + token.SetLabileFlagBit(token.hyphenNotPunctuation); token.GetNextToken(); if (token.Equals("END") || token.Equals("ENDBLOCK")) diff --git a/ncl/nxsstring.cpp b/ncl/nxsstring.cpp index 964de39fe..0c5d6f21d 100644 --- a/ncl/nxsstring.cpp +++ b/ncl/nxsstring.cpp @@ -875,3 +875,22 @@ NxsStringVector BreakPipeSeparatedList( } return retVec; } + +/*-------------------------------------------------------------------------------------------------------------------------- +| Returns true if the Equals comparison function is true for this or any element in the vector `s'. +| (James B. 23-Jul-2020, moved this here, from nxsstring.h, because the references to NxsStringVector +| were a problem when compiling in Visual Studio). +*/ +bool NxsString::IsInVector( + const NxsStringVector& s, /* the vector of NxsString objects to be searched */ + NxsString::CmpEnum mode) /* the argument passed to the Equals function, which is called for every element in the vector `s' */ + const +{ + for (NxsStringVector::const_iterator sIt = s.begin(); sIt != s.end(); sIt++) + { + if (Equals(*sIt, mode)) + return true; + } + return false; +} + diff --git a/ncl/nxsstring.h b/ncl/nxsstring.h index 2b86b6521..64c12d112 100644 --- a/ncl/nxsstring.h +++ b/ncl/nxsstring.h @@ -22,6 +22,8 @@ #include #include +#include +#include //for std::vector #include "nxsindent.h" class IndexSet; @@ -35,8 +37,9 @@ class IndexSet; | numbers to the ends of strings, an ability which is very useful for producing default labels (e.g. taxon1, taxon2, | etc.). */ + class NxsString - : public string + : public std::string { public: @@ -66,12 +69,12 @@ class NxsString bool IsADouble() const; bool IsALong() const; bool IsCapAbbreviation(const NxsString &s) const; - bool IsInVector(const NxsStringVector &s, NxsString::CmpEnum mode = respect_case) const; + bool IsInVector(const std::vector &s, NxsString::CmpEnum mode = respect_case) const; bool IsStdAbbreviation(const NxsString &s, bool respectCase) const; bool IsNexusPunctuation(const char c) const; bool QuotesNeeded() const; NxsString UpperCasePrefix() const; - friend ostream &operator<<(std::ostream &out, const NxsString &s); + friend std::ostream &operator<<(std::ostream &out, const NxsString &s); // Modifiers // @@ -128,6 +131,8 @@ class NxsString static NxsString ToHex(long p, unsigned nFours); }; +typedef std::vector NxsStringVector; + #if defined (NXS_SUPPORT_OLD_NAMES) typedef NxsString nxsstring; #endif @@ -169,7 +174,7 @@ class NStrCaseSensitiveEquals | Binary function class that performs case-Insensitive string compares. */ struct NxsStringEqual - : public binary_function + : public std::binary_function { bool operator()(const NxsString &x, const NxsString &y) const; }; @@ -244,7 +249,7 @@ inline bool NxsStringEqual::operator()( | The default constructor. */ inline NxsString::NxsString() - : string() + : std::string() { } @@ -269,21 +274,6 @@ inline void NxsString::clear() erase(); } -/*-------------------------------------------------------------------------------------------------------------------------- -| Returns true if the Equals comparison function is true for this or any element in the vector `s'. -*/ -inline bool NxsString::IsInVector( - const NxsStringVector &s, /* the vector of NxsString objects to be searched */ - NxsString::CmpEnum mode) /* the argument passed to the Equals function, which is called for every element in the vector `s' */ - const - { - for (NxsStringVector::const_iterator sIt = s.begin(); sIt != s.end(); sIt++) - { - if (Equals(*sIt, mode)) - return true; - } - return false; - } /*-------------------------------------------------------------------------------------------------------------------------- | A copy constructor taking a C-string argument. @@ -326,7 +316,7 @@ inline NxsString &NxsString::operator=( inline NxsString &NxsString::operator+=( const char *s) /* the C-string to be appended */ { - append(string(s)); + append(std::string(s)); return *this; } @@ -349,7 +339,7 @@ inline NxsString &NxsString::operator+=( char s[2]; s[0] = c; s[1] = '\0'; - append(string(s)); + append(std::string(s)); return *this; } @@ -595,8 +585,8 @@ inline NxsString &endl( /*-------------------------------------------------------------------------------------------------------------------------- | Writes the string `s' to the ostream `out'. */ -inline ostream &operator<<( - ostream &out, /* the stream to which the string `s' is to be written */ +inline std::ostream &operator<<( + std::ostream &out, /* the stream to which the string `s' is to be written */ const NxsString &s) /* the string to write */ { out << s.c_str(); diff --git a/ncl/nxstaxablock.h b/ncl/nxstaxablock.h index 74080c7e8..51d82b89c 100644 --- a/ncl/nxstaxablock.h +++ b/ncl/nxstaxablock.h @@ -19,6 +19,11 @@ #ifndef NCL_NXSTAXABLOCK_H #define NCL_NXSTAXABLOCK_H +#include //for std::ostream +#include "nxsstring.h" +#include "nxsdefs.h" //for NxsBoolVector +#include "nxstoken.h" //for NxsToken +#include "nxsblock.h" //for NxsBlock /*---------------------------------------------------------------------------------------------------------------------- | This class handles reading and storage for the NxsReader block TAXA. It overrides the member functions Read and | Reset, which are abstract virtual functions in the base class NxsBlock. The taxon names are stored in an vector of @@ -45,7 +50,7 @@ class NxsTaxaBlock unsigned GetNumTaxonLabels(); NxsString GetTaxonLabel(unsigned i); bool NeedsQuotes(unsigned i); - virtual void Report(ostream &out); + virtual void Report(std::ostream &out); virtual void Reset(); class NxsX_NoSuchTaxon {}; /* thrown if FindTaxon cannot locate a supplied taxon label in the taxonLabels vector */ diff --git a/ncl/nxstoken.h b/ncl/nxstoken.h index e1d4cc4a4..dea37484b 100644 --- a/ncl/nxstoken.h +++ b/ncl/nxstoken.h @@ -20,6 +20,10 @@ #ifndef NCL_NXSTOKEN_H #define NCL_NXSTOKEN_H +#include //for std::ostream +#include "nxsstring.h" //for NxsString and NxsStringVector +#include "nxsexception.h" //for NxsException +#include "nxsdefs.h" //for file_pos /*---------------------------------------------------------------------------------------------------------------------- | NxsToken objects are used by NxsReader to extract words (tokens) from a NEXUS data file. NxsToken objects know to | correctly skip NEXUS comments and understand NEXUS punctuation, making reading a NEXUS file as simple as repeatedly @@ -53,7 +57,7 @@ class NxsToken NxsString errormsg; - NxsToken(istream &i); + NxsToken(std::istream &i); virtual ~NxsToken(); bool AtEOF(); @@ -80,8 +84,8 @@ class NxsToken bool StoppedOn(char ch); void StripWhitespace(); void ToUpper(); - void Write(ostream &out); - void Writeln(ostream &out); + void Write(std::ostream &out); + void Writeln(std::ostream &out); virtual void OutputComment(const NxsString &msg); void GetNextContiguousToken(char stop_char); // Added by BQM @@ -100,7 +104,7 @@ class NxsToken private: - istream ∈ /* reference to input stream from which tokens will be read */ + std::istream ∈ /* reference to input stream from which tokens will be read */ file_pos filepos; /* current file position (for Metrowerks compiler, type is streampos rather than long) */ long fileline; /* current file line */ long filecol; /* current column in current line (refers to column immediately following token just read) */ @@ -183,7 +187,7 @@ inline char NxsToken::GetNextChar() if (failed) { errormsg = "Unknown error reading data file (check to make sure file exists)"; - throw NxsException(errormsg); + throw NxsException(errormsg, *this); } if (ch == 13 || ch == 10) @@ -469,7 +473,7 @@ inline bool NxsToken::StoppedOn( | output stream afterwards. */ inline void NxsToken::Write( - ostream &out) /* the output stream to which to write token NxsString */ + std::ostream &out) /* the output stream to which to write token NxsString */ { out << token; } @@ -479,9 +483,9 @@ inline void NxsToken::Write( | stream afterwards. */ inline void NxsToken::Writeln( - ostream &out) /* the output stream to which to write `token' */ + std::ostream &out) /* the output stream to which to write `token' */ { - out << token << endl; + out << token << std::endl; } /** diff --git a/nclextra/modelsblock.cpp b/nclextra/modelsblock.cpp index 29265b326..12820c658 100644 --- a/nclextra/modelsblock.cpp +++ b/nclextra/modelsblock.cpp @@ -7,7 +7,7 @@ #include "modelsblock.h" -ModelsBlock::ModelsBlock() : NxsBlock(), vector() +ModelsBlock::ModelsBlock() : NxsBlock(), unordered_map() { id = "MODELS"; } @@ -51,7 +51,7 @@ void ModelsBlock::Read(NxsToken &token) model.flag |= (NM_ATOMIC*(model.description.find_first_of("+*") == string::npos && model.description.find("MIX") == string::npos)); - push_back(model); + insert({model.name, model}); } else if (token.Equals("END") || token.Equals("ENDBLOCK")) { // Get the semicolon following END @@ -73,18 +73,16 @@ void ModelsBlock::Read(NxsToken &token) } } -NxsModel *ModelsBlock::findModel(string &name) { - for (iterator it = begin(); it != end(); it++) - if (it->name == name) return &(*it); - return NULL; +NxsModel *ModelsBlock::findModel(string name) { + iterator it = find(name); + if (it == end()) return NULL; + return &(it->second); } -NxsModel *ModelsBlock::findMixModel(string &name) { - for (iterator it = begin(); it != end(); it++) - if (it->name == name) { - if ((it->flag & NM_ATOMIC) == 0) - return &(*it); - else return NULL; - } - return NULL; +NxsModel *ModelsBlock::findMixModel(string name) { + NxsModel *model = findModel(name); + if (!model) return NULL; + if (model->flag & NM_ATOMIC) + return NULL; + return model; } diff --git a/nclextra/modelsblock.h b/nclextra/modelsblock.h index f62fe21c5..f1263b289 100644 --- a/nclextra/modelsblock.h +++ b/nclextra/modelsblock.h @@ -9,9 +9,11 @@ #define MODELSBLOCK_H_ #include "ncl/ncl.h" +#include "utils/tools.h" -const int NM_ATOMIC = 1; // NxsModel is not mixture or +G etc. model -const int NM_FREQ = 2; // NxsModel contains state frequency +const int NM_ATOMIC = 1; // NxsModel is not mixture or +G etc. model +const int NM_FREQ = 2; // NxsModel contains state frequency +const int NM_PROTEIN = 4; // NxsModel contains emprical protein model class NxsModel { public: @@ -39,7 +41,7 @@ class NxsModel { /** * Class to parse MODELS block in NEXUS file */ -class ModelsBlock: public NxsBlock, public vector { +class ModelsBlock: public NxsBlock, public unordered_map { public: /** constructor */ ModelsBlock(); @@ -50,13 +52,13 @@ class ModelsBlock: public NxsBlock, public vector { @param name model name @return pointer to model with the name or NULL if not found */ - NxsModel *findModel(string &name); + NxsModel *findModel(string name); /** @param name model name @return pointer to a mixed model with the name or NULL if not found */ - NxsModel *findMixModel(string &name); + NxsModel *findMixModel(string name); protected: diff --git a/nclextra/msetsblock.cpp b/nclextra/msetsblock.cpp index d4309f1f7..05cefc4bc 100644 --- a/nclextra/msetsblock.cpp +++ b/nclextra/msetsblock.cpp @@ -18,6 +18,7 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "msetsblock.h" +#include "utils/tools.h" MSetsBlock::MSetsBlock() : NxsBlock() @@ -105,7 +106,9 @@ void MSetsBlock::Read(NxsToken &token) token.SetLabileFlagBit(NxsToken::preserveUnderscores); token.GetNextToken(); do { - myset->taxlist.push_back(token.GetToken()); + string taxname = token.GetToken(); + renameString(taxname); + myset->taxlist.push_back(taxname); token.SetLabileFlagBit(NxsToken::preserveUnderscores); token.GetNextToken(); } while (!token.AtEOF() && !token.Equals(";")); @@ -124,7 +127,7 @@ void MSetsBlock::Read(NxsToken &token) { // This should be the NTAX keyword // - token.SetLabileFlagBit(NxsToken::preserveUnderscores); + token.SetLabileFlagBit(NxsToken::preserveUnderscores + NxsToken::hyphenNotPunctuation); token.GetNextToken(); @@ -158,6 +161,7 @@ void MSetsBlock::Read(NxsToken &token) myset->aln_file = myset->position_spec.substr(0, pos); myset->position_spec = myset->position_spec.substr(pos+1); } + trimString(myset->position_spec); if ((pos=myset->position_spec.find(',')) != string::npos && isalpha(myset->position_spec[0])) { myset->sequence_type = myset->position_spec.substr(0, pos); myset->position_spec = myset->position_spec.substr(pos+1); @@ -171,7 +175,11 @@ void MSetsBlock::Read(NxsToken &token) errormsg += " instead"; throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn()); } - + trimString(myset->aln_file); + trimString(myset->char_partition); + trimString(myset->model_name); + trimString(myset->position_spec); + trimString(myset->sequence_type); } // if (token.Equals("CHARSET")) else if (token.Equals("CHARPARTITION")) { @@ -220,7 +228,20 @@ void MSetsBlock::Read(NxsToken &token) myset->model_name = model_name; myset->char_partition = partition_name; token.GetNextToken(); - if (!token.Equals(",") && !token.Equals(";")) + if (token.Equals("{")) { + token.GetNextToken(); + myset->tree_len = convert_double(token.GetToken().c_str()); + token.GetNextToken(); + if (!token.Equals("}")) { + errormsg = "Expecting '}', but found "; + errormsg += token.GetToken(); + errormsg += " instead"; + throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn()); + } + token.GetNextToken(); + } + + if (!token.Equals(",") && !token.Equals(";")) { errormsg = "Expecting ',' or ';', but found "; errormsg += token.GetToken(); @@ -259,6 +280,9 @@ void MSetsBlock::Read(NxsToken &token) else { + errormsg = "Unknown command "; + errormsg += token.GetToken(); + throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn()); SkippingCommand(token.GetToken()); do { @@ -276,6 +300,11 @@ void MSetsBlock::Read(NxsToken &token) } +void MSetsBlock::SkippingCommand(NxsString commandName) { + cout << "WARNING: Skipping unknown command " << commandName << endl; +} + + CharSet *MSetsBlock::findCharSet(string name) { for (vector::iterator it = charsets.begin(); it != charsets.end(); it++) if ((*it)->name == name) return (*it); diff --git a/nclextra/msetsblock.h b/nclextra/msetsblock.h index 03d6c656c..bea2697d2 100644 --- a/nclextra/msetsblock.h +++ b/nclextra/msetsblock.h @@ -45,6 +45,12 @@ typedef vector TaxaSetNameVector; */ class CharSet { public: + + /** constructor */ + CharSet() { + tree_len = 0.0; + } + /** charset name */ string name; @@ -62,6 +68,9 @@ class CharSet { /** name of CharPartition where this charset is included*/ string char_partition; + + /** total tree length for this charset */ + double tree_len; }; @@ -95,10 +104,16 @@ class MSetsBlock : public NxsBlock */ virtual void Reset(); + /** + called when some commands are skipped + @param commandName command name + */ + virtual void SkippingCommand(NxsString commandName); + /** @return the number of sets */ - int getNSets() const { return sets.size(); } + size_t getNSets() const { return sets.size(); } /** @param id set id diff --git a/nn/neuralnetwork.cpp b/nn/neuralnetwork.cpp new file mode 100644 index 000000000..40b3a3583 --- /dev/null +++ b/nn/neuralnetwork.cpp @@ -0,0 +1,217 @@ +// +// Created by tamara on 3/8/21. +// + +#include +#include "neuralnetwork.h" +#include +#include +#include + +NeuralNetwork::NeuralNetwork(Alignment *alignment) { + this->alignment = alignment; +} + +NeuralNetwork::~NeuralNetwork() {} + +double NeuralNetwork::doAlphaInference() { + Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "alpha_find"); + Ort::SessionOptions session_options; + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + + const char *model_path = "../nn_models/lanfear_alpha_lstm.onnx"; + + printf("Using Onnxruntime C++ API\n"); + Ort::Session session(env, model_path, session_options); + + size_t num_input_nodes = session.GetInputCount(); + std::vector input_node_names(num_input_nodes); + + Ort::AllocatorWithDefaultOptions allocator; + //std::vector input_node_dims; + + //printf("Number of inputs = %zu\n", num_input_nodes); + + // iterate over all input nodes + for (int i = 0; i < num_input_nodes; i++) { + // print input node names + char *input_name = session.GetInputName(i, allocator); + //printf("Input %d : name=%s\n", i, input_name); + input_node_names[i] = input_name; + /* + // print input node types + Ort::TypeInfo type_info = session.GetInputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + ONNXTensorElementDataType type = tensor_info.GetElementType(); + printf("Input %d : type=%d\n", i, type); + + // print input dims + input_node_dims = tensor_info.GetShape(); + printf("Input %d : num_dims=%zu\n", i, input_node_dims.size()); + for (int j = 0; j < input_node_dims.size(); j++) { + printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]); + }*/ + } + + std::vector input_tensor_(10000 * 4); + std::vector input_shape_{1, 10000, 4}; + + std::vector output_node_names = {"alpha", "ev_model"}; + + size_t num_sites = this->alignment->getNSite(); + size_t num_taxa = this->alignment->getNSeq(); + + // choose 10,000 random positions (with repetition) in (0, num_sites - 1) + mt19937 rng(chrono::steady_clock::now().time_since_epoch().count()); + std::uniform_int_distribution dist(0, num_sites - 1); + + for (size_t i = 0; i < 40000; i = i + 4) { + size_t site_idx = dist(rng); + vector freqs = this->alignment->getPattern(site_idx).freqs; + // in case of gaps, adjust number of taxa + // size_t num_taxa = accumulate(freqs.begin(), freqs.end(), 0); + + input_tensor_[i] = (float) freqs[0] / num_taxa; + input_tensor_[i + 1] = (float) freqs[1] / num_taxa; + input_tensor_[i + 2] = (float) freqs[2] / num_taxa; + input_tensor_[i + 3] = (float) freqs[3] / num_taxa; + } + + // create input tensor object from data values + auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_tensor_.data(), input_tensor_.size(), + input_shape_.data(), input_shape_.size()); + assert(input_tensor.IsTensor()); + + // do inference + auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, + output_node_names.data(), 2); + assert(output_tensors.size() == 2 && output_tensors.front().IsTensor()); + + // get pointer to output tensor float values + //float *floatarr = output_tensors.front().GetTensorMutableData(); + float *alpha = output_tensors[0].GetTensorMutableData(); + float *check = output_tensors[1].GetTensorMutableData(); + + printf("Check whether heterogeneous (0) or homogeneous (1) = %f\n", check[0]); + + // print alpha value + printf("Alpha value = %f\n", alpha[0] / 1000); + + // TODO: check via ev_model or alpha value whether it is +G? + if (check[0] > 0.5) + return -1; + return alpha[0] / 1000; +} + +string NeuralNetwork::doModelInference() { + Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "model_find"); + Ort::SessionOptions session_options; + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + + const char *model_path = "../nn_models/resnet_modelfinder.onnx"; + + printf("Using Onnxruntime C++ API\n"); + Ort::Session session(env, model_path, session_options); + + size_t num_input_nodes = session.GetInputCount(); + std::vector input_node_names(num_input_nodes); + + Ort::AllocatorWithDefaultOptions allocator; + //std::vector input_node_dims; + + //printf("Number of inputs = %zu\n", num_input_nodes); + + // iterate over all input nodes + for (int i = 0; i < num_input_nodes; i++) { + // print input node names + char *input_name = session.GetInputName(i, allocator); + //printf("Input %d : name=%s\n", i, input_name); + input_node_names[i] = input_name; + /* + // print input node types + Ort::TypeInfo type_info = session.GetInputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + ONNXTensorElementDataType type = tensor_info.GetElementType(); + printf("Input %d : type=%d\n", i, type); + + // print input dims + input_node_dims = tensor_info.GetShape(); + printf("Input %d : num_dims=%zu\n", i, input_node_dims.size()); + for (int j = 0; j < input_node_dims.size(); j++) { + printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]); + }*/ + } + + std::vector input_tensor_(40 * 250 * 26); + std::vector input_shape_{1, 40, 250, 26}; + + std::vector output_node_names = {"dense"}; + + const size_t num_taxa = this->alignment->getNSeq(); + + // choose 10,000 random sequence pairs (with repetition) in (0, num_sites - 1) + mt19937 rng(chrono::steady_clock::now().time_since_epoch().count()); + std::uniform_int_distribution dist_taxa(0, num_taxa - 1); + + vector summary_stats(26); + + this->alignment->ungroupSitePattern(); + + for (size_t i = 0; i < 260000; i = i + 26) { + size_t seq1_idx; + size_t seq2_idx; + while (true) { + seq1_idx = dist_taxa(rng); + seq2_idx = dist_taxa(rng); + if (seq1_idx != seq2_idx) + break; + } + + summary_stats = this->alignment->computeSummaryStats(seq1_idx, seq2_idx); + std::copy(std::begin(summary_stats), std::end(summary_stats), std::begin(input_tensor_) + i); // replace part of the vector with this + } + + // create input tensor object from data values + auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_tensor_.data(), input_tensor_.size(), + input_shape_.data(), input_shape_.size()); + assert(input_tensor.IsTensor()); + + // do inference + auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, + output_node_names.data(), 1); + assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); + + // get pointer to output tensor float values + float *floatarr = output_tensors.front().GetTensorMutableData(); + + // print values for JC,K2P,F81,HKY,Tn,GTR + float max_val = 0.0; + size_t chosen_model; + + for (size_t i = 0; i < 6; i++) { + if (floatarr[i] > max_val) { + max_val = floatarr[i]; + chosen_model = i; + } + //printf("Model value [%zu] = %f\n", i, floatarr[i]); + } + + // return chosen model + switch(chosen_model) { + case 0: return "JC"; + case 1: return "K2P"; + case 2: return "F81"; + case 3: return "HKY"; + case 4: return "Tn"; + case 5: return "GTR"; + default: throw "Model not known"; + + } + +} diff --git a/nn/neuralnetwork.h b/nn/neuralnetwork.h new file mode 100644 index 000000000..fa4d435fd --- /dev/null +++ b/nn/neuralnetwork.h @@ -0,0 +1,27 @@ +// +// Created by tamara on 3/8/21. +// + +#ifndef IQTREE_NEURALNETWORK_H +#define IQTREE_NEURALNETWORK_H + +#include +#include + +class NeuralNetwork { +public: + /** constructor */ + NeuralNetwork(Alignment *alignment); + + /** destructor */ + virtual ~NeuralNetwork(); + + double doAlphaInference(); + string doModelInference(); + + Alignment *alignment; + +}; + + +#endif //IQTREE_NEURALNETWORK_H diff --git a/obsolete/parsmultistate.cpp b/obsolete/parsmultistate.cpp index ff844195e..3b473e14b 100644 --- a/obsolete/parsmultistate.cpp +++ b/obsolete/parsmultistate.cpp @@ -22,15 +22,27 @@ #include "tree/tinatree.h" #include "parsmultistate.h" #include "alignment/alignment.h" +#include "tree/parstree.h" void doParsMultiState(Params ¶ms) { - cout << "Here\n"; - Alignment alignment(params.aln_file, params.sequence_type, params.intype); - TinaTree tree; - tree.readTree(params.user_file, params.is_rooted); - tree.setAlignment(&alignment); - tree.drawTree(cout); - cout << "Parsimony score is: " << tree.computeParsimonyScore() << endl; - cout << "Parsimony score ver2 is: " << tree.computeParsimony() << endl; + Alignment alignment(params.aln_file, params.sequence_type, params.intype, ""); + alignment.orderPatternByNumChars(PAT_VARIANT); + ParsTree pars_tree; + pars_tree.readTree(params.user_file, params.is_rooted); + if (pars_tree.rooted) + pars_tree.convertToUnrooted(); + pars_tree.setAlignment(&alignment); + pars_tree.initCostMatrix(CM_LINEAR); + pars_tree.setParsimonyKernel(params.SSE); + pars_tree.initializeAllPartialPars(); + int total_length = pars_tree.computeParsimony(); + cout << "total length: " << total_length << endl; + pars_tree.initCostMatrix(CM_UNIFORM); + int pars_score = pars_tree.computeParsimony(); + cout.unsetf(ios::fixed); + cout.precision(6); + cout << "mean length: " << double(total_length)/pars_score << endl; + cout << "Parsimony score is: " << pars_score << endl; + //cout << "Parsimony score ver2 is: " << tree.computeParsimony() << endl; //tree.printParsimonyStates(); } diff --git a/obsolete/parsmultistate.h b/obsolete/parsmultistate.h index c1c3eb1a6..8dc5454e7 100644 --- a/obsolete/parsmultistate.h +++ b/obsolete/parsmultistate.h @@ -21,7 +21,7 @@ #ifndef PARSMULTISTATE_H #define PARSMULTISTATE_H -#include "tools.h" +#include "utils/tools.h" void doParsMultiState(Params ¶ms); diff --git a/pda/circularnetwork.cpp b/pda/circularnetwork.cpp index cb1ee7598..420edfe07 100644 --- a/pda/circularnetwork.cpp +++ b/pda/circularnetwork.cpp @@ -34,15 +34,15 @@ CircularNetwork::CircularNetwork(Params ¶ms) : PDNetwork(params) { void CircularNetwork::findPD(Params ¶ms, vector &taxa_set, vector &taxa_order) { - if (!isCircular() || params.run_mode == EXHAUSTIVE || params.run_mode == GREEDY - || params.run_mode == LINEAR_PROGRAMMING || isPDArea()) { + if (!isCircular() || params.run_mode == RunMode::EXHAUSTIVE || params.run_mode == RunMode::GREEDY + || params.run_mode == RunMode::LINEAR_PROGRAMMING || isPDArea()) { // call inherited findPD if condition not met PDNetwork::findPD(params, taxa_set, taxa_order); return; } // call the entering function enterFindPD(params); - params.detected_mode = DYNAMIC_PROGRAMMING; + params.detected_mode = RunMode::DYNAMIC_PROGRAMMING; int root_id = (initialset.size() > 0) ? initialset[0] : -1; diff --git a/pda/ecopd.cpp b/pda/ecopd.cpp index ec4023cac..05610091a 100644 --- a/pda/ecopd.cpp +++ b/pda/ecopd.cpp @@ -913,27 +913,34 @@ void ECOpd::printInfDAG (const char* fileOUT,PDNetwork &splitsys, Params ¶ms } //Constraints---------------------------------------------------------------------- //species present in the set----------------------------------------------- - if(initialTaxa.size()!=0) - for(i=0;iid<<" + "; - else - out<<"x"<id<<" >= 1"<degree() == 0) { + nleaf++; + if (nleaf < nleafDAG) { + out << "x" << taxaDAG[j]->id << " + "; } + else { + out << "x" << taxaDAG[j]->id << " >= 1" << endl; + } + } + } //SURVIVAL CONSTRAINT if(weighted){ //constraint: Weighted food web. Sum of weights is greater than a given threshold-------------------------------- diff --git a/pda/hashsplitset.cpp b/pda/hashsplitset.cpp index addb1cf17..d4521eabc 100644 --- a/pda/hashsplitset.cpp +++ b/pda/hashsplitset.cpp @@ -41,7 +41,8 @@ Split *SplitIntMap::findSplit(Split *sp, int &value) { int SplitIntMap::getValue(Split *sp) { int value; - ASSERT(findSplit(sp, value)); + Split* findsp = findSplit(sp, value); + ASSERT(findsp); return value; } @@ -62,11 +63,11 @@ void SplitIntMap::insertSplit(Split *sp, int value) { } void SplitIntMap::buildMap(SplitGraph &sg, bool use_index) { - clear(); - for (int i = 0; i < sg.size(); i++) { - if (use_index) - insertSplit(sg[i], i); - else - insertSplit(sg[i], sg[i]->getWeight()); - } + clear(); + for (int i = 0; i < sg.size(); i++) { + if (use_index) + insertSplit(sg[i], i); + else + insertSplit(sg[i], sg[i]->getWeight()); + } } diff --git a/pda/pdnetwork.cpp b/pda/pdnetwork.cpp index 684ce7850..d100d4f85 100644 --- a/pda/pdnetwork.cpp +++ b/pda/pdnetwork.cpp @@ -455,24 +455,24 @@ void PDNetwork::findPD(Params ¶ms, vector &taxa_set, vector & rem_splits.push_back(i); IntList::iterator rem_it = rem_splits.end(); - params.detected_mode = EXHAUSTIVE; + params.detected_mode = RunMode::EXHAUSTIVE; if (isPDArea()) { - params.detected_mode = LINEAR_PROGRAMMING; + params.detected_mode = RunMode::LINEAR_PROGRAMMING; printLPVersion(params.gurobi_format); cout << "Optimizing PD over " << sets->getNSets() << " areas..." << endl; cout << "Linear programming on general split network..." << endl; findPDArea_LP(params, taxa_set); - } else if (params.run_mode == GREEDY) { + } else if (params.run_mode == RunMode::GREEDY) { // greedy search, not ensure to give the optimal sets! cout << "Start greedy search..." << endl; greedyPD(params.sub_size, curset, taxa_order); localSearchPD(params.sub_size, curset, taxa_order); taxa_set.resize(1); taxa_set[0].push_back(new Split(curset)); - } else if (params.run_mode != EXHAUSTIVE) { - params.detected_mode = LINEAR_PROGRAMMING; + } else if (params.run_mode != RunMode::EXHAUSTIVE) { + params.detected_mode = RunMode::LINEAR_PROGRAMMING; printLPVersion(params.gurobi_format); cout << "Linear programming on general split network..." << endl; findPD_LP(params, taxa_set); @@ -1059,7 +1059,7 @@ void PDNetwork::printOutputSetScore(Params ¶ms, vector &pd_set) { ofstream scoreout; ofstream out; if (params.nr_output == 1) { - if (params.run_mode == PD_USER_SET || !isPDArea()) { + if (params.run_mode == RunMode::PD_USER_SET || !isPDArea()) { sprintf(filename, "%s.pdtaxa", params.out_prefix); cout << "All taxa list(s) printed to " << filename << endl; } else { @@ -1103,7 +1103,7 @@ void PDNetwork::printOutputSetScore(Params ¶ms, vector &pd_set) { //c_old = count; if (params.nr_output > 10) { out.open(filename); - if (params.run_mode == PD_USER_SET || !isPDArea()) { + if (params.run_mode == RunMode::PD_USER_SET || !isPDArea()) { for (i = 0; i < getNTaxa(); i++) if (this_set->containTaxon(i)) out << getTaxa()->GetTaxonLabel(i) << endl; @@ -1120,7 +1120,7 @@ void PDNetwork::printOutputSetScore(Params ¶ms, vector &pd_set) { calcCost(*this_set) << " " << computeBoundary(*this_set) << " " << params.boundary_modifier << endl; - if (params.run_mode == PD_USER_SET || !isPDArea()) { + if (params.run_mode == RunMode::PD_USER_SET || !isPDArea()) { for (i = 0; i < getNTaxa(); i++) if (this_set->containTaxon(i)) out << getTaxa()->GetTaxonLabel(i) << endl; diff --git a/pda/splitgraph.cpp b/pda/splitgraph.cpp index b115a5133..fb030452b 100644 --- a/pda/splitgraph.cpp +++ b/pda/splitgraph.cpp @@ -30,131 +30,131 @@ bool compareSplit(Split* sp1, Split* sp2) { - if (sp1->countTaxa() != sp2->countTaxa()) - return sp1->countTaxa() < sp2->countTaxa(); - else - return sp1->firstTaxon() < sp2->firstTaxon(); + if (sp1->countTaxa() != sp2->countTaxa()) + return sp1->countTaxa() < sp2->countTaxa(); + else + return sp1->firstTaxon() < sp2->firstTaxon(); } //#define MY_DEBUG /******************************************************** - Defining SplitGraph methods + Defining SplitGraph methods ********************************************************/ SplitGraph::SplitGraph() - : vector() + : vector() { - pda = NULL; - taxa = NULL; - splits = NULL; - sets = NULL; - trees = NULL; - mtrees = NULL; - areas_boundary = NULL; + pda = NULL; + taxa = NULL; + splits = NULL; + sets = NULL; + trees = NULL; + mtrees = NULL; + areas_boundary = NULL; } SplitGraph::SplitGraph(Params ¶ms) : vector() { - init(params); + init(params); } void SplitGraph::createBlocks() { - taxa = new NxsTaxaBlock(); - splits = new MSplitsBlock(this); - pda = new MPdaBlock(this); - sets = new MSetsBlock(); - trees = new TreesBlock(taxa); - //mtrees = NULL; + taxa = new NxsTaxaBlock(); + splits = new MSplitsBlock(this); + pda = new MPdaBlock(this); + sets = new MSetsBlock(); + trees = new TreesBlock(taxa); + //mtrees = NULL; } void SplitGraph::init(Params ¶ms) { - mtrees = NULL; - if (params.intype == IN_NEWICK) { - // read the input file, can contain more than 1 tree - mtrees = new MTreeSet(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count); - //mtree = new MTree(params.user_file, params.is_rooted); - - if (params.is_rooted) { - params.sub_size++; - params.min_size++; - } - if (mtrees->isRooted() && params.root != NULL) - outError(ERR_CONFLICT_ROOT); - //SplitIntMap hash_ss; - mtrees->convertSplits(*this, params.split_threshold, params.split_weight_summary, params.split_weight_threshold); - - if (verbose_mode >= VB_DEBUG) - saveFileStarDot(cout); - } else { - createBlocks(); - if (params.is_rooted) - outError(ERR_ROOT_NET); - - cout << "Reading input file " << params.user_file << "..." << endl; - - MyReader nexus(params.user_file); - - nexus.Add(taxa); - nexus.Add(splits); - nexus.Add(pda); - nexus.Add(sets); - nexus.Add(trees); - - MyToken token(nexus.inf); - nexus.Execute(token); - if (trees->GetNumTrees() > 0) { - if (getNSplits() > 0) - outError("Ambiguous input file, pls only specify either SPLITS block or TREES block"); - convertFromTreesBlock(params.tree_burnin, params.tree_max_count, params.split_threshold, - params.split_weight_summary, params.split_weight_threshold, params.tree_weight_file); - } - - } - - if (verbose_mode >= VB_DEBUG) - taxa->Report(cout); - //splits->Report(cout); - //reportConflict(cout); - if (params.pdtaxa_file != NULL) { - if (sets->getNSets() > 0) - outError("Taxa sets were already specified in the input file"); - cout << "Reading taxa sets in file " << params.pdtaxa_file << "..." << endl; - - bool nexus_formated = (detectInputFile(params.pdtaxa_file) == IN_NEXUS); - if (nexus_formated) { - MyReader nexus(params.pdtaxa_file); - nexus.Add(sets); - MyToken token(nexus.inf); - nexus.Execute(token); - } else { - readTaxaSets(params.pdtaxa_file, sets); - } - if (sets->getNSets() == 0) - outError("No taxa sets found"); - } - - areas_boundary = NULL; - if (params.areas_boundary_file) { - if (sets->getNSets() == 0) outError("No taxon sets defined yet"); - areas_boundary = new double [sets->getNSets() * sets->getNSets()]; - cout << "Reading sets relation file " << params.areas_boundary_file << "..." << endl; - readAreasBoundary(params.areas_boundary_file, sets, areas_boundary); - } - - if (verbose_mode >= VB_DEBUG && sets->getNSets() > 0) - sets->Report(cout); - - if (sets->getNSets() > 0 && taxa->GetNumTaxonLabels() == 0) { - AddTaxaFromSets(); - } - if (taxa->GetNumTaxonLabels() == 0) - outError("No taxa found"); - if (getNSplits() == 0) { - //outError(ERR_NO_SPLITS); - createStarTree(); - } - cout << getNTaxa()-params.is_rooted << - " taxa and " << getNSplits()-params.is_rooted << " splits." << endl; + mtrees = NULL; + if (params.intype == IN_NEWICK) { + // read the input file, can contain more than 1 tree + mtrees = new MTreeSet(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count); + //mtree = new MTree(params.user_file, params.is_rooted); + + if (params.is_rooted) { + params.sub_size++; + params.min_size++; + } + if (mtrees->isRooted() && params.root != NULL) + outError(ERR_CONFLICT_ROOT); + //SplitIntMap hash_ss; + mtrees->convertSplits(*this, params.split_threshold, params.split_weight_summary, params.split_weight_threshold); + + if (verbose_mode >= VB_DEBUG) + saveFileStarDot(cout); + } else { + createBlocks(); +// if (params.is_rooted) +// outError(ERR_ROOT_NET); + + cout << "Reading input file " << params.user_file << "..." << endl; + + MyReader nexus(params.user_file); + + nexus.Add(taxa); + nexus.Add(splits); + nexus.Add(pda); + nexus.Add(sets); + nexus.Add(trees); + + MyToken token(nexus.inf); + nexus.Execute(token); + if (trees->GetNumTrees() > 0) { + if (getNSplits() > 0) + outError("Ambiguous input file, pls only specify either SPLITS block or TREES block"); + convertFromTreesBlock(params.tree_burnin, params.tree_max_count, params.split_threshold, + params.split_weight_summary, params.split_weight_threshold, params.tree_weight_file); + } + + } + + if (verbose_mode >= VB_DEBUG) + taxa->Report(cout); + //splits->Report(cout); + //reportConflict(cout); + if (params.pdtaxa_file != NULL) { + if (sets->getNSets() > 0) + outError("Taxa sets were already specified in the input file"); + cout << "Reading taxa sets in file " << params.pdtaxa_file << "..." << endl; + + bool nexus_formated = (detectInputFile(params.pdtaxa_file) == IN_NEXUS); + if (nexus_formated) { + MyReader nexus(params.pdtaxa_file); + nexus.Add(sets); + MyToken token(nexus.inf); + nexus.Execute(token); + } else { + readTaxaSets(params.pdtaxa_file, sets); + } + if (sets->getNSets() == 0) + outError("No taxa sets found"); + } + + areas_boundary = NULL; + if (params.areas_boundary_file) { + if (sets->getNSets() == 0) outError("No taxon sets defined yet"); + areas_boundary = new double [sets->getNSets() * sets->getNSets()]; + cout << "Reading sets relation file " << params.areas_boundary_file << "..." << endl; + readAreasBoundary(params.areas_boundary_file, sets, areas_boundary); + } + + if (verbose_mode >= VB_DEBUG && sets->getNSets() > 0) + sets->Report(cout); + + if (sets->getNSets() > 0 && taxa->GetNumTaxonLabels() == 0) { + AddTaxaFromSets(); + } + if (taxa->GetNumTaxonLabels() == 0) + outError("No taxa found"); + if (getNSplits() == 0) { + //outError(ERR_NO_SPLITS); + createStarTree(); + } + cout << getNTaxa()-params.is_rooted << + " taxa and " << getNSplits()-params.is_rooted << " splits." << endl; } @@ -192,7 +192,8 @@ void SplitGraph::restoreCheckpoint() { for (int split = 0; split < nsplits; split++) { checkpoint->addListElement(); string str; - ASSERT(checkpoint->getString("", str)); + bool found = checkpoint->getString("", str); + ASSERT(found); stringstream ss(str); double weight; ss >> weight; @@ -211,86 +212,86 @@ void SplitGraph::restoreCheckpoint() { } int SplitGraph::getNTrivialSplits() { - int count = 0; - for (iterator it = begin(); it != end(); it++) - if ((*it)->trivial() >= 0) - count++; - return count; + int count = 0; + for (iterator it = begin(); it != end(); it++) + if ((*it)->trivial() >= 0) + count++; + return count; } void SplitGraph::createStarTree() { - cout << "No splits found, creating a star tree with branch length of 1..." << endl; - int ntaxa = taxa->GetNumTaxonLabels(); - for (int i = 0; i < ntaxa; i++) { - Split *sp = new Split(ntaxa, 1.0); - sp->addTaxon(i); - push_back(sp); - } - cout << "NOTE: subsequent PD will correspond to species richness." << endl; + cout << "No splits found, creating a star tree with branch length of 1..." << endl; + int ntaxa = taxa->GetNumTaxonLabels(); + for (int i = 0; i < ntaxa; i++) { + Split *sp = new Split(ntaxa, 1.0); + sp->addTaxon(i); + push_back(sp); + } + cout << "NOTE: subsequent PD will correspond to species richness." << endl; } void SplitGraph::AddTaxaFromSets() { - cout << "Taking taxa from SETS block..." << endl; - for (int i = 0; i < sets->getNSets(); i++) - for(vector::iterator it = sets->getSet(i)->taxlist.begin(); - it != sets->getSet(i)->taxlist.end(); it++) - if (!taxa->IsAlreadyDefined(NxsString(it->c_str()))) { - taxa->AddTaxonLabel(NxsString(it->c_str())); - } + cout << "Taking taxa from SETS block..." << endl; + for (int i = 0; i < sets->getNSets(); i++) + for(vector::iterator it = sets->getSet(i)->taxlist.begin(); + it != sets->getSet(i)->taxlist.end(); it++) + if (!taxa->IsAlreadyDefined(NxsString(it->c_str()))) { + taxa->AddTaxonLabel(NxsString(it->c_str())); + } } void SplitGraph::freeMem() { - for (reverse_iterator it = rbegin(); it != rend(); it++) { - //(*it)->report(cout); - delete *it; - } - clear(); - if (areas_boundary) delete areas_boundary; - if (trees) delete trees; - if (sets) delete sets; - if (pda) delete pda; - if (splits) delete splits; - if (taxa) delete taxa; - if (mtrees) delete mtrees; + for (reverse_iterator it = rbegin(); it != rend(); it++) { + //(*it)->report(cout); + delete *it; + } + clear(); + if (areas_boundary) delete areas_boundary; + if (trees) delete trees; + if (sets) delete sets; + if (pda) delete pda; + if (splits) delete splits; + if (taxa) delete taxa; + if (mtrees) delete mtrees; } SplitGraph::~SplitGraph() { - freeMem(); + freeMem(); } void SplitGraph::convertFromTreesBlock(int burnin, int max_count, double split_threshold, - int split_weight_summary, double weight_threshold, const char *tree_weight_file) { - cout << trees->GetNumTrees() << " tree(s) loaded" << endl; - if (burnin >= trees->GetNumTrees()) - outError("Burnin value is too large"); - if (burnin > 0) - cout << burnin << " beginning tree(s) discarded" << endl; - mtrees = new MTreeSet(); - - for (int i = burnin; i < trees->GetNumTrees() && (i < burnin+max_count); i++) { - stringstream strs(trees->GetTranslatedTreeDescription(i), ios::in | ios::out | ios::app); - strs << ";"; - MTree *tree = mtrees->newTree(); - bool myrooted = trees->IsRootedTree(i); - tree->readTree(strs, myrooted); - mtrees->push_back(tree); - mtrees->tree_weights.push_back(1); - } - mtrees->checkConsistency(); - //SplitIntMap hash_ss; - - if (tree_weight_file) - readIntVector(tree_weight_file, burnin, max_count, mtrees->tree_weights); -/* else if (!weights) - tree_weights.resize(size(), 1);*/ - - if (mtrees->size() != mtrees->tree_weights.size()) - outError("Tree file and tree weight file have different number of entries"); - mtrees->convertSplits(*this, split_threshold, split_weight_summary, weight_threshold); + int split_weight_summary, double weight_threshold, const char *tree_weight_file) { + cout << trees->GetNumTrees() << " tree(s) loaded" << endl; + if (burnin >= trees->GetNumTrees()) + outError("Burnin value is too large"); + if (burnin > 0) + cout << burnin << " beginning tree(s) discarded" << endl; + mtrees = new MTreeSet(); + + for (int i = burnin; i < trees->GetNumTrees() && (i < burnin+max_count); i++) { + stringstream strs(trees->GetTranslatedTreeDescription(i), ios::in | ios::out | ios::app); + strs << ";"; + MTree *tree = mtrees->newTree(); + bool myrooted = trees->IsRootedTree(i); + tree->readTree(strs, myrooted); + mtrees->push_back(tree); + mtrees->tree_weights.push_back(1); + } + mtrees->checkConsistency(); + //SplitIntMap hash_ss; + + if (tree_weight_file) + readIntVector(tree_weight_file, burnin, max_count, mtrees->tree_weights); +/* else if (!weights) + tree_weights.resize(size(), 1);*/ + + if (mtrees->size() != mtrees->tree_weights.size()) + outError("Tree file and tree weight file have different number of entries"); + mtrees->convertSplits(*this, split_threshold, split_weight_summary, weight_threshold); } @@ -298,321 +299,321 @@ void SplitGraph::convertFromTreesBlock(int burnin, int max_count, double split_t void SplitGraph::report(ostream &out) { - out << endl; - out << "Split network contains "; - - if (size() == 0) - { - out << "no split" << endl; - } - else if (size() == 1) - out << "one split" << endl; - else - out << size() << " splits" << endl; - - if (size() == 0) - return; + out << endl; + out << "Split network contains "; - sort(begin(), end(), compareSplit); - int k = 0; - for (iterator it = begin(); it != end(); it++,k++) - { - out << '\t' << (k+1) << '\t'; - (*it)->report(out); - } + if (size() == 0) + { + out << "no split" << endl; + } + else if (size() == 1) + out << "one split" << endl; + else + out << size() << " splits" << endl; + + if (size() == 0) + return; + + sort(begin(), end(), compareSplit); + int k = 0; + for (iterator it = begin(); it != end(); it++,k++) + { + out << '\t' << (k+1) << '\t'; + (*it)->report(out); + } } void SplitGraph::reportConflict(ostream &out) { - int k = 0; - out << "Compatible splits: " << endl; - for (iterator i = begin(); i != end(); i++, k++) - { - out << (k+1) << '\t'; - int k2 = 1; - for (iterator j = begin(); j != end(); j++, k2++) - if ( j != i && (*i)->compatible(*(*j))) - { - out << k2 << " "; - } - out << endl; - } + int k = 0; + out << "Compatible splits: " << endl; + for (iterator i = begin(); i != end(); i++, k++) + { + out << (k+1) << '\t'; + int k2 = 1; + for (iterator j = begin(); j != end(); j++, k2++) + if ( j != i && (*i)->compatible(*(*j))) + { + out << k2 << " "; + } + out << endl; + } } /** - calculate sum of weights of preserved splits in the taxa_set - @param taxa_set a set of taxa + calculate sum of weights of preserved splits in the taxa_set + @param taxa_set a set of taxa */ double SplitGraph::calcWeight(Split &taxa_set) { - double sum = 0.0; - for (iterator it = begin(); it != end(); it++) - if ((*it)->preserved(taxa_set)) - sum += (*it)->getWeight(); - return sum; + double sum = 0.0; + for (iterator it = begin(); it != end(); it++) + if ((*it)->preserved(taxa_set)) + sum += (*it)->getWeight(); + return sum; } int SplitGraph::countSplits(Split &taxa_set) { - int cnt = 0; - for (iterator it = begin(); it != end(); it++) - if ((*it)->preserved(taxa_set)) - cnt++; - return cnt; + int cnt = 0; + for (iterator it = begin(); it != end(); it++) + if ((*it)->preserved(taxa_set)) + cnt++; + return cnt; } int SplitGraph::countInternalSplits(Split &taxa_set) { - int cnt = 0; - for (iterator it = begin(); it != end(); it++) - if ((*it)->trivial() < 0 && (*it)->preserved(taxa_set)) - cnt++; - return cnt; + int cnt = 0; + for (iterator it = begin(); it != end(); it++) + if ((*it)->trivial() < 0 && (*it)->preserved(taxa_set)) + cnt++; + return cnt; } /** - calculate sum of weights of all splits + calculate sum of weights of all splits */ double SplitGraph::calcWeight() { - double sum = 0.0; - for (iterator it = begin(); it != end(); it++) - sum += (*it)->weight; - return sum; + double sum = 0.0; + for (iterator it = begin(); it != end(); it++) + sum += (*it)->weight; + return sum; } double SplitGraph::calcTrivialWeight() { - double sum = 0.0; - for (iterator it = begin(); it != end(); it++) - if ((*it)->trivial() >= 0) - sum += (*it)->weight; - return sum; + double sum = 0.0; + for (iterator it = begin(); it != end(); it++) + if ((*it)->trivial() >= 0) + sum += (*it)->weight; + return sum; } double SplitGraph::maxWeight() { - double m = -1e6; - for (iterator it = begin(); it != end(); it++) - if (m < (*it)->weight) m = (*it)->weight; - return m; + double m = -1e6; + for (iterator it = begin(); it != end(); it++) + if (m < (*it)->weight) m = (*it)->weight; + return m; } void SplitGraph::generateTaxaSet(char *filename, int size, int overlap, int times) { - ofstream out(filename); - if (!out.is_open()) - outError(ERR_WRITE_OUTPUT, filename); - ASSERT(overlap <= size); - int total = 2*size - overlap; - int ntaxa = getNTaxa(); - for (int cnt = 0; cnt < times; cnt++) { - // generate random taxon index - IntVector ranvec; - BoolVector occur(ntaxa, false); - int i; - for (i = 0; i < total; i++) { - int rnum; - do { rnum = random_int(ntaxa); } while (occur[rnum]); - ranvec.push_back(rnum); - occur[rnum] = true; - } - // now write the first set - out << size << endl; - for (i = 0; i < size; i++) - out << taxa->GetTaxonLabel(ranvec[i]) << endl; - out << endl; - // now write the second set - out << size << endl; - for (i = size-overlap; i < total; i++) - out << taxa->GetTaxonLabel(ranvec[i]) << endl; - out << endl; - } - out.close(); + ofstream out(filename); + if (!out.is_open()) + outError(ERR_WRITE_OUTPUT, filename); + ASSERT(overlap <= size); + int total = 2*size - overlap; + int ntaxa = getNTaxa(); + for (int cnt = 0; cnt < times; cnt++) { + // generate random taxon index + IntVector ranvec; + BoolVector occur(ntaxa, false); + int i; + for (i = 0; i < total; i++) { + int rnum; + do { rnum = random_int(ntaxa); } while (occur[rnum]); + ranvec.push_back(rnum); + occur[rnum] = true; + } + // now write the first set + out << size << endl; + for (i = 0; i < size; i++) + out << taxa->GetTaxonLabel(ranvec[i]) << endl; + out << endl; + // now write the second set + out << size << endl; + for (i = size-overlap; i < total; i++) + out << taxa->GetTaxonLabel(ranvec[i]) << endl; + out << endl; + } + out.close(); } void SplitGraph::calcDistance(char *filename) { - ofstream out(filename); - if (!out.is_open()) - outError(ERR_WRITE_OUTPUT, filename); - mmatrix(double) dist; - int i, j; - calcDistance(dist); - - int ntaxa = getNTaxa(); - - // now write the distances in phylip .dist format - out << ntaxa << endl; - - for (i = 0; i < ntaxa; i++) { - out << taxa->GetTaxonLabel(i) << " "; - for (j = 0; j < ntaxa; j++) { - out << dist[i][j] << " "; - } - out << endl; - } - out.close(); + ofstream out(filename); + if (!out.is_open()) + outError(ERR_WRITE_OUTPUT, filename); + mmatrix(double) dist; + int i, j; + calcDistance(dist); + + int ntaxa = getNTaxa(); + + // now write the distances in phylip .dist format + out << ntaxa << endl; + + for (i = 0; i < ntaxa; i++) { + out << taxa->GetTaxonLabel(i) << " "; + for (j = 0; j < ntaxa; j++) { + out << dist[i][j] << " "; + } + out << endl; + } + out.close(); } void SplitGraph::calcDistance(mmatrix(double) &dist) { - int ntaxa = getNTaxa(); - iterator it; - vector vi, vj; - vector::iterator i, j; - - dist.resize(ntaxa); - for (mmatrix(double)::iterator di = dist.begin(); di != dist.end(); di++) - (*di).resize(ntaxa, 0); - - for (it = begin(); it != end(); it++) { - (*it)->getTaxaList(vi, vj); - for (i = vi.begin(); i != vi.end(); i++) - for (j = vj.begin(); j < vj.end(); j++) { - dist[*i][*j] += (*it)->weight; - dist[*j][*i] += (*it)->weight; - } - } + int ntaxa = getNTaxa(); + iterator it; + vector vi, vj; + vector::iterator i, j; + + dist.resize(ntaxa); + for (mmatrix(double)::iterator di = dist.begin(); di != dist.end(); di++) + (*di).resize(ntaxa, 0); + + for (it = begin(); it != end(); it++) { + (*it)->getTaxaList(vi, vj); + for (i = vi.begin(); i != vi.end(); i++) + for (j = vj.begin(); j < vj.end(); j++) { + dist[*i][*j] += (*it)->weight; + dist[*j][*i] += (*it)->weight; + } + } } void SplitGraph::calcDistance(mmatrix(double) &dist, vector &taxa_order) { - int ntaxa = getNTaxa(); - int i, j; - - mmatrix(double) my_dist; - calcDistance(my_dist); - dist.resize(ntaxa); - for (i = 0; i < ntaxa; i++) { - dist[i].resize(ntaxa); - for (j = 0; j < ntaxa; j++) - dist[i][j] = my_dist[taxa_order[i]][taxa_order[j]]; - } + int ntaxa = getNTaxa(); + int i, j; + + mmatrix(double) my_dist; + calcDistance(my_dist); + dist.resize(ntaxa); + for (i = 0; i < ntaxa; i++) { + dist[i].resize(ntaxa); + for (j = 0; j < ntaxa; j++) + dist[i][j] = my_dist[taxa_order[i]][taxa_order[j]]; + } } bool SplitGraph::checkCircular(mmatrix(double) &dist) { - return true; - int ntaxa = getNTaxa(); - Split taxa_set(ntaxa, 0.0); - for (int i = 0; i < ntaxa-2; i++) - for (int j = i+1; j < ntaxa-1; j++) - for (int k = j+1; k < ntaxa; k++) { - taxa_set.addTaxon(i); - taxa_set.addTaxon(j); - taxa_set.addTaxon(k); - taxa_set.weight = calcWeight(taxa_set); - if (fabs(2 * taxa_set.weight - (dist[i][j] + dist[i][k] + dist[j][k])) > 0.0001) { - cout << "Taxa " << i << " " << j << " " << k; - cout << " do not satisfy circular equation!" << endl; - cout << "Weight = " << taxa_set.weight << endl; - cout << "Sum dist/2 = " << (dist[i][j] + dist[i][k] + dist[j][k]) / 2.0 << endl; - cout << "dist = " << dist[i][j] << " " << dist[i][k] << " " - << dist[j][k] << endl; - return false; - } - taxa_set.removeTaxon(i); - taxa_set.removeTaxon(j); - taxa_set.removeTaxon(k); - } - return true; + return true; + int ntaxa = getNTaxa(); + Split taxa_set(ntaxa, 0.0); + for (int i = 0; i < ntaxa-2; i++) + for (int j = i+1; j < ntaxa-1; j++) + for (int k = j+1; k < ntaxa; k++) { + taxa_set.addTaxon(i); + taxa_set.addTaxon(j); + taxa_set.addTaxon(k); + taxa_set.weight = calcWeight(taxa_set); + if (fabs(2 * taxa_set.weight - (dist[i][j] + dist[i][k] + dist[j][k])) > 0.0001) { + cout << "Taxa " << i << " " << j << " " << k; + cout << " do not satisfy circular equation!" << endl; + cout << "Weight = " << taxa_set.weight << endl; + cout << "Sum dist/2 = " << (dist[i][j] + dist[i][k] + dist[j][k]) / 2.0 << endl; + cout << "dist = " << dist[i][j] << " " << dist[i][k] << " " + << dist[j][k] << endl; + return false; + } + taxa_set.removeTaxon(i); + taxa_set.removeTaxon(j); + taxa_set.removeTaxon(k); + } + return true; } void SplitGraph::generateCircular(Params ¶ms) { - int i, j; - int ntaxa = params.sub_size; - int num_splits = (params.num_splits > 0) ? params.num_splits : 3 * ntaxa; - if (num_splits < ntaxa) - outError(ERR_FEW_SPLITS); - - taxa = new NxsTaxaBlock(); - splits = new MSplitsBlock(this); - - double threshold = (ntaxa > 3) ? (double)(num_splits - ntaxa) / (ntaxa*(ntaxa-3)/2) : 0.0; - - // insert all trivial splits - for (i = 0; i < ntaxa; i++) { - double weight = randomLen(params); - Split *sp = new Split(ntaxa, weight); - sp->addTaxon(i); - push_back(sp); - ostringstream str; - str << "T" << (i+1); - taxa->AddTaxonLabel(NxsString(str.str().c_str())); - splits->cycle.push_back(i); - } - - // randomly insert internal splits - for (i = 0; i < ntaxa-2 && getNSplits() < num_splits; i++) - for (j = i+1; j < ntaxa && j < ntaxa-3+i; j++) { - double choice = random_double(); - if (choice > threshold) continue; - double weight = randomLen(params); - Split *sp = new Split(ntaxa, weight); - for (int k = i; k <= j; k++) - sp->addTaxon(k); - push_back(sp); - if (getNSplits() >= num_splits) break; - } - - ofstream out(params.user_file); - if (!out.is_open()) { - outError(ERR_WRITE_OUTPUT, params.user_file); - } - - saveFileNexus(out); - out.close(); + int i, j; + int ntaxa = params.sub_size; + int num_splits = (params.num_splits > 0) ? params.num_splits : 3 * ntaxa; + if (num_splits < ntaxa) + outError(ERR_FEW_SPLITS); + + taxa = new NxsTaxaBlock(); + splits = new MSplitsBlock(this); + + double threshold = (ntaxa > 3) ? (double)(num_splits - ntaxa) / (ntaxa*(ntaxa-3)/2) : 0.0; + + // insert all trivial splits + for (i = 0; i < ntaxa; i++) { + double weight = randomLen(params); + Split *sp = new Split(ntaxa, weight); + sp->addTaxon(i); + push_back(sp); + ostringstream str; + str << "T" << (i+1); + taxa->AddTaxonLabel(NxsString(str.str().c_str())); + splits->cycle.push_back(i); + } + + // randomly insert internal splits + for (i = 0; i < ntaxa-2 && getNSplits() < num_splits; i++) + for (j = i+1; j < ntaxa && j < ntaxa-3+i; j++) { + double choice = random_double(); + if (choice > threshold) continue; + double weight = randomLen(params); + Split *sp = new Split(ntaxa, weight); + for (int k = i; k <= j; k++) + sp->addTaxon(k); + push_back(sp); + if (getNSplits() >= num_splits) break; + } + + ofstream out(params.user_file); + if (!out.is_open()) { + outError(ERR_WRITE_OUTPUT, params.user_file); + } + + saveFileNexus(out); + out.close(); } void SplitGraph::saveFileNexus(ostream &out, bool omit_trivial) { - int ntaxa = getNTaxa(); - int i; - out << "#nexus" << endl << endl; - out << "BEGIN Taxa;" << endl; - out << "DIMENSIONS ntax=" << ntaxa << ";" << endl; - out << "TAXLABELS" << endl; - for (i = 0; i < ntaxa; i++) - out << "[" << i+1 << "] '" << taxa->GetTaxonLabel(i) << "'" << endl; - out << ";" << endl << "END; [Taxa]" << endl << endl; - out << "BEGIN Splits;" << endl; - out << "DIMENSIONS ntax=" << ntaxa << " nsplits=" << ((omit_trivial) ? getNSplits() - getNTrivialSplits() : getNSplits()) << ";" << endl; - out << "FORMAT labels=no weights=yes confidences=no intervals=no;" << endl; - if (isCircular()) { - out << "CYCLE"; - for (i = 0; i < ntaxa; i++) - out << " " << splits->cycle[i] + 1; - out << ";" << endl; - } - out << "MATRIX" << endl; - int near_zeros = 0; - int zeros = 0; - for (iterator it = begin(); it != end(); it++) { - if (omit_trivial && (*it)->trivial() >= 0) continue; - if ((*it)->weight == 0.0) zeros ++; - if ((*it)->weight <= 1e-6) near_zeros ++; - out << "\t" << (*it)->weight << "\t"; - for (i = 0; i < ntaxa; i++) - if ((*it)->containTaxon(i)) - out << " " << i+1; - out << "," << endl; - } - out << ";" << endl << "END; [Splits]" << endl << endl; - if (near_zeros) { - //outWarning("Some nearly-zero split weights observed"); - //cout << zeros << " zero-weights and " << near_zeros << " near zero weights!" << endl; - } + int ntaxa = getNTaxa(); + int i; + out << "#nexus" << endl << endl; + out << "BEGIN Taxa;" << endl; + out << "DIMENSIONS ntax=" << ntaxa << ";" << endl; + out << "TAXLABELS" << endl; + for (i = 0; i < ntaxa; i++) + out << "[" << i+1 << "] '" << taxa->GetTaxonLabel(i) << "'" << endl; + out << ";" << endl << "END; [Taxa]" << endl << endl; + out << "BEGIN Splits;" << endl; + out << "DIMENSIONS ntax=" << ntaxa << " nsplits=" << ((omit_trivial) ? getNSplits() - getNTrivialSplits() : getNSplits()) << ";" << endl; + out << "FORMAT labels=no weights=yes confidences=no intervals=no;" << endl; + if (isCircular()) { + out << "CYCLE"; + for (i = 0; i < ntaxa; i++) + out << " " << splits->cycle[i] + 1; + out << ";" << endl; + } + out << "MATRIX" << endl; + int near_zeros = 0; + int zeros = 0; + for (iterator it = begin(); it != end(); it++) { + if (omit_trivial && (*it)->trivial() >= 0) continue; + if ((*it)->weight == 0.0) zeros ++; + if ((*it)->weight <= 1e-6) near_zeros ++; + out << "\t" << (*it)->weight << "\t"; + for (i = 0; i < ntaxa; i++) + if ((*it)->containTaxon(i)) + out << " " << i+1; + out << "," << endl; + } + out << ";" << endl << "END; [Splits]" << endl << endl; + if (near_zeros) { + //outWarning("Some nearly-zero split weights observed"); + //cout << zeros << " zero-weights and " << near_zeros << " near zero weights!" << endl; + } } void SplitGraph::saveFileStarDot(ostream &out, bool omit_trivial) { - int ntaxa = getNTaxa(); - int i; - for (iterator it = begin(); it != end(); it++) { - if (omit_trivial && (*it)->trivial() >= 0) continue; - bool swap_code = !(*it)->containTaxon(0); - if (swap_code) { - for (i = 0; i < ntaxa; i++) - out << (((*it)->containTaxon(i)) ? '.' : '*'); - } else { - for (i = 0; i < ntaxa; i++) - out << (((*it)->containTaxon(i)) ? '*' : '.'); - } - out << "\t" << (*it)->weight << endl; - } + int ntaxa = getNTaxa(); + int i; + for (iterator it = begin(); it != end(); it++) { + if (omit_trivial && (*it)->trivial() >= 0) continue; + bool swap_code = !(*it)->containTaxon(0); + if (swap_code) { + for (i = 0; i < ntaxa; i++) + out << (((*it)->containTaxon(i)) ? '.' : '*'); + } else { + for (i = 0; i < ntaxa; i++) + out << (((*it)->containTaxon(i)) ? '*' : '.'); + } + out << "\t" << (*it)->weight << endl; + } } void SplitGraph::saveFile(const char* out_file, InputType file_format, bool omit_trivial) { @@ -621,9 +622,9 @@ void SplitGraph::saveFile(const char* out_file, InputType file_format, bool omit out.exceptions(ios::failbit | ios::badbit); out.open(out_file); if (file_format == IN_NEXUS) - saveFileNexus(out, omit_trivial); - else - saveFileStarDot(out, omit_trivial); + saveFileNexus(out, omit_trivial); + else + saveFileStarDot(out, omit_trivial); out.close(); } catch (ios::failure) { outError(ERR_WRITE_OUTPUT, out_file); @@ -631,138 +632,138 @@ void SplitGraph::saveFile(const char* out_file, InputType file_format, bool omit } void SplitGraph::scaleWeight(double norm, bool make_int, int precision) { - for (iterator itg = begin(); itg != end(); itg ++ ) - if (make_int) - (*itg)->setWeight( round((*itg)->getWeight()*norm) ); - else if (precision < 0) - (*itg)->setWeight( (*itg)->getWeight()*norm); - else - (*itg)->setWeight( round((*itg)->getWeight()*norm*pow((double)10.0,precision))/pow((double)10.0,precision)); + for (iterator itg = begin(); itg != end(); itg ++ ) + if (make_int) + (*itg)->setWeight( round((*itg)->getWeight()*norm) ); + else if (precision < 0) + (*itg)->setWeight( (*itg)->getWeight()*norm); + else + (*itg)->setWeight( round((*itg)->getWeight()*norm*pow((double)10.0,precision))/pow((double)10.0,precision)); } // TODO Implement a more efficient function using Hash Table bool SplitGraph::containSplit(Split &sp) { - Split invert_sp(sp); - invert_sp.invert(); - for (iterator it = begin(); it != end(); it++) - if ((*(*it)) == sp || (*(*it)) == invert_sp) - return true; - return false; + Split invert_sp(sp); + invert_sp.invert(); + for (iterator it = begin(); it != end(); it++) + if ((*(*it)) == sp || (*(*it)) == invert_sp) + return true; + return false; } double SplitGraph::computeBoundary(Split &area) { - if (!areas_boundary) return 0.0; - int nareas = sets->getNSets(); - double boundary = 0.0; - for (int i = 0; i < nareas; i++) - if (area.containTaxon(i)) { - boundary += areas_boundary[i*nareas+i]; - for (int j = i+1; j < nareas; j++) - if (area.containTaxon(j)) - boundary -= 2.0 * areas_boundary[i*nareas+j]; - } - return boundary; + if (!areas_boundary) return 0.0; + int nareas = sets->getNSets(); + double boundary = 0.0; + for (int i = 0; i < nareas; i++) + if (area.containTaxon(i)) { + boundary += areas_boundary[i*nareas+i]; + for (int j = i+1; j < nareas; j++) + if (area.containTaxon(j)) + boundary -= 2.0 * areas_boundary[i*nareas+j]; + } + return boundary; } bool SplitGraph::compatible(Split *sp) { - for (iterator it = begin(); it != end(); it++) - if (!(*it)->compatible(*sp)) - return false; - return true; + for (iterator it = begin(); it != end(); it++) + if (!(*it)->compatible(*sp)) + return false; + return true; } void SplitGraph::findMaxCompatibleSplits(SplitGraph &maxsg) { - // maximum number of compatible splits = 2n-3! - int max_splits = getNTaxa() * 2 - 3; - - // myset will be sorted by weight in descending order - SplitSet myset; - myset.insert(myset.end(), begin(), end()); - sort(myset.begin(), myset.end(), splitweightcmp); - - // now build the spset - if (!maxsg.taxa) - maxsg.taxa = new NxsTaxaBlock(); - if (!maxsg.splits) - maxsg.splits = new MSplitsBlock(&maxsg); - if (!maxsg.pda) - maxsg.pda = new MPdaBlock(&maxsg); - - for (int i = 0; i < getNTaxa(); i++) - maxsg.taxa->AddTaxonLabel(taxa->GetTaxonLabel(i)); - - // make the cycle - maxsg.splits->cycle = splits->cycle; - // make the splits - - for (SplitSet::iterator it = myset.begin(); it != myset.end(); it++) - if (maxsg.compatible(*it)){ - maxsg.push_back(new Split(*(*it))); - //(*it)->report(cout); - if (maxsg.size() >= max_splits) - break; - } - myset.clear(); + // maximum number of compatible splits = 2n-3! + int max_splits = getNTaxa() * 2 - 3; + + // myset will be sorted by weight in descending order + SplitSet myset; + myset.insert(myset.end(), begin(), end()); + sort(myset.begin(), myset.end(), splitweightcmp); + + // now build the spset + if (!maxsg.taxa) + maxsg.taxa = new NxsTaxaBlock(); + if (!maxsg.splits) + maxsg.splits = new MSplitsBlock(&maxsg); + if (!maxsg.pda) + maxsg.pda = new MPdaBlock(&maxsg); + + for (int i = 0; i < getNTaxa(); i++) + maxsg.taxa->AddTaxonLabel(taxa->GetTaxonLabel(i)); + + // make the cycle + maxsg.splits->cycle = splits->cycle; + // make the splits + + for (SplitSet::iterator it = myset.begin(); it != myset.end(); it++) + if (maxsg.compatible(*it)){ + maxsg.push_back(new Split(*(*it))); + //(*it)->report(cout); + if (maxsg.size() >= max_splits) + break; + } + myset.clear(); } bool SplitGraph::isWeaklyCompatible() { - if (getNSplits() < 3) return true; - for (iterator it1 = begin(); it1+2 != end(); it1++) - for (iterator it2 = it1+1; it2+1 != end(); it2++) - for (iterator it3 = it2+1; it3 != end(); it3++) { - Split sp1(*(*it1)); - Split sp2(*(*it2)); - Split sp3(*(*it3)); - Split sp(sp1); - sp *= sp2; - sp *= sp3; - if (sp.isEmpty()) continue; - sp1.invert(); - sp2.invert(); - sp = sp1; - sp *= sp2; - sp *= sp3; - if (sp.isEmpty()) continue; - sp2.invert(); - sp3.invert(); - sp = sp1; - sp *= sp2; - sp *= sp3; - if (sp.isEmpty()) continue; - sp1.invert(); - sp2.invert(); - sp = sp1; - sp *= sp2; - sp *= sp3; - if (sp.isEmpty()) continue; - return false; - } - return true; + if (getNSplits() < 3) return true; + for (iterator it1 = begin(); it1+2 != end(); it1++) + for (iterator it2 = it1+1; it2+1 != end(); it2++) + for (iterator it3 = it2+1; it3 != end(); it3++) { + Split sp1(*(*it1)); + Split sp2(*(*it2)); + Split sp3(*(*it3)); + Split sp(sp1); + sp *= sp2; + sp *= sp3; + if (sp.isEmpty()) continue; + sp1.invert(); + sp2.invert(); + sp = sp1; + sp *= sp2; + sp *= sp3; + if (sp.isEmpty()) continue; + sp2.invert(); + sp3.invert(); + sp = sp1; + sp *= sp2; + sp *= sp3; + if (sp.isEmpty()) continue; + sp1.invert(); + sp2.invert(); + sp = sp1; + sp *= sp2; + sp *= sp3; + if (sp.isEmpty()) continue; + return false; + } + return true; } void SplitGraph::getTaxaName(vector &taxname) { - taxname.clear(); - for (int i = 0; i < getNTaxa(); i++) - taxname.push_back(taxa->GetTaxonLabel(i)); + taxname.clear(); + for (int i = 0; i < getNTaxa(); i++) + taxname.push_back(taxa->GetTaxonLabel(i)); } int SplitGraph::findLeafName(string &name) { - for (int i = 0; i < getNTaxa(); i++) - if (taxa->GetTaxonLabel(i) == name) - return i; - return -1; + for (int i = 0; i < getNTaxa(); i++) + if (taxa->GetTaxonLabel(i) == name) + return i; + return -1; } int SplitGraph::removeTrivialSplits() { int removed = 0; - for (iterator itg = begin(); itg != end(); ) { - if ((*itg)->trivial() >= 0) { - removed++; - delete (*itg); - (*itg) = back(); - pop_back(); - } else itg++; - } + for (iterator itg = begin(); itg != end(); ) { + if ((*itg)->trivial() >= 0) { + removed++; + delete (*itg); + (*itg) = back(); + pop_back(); + } else itg++; + } return removed; } diff --git a/pda/splitgraph.h b/pda/splitgraph.h index be8be141f..089cbed9a 100644 --- a/pda/splitgraph.h +++ b/pda/splitgraph.h @@ -246,7 +246,7 @@ class SplitGraph : public vector, public CheckpointFactory /** @return number of splits */ - int getNSplits() { + size_t getNSplits() { return size(); } diff --git a/phylo-yaml/CMakeLists.txt b/phylo-yaml/CMakeLists.txt new file mode 100644 index 000000000..146a16010 --- /dev/null +++ b/phylo-yaml/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(phyloYAML +statespace.h +statespace.cpp +) + +target_link_libraries(phyloYAML yaml-cpp) diff --git a/phylo-yaml/statespace.cpp b/phylo-yaml/statespace.cpp new file mode 100644 index 000000000..a0ed19e28 --- /dev/null +++ b/phylo-yaml/statespace.cpp @@ -0,0 +1,497 @@ +// +// C++ Implementation: StateSpace +// +// Description: +// +// +// Author: BUI Quang Minh(C) 2018 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "statespace.h" + +namespace PML { + +const char* const ERR_NOT_A_LIST = "list '[...]' expected"; +const char* const ERR_NOT_A_MAP = "'key: value' pairs expected"; +const char* const ERR_UNDEFINED_STATE = "undefined state"; +const char* const ERR_STRING_LIST = "string or list [...] expected"; +const char* const ERR_TRANSLATE_LENGTH = "translate length different from #states"; + +const char* const KEY_DATATYPE = "datatype"; +const char* const KEY_STATE = "state"; +const char* const KEY_MISSING = "missing"; +const char* const KEY_GAP = "gap"; +const char* const KEY_EQUATE = "equate"; +const char* const KEY_TRANSLATE = "translate"; + +const char* builtin_state_spaces = R"( +### DNA data definition ### +- datatype: DNA + state: &Nucleotide [ A, C, G, T ] # anchor to Nucleotide + missing: &NTmissing [ N, "?" ] + gap: &NTgap "-" + equate: + U: T # T and U are the same + R: [A, G] # R is interpreted as A or G + Y: [C, T] + W: [A, T] + S: [G, C] + M: [A, C] + K: [G, T] + B: [C, G, T] + H: [A, C, T] + D: [A, G, T] + V: [A, G, C] + +### Amino-acid data definition ### +- datatype: AA + state: [ A, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V ] + missing: [ X, "?", "*" ] + gap: "-" + equate: + B: [ N, D ] + Z: [ Q, E ] + J: [ I, L ] + +### Binary (0/1) data ### +- datatype: BIN + state: [ 0, 1 ] + missing: "?" + gap: "-" + +### RY data definition ### +- datatype: RY + state: [ R, Y ] # R=AG, Y=CT + missing: [ N, "?", W, S, M, K, B, H, D, V ] + gap: "-" + equate: + A: R + C: Y + G: R + T: Y + U: Y + +### Morphological data ### +- datatype: MORPH + state: [ 0..9, A..Z ] + missing: "?" + gap: "-" + +### Codon data with standard genetic code ### +- datatype: CODON + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: [ K, N, K, N, T, T, T, T, R, S, R, S, I, I, M, I, + Q, H, Q, H, P, P, P, P, R, R, R, R, L, L, L, L, + E, D, E, D, A, A, A, A, G, G, G, G, V, V, V, V, + X, Y, X, Y, S, S, S, S, X, C, W, C, L, F, L, F ] + +### Codon data with Vertebrate Mitochondrial code ### +- datatype: CODON2 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Vertebrate Mitochondrial code ### +- datatype: CODON2 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Yeast Mitochondrial code ### +- datatype: CODON3 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Mold, Protozoan code ### +- datatype: CODON4 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Invertebrate Mitochondrial code ### +- datatype: CODON5 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Ciliate, Dasycladacean and Hexamita Nuclear code ### +- datatype: CODON6 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF + +### Codon data with Echinoderm and Flatworm Mitochondrial code ### +- datatype: CODON9 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Euplotid Nuclear code ### +- datatype: CODON10 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF + +### Codon data with Bacterial, Archaeal and Plant Plastid code ### +- datatype: CODON11 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF + +### Codon data with Alternative Yeast Nuclear code ### +- datatype: CODON12 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF + +### Codon data with Ascidian Mitochondrial code ### +- datatype: CODON13 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTGSGSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Alternative Flatworm Mitochondrial code ### +- datatype: CODON14 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLF + +### Codon data with Blepharisma Nuclear code ### +- datatype: CODON15 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YQYSSSS*CWCLFLF + +### Codon data with Chlorophycean Mitochondrial code ### +- datatype: CODON16 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLYSSSS*CWCLFLF + +### Codon data with Trematode Mitochondrial code ### +- datatype: CODON21 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: NNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Scenedesmus obliquus mitochondrial code ### +- datatype: CODON22 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLY*SSS*CWCLFLF + +### Codon data with Thraustochytrium Mitochondrial code ### +- datatype: CODON23 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWC*FLF + +### Codon data with Pterobranchia mitochondrial code ### +- datatype: CODON24 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF + +### Codon data with Candidate Division SR1 and Gracilibacteria code ### +- datatype: CODON25 + state: [ *Nucleotide, *Nucleotide, *Nucleotide ] # reference to Nucleotide + missing: [ *NTmissing, *NTmissing, *NTmissing ] + gap: [ *NTgap, *NTgap, *NTgap ] + translate: KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF + + +)"; + +StateSpace::StateSpace() { + num_states = 0; + num_all_states = 0; +} + +StateSpace::~StateSpace() { + +} + +bool StateSpace::isUnknown(StateType state) { + return (state == num_states); +} + +StateType StateSpace::toState(string str) { + StringStateMap::iterator it; + it = states.find(str); + if (it == states.end()) + throw str + " is not a valid state symbol"; + return it->second; +} + +void StateSpace::toState(string &str, StateVector &str_states) { + size_t pos; + for (pos = 0; pos < str.length();) { + bool found = false; + for (int len = min_state_len; len <= max_state_len; len++) { + auto it = states.find(str.substr(pos, len)); + if (it == states.end()) + continue; + found = true; + str_states.push_back(it->second); + pos += len; + break; + } + if (!found) + throw str.substr(pos, max_state_len) + " is not a valid state symbol"; + } +} + +string StateSpace::toString(StateType state) { + auto it = raw_states.find(state); + ASSERT(it != raw_states.end()); + return it->second; +} + +/** + parse a string with range (e.g. 1..5) to a vector of string + */ +void parseRange(string str, StrVector &list) { + size_t pos; + if ((pos = str.find("..")) == string::npos) { + list.push_back(str); + return; + } + string first = str.substr(0, pos); + string last = str.substr(pos+2); + trimString(first); + trimString(last); + if (first.length() == 1 && last.length() == 1 && first[0] < last[0]) { + for (char ch = first[0]; ch <= last[0]; ch++) + list.push_back(string(1,ch)); + } else { + list.push_back(str); + } +} + +/** + parse a list into a vector of string + */ +void parseList(YAML::const_iterator first, YAML::const_iterator last, StrVector &list) { + StrVector this_list; + if (first->IsScalar()) + parseRange(first->Scalar(), this_list); + else if (first->IsSequence()) { + for (auto it = first->begin(); it != first->end(); it++) { + parseRange(it->Scalar(), this_list); + } + } else { + throw YAML::Exception(first->Mark(), ERR_STRING_LIST); + } + StrVector last_list; + first++; + if (first != last) + parseList(first, last, last_list); + else + last_list = { "" }; + for (auto sit = this_list.begin(); sit != this_list.end(); sit++) + for (auto sit2 = last_list.begin(); sit2 != last_list.end(); sit2++ ) + list.push_back(*sit + *sit2); +} + +/** + parse a YAML::Node into a list of strings + @param extend_length TRUE to make vector of characters if list has length 1 + */ +void parseStringList(YAML::Node node, StrVector &list, bool extend_length = false) { + if (node.IsScalar()) { + // scalar assumed to be string + parseRange(node.Scalar(), list); + } else if (node.IsSequence()) { + YAML::const_iterator it; + // check if a sequence of scalars + bool all_scalars = true; + for (it = node.begin(); it != node.end(); it++) + if (!it->IsScalar()) { + all_scalars = false; + break; + } + + if (all_scalars) { + for (it = node.begin(); it != node.end(); it++) + parseRange(it->Scalar(), list); + } else { + // now it can be a sequence of sequences, merge them together + parseList(node.begin(), node.end(), list); + } + } else { + throw YAML::Exception(node.Mark(), ERR_STRING_LIST); + } + + if (list.size() == 1 && extend_length) { + // single list, convert to vector of characters + for (auto i = list[0].begin()+1; i != list[0].end(); i++) + list.push_back(string(1,*i)); + list[0] = list[0].substr(0,1); + } +} + +void StateSpace::resetStateSpace() { + space_name = ""; + num_states = 0; + num_all_states = 0; + states.clear(); + raw_states.clear(); + equate.clear(); + translate.clear(); + min_state_len = max_state_len = 0; +} + +void StateSpace::parseStateSpace(YAML::Node datatype) { + if (!datatype.IsMap()) + throw YAML::Exception(datatype.Mark(), ERR_NOT_A_MAP); + if (!datatype[KEY_DATATYPE]) + throw YAML::Exception(datatype.Mark(), "'datatype: XXX' declaration not found"); + resetStateSpace(); + space_name = datatype[KEY_DATATYPE].Scalar(); + // definition found + // parse state: symbols + if (!datatype[KEY_STATE]) + throw YAML::Exception(datatype.Mark(), "datatype does not have 'state: [...]'"); + StrVector allstates; + parseStringList(datatype[KEY_STATE], allstates); + if (allstates.size() < 2) + throw YAML::Exception(datatype[KEY_STATE].Mark(), "state space must have at least 2 states"); + StateType stateID = 0; + for (auto sit = allstates.begin(); sit != allstates.end(); sit++, stateID++) { + states[*sit] = stateID; + raw_states[stateID] = *sit; + } + num_states = stateID; + + if (verbose_mode >= VB_MED) + cout << states.size() << " " << KEY_STATE << endl; + + // parse missing: symbols + if (datatype[KEY_MISSING]) { + StrVector list; + parseStringList(datatype[KEY_MISSING], list); + for (auto i = list.begin(); i != list.end(); i++) { + states[*i] = stateID; + raw_states[stateID] = *i; + } + } + + // parse gap: symbols + if (datatype[KEY_GAP]) { + StrVector list; + parseStringList(datatype[KEY_GAP], list); + for (auto i = list.begin(); i != list.end(); i++) { + states[*i] = stateID; + raw_states[stateID] = *i; + } + } + + stateID++; + + // parse equate: symbols + YAML::Node node_equate; + if ((node_equate = datatype[KEY_EQUATE])) { + if (!node_equate.IsMap()) + throw YAML::Exception(node_equate.Mark(), ERR_NOT_A_MAP); + for (auto nit = node_equate.begin(); nit != node_equate.end(); nit++) { + string key = nit->first.Scalar(); + states[key] = stateID; + auto value = nit->second; + StrVector values; + parseStringList(value, values); + for (auto i = values.begin(); i != values.end(); i++) { + if (states.find(*i) == states.end()) + throw YAML::Exception(value.Mark(), ERR_UNDEFINED_STATE); + if (equate.find(stateID) == equate.end()) + equate[stateID] = { states[*i] }; + else + equate[stateID].push_back(states[*i]); + } + if (equate[stateID].size() == 1) { + // map to just one state, so it's not an ambiguous state + states[key] = equate[stateID][0]; + equate.erase(stateID); + } else { + // increase number of states + raw_states[stateID] = key; + stateID++; + } + } // for Node + if (verbose_mode >= VB_MED) + cout << equate.size() << " ambiguous states" << endl; + } // equate + + // parse translate + if (datatype[KEY_TRANSLATE]) { + parseStringList(datatype[KEY_TRANSLATE], translate, true); + if (translate.size() != num_states) + throw YAML::Exception(datatype[KEY_TRANSLATE].Mark(), ERR_TRANSLATE_LENGTH); + } + + num_all_states = stateID; + min_state_len = max_state_len = states.begin()->first.length(); + for (auto i = states.begin(); i != states.end(); i++) { + if (min_state_len > i->first.length()) + min_state_len = i->first.length(); + if (max_state_len < i->first.length()) + max_state_len = i->first.length(); + } +} + +void StateSpace::initStateSpace(SeqType seqtype) { + + string name; + switch (seqtype) { + case SEQ_DNA: name = "DNA"; break; + case SEQ_CODON: name = "CODON"; break; + case SEQ_MORPH: name = "MORPH"; break; + case SEQ_BINARY: name = "BIN"; break; + case SEQ_PROTEIN: name = "AA"; break; + case SEQ_MULTISTATE: name = "MULTI"; break; + case SEQ_POMO: outError("Unhandled POMO state space"); break; + case SEQ_UNKNOWN: ASSERT(0); + } + + try { + YAML::Node spaceDef = YAML::Load(builtin_state_spaces); + if (!spaceDef.IsSequence()) + throw YAML::Exception(spaceDef.Mark(), ERR_NOT_A_LIST); + for (auto it = spaceDef.begin(); it != spaceDef.end(); it++) + { + auto datatype = *it; + if (!(datatype[KEY_DATATYPE])) + continue; + if (datatype[KEY_DATATYPE].Scalar() == name) { + parseStateSpace(datatype); + break; + } + } + } catch (YAML::Exception &e) { + outError(e.what()); + } +} + +} // namespace PML diff --git a/phylo-yaml/statespace.h b/phylo-yaml/statespace.h new file mode 100644 index 000000000..f4f861a77 --- /dev/null +++ b/phylo-yaml/statespace.h @@ -0,0 +1,151 @@ + +// +// C++ Interface: StateSpace +// +// Description: +// +// +// Author: BUI Quang Minh (c) 2018 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef STATESPACE_H +#define STATESPACE_H + +#include +#include +#include +#include +#include "utils/tools.h" +#include "yaml-cpp/yaml.h" + +namespace PML { + +/** + StateType as 32-bit unsigned int + */ +typedef uint32_t StateType; + +typedef vector StateVector; + +enum SeqType { + SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_POMO, SEQ_UNKNOWN +}; + +// IMPORTANT: refactor STATE_UNKNOWN +//const char STATE_UNKNOWN = 126; + +// TODO DS: This seems like a significant restriction. +/* PoMo: STATE_INVALID is not handled in PoMo. Set STATE_INVALID to + 127 to remove warning about comparison to char in alignment.cpp. + This is important if the maximum N will be increased above 21 + because then the state space is larger than 127 and we have to + think about something else. */ +/* const unsigned char STATE_INVALID = 255; */ +const unsigned char STATE_INVALID = 127; + +#ifdef USE_HASH_MAP +typedef unordered_map StringIntMap; +typedef unordered_map StringStateMap; +typedef unordered_map StateStringMap; +typedef unordered_map StringDoubleHashMap; +typedef unordered_map IntIntMap; +#else +typedef map StringIntMap; +typedef map StringStateMap; +typedef map StateStringMap; +typedef map StringDoubleHashMap; +typedef map IntIntMap; +#endif + + +/** + general class defining state space + */ +class StateSpace { +public: + /** constructor */ + StateSpace(); + + /** destructor */ + ~StateSpace(); + + /** convert a raw string to single state ID */ + StateType toState(string str); + + /** + convert the entire string into vector of states + @param[in] str input string + @param[out] str_states output vector of StateType + */ + void toState(string &str, StateVector &str_states); + + /** convert a state back to raw string */ + string toString(StateType state); + + /** + check if a state is unknown (missing or gap) + */ + bool isUnknown(StateType state); + + /** get number of states */ + inline int getNStates() { return num_states; } + + /** get all number of states incl. missing/gap/ambiguous states */ + inline size_t getNAllStates() { return states.size(); } + + /** + initialise from a state definition string + @param datatype a YAML::Node structure + */ + void parseStateSpace(YAML::Node datatype); + + /** + initialise state space from a SeqType + @param seqtype sequence type + */ + void initStateSpace(SeqType seqtype); + + /** + reset state space + */ + void resetStateSpace(); + + /** number of state */ + int num_states; + +protected: + + /** state space name */ + string space_name; + + /** number of state */ + int num_all_states; + + /** map from raw state string to state ID */ + StringStateMap states; + + /** map from state ID to raw state string */ + StateStringMap raw_states; + + /** map from ambiguous states to vector of state ID */ + unordered_mapequate; + + /** vector of the same size as states to translate to another state space */ + StrVector translate; + +private: + + /** minimum length of state string */ + int min_state_len; + + /** maximum length of state string */ + int max_state_len; + +}; + +} // namespace PML + +#endif diff --git a/pll/avxLikelihood.c b/pll/avxLikelihood.c index 5202883c8..61b3fce53 100644 --- a/pll/avxLikelihood.c +++ b/pll/avxLikelihood.c @@ -30,10 +30,7 @@ * * AVX versions of the likelihood functions */ -#ifndef WIN32 -#include -#endif - +#include "systypes.h" #include #include #include @@ -4103,8 +4100,9 @@ void newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(int tipCase, } break; default: - assert(0); - } + assert(0); + break; + } if(useFastScaling) *scalerIncrement = addScale; diff --git a/pll/bipartitionList.c b/pll/bipartitionList.c index 44c68884c..ee3a390e1 100644 --- a/pll/bipartitionList.c +++ b/pll/bipartitionList.c @@ -27,13 +27,7 @@ * @file bipartitionList.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include @@ -417,7 +411,7 @@ double convergenceCriterion(pllHashTable *h, int mxtips) { for (hitem = h->Items[k]; hitem; hitem = hitem->next) { - pllBipartitionEntry *e = hitem->data; + pllBipartitionEntry* e = (pllBipartitionEntry*)(hitem->data); unsigned int *vector = e->treeVector; if(((vector[0] & 1) > 0) + ((vector[0] & 2) > 0) == 1) diff --git a/pll/cycle.h b/pll/cycle.h index 889932a70..114323ac8 100644 --- a/pll/cycle.h +++ b/pll/cycle.h @@ -251,7 +251,9 @@ INLINE_ELAPSED(__inline__) /* Visual C++, courtesy of Dirk Michaelis */ #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) -#include +#ifndef CLANG_UNDER_VS + #include +#endif #pragma intrinsic(__rdtsc) typedef unsigned __int64 ticks; #define getticks __rdtsc diff --git a/pll/evaluateGenericSpecial.c b/pll/evaluateGenericSpecial.c index 9a0dfc883..b50c29116 100644 --- a/pll/evaluateGenericSpecial.c +++ b/pll/evaluateGenericSpecial.c @@ -29,11 +29,7 @@ * @brief Functions for computing the log likelihood at a given branch of the tree (i.e. a virtual root that is placed at this branch) */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#endif - +#include "systypes.h" #include #include #include @@ -414,104 +410,95 @@ static double evaluateCatAsc(int *ex1, int *ex2, } -static double evaluateGammaAsc(int *ex1, int *ex2, - double *x1, double *x2, - double *tipVector, - unsigned char *tipX1, int n, double *diagptable, const int numStates) +static double evaluateGammaAsc(int* ex1, int* ex2, + double* x1, double* x2, + double* tipVector, + unsigned char* tipX1, int n, double* diagptable, const int numStates) { - double - exponent, - sum = 0.0, - unobserved, - term, - *left, - *right; - - int - i, - j, - l; - - const int - gammaStates = numStates * 4; - - unsigned char - tip[32]; + double + exponent, + sum = 0.0, + unobserved, + term; + + int + i, + j, + l; - ascertainmentBiasSequence(tip, numStates); - - if(tipX1) - { - for (i = 0; i < n; i++) - { - left = &(tipVector[numStates * tip[i]]); - - for(j = 0, term = 0.0; j < 4; j++) - { - right = &(x2[gammaStates * i + numStates * j]); - - for(l = 0; l < numStates; l++) - term += left[l] * right[l] * diagptable[j * numStates + l]; - } - - /* assumes that pow behaves as expected/specified for underflows - from the man page: - If result underflows, and is not representable, - a range error occurs and 0.0 is returned. - */ + const int + gammaStates = numStates * 4; - exponent = pow(PLL_MINLIKELIHOOD, (double)ex2[i]); + unsigned char + tip[32]; + + ascertainmentBiasSequence(tip, numStates); + + if (tipX1) + { + for (i = 0; i < n; i++) { + double* left = tipVector + numStates * tip[i]; + for (j = 0, term = 0.0; j < 4; j++) { + double* right = x2 + gammaStates * i + numStates * j; + for (l = 0; l < numStates; l++) { + term += left[l] * right[l] * diagptable[j * numStates + l]; + } + } + + /* assumes that pow behaves as expected/specified for underflows + from the man page: + If result underflows, and is not representable, + a range error occurs and 0.0 is returned. + */ + + exponent = pow(PLL_MINLIKELIHOOD, (double)ex2[i]); + + unobserved = fabs(term) * exponent; - unobserved = fabs(term) * exponent; - #ifdef _DEBUG_ASC - if(ex2[i] > 0) - { - printf("s %d\n", ex2[i]); - assert(0); - } + if (ex2[i] > 0) + { + printf("s %d\n", ex2[i]); + assert(0); + } #endif - - sum += unobserved; - } - } - else - { - for (i = 0; i < n; i++) - { - - for(j = 0, term = 0.0; j < 4; j++) - { - left = &(x1[gammaStates * i + numStates * j]); - right = &(x2[gammaStates * i + numStates * j]); - - for(l = 0; l < numStates; l++) - term += left[l] * right[l] * diagptable[j * numStates + l]; - } - - /* assumes that pow behaves as expected/specified for underflows - from the man page: - If result underflows, and is not representable, - a range error occurs and 0.0 is returned. - */ - exponent = pow(PLL_MINLIKELIHOOD, (double)(ex1[i] + ex2[i])); + sum += unobserved; + } + } + else + { + for (i = 0; i < n; i++) { + for (j = 0, term = 0.0; j < 4; j++) { + double* left = x1 + gammaStates * i + numStates * j; + double* right = x2 + gammaStates * i + numStates * j; + for (l = 0; l < numStates; l++) { + term += left[l] * right[l] * diagptable[j * numStates + l]; + } + } + + /* assumes that pow behaves as expected/specified for underflows + from the man page: + If result underflows, and is not representable, + a range error occurs and 0.0 is returned. + */ + + exponent = pow(PLL_MINLIKELIHOOD, (double)(ex1[i] + ex2[i])); + + unobserved = fabs(term) * exponent; - unobserved = fabs(term) * exponent; - #ifdef _DEBUG_ASC - if(ex2[i] > 0 || ex1[i] > 0) - { - printf("s %d %d\n", ex1[i], ex2[i]); - assert(0); - } + if (ex2[i] > 0 || ex1[i] > 0) + { + printf("s %d %d\n", ex1[i], ex2[i]); + assert(0); + } #endif - sum += unobserved; - } - } - - return sum; + sum += unobserved; + } + } + return sum; } @@ -2053,9 +2040,7 @@ void perSiteLogLikelihoods(pllInstance *tr, partitionList *pr, double *logLikeli for(i = lower, localCount = 0; i < upper; i++, localCount++) { - double - l; - + double l = 0; /* we need to switch of rate heterogeneity implementations here. when we have PSR we actually need to provide the per-site rate diff --git a/pll/evaluatePartialGenericSpecial.c b/pll/evaluatePartialGenericSpecial.c index 4d461a5b2..72920e2d9 100644 --- a/pll/evaluatePartialGenericSpecial.c +++ b/pll/evaluatePartialGenericSpecial.c @@ -29,10 +29,7 @@ */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#endif +#include "systypes.h" #include #include @@ -118,9 +115,9 @@ static __inline void computeVectorCAT_FLEX(double *lVector, int *eVector, double ump_x2, lz1, lz2, - *x1, - *x2, - *x3; + *x1 = NULL, + *x2 = NULL, + *x3 = NULL; int scale, @@ -367,15 +364,11 @@ static double evaluatePartialCAT_FLEX(int i, double ki, int counter, traversalI double evaluatePartialGeneric (pllInstance *tr, partitionList *pr, int i, double ki, int _model) { - double - result; - - - int - branchReference, + double result=0; + int branchReference, - /* number of states of the data type in this partition */ - states = pr->partitionData[_model]->states; + /* number of states of the data type in this partition */ + states = pr->partitionData[_model]->states; /* SOS ATTENTION: note the different indexing used for the parallel and sequential versions ! */ @@ -495,7 +488,7 @@ static __inline void computeVectorGTRCAT_BINARY(double *lVector, int *eVector, d unsigned char **yVector, int mxtips) { double d1, d2, ump_x1, ump_x2, x1px2[2], lz1, lz2; - double *x1, *x2, *x3; + double *x1=NULL, *x2=NULL, *x3=NULL; int j, k, pNumber = ti->pNumber, @@ -520,6 +513,7 @@ static __inline void computeVectorGTRCAT_BINARY(double *lVector, int *eVector, d break; default: assert(0); + return; } lz1 = qz * ki; @@ -1226,7 +1220,7 @@ static __inline void computeVectorGTRCATPROT(double *lVector, int *eVector, doub unsigned char **yVector, int mxtips) { double d1[20], d2[20], ump_x1, ump_x2, x1px2[20], lz1, lz2; - double *x1, *x2, *x3; + double *x1=NULL, *x2=NULL, *x3=NULL; int j, k, scale = 1, pNumber = ti->pNumber, @@ -1235,7 +1229,6 @@ static __inline void computeVectorGTRCATPROT(double *lVector, int *eVector, doub x3 = &lVector[20 * (pNumber - mxtips)]; - switch(ti->tipCase) { case PLL_TIP_TIP: @@ -1252,6 +1245,7 @@ static __inline void computeVectorGTRCATPROT(double *lVector, int *eVector, doub break; default: assert(0); + return; } lz1 = qz * ki; @@ -1347,8 +1341,9 @@ static double evaluatePartialGTRCATPROT(int i, double ki, int counter, traversa assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); - if(qz < PLL_ZMIN) - lz = PLL_ZMIN; + if (qz < PLL_ZMIN) { + qz = PLL_ZMIN; //James B. 23-Jul-2020. There was cross-talk here: lz was being set, not qz. + } lz = log(qz); lz *= ki; diff --git a/pll/fastDNAparsimony.c b/pll/fastDNAparsimony.c index 10764654b..5d0bf9982 100644 --- a/pll/fastDNAparsimony.c +++ b/pll/fastDNAparsimony.c @@ -27,13 +27,7 @@ * @file fastDNAparsimony.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include @@ -118,8 +112,11 @@ # define __builtin_popcount _mm_popcnt_u32 # define __builtin_popcountl _mm_popcnt_u64 # else +# if defined(CLANG_UNDER_VS) +# define _mm_popcnt_u64 _popcnt64 +# else # include - static __inline uint32_t __builtin_popcount (uint32_t a) { +static __inline uint32_t __builtin_popcount (uint32_t a) { // popcnt instruction not available uint32_t b = a - ((a >> 1) & 0x55555555); uint32_t c = (b & 0x33333333) + ((b >> 2) & 0x33333333); @@ -129,6 +126,7 @@ } //# define __builtin_popcount __popcnt # define __builtin_popcountl __popcnt64 +# endif # endif #endif @@ -158,7 +156,7 @@ extern double masterTime; #if (defined(__SSE3) || defined(__AVX)) -#ifdef _WIN32 +#if defined(_WIN32) &&!defined(WIN64) /* emulate with 32-bit version */ static __inline unsigned int vectorPopcount(INT_TYPE v) { @@ -336,13 +334,13 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) parsimonyNumber *left[2], *right[2], - *this[2]; + *here[2]; for(k = 0; k < 2; k++) { left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]); right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]); - this[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]); + here[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) @@ -364,8 +362,8 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) v_N = VECTOR_BIT_OR(l_A, l_C); - VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); - VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); + VECTOR_STORE((CAST)(&here[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); + VECTOR_STORE((CAST)(&here[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); v_N = VECTOR_AND_NOT(v_N, allOne); @@ -378,13 +376,13 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) parsimonyNumber *left[4], *right[4], - *this[4]; + *here[4]; for(k = 0; k < 4; k++) { left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]); right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]); - this[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]); + here[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) @@ -416,10 +414,10 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T)); - VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); - VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); - VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G))); - VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T))); + VECTOR_STORE((CAST)(&here[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A))); + VECTOR_STORE((CAST)(&here[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C))); + VECTOR_STORE((CAST)(&here[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G))); + VECTOR_STORE((CAST)(&here[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T))); v_N = VECTOR_AND_NOT(v_N, allOne); @@ -432,13 +430,13 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) parsimonyNumber *left[20], *right[20], - *this[20]; + *here[20]; for(k = 0; k < 20; k++) { left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]); right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]); - this[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]); + here[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) @@ -462,7 +460,7 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) } for(j = 0; j < 20; j++) - VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); + VECTOR_STORE((CAST)(&here[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); v_N = VECTOR_AND_NOT(v_N, allOne); @@ -475,7 +473,7 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) parsimonyNumber *left[32], *right[32], - *this[32]; + *here[32]; assert(states <= 32); @@ -483,7 +481,7 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) { left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]); right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]); - this[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]); + here[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]); } for(i = 0; i < width; i += INTS_PER_VECTOR) @@ -507,7 +505,7 @@ static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr) } for(j = 0; j < states; j++) - VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); + VECTOR_STORE((CAST)(&here[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j]))); v_N = VECTOR_AND_NOT(v_N, allOne); @@ -1207,8 +1205,7 @@ static void testInsertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, if(doIt) { - double - *z = rax_malloc(numBranches*sizeof(double)); + double* z = (double*)rax_malloc(numBranches*sizeof(double)); if(saveBranches) { diff --git a/pll/genericParallelization.c b/pll/genericParallelization.c index 1454b5ecf..ab6328e70 100644 --- a/pll/genericParallelization.c +++ b/pll/genericParallelization.c @@ -43,6 +43,10 @@ #include "pllInternal.h" #include "pll.h" +#ifdef _IQTREE_MPI +#include +#endif + /** @file genericParallelization.c @brief Generic master-worker parallelization with either pthreads or MPI. @@ -74,7 +78,7 @@ static void computeFraction(partitionList *localPr, int tid, int n); static void computeFractionMany(partitionList *localPr, int tid); static void initializePartitionsMaster(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n); -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) static char* addBytes(char *buf, void *toAdd, size_t numBytes); static char* popBytes(char *buf, void *result, size_t numBytes); static void defineTraversalInfoMPI(void); @@ -96,13 +100,13 @@ double timeBuffer[NUM_PAR_JOBS]; double timePerRegion[NUM_PAR_JOBS]; #endif -extern char* getJobName(int tmp); +extern const char* getJobName(int tmp); //extern double *globalResult; extern volatile char *barrierBuffer; -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || ( defined(_IQTREE_MPI) && defined(CLANG_UNDER_VS) ) extern MPI_Datatype TRAVERSAL_MPI; /** @brief Pthreads helper function for adding bytes to communication buffer. @@ -457,6 +461,9 @@ void perSiteLogLikelihoodsPthreads(pllInstance *tr, partitionList *pr, double *l pllBoolean execute = ((tr->manyPartitions && isThisMyPartition(pr, tid, model)) || (!tr->manyPartitions)); + //On Windows, this will result in a compilation error if you don't have an MPI API + //downloaded and installed. + //I've only built with the Microsoft MPI (MS-MPI), v10.1.2 (November 2019). /* if the entire partition has been assigned to this thread (-Q) or if -Q is not activated we need to compute some per-site log likelihoods with thread tid for this partition */ @@ -470,30 +477,29 @@ void perSiteLogLikelihoodsPthreads(pllInstance *tr, partitionList *pr, double *l if(tr->manyPartitions || (i % n == (size_t)tid)) { - double - l; + double l = 0; - /* now compute the per-site log likelihood at the current site */ + /* now compute the per-site log likelihood at the current site */ - switch(tr->rateHetModel) - { - case PLL_CAT: - l = evaluatePartialGeneric (tr, pr, localIndex, pr->partitionData[model]->perSiteRates[pr->partitionData[model]->rateCategory[localIndex]], model); - break; - case PLL_GAMMA: - l = evaluatePartialGeneric (tr, pr, localIndex, 1.0, model); - break; - default: - assert(0); - } + switch(tr->rateHetModel) + { + case PLL_CAT: + l = evaluatePartialGeneric (tr, pr, localIndex, pr->partitionData[model]->perSiteRates[pr->partitionData[model]->rateCategory[localIndex]], model); + break; + case PLL_GAMMA: + l = evaluatePartialGeneric (tr, pr, localIndex, 1.0, model); + break; + default: + assert(0); + } - /* store it in an array that is local in memory to the current thread, - see function collectDouble() in axml.c for understanding how we then collect these - values stored in local arrays from the threads */ + /* store it in an array that is local in memory to the current thread, + see function collectDouble() in axml.c for understanding how we then collect these + values stored in local arrays from the threads */ - lhs[i] = l; + lhs[i] = l; - localIndex++; + localIndex++; } } } @@ -975,7 +981,7 @@ static void broadCastAlpha(partitionList *localPr, partitionList *pr) int i, model; -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) int bufSize = localPr->numberOfPartitions * 4 * sizeof(double); char bufDbl[bufSize]; char *bufPtrDbl = bufDbl; @@ -1000,7 +1006,7 @@ static void broadCastLg4xWeights(partitionList *localPr, partitionList *pr) int i, model; -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) int bufSize = localPr->numberOfPartitions * 4 * sizeof(double); char bufDbl[bufSize]; char *bufPtrDbl = bufDbl; @@ -1030,8 +1036,7 @@ static void copyLG4(partitionList *localPr, partitionList *pr) } #endif - char - bufDbl[bufSize]; + char bufDbl[bufSize]; char *bufPtrDbl = bufDbl; RECV_BUF(bufDbl, bufSize, MPI_BYTE); @@ -1309,7 +1314,7 @@ __inline static void broadcastTraversalInfo(pllInstance *localTree, pllInstance @param type type of parallel region */ -char* getJobName(int type) +const char* getJobName(int type) { switch(type) { @@ -1348,6 +1353,7 @@ char* getJobName(int type) case PLL_THREAD_EVALUATE_PER_SITE_LIKES: return "PLL_THREAD_EVALUATE_PER_SITE_LIKES"; default: assert(0); + return "Unrecognized Job Type"; } } @@ -1536,7 +1542,7 @@ static pllBoolean execFunction(pllInstance *tr, pllInstance *localTree, partitio if( localTree->rateHetModel == PLL_CAT) /* TRICKY originally this should only be executed by workers */ { -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) int bufSize = 2 * localTree->originalCrunchedLength * sizeof(double); char bufDbl[bufSize], *bufPtrDbl = bufDbl; @@ -1593,7 +1599,7 @@ static pllBoolean execFunction(pllInstance *tr, pllInstance *localTree, partitio /* assertCtr = 0, */ dblBufSize = 0; -#ifdef _FINE_GRAIN_MPI +#if defined(FINE_GRAIN_MPI) || defined(_IQTREE_MPI) int bufSize = localPr->numberOfPartitions * sizeof(int); char buf[bufSize]; char *bufPtr = buf; @@ -1612,7 +1618,7 @@ static pllBoolean execFunction(pllInstance *tr, pllInstance *localTree, partitio dblBufSize += 2 * localTree->originalCrunchedLength * sizeof(double); -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) char bufDbl[dblBufSize], *bufPtrDbl = bufDbl; #endif @@ -2080,10 +2086,11 @@ static void distributeYVectors(pllInstance *localTree, pllInstance *tr, partitio /* distribute the y-vectors */ for(j = 1 ; j <= (size_t)localTree->mxtips; j++) { -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) unsigned char yBuf[tr->originalCrunchedLength]; - if(MASTER_P) - memcpy(yBuf, tr->yVector[j], tr->originalCrunchedLength * sizeof(unsigned char)); + if (MASTER_P) { + memcpy(yBuf, tr->yVector[j], tr->originalCrunchedLength * sizeof(unsigned char)); + } MPI_Bcast( yBuf, tr->originalCrunchedLength, MPI_UNSIGNED_CHAR,0,MPI_COMM_WORLD); #endif @@ -2219,7 +2226,7 @@ static void initializePartitionsMaster(pllInstance *tr, pllInstance *localTree, treeIsInitialized = PLL_TRUE; ASSIGN_INT(localTree->manyPartitions, tr->manyPartitions); - ASSIGN_INT(localTree->numberOfThreads, tr->numberOfThreads); + ASSIGN_INT(localTree->numberOfThreads, tr->numberOfThreads); ASSIGN_INT(localPr->numberOfPartitions, pr->numberOfPartitions); #ifdef _USE_PTHREADS @@ -2228,7 +2235,7 @@ static void initializePartitionsMaster(pllInstance *tr, pllInstance *localTree, else assignAndInitPart1(localTree, tr, localPr, pr, &tid); #else - globalResult = rax_calloc((size_t) tr->numberOfThreads * (size_t)pr->numberOfPartitions* 2 ,sizeof(double)); + globalResult = (double*)rax_calloc((size_t) tr->numberOfThreads * (size_t)pr->numberOfPartitions* 2 ,sizeof(double)); assignAndInitPart1(localTree, tr, localPr, pr, &tid); defineTraversalInfoMPI(); #endif diff --git a/pll/genericParallelization.h b/pll/genericParallelization.h index 576f8e9b4..2cbb4d84d 100644 --- a/pll/genericParallelization.h +++ b/pll/genericParallelization.h @@ -63,7 +63,7 @@ extern double masterTimePerPhase; /******************/ /* MPI SPECIFIC */ /******************/ -#ifdef _FINE_GRAIN_MPI +#if defined(_FINE_GRAIN_MPI) || (defined(_IQTREE_MPI) && defined(CLANG_UNDER_VS)) #include #ifdef DEBUG_MPI_EACH_SEND #define DEBUG_PRINT(text, elem) printf(text, elem) @@ -80,7 +80,7 @@ extern double masterTimePerPhase; #define MASTER_P (processID == 0) #define POP_OR_PUT_BYTES(bufPtr, elem, type) (MASTER_P ? (bufPtr = addBytes((bufPtr), &(elem), sizeof(type))) : (bufPtr = popBytes((bufPtr), &(elem), sizeof(type)))) -#define ASSIGN_INT(x,y) (MPI_Bcast(&y,1,MPI_INT,0,MPI_COMM_WORLD),DEBUG_PRINT("\tSEND/RECV %d\n", y)) +#define ASSIGN_INT(x,y) (MPI_Bcast((int*)(&y),1,MPI_INT,0,MPI_COMM_WORLD),DEBUG_PRINT("\tSEND/RECV %d\n", y)) #define ASSIGN_BUF(x,y,type) (POP_OR_PUT_BYTES(bufPtr, y,type)) #define ASSIGN_BUF_DBL(x,y) (POP_OR_PUT_BYTES(bufPtrDbl,y, double)) #define ASSIGN_DBL(x,y) (MPI_Bcast(&y,1,MPI_DOUBLE, 0, MPI_COMM_WORLD), DEBUG_PRINT("\tSEND/RECV %f\n", y)) @@ -101,11 +101,13 @@ extern int processID; /* PTHREAD SPECIFIC */ /*********************/ #ifdef _USE_PTHREADS -#if defined (_MSC_VER) +#ifndef CLANG_UNDER_VS +#if defined (_MSC_VER) #include "pthread.h" #else #include #endif +#endif #define _REPRODUCIBLE_MPI_OR_PTHREADS #define VOLATILE_PAR volatile #define MASTER_P (tid == 0) diff --git a/pll/hardware.c b/pll/hardware.c index 360756870..9eec25128 100644 --- a/pll/hardware.c +++ b/pll/hardware.c @@ -1,9 +1,7 @@ #include #include #include -#ifndef WIN32 -#include -#endif +#include "systypes.h" #include #include "hardware.h" @@ -11,15 +9,21 @@ #define PLL_FEAT_AVAIL(x,y) (((x) & (y)) == (y)) #define PLL_SYS_CPU_DIR_PATH "/sys/devices/system/cpu/" -//#ifdef _MSC_VER -//#define inline __inline -//#endif +#ifdef CLANG_UNDER_VS + //James B. Workaround for Windows builds where these macros might not be defined + #ifndef S_ISDIR + #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) + #endif + #ifndef S_ISREG + #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) + #endif +#endif static __inline void cpuid(unsigned int op, int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -#ifdef WIN32 +#if defined(WIN32) || defined(WIN64) __int32 regs[4]; __cpuid((int*)regs, (int)op); *eax = regs[0]; @@ -69,7 +73,7 @@ static int pll_probe_cpu (pllHardwareInfo * hw) char cpu[30]; char cpupath[100]; int i, id, max_physical_id = -1; - char * physical_id_path = "/topology/physical_package_id"; + const char * physical_id_path = "/topology/physical_package_id"; FILE * fd; /* check whether the sys cpu dir exists */ diff --git a/pll/makenewzGenericSpecial.c b/pll/makenewzGenericSpecial.c index b2b114a8e..f35cb975e 100644 --- a/pll/makenewzGenericSpecial.c +++ b/pll/makenewzGenericSpecial.c @@ -27,10 +27,7 @@ * @file bipartitionList.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#endif +#include "systypes.h" #include #include @@ -64,7 +61,7 @@ /* pointers to reduction buffers for storing and gathering the first and second derivative of the likelihood in Pthreads and MPI */ -#if IS_PARALLEL +#ifdef IS_PARALLEL void branchLength_parallelReduce(pllInstance *tr, double *dlnLdlz, double *d2lnLdlz2, int numBranches ) ; //extern double *globalResult; #endif @@ -1338,20 +1335,14 @@ void execCore(pllInstance *tr, partitionList *pr, volatile double *_dlnLdlz, vol if(pr->partitionData[model]->ascBias) #endif { - size_t - i; - - double - correction; - - int - w = 0; + double correction = 0; + int w = 0; volatile double d1 = 0.0, d2 = 0.0; - for(i = (size_t)pr->partitionData[model]->lower; i < (size_t)pr->partitionData[model]->upper; i++) + for(size_t i = (size_t)pr->partitionData[model]->lower; i < (size_t)pr->partitionData[model]->upper; i++) w += tr->aliaswgt[i]; switch(tr->rateHetModel) @@ -1366,14 +1357,12 @@ void execCore(pllInstance *tr, partitionList *pr, volatile double *_dlnLdlz, vol break; default: assert(0); - } - - correction = 1.0 - correction; + } + correction = 1.0 - correction; //Never used! /* Lewis correction */ _dlnLdlz[branchIndex] = _dlnLdlz[branchIndex] + dlnLdlz - (double)w * d1; _d2lnLdlz2[branchIndex] = _d2lnLdlz2[branchIndex] + d2lnLdlz2- (double)w * d2; - } else { @@ -1381,18 +1370,12 @@ void execCore(pllInstance *tr, partitionList *pr, volatile double *_dlnLdlz, vol _d2lnLdlz2[branchIndex] = _d2lnLdlz2[branchIndex] + d2lnLdlz2; } } - else - { - /* set to 0 to make the reduction operation consistent */ - - if(width == 0 && (numBranches > 1)) - { - _dlnLdlz[model] = 0.0; + else if(width == 0 && (numBranches > 1)) { + /* set to 0 to make the reduction operation consistent */ + _dlnLdlz[model] = 0.0; _d2lnLdlz2[model] = 0.0; } - } } - } diff --git a/pll/mem_alloc.h b/pll/mem_alloc.h index 0bfa08a78..340101ffe 100644 --- a/pll/mem_alloc.h +++ b/pll/mem_alloc.h @@ -1,11 +1,9 @@ #ifndef __mem_alloc_h #define __mem_alloc_h -#if defined WIN32 || defined _WIN32 || defined __WIN32__ +#if defined WIN32 || defined _WIN32 || defined __WIN32__ || defined WIN64 #include -//#include #include -//#include #endif #include @@ -15,6 +13,7 @@ #endif #include "pll.h" #include +#include //#define rax_memalign memalign //#define rax_malloc malloc @@ -22,7 +21,7 @@ //#define rax_realloc realloc -#if defined WIN32 || defined _WIN32 || defined __WIN32__ +#if defined WIN32 || defined _WIN32 || defined __WIN32__ || defined(WIN64) #if (defined(__MINGW32__) || defined(__clang__)) && defined(BINARY32) #define rax_posix_memalign(ptr,alignment,size) *(ptr) = __mingw_aligned_malloc((size),(alignment)) #define rax_malloc(size) __mingw_aligned_malloc((size), PLL_BYTE_ALIGNMENT) @@ -54,12 +53,11 @@ /* for strndup stuff */ -static __inline char *my_strndup(const char *s, size_t n) { - char *ret = (char *) rax_malloc(n+1); - strncpy(ret, s, n); - ret[n] = 0; - return ret; -} +char *my_strndup(const char *s, size_t n); + +#ifndef HAVE_STRTOK_R +char *strtok_r (char * s, const char * delim, char **save_ptr); +#endif #if 0 // using the following contraption to trigger a compile-time error does not work on some gcc versions. It will trigger a confising linker error in the best case, so it is deativated. diff --git a/pll/models.c b/pll/models.c index 7bc24ef36..f4c24d9e5 100644 --- a/pll/models.c +++ b/pll/models.c @@ -33,13 +33,7 @@ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include @@ -4306,7 +4300,6 @@ static void initializeBaseFreqs(partitionList *pr, double **empiricalFrequencies void initModel(pllInstance *tr, double **empiricalFrequencies, partitionList * partitions) { int model, j; - double temp; tr->optimizeRateCategoryInvocations = 1; tr->numberOfInvariableColumns = 0; @@ -4314,7 +4307,7 @@ void initModel(pllInstance *tr, double **empiricalFrequencies, partitionList * p for (j = 0; j < tr->originalCrunchedLength; j++) { - tr->patrat[j] = temp = 1.0; + tr->patrat[j] = 1.0; tr->patratStored[j] = 1.0; tr->rateCategory[j] = 0; } diff --git a/pll/newviewGenericSpecial.c b/pll/newviewGenericSpecial.c index e69d7f28d..c99c618d4 100644 --- a/pll/newviewGenericSpecial.c +++ b/pll/newviewGenericSpecial.c @@ -30,10 +30,7 @@ */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#endif +#include "systypes.h" #include #include @@ -3960,7 +3957,9 @@ static char getStateCharacter(int dataType, int state) break; default: assert(0); - } + result = '\x0'; + break; + } return result; } diff --git a/pll/optimizeModel.c b/pll/optimizeModel.c index b4e890265..b8e54ebb9 100644 --- a/pll/optimizeModel.c +++ b/pll/optimizeModel.c @@ -30,10 +30,7 @@ */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#endif +#include "systypes.h" #include #include @@ -401,7 +398,7 @@ static void evaluateChange(pllInstance *tr, partitionList *pr, int rateNumber, d pr->partitionData[i]->executeModel = PLL_FALSE; } - for (i = 0, pos = 0; i < ll->entries; i++) + for (i = 0; i < ll->entries; i++) { int index = ll->ld[i].partitionList[0]; if (ll->ld[i].valid) @@ -1681,11 +1678,11 @@ void pllOptBaseFreqs(pllInstance *tr, partitionList * pr, double modelEpsilon, l } } - if(aaPartitions > 0) - optFreqs(tr, pr, modelEpsilon, ll, aaPartitions, states); - + if (aaPartitions > 0) { + optFreqs(tr, pr, modelEpsilon, ll, aaPartitions, states); + } /* then binary */ - for(i = 0; i < ll->entries; i++) + for(i = 0; ll && i < ll->entries; i++) { switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType) { @@ -2023,7 +2020,7 @@ void optRateCatPthreads(pllInstance *tr, partitionList *pr, double lower_spacing > leftLH) && (fabs(leftLH - v) > epsilon)) { -#ifndef WIN32 +#if !defined(WIN32) && !defined(WIN64) if(isnan(v)) assert(0); #endif @@ -2038,7 +2035,7 @@ void optRateCatPthreads(pllInstance *tr, partitionList *pr, double lower_spacing while(((v = evaluatePartialGeneric(tr, pr, localIndex, initialRate + k * upper_spacing, model)) > rightLH) && (fabs(rightLH - v) > epsilon)) { -#ifndef WIN32 +#if !defined(WIN32) && !defined(WIN64) if(isnan(v)) assert(0); #endif @@ -2120,7 +2117,7 @@ static void optRateCatModel(pllInstance *tr, partitionList *pr, int model, doubl > leftLH) && (fabs(leftLH - v) > epsilon)) { -#ifndef WIN32 +#if !defined(WIN32) && !defined(WIN64) if(isnan(v)) assert(0); #endif @@ -2135,7 +2132,7 @@ static void optRateCatModel(pllInstance *tr, partitionList *pr, int model, doubl while(((v = evaluatePartialGeneric(tr, pr, i, initialRate + k * upper_spacing, model)) > rightLH) && (fabs(rightLH - v) > epsilon)) { -#ifndef WIN32 +#if !defined(WIN32) && !defined(WIN64) if(isnan(v)) assert(0); #endif @@ -2762,8 +2759,9 @@ static void autoProtein(pllInstance *tr, partitionList *pr) *oldIndex = (int*) rax_malloc(sizeof(int) * pr->numberOfPartitions); - pllBoolean *oldFreqs = (pllBoolean*) malloc( - sizeof(pllBoolean) * pr->numberOfPartitions); + //pllBoolean *oldFreqs = (pllBoolean*) malloc( + // sizeof(pllBoolean) * pr->numberOfPartitions); + // JB 10-Jul-2020 Never used. double startLH, *bestScores = (double*) rax_malloc( @@ -2785,7 +2783,7 @@ static void autoProtein(pllInstance *tr, partitionList *pr) /* save the currently assigned protein model for each PLL_AUTO partition */ for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) { oldIndex[partitionIndex] = pr->partitionData[partitionIndex]->autoProtModels; - oldFreqs[partitionIndex] = pr->partitionData[partitionIndex]->protUseEmpiricalFreqs; + //oldFreqs[partitionIndex] = pr->partitionData[partitionIndex]->protUseEmpiricalFreqs; //JB 10-Jul-2020 Never used bestIndex[partitionIndex] = -1; bestScores[partitionIndex] = PLL_UNLIKELY; } diff --git a/pll/parsimony.c b/pll/parsimony.c index 1fae471ae..5b5c37e30 100644 --- a/pll/parsimony.c +++ b/pll/parsimony.c @@ -27,14 +27,7 @@ * @file parsimony.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif - +#include "systypes.h" #include #include #include diff --git a/pll/pll.h b/pll/pll.h index efd7dec29..76a690098 100644 --- a/pll/pll.h +++ b/pll/pll.h @@ -1502,9 +1502,10 @@ typedef struct /******************** START OF API FUNCTION DESCRIPTIONS ********************/ -#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI)) -pllBoolean isThisMyPartition(partitionList *pr, int tid, int model); -void printParallelTimePerRegion(void); + +#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI)) || defined(_IQTREE_MPI) +pllBoolean isThisMyPartition(partitionList* pr, int tid, int model); +void printParallelTimePerRegion(void); #endif #ifdef _FINE_GRAIN_MPI diff --git a/pll/pllInternal.h b/pll/pllInternal.h index 1b6e0ac0d..a32d5c832 100644 --- a/pll/pllInternal.h +++ b/pll/pllInternal.h @@ -199,7 +199,7 @@ pllBoolean noGap(unsigned int *x, int pos); /* from utils.h */ linkageList* initLinkageList(int *linkList, partitionList *pr); -#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS) ) +#if (defined(_FINE_GRAIN_MPI) || defined(_IQTREE_MPI) || defined(_USE_PTHREADS) ) /* work tags for parallel regions */ #define PLL_THREAD_NEWVIEW 0 diff --git a/pll/recom.c b/pll/recom.c index 5ab20c793..e4be6c84b 100644 --- a/pll/recom.c +++ b/pll/recom.c @@ -28,6 +28,7 @@ * @brief Functions used for recomputation of vectors (only a fraction of LH vectors stored in RAM) */ #include "mem_alloc.h" +#include "systypes.h" #include #include #include @@ -36,9 +37,6 @@ #include #include #include -#ifndef WIN32 -#include -#endif #include "pll.h" #include "pllInternal.h" diff --git a/pll/restartHashTable.c b/pll/restartHashTable.c index 007e2477c..544964b9e 100644 --- a/pll/restartHashTable.c +++ b/pll/restartHashTable.c @@ -27,13 +27,7 @@ * @file bipartitionList.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include diff --git a/pll/searchAlgo.c b/pll/searchAlgo.c index c638d489a..fc900ea0e 100644 --- a/pll/searchAlgo.c +++ b/pll/searchAlgo.c @@ -30,13 +30,7 @@ * Detailed description to appear soon. */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include @@ -568,7 +562,8 @@ void regionalSmooth (pllInstance *tr, partitionList *pr, nodeptr p, int maxtimes nodeptr removeNodeBIG (pllInstance *tr, partitionList *pr, nodeptr p, int numBranches) { // double zqr[numBranches], result[numBranches]; - double* zqr = rax_malloc(numBranches*sizeof(double)), *result = rax_malloc(numBranches*sizeof(double)); + double* zqr = (double*)rax_malloc(numBranches * sizeof(double)); + double* result = (double*)rax_malloc(numBranches * sizeof(double)); nodeptr q, r; int i; @@ -661,14 +656,14 @@ pllBoolean insertBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q) if(tr->thoroughInsertion) { - double * zqr = rax_malloc(numBranches*sizeof(double)), - *zqs = rax_malloc(numBranches*sizeof(double)), - *zrs = rax_malloc(numBranches*sizeof(double)); - double lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax; - double *defaultArray=rax_malloc(numBranches*sizeof(double)); - double *e1 = rax_malloc(numBranches*sizeof(double)), - *e2 = rax_malloc(numBranches*sizeof(double)), - *e3 = rax_malloc(numBranches*sizeof(double)); + double* zqr = (double*)rax_malloc(numBranches * sizeof(double)); + double* zqs = (double*)rax_malloc(numBranches * sizeof(double)); + double* zrs = (double*)rax_malloc(numBranches * sizeof(double)); + double lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax; + double *defaultArray = (double*)rax_malloc(numBranches*sizeof(double)); + double* e1 = (double*)rax_malloc(numBranches * sizeof(double)); + double* e2 = (double*)rax_malloc(numBranches * sizeof(double)); + double* e3 = (double*)rax_malloc(numBranches*sizeof(double)); double *qz; qz = q->z; @@ -720,7 +715,7 @@ pllBoolean insertBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q) } else { - double *z = rax_malloc(numBranches*sizeof(double)); + double *z = (double*) rax_malloc(numBranches*sizeof(double)); for(i = 0; i < numBranches; i++) { @@ -1151,17 +1146,13 @@ int rearrangeBIG(pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int */ double treeOptimizeRapid(pllInstance *tr, partitionList *pr, int mintrav, int maxtrav, bestlist *bt, infoList *iList) { - int i, index, - *perm = (int*)NULL; + int i, index, *perm = (int*)NULL; nodeRectifier(tr); - - - if (maxtrav > tr->mxtips - 3) - maxtrav = tr->mxtips - 3; - - + if (maxtrav > tr->mxtips - 3) { + maxtrav = tr->mxtips - 3; + } resetInfoList(iList); @@ -1191,21 +1182,12 @@ double treeOptimizeRapid(pllInstance *tr, partitionList *pr, int mintrav, int ma tr->lhDEC = 0; } - /* - printf("DoCutoff: %d\n", tr->doCutoff); - printf("%d %f %f %f\n", tr->itCount, tr->lhAVG, tr->lhDEC, tr->lhCutoff); - - printf("%d %d\n", mintrav, maxtrav); - */ - for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++) { - tr->bestOfNode = PLL_UNLIKELY; - - if(tr->permuteTreeoptimize) - index = perm[i]; - else - index = i; + tr->bestOfNode = PLL_UNLIKELY; + //James B. Was doing a null de-reference of perm here, + //if tr->permuteTreeoptimize was non-zero. No longer! + index = i; if(rearrangeBIG(tr, pr, tr->nodep[index], mintrav, maxtrav)) { @@ -1914,8 +1896,10 @@ int pllNniSearch(pllInstance * tr, partitionList *pr, int estimateModel) { evalNNIForSubtree(tr, pr, q->back, nniList, &cnt, &cnt_nni, curScore); q = q->next; } - if (cnt_nni == 0) - return 0.0; + if (cnt_nni == 0) { + free(nniList); //James B. 23-Jul-2020 (memory leak) + return 0.0; + } nniMove* impNNIList = (nniMove*) malloc(cnt_nni * sizeof(nniMove)); int j = 0; @@ -2944,7 +2928,6 @@ determineRearrangementSetting(pllInstance *tr, partitionList *pr, tr->doCutoff = PLL_FALSE; mintrav = 1; - maxtrav = 5; bestTrav = maxtrav = 5; @@ -2969,13 +2952,12 @@ determineRearrangementSetting(pllInstance *tr, partitionList *pr, tr->startLH = tr->endLH = tr->likelihood; - for (i = 1; i <= tr->mxtips + tr->mxtips - 2; i++) - { - - if (tr->permuteTreeoptimize) - index = perm[i]; + for (i = 1; i <= tr->mxtips + tr->mxtips - 2; i++) { + if (perm != NULL) + index = perm[i]; else - index = i; + index = i; + tr->bestOfNode = PLL_UNLIKELY; if (rearrangeBIG(tr, pr, tr->nodep[index], mintrav, maxtrav)) @@ -3093,7 +3075,6 @@ pllRaxmlSearchAlgorithm(pllInstance * tr, partitionList * pr, initInfoList(&iList, 50); - difference = 10.0; epsilon = tr->likelihoodEpsilon; tr->thoroughInsertion = 0; diff --git a/pll/systypes.h b/pll/systypes.h new file mode 100644 index 000000000..4dcff1dfb --- /dev/null +++ b/pll/systypes.h @@ -0,0 +1,10 @@ +#ifdef WIN32 +#include +#endif + +#if !defined(WIN32) && !defined(WIN64) +#include +#include +#include +#include +#endif diff --git a/pll/topologies.c b/pll/topologies.c index f19bf3dcb..58f4e8a97 100644 --- a/pll/topologies.c +++ b/pll/topologies.c @@ -28,13 +28,7 @@ * @brief Miscellanous functions working with tree topology */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include diff --git a/pll/trash.c b/pll/trash.c index 5247c252d..b15d77bc8 100644 --- a/pll/trash.c +++ b/pll/trash.c @@ -27,13 +27,7 @@ * @file trash.c */ #include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include diff --git a/pll/treeIO.c b/pll/treeIO.c index 0a63b4025..ed306a3ce 100644 --- a/pll/treeIO.c +++ b/pll/treeIO.c @@ -27,15 +27,7 @@ * @file treeIO.c */ #include "mem_alloc.h" - -#include "mem_alloc.h" - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include diff --git a/pll/utils.c b/pll/utils.c index 02c49f179..ab6d53bf7 100644 --- a/pll/utils.c +++ b/pll/utils.c @@ -28,17 +28,8 @@ * * @brief Miscellaneous general utility and helper functions */ -#ifdef WIN32 -#include -#endif - -#ifndef WIN32 -#include -#include -#include -#include -#endif +#include "systypes.h" #include #include #include @@ -50,6 +41,8 @@ #include #include #include "cycle.h" +#include "mem_alloc.h" //for rax_malloc_string_copy + #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC)) @@ -69,10 +62,22 @@ #include "pll.h" #include "pllInternal.h" +void rax_malloc_string_copy(const char* source, char** dest) +{ + size_t bufLen = (strlen(source) + 1); + *dest = (char*)rax_malloc(bufLen); + #ifdef CLANG_UNDER_VS + strcpy_s(*dest, bufLen, source); + #else + strcpy(*dest, source); + #endif +} + #define GLOBAL_VARIABLES_DEFINITION #include "globalVariables.h" + /* mappings of BIN/DNA/AA alphabet to numbers */ static const char PLL_MAP_BIN[256] = @@ -152,16 +157,22 @@ static void initializePartitionsSequential(pllInstance *tr, partitionList *pr); */ /***************** UTILITY FUNCTIONS **************************/ -#if (!defined(_SVID_SOURCE) && !defined(_BSD_SOURCE) && !defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE) && !defined(_POSIX_SOURCE)) -static char * -my_strtok_r (char * s, const char * delim, char **save_ptr) +char *my_strndup(const char *s, size_t n) { + char *ret = (char *) rax_malloc(n+1); + strncpy(ret, s, n); + ret[n] = 0; + return ret; +} + +#ifndef HAVE_STRTOK_R +char *strtok_r (char * s, const char * delim, char **save_ptr) { char *token; /* Scan leading delimiters */ - if (s == NULL) - s = *save_ptr; - + if (s == NULL) { + s = *save_ptr; + } s += strspn (s, delim); if (*s == '\0') { @@ -185,14 +196,6 @@ my_strtok_r (char * s, const char * delim, char **save_ptr) } #endif -#if (defined(_SVID_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)) -#define STRTOK_R strtok_r -#else -#define STRTOK_R my_strtok_r -#endif - - - void storeExecuteMaskInTraversalDescriptor(pllInstance *tr, partitionList *pr) { @@ -272,6 +275,7 @@ size_t discreteRateCategories(int rateHetModel) result = 4; break; default: + result = 0; assert(0); } @@ -433,8 +437,6 @@ void hookupFull (nodeptr p, nodeptr q, double *z) /* connect node p with q and assign the default branch lengths */ void hookupDefault (nodeptr p, nodeptr q) { - int i; - p->back = q; q->back = p; @@ -644,7 +646,7 @@ void initializePartitionData(pllInstance *localTree, partitionList * localPartit /* Initializing the xVector array like this is absolutely required !!!! - I don't know which programming genious removed this, but it must absolutely stay in here!!!! + I don't know which programming genius removed this, but it must absolutely stay in here!!!! */ { @@ -867,33 +869,26 @@ void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl } #if (!defined(_FINE_GRAIN_MPI) && !defined(_USE_PTHREADS)) -static void initializePartitionsSequential(pllInstance *tr, partitionList *pr) -{ - size_t - model; - - for(model = 0; model < (size_t)pr->numberOfPartitions; model++) - assert(pr->partitionData[model]->width == pr->partitionData[model]->upper - pr->partitionData[model]->lower); - - initializePartitionData(tr, pr); - - /* figure in tip sequence data per-site pattern weights */ - for(model = 0; model < (size_t)pr->numberOfPartitions; model++) - { - size_t - j; - size_t lower = pr->partitionData[model]->lower; - size_t width = pr->partitionData[model]->upper - lower; +static void initializePartitionsSequential(pllInstance* tr, partitionList* pr) +{ + for (size_t model = 0; model < (size_t)pr->numberOfPartitions; model++) { + assert(pr->partitionData[model]->width == pr->partitionData[model]->upper - pr->partitionData[model]->lower); + } + initializePartitionData(tr, pr); - for(j = 1; j <= (size_t)tr->mxtips; j++) + /* figure in tip sequence data per-site pattern weights */ + for (size_t model = 0; model < (size_t)pr->numberOfPartitions; model++) { - pr->partitionData[model]->yVector[j] = &(tr->yVector[j][pr->partitionData[model]->lower]); + size_t lower = pr->partitionData[model]->lower; + size_t width = pr->partitionData[model]->upper - lower; + for (size_t j = 1; j <= (size_t)tr->mxtips; j++) + { + pr->partitionData[model]->yVector[j] = &(tr->yVector[j][pr->partitionData[model]->lower]); + } + memcpy((void*)(&(pr->partitionData[model]->wgt[0])), (void*)(&(tr->aliaswgt[lower])), sizeof(int) * width); } - memcpy((void*)(&(pr->partitionData[model]->wgt[0])), (void*)(&(tr->aliaswgt[lower])), sizeof(int) * width); - } - - initMemorySavingAndRecom(tr, pr); + initMemorySavingAndRecom(tr, pr); } #endif @@ -1148,70 +1143,68 @@ static partitionList * createPartitions (pllQueue * parts, int * bounds) // TODO: change PLL_NUM_BRANCHES to number of partitions I guess pl->partitionData = (pInfo **) rax_calloc (PLL_NUM_BRANCHES, sizeof (pInfo *)); - for (i = 0, elm = parts->head; elm; elm = elm->next, ++ i) - { - pi = (pllPartitionInfo *) elm->item; + for (i = 0, elm = parts->head; elm; elm = elm->next, ++i) + { + pi = (pllPartitionInfo*)elm->item; - /* check whether the data type is valid, and in case it's not, deallocate - and return NULL */ - if (pi->dataType <= PLL_MIN_MODEL || pi->dataType >= PLL_MAX_MODEL) + /* check whether the data type is valid, and in case it's not, deallocate + and return NULL */ + if (pi->dataType <= PLL_MIN_MODEL || pi->dataType >= PLL_MAX_MODEL) { - for (j = 0; j < i; ++ j) - { - rax_free (pl->partitionData[j]->partitionName); - rax_free (pl->partitionData[j]); - } - rax_free (pl->partitionData); - rax_free (pl); - return (NULL); + for (j = 0; j < i; ++j) + { + rax_free(pl->partitionData[j]->partitionName); + rax_free(pl->partitionData[j]); + } + rax_free(pl->partitionData); + rax_free(pl); + return (NULL); } - pl->partitionData[i] = (pInfo *) rax_malloc (sizeof (pInfo)); + pl->partitionData[i] = (pInfo*)rax_malloc(sizeof(pInfo)); - pl->partitionData[i]->lower = bounds[i << 1]; - pl->partitionData[i]->upper = bounds[(i << 1) + 1]; - pl->partitionData[i]->width = bounds[(i << 1) + 1] - bounds[i << 1]; - pl->partitionData[i]->partitionWeight = 1.0 * (double) pl->partitionData[i]->width; + pl->partitionData[i]->lower = bounds[i << 1]; + pl->partitionData[i]->upper = bounds[(i << 1) + 1]; + pl->partitionData[i]->width = bounds[(i << 1) + 1] - bounds[i << 1]; + pl->partitionData[i]->partitionWeight = 1.0 * (double)pl->partitionData[i]->width; - //the two flags below are required to allow users to set - //alpha parameters and substitution rates in the Q matrix - //to fixed values. These parameters will then not be optimized - //in the model parameter optimization functions - //by default we assume that all parameters are being optimized, i.e., - //this has to be explicitly set by the user - - pl->partitionData[i]->optimizeAlphaParameter = PLL_TRUE; - pl->partitionData[i]->optimizeSubstitutionRates = PLL_TRUE; - pl->partitionData[i]->dataType = pi->dataType; - pl->partitionData[i]->protModels = -1; - pl->partitionData[i]->protUseEmpiricalFreqs = -1; - pl->partitionData[i]->maxTipStates = pLengths[pi->dataType].undetermined + 1; - pl->partitionData[i]->optimizeBaseFrequencies = pi->optimizeBaseFrequencies; - pl->partitionData[i]->ascBias = pi->ascBias; - pl->partitionData[i]->parsVect = NULL; + //the two flags below are required to allow users to set + //alpha parameters and substitution rates in the Q matrix + //to fixed values. These parameters will then not be optimized + //in the model parameter optimization functions + //by default we assume that all parameters are being optimized, i.e., + //this has to be explicitly set by the user + + pl->partitionData[i]->optimizeAlphaParameter = PLL_TRUE; + pl->partitionData[i]->optimizeSubstitutionRates = PLL_TRUE; + pl->partitionData[i]->dataType = pi->dataType; + pl->partitionData[i]->protModels = -1; + pl->partitionData[i]->protUseEmpiricalFreqs = -1; + pl->partitionData[i]->maxTipStates = pLengths[pi->dataType].undetermined + 1; + pl->partitionData[i]->optimizeBaseFrequencies = pi->optimizeBaseFrequencies; + pl->partitionData[i]->ascBias = pi->ascBias; + pl->partitionData[i]->parsVect = NULL; - if (pi->dataType == PLL_AA_DATA) + if (pi->dataType == PLL_AA_DATA) { - if(pl->partitionData[i]->protModels != PLL_GTR) - pl->partitionData[i]->optimizeSubstitutionRates = PLL_FALSE; - pl->partitionData[i]->protUseEmpiricalFreqs = pi->protUseEmpiricalFreqs; - pl->partitionData[i]->protModels = pi->protModels; + if (pl->partitionData[i]->protModels != PLL_GTR) + pl->partitionData[i]->optimizeSubstitutionRates = PLL_FALSE; + pl->partitionData[i]->protUseEmpiricalFreqs = pi->protUseEmpiricalFreqs; + pl->partitionData[i]->protModels = pi->protModels; } - pl->partitionData[i]->states = pLengths[pl->partitionData[i]->dataType].states; - pl->partitionData[i]->numberOfCategories = 1; - pl->partitionData[i]->autoProtModels = 0; - pl->partitionData[i]->nonGTR = PLL_FALSE; - pl->partitionData[i]->partitionContribution = -1.0; - pl->partitionData[i]->partitionLH = 0.0; - pl->partitionData[i]->fracchange = 1.0; - pl->partitionData[i]->executeModel = PLL_TRUE; + pl->partitionData[i]->states = pLengths[pl->partitionData[i]->dataType].states; + pl->partitionData[i]->numberOfCategories = 1; + pl->partitionData[i]->autoProtModels = 0; + pl->partitionData[i]->nonGTR = PLL_FALSE; + pl->partitionData[i]->partitionContribution = -1.0; + pl->partitionData[i]->partitionLH = 0.0; + pl->partitionData[i]->fracchange = 1.0; + pl->partitionData[i]->executeModel = PLL_TRUE; - - pl->partitionData[i]->partitionName = (char *) rax_malloc ((strlen (pi->partitionName) + 1) * sizeof (char)); - strcpy (pl->partitionData[i]->partitionName, pi->partitionName); + rax_malloc_string_copy(pi->partitionName, &(pl->partitionData[i]->partitionName)); } return (pl); @@ -1512,9 +1505,9 @@ static int genericBaseFrequenciesAlignment (pInfo * partition, lower = partition->lower; upper = partition->upper; - for(l = 0; l < numFreqs; l++) - pfreqs[l] = 1.0 / ((double)numFreqs); - + for (l = 0; l < numFreqs; l++) { + pfreqs[l] = 1.0 / ((double)numFreqs); + } for (k = 1; k <= 8; k++) { for(l = 0; l < numFreqs; l++) @@ -2421,13 +2414,10 @@ pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList) int i; pllTreeInitDefaults (tr, tips); - for (i = 1; i <= tips; ++ i) - { - tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char)); - strcpy (tr->nameList[i], nameList[i]); - pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i])); - } - + for (i = 1; i <= tips; ++i) { + rax_malloc_string_copy(nameList[i], &(tr->nameList[i])); + pllHashAdd(tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void*)(tr->nodep[i])); + } pllMakeRandomTree (tr); } @@ -2447,23 +2437,17 @@ pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList) Parsed alignment */ void -pllTreeInitTopologyForAlignment (pllInstance * tr, pllAlignmentData * alignmentData) +pllTreeInitTopologyForAlignment(pllInstance* tr, pllAlignmentData* alignmentData) { - int - tips = alignmentData->sequenceCount, - i; + int tips = alignmentData->sequenceCount; + char** nameList = alignmentData->sequenceLabels; - char - **nameList = alignmentData->sequenceLabels; - - pllTreeInitDefaults (tr, tips); + pllTreeInitDefaults(tr, tips); - for (i = 1; i <= tips; ++ i) - { - tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char)); - strcpy (tr->nameList[i], nameList[i]); - pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i])); - } + for (int i = 1; i <= tips; ++i) { + rax_malloc_string_copy(nameList[i], &(tr->nameList[i])); + pllHashAdd(tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void*)(tr->nodep[i])); + } } @@ -2619,13 +2603,11 @@ static int init_Q_MatrixSymmetries(char *linkageString, partitionList * pr, int *ch, *token; - ch = (char *) rax_malloc (strlen (linkageString) + 1); - strcpy (ch, linkageString); - + rax_malloc_string_copy(linkageString, &ch); for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL) { - token = STRTOK_R(str1, ",", &saveptr); + token = strtok_r(str1, ",", &saveptr); if(token == (char *)NULL) break; if(!(j < numberOfRates)) @@ -3375,41 +3357,26 @@ linkageList* initLinkageList(int *linkList, partitionList *pr) -static linkageList* initLinkageListString(char *linkageString, partitionList * pr) +static linkageList* initLinkageListString(char* linkageString, partitionList* pr) { - int - *list = (int*)rax_malloc(sizeof(int) * pr->numberOfPartitions), - j; + int* list = (int*)rax_malloc(sizeof(int) * pr->numberOfPartitions); + char* saveptr, * ch; - linkageList - *l; - - char - *str1, - *saveptr, -// *ch = strdup(linkageString), - *ch, - *token; - - ch = (char *) rax_malloc (strlen (linkageString) + 1); - strcpy (ch, linkageString); - - for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL) - { - token = STRTOK_R(str1, ",", &saveptr); - if(token == (char *)NULL) - break; - assert(j < pr->numberOfPartitions); - list[j] = atoi(token); + rax_malloc_string_copy(linkageString, &ch); + char* str1 = ch; + for (int j = 0; ; j++, str1 = (char*)NULL) { + char* token = strtok_r(str1, ",", &saveptr); + if (token == (char*)NULL) { + break; + } + assert(j < pr->numberOfPartitions); + list[j] = atoi(token); } - - rax_free(ch); - - l = initLinkageList(list, pr); - - rax_free(list); + rax_free(ch); + linkageList* l = initLinkageList(list, pr); + rax_free(list); - return l; + return l; } /** @ingroup modelParamsGroups diff --git a/sprng/lcg64.c b/sprng/lcg64.c index d73efbfe4..63b5ebdf3 100755 --- a/sprng/lcg64.c +++ b/sprng/lcg64.c @@ -596,7 +596,7 @@ int *igen; { struct rngen *gen; - printf("\n%s\n", GENTYPE+2); + printf("\n%s\n", &GENTYPE[2]); gen = (struct rngen *) igen; printf("\n \tseed = %d, stream_number = %d\tparameter = %d\n\n", gen->init_seed, gen->stream_number, gen->parameter); diff --git a/terrace/CMakeLists.txt b/terrace/CMakeLists.txt new file mode 100644 index 000000000..4ffbcd25b --- /dev/null +++ b/terrace/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(terrace +terrace.cpp +terrace.h +) + +target_link_libraries(terrace tree alignment terraphast) \ No newline at end of file diff --git a/terrace/terrace.cpp b/terrace/terrace.cpp new file mode 100644 index 000000000..9ec94a6ec --- /dev/null +++ b/terrace/terrace.cpp @@ -0,0 +1,84 @@ +/*************************************************************************** + * Copyright (C) 2018 by Lukasz Reszczynski * + * lukasz.reszczynski@univie.ac.at * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#include "terrace.h" + +Terrace::Terrace(PhyloTree &tree, SuperAlignment* saln) : + coverage(tree.aln->getSeqNames().size(), saln->taxa_index[0].size()) +{ + stringstream nwk; + tree.printTree(nwk, 0); + + terraces::index cols{}; + terraces::index rows{}; + + terraces::bitmatrix coverage_matrix{rows, cols}; + + vector labels = tree.aln->getSeqNames(); + + names.resize(labels.size()); + + for (int i=0; itaxa_index[0].size(); + + for (int i=0; itaxa_index[i][j] != -1); + coverage.set(i, j, value); + } + } + + supertree = terraces::create_supertree_data(terraphast_nwk, coverage); + + init(); +} + +void Terrace::init() +{ + +} + +uint64_t Terrace::getSize() +{ + return terraces::count_terrace(supertree); +} + +void Terrace::printTrees(ostream &out) +{ + terraces::print_terrace(supertree, names, out); +} + +void Terrace::printTreesCompressed(ostream &out) +{ + terraces::print_terrace_compressed(supertree, names, out); +} + +Terrace::~Terrace() +{ +} + + diff --git a/terrace/terrace.h b/terrace/terrace.h new file mode 100644 index 000000000..f2bb1e403 --- /dev/null +++ b/terrace/terrace.h @@ -0,0 +1,80 @@ +/*************************************************************************** + * Copyright (C) 2018 by Lukasz Reszczynski * + * lukasz.reszczynski@univie.ac.at * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef TERRACE_H +#define TERRACE_H + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "tree/phylotree.h" +#include "alignment/superalignment.h" +#include "utils/tools.h" + +#include "terraphast/include/terraces/errors.hpp" +#include "terraphast/include/terraces/parser.hpp" +#include "terraphast/include/terraces/simple.hpp" +#include "terraphast/include/terraces/advanced.hpp" +#include "terraphast/include/terraces/bitmatrix.hpp" + +/** + A phylogenetic terrace + @author Lukasz Reszczynski +*/ +class Terrace { + +public: + /** + * Constructor + * @param tree tree + * @param saln superalignment + */ + Terrace(PhyloTree &tree, SuperAlignment* saln); + + /** + * @return The terrace size + */ + uint64_t getSize(); + + /** + * Print trees from the terrace in the newick format. + * @param out the output stream + */ + void printTrees(ostream &out); + + /** + * Print trees from the terrace in the compressed newick format. + * @param out the output stream + */ + void printTreesCompressed(ostream &out); + + void init(); + + ~Terrace(); + +private: + terraces::bitmatrix coverage; + terraces::name_map names; + terraces::index_map indices; + terraces::supertree_data supertree; +}; + + +#endif diff --git a/terraphast/.gitattributes b/terraphast/.gitattributes new file mode 100644 index 000000000..7844b9373 --- /dev/null +++ b/terraphast/.gitattributes @@ -0,0 +1 @@ +appveyor.yml eol=crlf diff --git a/terraphast/.gitignore b/terraphast/.gitignore new file mode 100644 index 000000000..46cdd7195 --- /dev/null +++ b/terraphast/.gitignore @@ -0,0 +1,10 @@ +build +.idea +cmake-* +*.o +*.user +.vscode +.ycm_extra_conf.py +.ycm_extra_conf.pyc +callgrind.out.* +*.orig diff --git a/terraphast/AUTHORS b/terraphast/AUTHORS new file mode 100644 index 000000000..c8bb70a9c --- /dev/null +++ b/terraphast/AUTHORS @@ -0,0 +1,12 @@ +This library was built in a joint effort by + + Peter Boszoky + Tobias Ribizel + Fedor Scholz + Florian Weber + +The C interface was provided by + + Michael Hamann + Bui Quang Minh + Alexandros Stamatakis diff --git a/terraphast/CMakeLists.txt b/terraphast/CMakeLists.txt new file mode 100644 index 000000000..05357927e --- /dev/null +++ b/terraphast/CMakeLists.txt @@ -0,0 +1,240 @@ +project(terraphast C CXX) + +cmake_minimum_required(VERSION 3.0.2) +cmake_policy(SET CMP0054 NEW) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") + +Option(DEV_ENVIRONMENT "Development environment (extended debugging)" OFF) + +##################################################################### +# decide which components need to be built: by default, build everything, +# unless some components were explicitely disabled from the parent project +##################################################################### +Option(TERRAPHAST_USE_GMP "Use GMP" ON) +Option(TERRAPHAST_BUILD_CLIB "Build the C library" ON) +Option(TERRAPHAST_BUILD_APPS "Build the tools" ON) +Option(TERRAPHAST_BUILD_TESTS "Build the tests" ON) +Option(TERRAPHAST_ARCH_NATIVE "Use -march=native compiler flag" ON) + +##################################################################### +# build library +##################################################################### +add_library(terraphast + lib/advanced.cpp + lib/bigint.cpp + lib/bipartitions.cpp + lib/bipartitions.hpp + lib/bitmatrix.cpp + lib/bits.hpp + lib/bitvector.hpp + lib/clamped_uint.cpp + lib/constraints.cpp + lib/constraints_impl.hpp + lib/errors.cpp + lib/io_utils.hpp + lib/multitree.cpp + lib/multitree.hpp + lib/multitree_impl.hpp + lib/multitree_iterator.cpp + lib/multitree_iterator.hpp + lib/nodes.cpp + lib/parser.cpp + lib/ranked_bitvector.hpp + lib/rooting.cpp + lib/simple.cpp + lib/small_bipartition.hpp + lib/stack_allocator.hpp + lib/subtree_extraction.cpp + lib/subtree_extraction_impl.hpp + lib/supertree_enumerator.hpp + lib/supertree_helpers.cpp + lib/supertree_helpers.hpp + lib/supertree_variants.hpp + lib/supertree_variants_debug.hpp + lib/supertree_variants_multitree.hpp + lib/trees.cpp + lib/trees_impl.hpp + lib/union_find.cpp + lib/union_find.hpp + lib/union_find_debug.hpp + lib/utils.hpp + lib/validation.cpp + lib/validation.hpp + # For QtCreator/CLion/... to show the files + include/terraces/advanced.hpp + include/terraces/bigint.hpp + include/terraces/bitmatrix.hpp + include/terraces/clamped_uint.hpp + include/terraces/constraints.hpp + include/terraces/definitions.hpp + include/terraces/errors.hpp + include/terraces/parser.hpp + include/terraces/rooting.hpp + include/terraces/simple.hpp + include/terraces/subtree_extraction.hpp + include/terraces/trees.hpp +) +target_include_directories(terraphast + PUBLIC include + PRIVATE lib +) + +if(TERRAPHAST_USE_GMP) + find_package(GMP) + if(GMP_FOUND) + message(STATUS "GMP libraries found") + target_link_libraries(terraphast gmpxx gmp) + target_compile_definitions(terraphast PUBLIC USE_GMP) + else() + message(FATAL_ERROR "GMP libraries not found! Disable them using -DTERRAPHAST_USE_GMP=OFF") + endif() +endif() + +set(terraces_targets terraphast) + +if(TERRAPHAST_BUILD_CLIB) + add_library(terraces_c + c_lib/terraces.cpp + c_include/terraces/terraces.h + ) + target_include_directories(terraces_c PUBLIC c_include) + target_link_libraries(terraces_c terraphast) + if (NOT TERRAPHAST_USE_GMP) + message(FATAL_ERROR "The C library requires the GMP libraries to build! Enable them using -DTERRAPHAST_USE_GMP=ON") + endif() + + set(terraces_targets ${terraces_targets} terraces_c) +endif() + +##################################################################### +# internal compiler flags +##################################################################### +if(DEV_ENVIRONMENT AND CMAKE_BUILD_TYPE STREQUAL "Debug") + target_compile_definitions(terraphast PUBLIC _GLIBCXX_DEBUG) # PUBLIC to maintain ABI compatibility + if(TERRAPHAST_BUILD_CLIB) + target_compile_definitions(terraces_c PRIVATE _GLIBCXX_DEBUG) # PRIVATE since no stdlib objects are used + endif() +endif() + +##################################################################### +# build tools +##################################################################### +if(TERRAPHAST_BUILD_APPS) + add_executable(app "app/app.cpp") + add_executable(validated_run "tools/validated_run.cpp") + add_executable(verbose_run "tools/verbose_run.cpp") + add_executable(isomorphic "tools/isomorphic.cpp") + add_executable(reroot "tools/reroot.cpp") + add_executable(subtree "tools/subtree.cpp") + add_executable(tree_gen "tools/tree_gen.cpp") + add_executable(site_gen "tools/site_gen.cpp") + add_executable(nwk_to_dot "tools/nwk_to_dot.cpp") + target_link_libraries(validated_run terraphast) + target_link_libraries(verbose_run terraphast) + target_link_libraries(isomorphic terraphast) + target_link_libraries(reroot terraphast) + target_link_libraries(subtree terraphast) + target_link_libraries(tree_gen terraphast) + target_link_libraries(site_gen terraphast) + target_link_libraries(nwk_to_dot terraphast) + target_link_libraries(app terraphast) + + set(terraces_targets ${terraces_targets} app validated_run verbose_run isomorphic reroot subtree tree_gen site_gen nwk_to_dot) +endif() + +##################################################################### +# build tests +##################################################################### +if(TERRAPHAST_BUILD_TESTS) + add_library(Catch INTERFACE) + target_include_directories(Catch INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/catch) + + add_executable(unittests + test/main.cc + test/advanced.cpp + test/bipartitions.cpp + test/bitmatrix.cpp + test/bits.cpp + test/bitvector.cpp + test/clamped_uint.cpp + test/constraints.cpp + test/compile_test.cpp + test/fast_set.cpp + test/integration.cpp + test/multitree_iterator.cpp + test/parser.cpp + test/rooting.cpp + test/small_bipartition.cpp + test/stack_allocator.cpp + test/subtree_extraction.cpp + test/supertree.cpp + test/trees.cpp + test/union_find.cpp + test/util.cpp + test/validation.cpp + test/simple.cpp + ) + target_link_libraries(unittests terraphast Catch) + if(TERRAPHAST_BUILD_CLIB) + target_sources(unittests PRIVATE + test/c_api.cpp + ) + target_link_libraries(unittests terraces_c) + endif() + add_test(NAME unittests COMMAND unittests) + enable_testing() + + set(terraces_targets ${terraces_targets} unittests) +endif() + +set_target_properties(${terraces_targets} PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON) + +##################################################################### +# set platform-specific options, include platform-specific files +##################################################################### +if(("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") OR CLANG_UNDER_VS) + set(TERRAPHAST_PLATFORM_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/lib/cl") + set(TERRAPHAST_COMPILE_FLAGS -Oi -W4) + + # Most of our files only compile with disabled language extensions for VC++ + # Unfortunately, Catch uses some windows-specific features, so we have to + # enable these extensions for the Catch main method (more specific: not disable them) + file(GLOB ALL_SOURCES lib/*.cpp c_lib/*.cpp test/*.cpp tools/*.cpp app/*.cpp) + if (NOT CLANG_UNDER_VS) + set_source_files_properties(${ALL_SOURCES} PROPERTIES COMPILE_FLAGS "-Za") + endif() +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") + set(TERRAPHAST_PLATFORM_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/lib/intel") + set(TERRAPHAST_COMPILE_FLAGS -Wall -Wextra -Wconversion -Wsign-conversion -Werror) + if(TERRAPHAST_ARCH_NATIVE) + set(TERRAPHAST_COMPILE_FLAGS -march=native ${TERRAPHAST_COMPILE_FLAGS}) + endif() +else() + set(TERRAPHAST_PLATFORM_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/lib/gcc_clang") + set(TERRAPHAST_COMPILE_FLAGS -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -Werror) + if(TERRAPHAST_ARCH_NATIVE) + set(TERRAPHAST_COMPILE_FLAGS -march=native ${TERRAPHAST_COMPILE_FLAGS}) + endif() +endif() + +target_include_directories(terraphast PUBLIC "${TERRAPHAST_PLATFORM_INCLUDE}") +if(TERRAPHAST_BUILD_TESTS) + target_include_directories(unittests PRIVATE "${TERRAPHAST_PLATFORM_INCLUDE}") +endif() +target_compile_options(terraphast PRIVATE "${TERRAPHAST_COMPILE_FLAGS}") +if(TERRAPHAST_BUILD_CLIB) + target_compile_options(terraces_c PRIVATE "${TERRAPHAST_COMPILE_FLAGS}") +endif() + +##################################################################### +# setup clang-tidy +##################################################################### +find_program(CLANG_TIDY_PATH NAMES "clang-tidy") +if(NOT CLANG_TIDY_PATH) + message(STATUS "clang-tidy not found.") +else() + message(STATUS "clang-tidy found: ${CLANG_TIDY_PATH}") + set(CLANG_TIDY_COMMANDLINE "${CLANG_TIDY_PATH}" "-checks=llvm-namespace-comment -fix") +endif() + +#set_target_properties(terraphast terraces_c app validated_run verbose_run isomorphic reroot subtree tree_gen site_gen nwk_to_dot unittests PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_COMMANDLINE}") diff --git a/terraphast/LICENSE b/terraphast/LICENSE new file mode 100644 index 000000000..53d1f3d01 --- /dev/null +++ b/terraphast/LICENSE @@ -0,0 +1,675 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. + diff --git a/terraphast/README.md b/terraphast/README.md new file mode 100644 index 000000000..d8ce93976 --- /dev/null +++ b/terraphast/README.md @@ -0,0 +1,83 @@ +Documentation Home {#mainpage} +================== + +C++ tool to check for and enumerate terraces in phylogenetic tree space. + +----- + +**Usage**: `terraces/build/release/app ` + +Terraphast takes a .nkw file in Newick format and a genes/sites file, which denotes whether (1) or not (0) gene i is present in species j. + +Program output states some imput data properties, the species whose leaf edge is used as a new tree root, and the resulting supertree in compressed newick format. + + + +**Compressed Newick Format**: The resulting supertree representation cann be plain Newick, but can also contain the following two notation enhancements: +- `{a,b,c}` represents any conceivable binary subtree comprising the taxa a, b, and c. +- `(A|B,C|D)` represents any conceivable binary subtree comprising either subtrees A or B on the left, and either subtrees C or D on the right branch. + +Both enhancements were chosen such that the result is standard newick format if there's only one possible supertree. + + + +## The Terrace Phenomenon and Problem +In recent years, it has become common practice to infer phylogenies on so-called multi-gene datasets. Concatenated multi-gene datasets usually exhibit holes, that is, sequence data for some species might not be available for some genes Gi in our concatenated dataset. This can be due to a plethora of reasons, for instance, a specific species might simply not have a specific gene G i or the specific gene has simply not been sequenced for some of the species. After concatenating genes (partitions) we therefore end up with an alignment that contains patches of missing data: + +``` +index 0123 + +Species 1 AC-- +Species 2 AG-- +Species 3 ACTT +Species 4 --AG +Species 5 --GG +``` + +Under the likelihood model conditions that generate terraces, the log likelihood LnL(T) of a tree T can be computed as follows: LnL(T) = LnL(T|G1) + LnL(T|G2) where T|Gi denotes the tree topology induced by T for the species/sequences in partition i for which we have sequence data. In our example, the trees induced by G1 and G2 contain only three taxa. We know that there's only one tree topology with three taxa. On the other hand, there are 15 possible topologys for 5-taxa trees. So all 15 possible 5-taxon trees for our example dataset will induce the same per-gene/partition trees and therefore span a terrace of size 15. +This example dataset is bad: It does not contain any signal for disentangling +the phylogenetic history of these 5 species, since they are only connected via species 3. + +**Terraces**: two distinct comprehensive (containing all n species) trees are on a terrace if all induced per-partition subtrees of the two trees are identical. This phenomenon was named and described in [SMS11]. + +Knowing about the phenomenon of terraces, researchers might want to know +(i) if a given tree is on a terrace, +(ii) how many trees there are on that terrace, and +(iii) how the trees on that terrace look like. + +## The Basic Approach + +TO PUT IN HERE: +- Take NWK and DATA file and re-root the tree so that the comprehensive taxon is a leaf under the root, and the rest of the tree is a subtree under the root. +- Extract constraints according to Constantinescu's algorithm (Only reference and very short outline) +- Using the constraints, generate trees according to C's algorithm +- No guarantees for completeness (simply unknown) + +## A Short Guide to the Code + +This can be found [here](documentation/walkthrough.md). + +## Improvements and Optimizations to the basic approach + +### Implemented: + +- We introduced an optional, **compressed tree output format**. This format makes printing to terminal faster, since not all possible trees are listed in full detail. See section *Enhanced Newick Format* above. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/8] +- **Memory allocation in large blocks**, and managing them with free lists. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/37, https://git.scc.kit.edu/bioinfo2017/terraces/issues/13] +- **Deletion of unnecessary constraints**. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/23] +- **Improved data structures**: We replaced index vectors representing the current leaves by bitvectors with rank support, thus improving space requirements and the efficiency of constraint filtering. The union-find data structure could be improved by storing the set ranks in out-of-bounds indices, thus halving the storage. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/29] +- **Remap constraints**: By removing inner nodes from the constraint numbering, we were able to halve the space requirements of most of our data structures. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/21] +- **Use specialized bit manipulation instructions**: Bipartition iteration and bitvector operations (bit iteration and rank computation) were improved significantly by using specialized CPU instructions supported by Compiler intrinsics. +- **Provide a fast terrace check**: We discovered that checking for the existence of multiple trees on a terrace can be done without explicitly building any of these trees, thus decreasing the runtime even further. +- **Implemented validation methods**: For checking whether the trees generated by our algorithm are indeed distinct and equivalent to the input tree with respect to the missing data, we implemented a fast isomorphy check operating directly on our data structures. +- Since we wanted to implement different versions of the algorithm, we used a **generic enumerator** which relies on callback methods to implement the concrete version of the algorithm (terrace checking, tree counting or multitree construction). This method also allows us to attach **logging and status update decorators** to the algorithm to check the internal computations or monitor the progress of the algorithm. +- Short of support for arbitrary-precision math, our implementation is **fully compatible with Visual C++** in addition to the normal gcc/clang support. + +### Planned: + +- Enumerate subtrees in **parallel**. One challenge would be separation of the workload so that multiple threads have "enough to do". Another challenge would be the merging of the individual threads' results. [https://git.scc.kit.edu/bioinfo2017/terraces/issues/6] +- Finding **good heuristics** for choosing a subtree into which we want to descend first. Ideally, we'd have a nice heuristic that tells us which subtrees and associated constraints probably give us several options to construct a supertree, in which case we can safely answer "yes" to the question "are we on a terrace?". **Ideas**: "Smallest subtree first", "Least constraints first". [https://git.scc.kit.edu/bioinfo2017/terraces/issues/3] +- ... + +## References +[SMS11] Michael J Sanderson, Michelle M McMahon, and Mike Steel. Terraces in phylogenetic +tree space. Science, 333(6041):448–450, 2011. \ No newline at end of file diff --git a/terraphast/app/app.cpp b/terraphast/app/app.cpp new file mode 100644 index 000000000..ee79930b3 --- /dev/null +++ b/terraphast/app/app.cpp @@ -0,0 +1,30 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +int main(int argc, char** argv) try { + auto tree_file_name = std::string{}; + auto data_file_name = std::string{}; + if (argc == 3) { + tree_file_name = argv[1]; + data_file_name = argv[2]; + } else { + std::cerr << "Usage: \n" << argv[0] << " \n"; + return 1; + } + auto trees = std::ostringstream{}; + const auto terraces_count = + terraces::simple::print_terrace_from_file(tree_file_name, data_file_name, trees); + + std::cout << "There are " << terraces_count + << " trees on the terrace.\n\nThe trees in question are:\n" + << trees.str() << '\n'; +} catch (std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; +} diff --git a/terraphast/appveyor.yml b/terraphast/appveyor.yml new file mode 100644 index 000000000..4b5d6aa58 --- /dev/null +++ b/terraphast/appveyor.yml @@ -0,0 +1,15 @@ +version: 1.0.{build} +configuration: Release +platform: +- x64 +- Win32 +clone_depth: 1 +before_build: +- cmd: 'scripts\build_appveyor.bat' +build: + project: terraphast.sln + parallel: true + verbosity: minimal +test_script: +- cmd: 'Release\unittests.exe' + diff --git a/terraphast/c_include/terraces/terraces.h b/terraphast/c_include/terraces/terraces.h new file mode 100644 index 000000000..269592e1a --- /dev/null +++ b/terraphast/c_include/terraces/terraces.h @@ -0,0 +1,203 @@ +#ifndef TERRACES_OLD_H +#define TERRACES_OLD_H + +#include + +#ifdef __cplusplus +#include +using std::size_t; +#define TERRACES_NOEXCEPT noexcept +extern "C" { +#else +#include +#define TERRACES_NOEXCEPT +#endif + +/* + Error Codes + + the return value of our function is an error code + we will define these together as the project proceeds, e.g. + 0: successful completion + -1: problem parsing Newick tree + -2: #species in Newick tree does not correspond to number of species in data matrix + -3: entries in data matrix not either 0 or 1 + -4: less than 4 species in input tree + -5: only one partition in data matrix + -6: overflow in number of splits to process + -7: no output file specified + -8: input tree is not a binary tree + -9: there is no root species in the data file (a species present in all partitions) + -10: there is a species with no partition in the data file + -11: conflict between the set flags; can't perform all actions simultaneously + */ + +#define TERRACE_SUCCESS 0 +#define TERRACE_NEWICK_ERROR -1 +#define TERRACE_SPECIES_ERROR -2 +#define TERRACE_MATRIX_ERROR -3 +#define TERRACE_NUM_SPECIES_ERROR -4 +#define TERRACE_NUM_PARTITIONS_ERROR -5 +#define TERRACE_SPLIT_COUNT_OVERFLOW_ERROR -6 +#define TERRACE_OUTPUT_FILE_ERROR -7 +#define TERRACE_TREE_NOT_BINARY_ERROR -8 +#define TERRACE_NO_ROOT_SPECIES_ERROR -9 +#define TERRACE_SPECIES_WITHOUT_PARTITION_ERROR -10 +#define TERRACE_FLAG_CONFLICT_ERROR -11 +#define TERRACE_INTERNAL_ERROR -99 +/* to be extended */ + +/* check for unused return values */ +#if defined(_MSC_VER) && (_MSC_VER >= 1700) +#define CHECK_RESULT _Check_return_ +#else +#define CHECK_RESULT __attribute__((warn_unused_result)) +#endif + +/* Argument to control output of terraceAnalysis function (ta_outspec) */ + +/** + count unrooted trees on terrace + */ +#define TA_COUNT 1 + +/** + print unrooted trees on terrace to file + @TODO: should TA_ENUMERATE automatically imply TA_COUNT? + @TODO: Yes it should! + */ +#define TA_ENUMERATE 2 + +/** + just detect if the tree is on a terrace. this should run much quicker than TA_COUNT or + TA_ENUMERATE, + because we can brake off, as soon as we have found that thera are at least two trees + on the terrace. + @TODO: how the output should look like in this case? + @TODO: return any integer in terraceSize large than 1 if the tree is on a terrace + */ +#define TA_DETECT 4 + +/** + print trees on a terrace in compressed form using some external binary tree compression tool. + optional, does not need to be implemented, only if you want to. + */ +#define TA_ENUMERATE_COMPRESS 8 + +/** + take a maximum comprehensive subset of the partitions if no comprehensive taxon exists. + This leads to an over-estimation of the terrace size, but works with every data set. + */ +#define TA_UPPER_BOUND 16 + +// data type containing data to be passed to the algorithm we want to implement + +typedef struct { + size_t numberOfSpecies; + size_t numberOfPartitions; + unsigned char* missingDataMatrix; + const char** speciesNames; + bool allocatedNameArray; +} missingData; + +/** + * Initialize missing data data type + * + * @param numberOfSpecies number of species in dataset + * + * @param numberOfPartitions number of partitions in dataset + * + * @param speciesNames list of species names in dataset, first entry correpsonds to first row in + * missingDataMatrix etc. + * + * @return poitner to missing data data structure + */ + +missingData* initializeMissingData(size_t numberOfSpecies, size_t numberOfPartitions, + const char** speciesNames); + +/** + * Free missing data data structure + * + * @param m pointer to missing data data structure + */ + +void freeMissingData(missingData* m); + +/** + * set entry in missing data matrix + * + * @param m pointer to missing data data structure + * + * @param speciesNumber species index + * + * @param partitionNumber partition index + * + * @param value value to be set + */ + +void setDataMatrix(missingData* m, size_t speciesNumber, size_t partitionNumber, + unsigned char value); + +/** + * get entry from missing data matrix + * + * @param m pointer to missing data data structure + * + * @param speciesNumber species index + * + * @param partitionNumber partition index + * + * @return the value at the specified matrix position + */ + +unsigned char getDataMatrix(const missingData* m, size_t speciesNumber, size_t partitionNumber); + +/** + * copy one dimensional array containing the missing data matrix to the matrix in the missing data + * data type + * + * @param matrix one-dimensional + * + * @param m pointer to missing data data structure + * + */ + +void copyDataMatrix(const unsigned char* matrix, missingData* m); + +/** + * Function that tells us, given a tree, and a missing data matrix as well as its dimensions, + * if the tree is on a terrace, how many trees there are on the terrace, it also prints trees on the + * terrace + * to file, if specified and might compress them as well. + * + * We might need to change the data type of variable terraceSize that is being written in this + * function. + * Why? + * + * @param[in] m struct containing missing data matrix, number of partitions, number of species, and + * list of + * species names + * + * @param[in] newickTreeString Unrooted strictly binary phylogenetic tree, + * in Newick format for which we want to find out if it is on a terrace. + * Denoted by T in the task specification + * + * @param[in] ta_outspec bit-masked integer as combination of TA_* constants to control the outputs + * + * @param[out] allTreesOnTerrace output file name for unrooted tree enumeration. + * Trees should be displayed in standard unrooted Newick format, and you should print one tree per + * line. + * + * qparam[out] terraceSize number of unrooted trees on the terrace + * + * @return TERRACE_SUCCESS on success, or an error code (see TERRACE_*) on failure + */ +CHECK_RESULT int terraceAnalysis(missingData* m, const char* newickTreeString, int ta_outspec, + const char* allTreesOnTerraceFile, mpz_t terraceSize); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* TERRACES_OLD_H */ diff --git a/terraphast/c_lib/terraces.cpp b/terraphast/c_lib/terraces.cpp new file mode 100644 index 000000000..d38ddbc5b --- /dev/null +++ b/terraphast/c_lib/terraces.cpp @@ -0,0 +1,153 @@ +#include "../lib/trees_impl.hpp" +#include +#include +#include +#include +#include +#include +#include + +missingData* initializeMissingData(size_t numberOfSpecies, size_t numberOfPartitions, + const char** speciesNames) { + auto data = new missingData; + data->numberOfSpecies = numberOfSpecies; + data->numberOfPartitions = numberOfPartitions; + data->allocatedNameArray = false; // TODO What is this entry? + data->speciesNames = speciesNames; + data->missingDataMatrix = new unsigned char[numberOfSpecies * numberOfPartitions](); + return data; +} + +void freeMissingData(missingData* m) { + delete[] m->missingDataMatrix; + delete m; +} + +void setDataMatrix(missingData* m, size_t speciesNumber, size_t partitionNumber, + unsigned char value) { + m->missingDataMatrix[speciesNumber * m->numberOfPartitions + partitionNumber] = value; +} + +unsigned char getDataMatrix(const missingData* m, size_t speciesNumber, size_t partitionNumber) { + return m->missingDataMatrix[speciesNumber * m->numberOfPartitions + partitionNumber]; +} + +void copyDataMatrix(const unsigned char* matrix, missingData* m) { + std::copy_n(matrix, m->numberOfSpecies * m->numberOfPartitions, m->missingDataMatrix); +} + +CHECK_RESULT int terraceAnalysis(missingData* m, const char* newickTreeString, const int ta_outspec, + const char* allTreesOnTerraceFile, mpz_t terraceSize) { + // check ta_outspec + auto detect = bool(ta_outspec & TA_DETECT); + auto count = bool(ta_outspec & TA_COUNT); + auto enumerate = bool(ta_outspec & TA_ENUMERATE); + auto compress = bool(ta_outspec & TA_ENUMERATE_COMPRESS); + auto force_comprehensive = bool(ta_outspec & TA_UPPER_BOUND); + bool invalid1 = detect && (count || enumerate); // cannot detect and count at the same time + bool invalid2 = compress && !enumerate; // cannot compress if we don't enumerate + if (invalid1 || invalid2) { + return TERRACE_FLAG_CONFLICT_ERROR; + } + + // check input sizes + if (m->numberOfPartitions < 2) { + return TERRACE_NUM_PARTITIONS_ERROR; + } + if (m->numberOfSpecies < 4) { + return TERRACE_NUM_SPECIES_ERROR; + } + + // copy missing data matrix + terraces::bitmatrix matrix{m->numberOfSpecies, m->numberOfPartitions}; + for (size_t row = 0; row < m->numberOfSpecies; ++row) { + size_t rowcount = 0; + for (size_t col = 0; col < m->numberOfPartitions; ++col) { + auto val = m->missingDataMatrix[row * m->numberOfPartitions + col]; + if (val != 0 && val != 1) { + return TERRACE_MATRIX_ERROR; + } + matrix.set(row, col, val); + rowcount += val; + } + if (rowcount == 0) { + return TERRACE_SPECIES_WITHOUT_PARTITION_ERROR; + } + } + + // copy names + terraces::name_map names; + terraces::index_map name_index; + for (size_t spec_i = 0; spec_i < m->numberOfSpecies; ++spec_i) { + names.emplace_back(m->speciesNames[spec_i]); + if (!name_index.insert({names.back(), spec_i}).second) { + return TERRACE_SPECIES_ERROR; + } + } + + // parse newick tree + terraces::tree tree; + try { + tree = terraces::parse_nwk(newickTreeString, name_index); + } catch (const terraces::bad_input_error& err) { + switch (err.type()) { + case terraces::bad_input_error_type::nwk_multifurcating: + return TERRACE_TREE_NOT_BINARY_ERROR; + case terraces::bad_input_error_type::nwk_taxon_duplicate: + return TERRACE_SPECIES_ERROR; + default: + return TERRACE_NEWICK_ERROR; + } + } + if (terraces::num_leaves_from_nodes(tree.size()) != m->numberOfSpecies) { + return TERRACE_SPECIES_ERROR; + } + + // prepare data + if (force_comprehensive) { + matrix = terraces::maximum_comprehensive_columnset(matrix); + } + + terraces::supertree_data data; + try { + data = terraces::create_supertree_data(tree, matrix); + } catch (const terraces::bad_input_error&) { + return TERRACE_INTERNAL_ERROR; + } catch (const terraces::no_usable_root_error&) { + return TERRACE_NO_ROOT_SPECIES_ERROR; + } + + // enumerate terrace + if (detect) { + auto lb = terraces::fast_count_terrace(data); + mpz_set_ui(terraceSize, lb); + } else if (count && !enumerate) { + try { + auto size = terraces::count_terrace_bigint(data); + mpz_set(terraceSize, size.value().get_mpz_t()); + } catch (const terraces::tree_count_overflow_error&) { + return TERRACE_SPLIT_COUNT_OVERFLOW_ERROR; + } + } else { + auto ofs = std::ofstream{allTreesOnTerraceFile}; + if (not ofs.is_open()) { + return TERRACE_OUTPUT_FILE_ERROR; + } + mpz_class size; + try { + if (compress) { + size = terraces::print_terrace_compressed(data, names, ofs).value(); + } else { + size = terraces::print_terrace(data, names, ofs).value(); + } + if (count) { + mpz_set(terraceSize, size.get_mpz_t()); + } + } catch (std::ifstream::failure&) { + return TERRACE_OUTPUT_FILE_ERROR; + } catch (const terraces::tree_count_overflow_error&) { + return TERRACE_SPLIT_COUNT_OVERFLOW_ERROR; + } + } + return TERRACE_SUCCESS; +} diff --git a/terraphast/catch/LICENSE.txt b/terraphast/catch/LICENSE.txt new file mode 100644 index 000000000..36b7cd93c --- /dev/null +++ b/terraphast/catch/LICENSE.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/terraphast/catch/catch.hpp b/terraphast/catch/catch.hpp new file mode 100644 index 000000000..fdb046fe4 --- /dev/null +++ b/terraphast/catch/catch.hpp @@ -0,0 +1,11689 @@ +/* + * Catch v1.12.2 + * Generated: 2018-05-14 15:10:01.112442 + * ---------------------------------------------------------- + * This file has been merged from multiple headers. Please don't edit it directly + * Copyright (c) 2012 Two Blue Cubes Ltd. All rights reserved. + * + * Distributed under the Boost Software License, Version 1.0. (See accompanying + * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED +#define TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED + +#define TWOBLUECUBES_CATCH_HPP_INCLUDED + +#ifdef __clang__ +# pragma clang system_header +#elif defined __GNUC__ +# pragma GCC system_header +#endif + +// #included from: internal/catch_suppress_warnings.h + +#ifdef __clang__ +# ifdef __ICC // icpc defines the __clang__ macro +# pragma warning(push) +# pragma warning(disable: 161 1682) +# else // __ICC +# pragma clang diagnostic ignored "-Wglobal-constructors" +# pragma clang diagnostic ignored "-Wvariadic-macros" +# pragma clang diagnostic ignored "-Wc99-extensions" +# pragma clang diagnostic ignored "-Wunused-variable" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wpadded" +# pragma clang diagnostic ignored "-Wc++98-compat" +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# pragma clang diagnostic ignored "-Wswitch-enum" +# pragma clang diagnostic ignored "-Wcovered-switch-default" +# endif +#elif defined __GNUC__ +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wparentheses" + +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wpadded" +#endif +#if defined(CATCH_CONFIG_MAIN) || defined(CATCH_CONFIG_RUNNER) +# define CATCH_IMPL +#endif + +#ifdef CATCH_IMPL +# ifndef CLARA_CONFIG_MAIN +# define CLARA_CONFIG_MAIN_NOT_DEFINED +# define CLARA_CONFIG_MAIN +# endif +#endif + +// #included from: internal/catch_notimplemented_exception.h +#define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_H_INCLUDED + +// #included from: catch_common.h +#define TWOBLUECUBES_CATCH_COMMON_H_INCLUDED + +// #included from: catch_compiler_capabilities.h +#define TWOBLUECUBES_CATCH_COMPILER_CAPABILITIES_HPP_INCLUDED + +// Detect a number of compiler features - mostly C++11/14 conformance - by compiler +// The following features are defined: +// +// CATCH_CONFIG_CPP11_NULLPTR : is nullptr supported? +// CATCH_CONFIG_CPP11_NOEXCEPT : is noexcept supported? +// CATCH_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods +// CATCH_CONFIG_CPP11_IS_ENUM : std::is_enum is supported? +// CATCH_CONFIG_CPP11_TUPLE : std::tuple is supported +// CATCH_CONFIG_CPP11_LONG_LONG : is long long supported? +// CATCH_CONFIG_CPP11_OVERRIDE : is override supported? +// CATCH_CONFIG_CPP11_UNIQUE_PTR : is unique_ptr supported (otherwise use auto_ptr) +// CATCH_CONFIG_CPP11_SHUFFLE : is std::shuffle supported? +// CATCH_CONFIG_CPP11_TYPE_TRAITS : are type_traits and enable_if supported? + +// CATCH_CONFIG_CPP11_OR_GREATER : Is C++11 supported? + +// CATCH_CONFIG_VARIADIC_MACROS : are variadic macros supported? +// CATCH_CONFIG_COUNTER : is the __COUNTER__ macro supported? +// CATCH_CONFIG_WINDOWS_SEH : is Windows SEH supported? +// CATCH_CONFIG_POSIX_SIGNALS : are POSIX signals supported? +// **************** +// Note to maintainers: if new toggles are added please document them +// in configuration.md, too +// **************** + +// In general each macro has a _NO_ form +// (e.g. CATCH_CONFIG_CPP11_NO_NULLPTR) which disables the feature. +// Many features, at point of detection, define an _INTERNAL_ macro, so they +// can be combined, en-mass, with the _NO_ forms later. + +// All the C++11 features can be disabled with CATCH_CONFIG_NO_CPP11 + +#ifdef __cplusplus + +# if __cplusplus >= 201103L +# define CATCH_CPP11_OR_GREATER +# endif + +# if __cplusplus >= 201402L +# define CATCH_CPP14_OR_GREATER +# endif + +#endif + +#ifdef __clang__ + +# if __has_feature(cxx_nullptr) +# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR +# endif + +# if __has_feature(cxx_noexcept) +# define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT +# endif + +# if defined(CATCH_CPP11_OR_GREATER) +# define CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) +# define CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + _Pragma( "clang diagnostic pop" ) + +# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) +# define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS \ + _Pragma( "clang diagnostic pop" ) +# endif + +#endif // __clang__ + +//////////////////////////////////////////////////////////////////////////////// +// We know some environments not to support full POSIX signals +#if defined(__CYGWIN__) || defined(__QNX__) + +# if !defined(CATCH_CONFIG_POSIX_SIGNALS) +# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +# endif + +#endif + +#ifdef __OS400__ +# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +# define CATCH_CONFIG_COLOUR_NONE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Cygwin +#ifdef __CYGWIN__ + +// Required for some versions of Cygwin to declare gettimeofday +// see: http://stackoverflow.com/questions/36901803/gettimeofday-not-declared-in-this-scope-cygwin +# define _BSD_SOURCE + +#endif // __CYGWIN__ + +//////////////////////////////////////////////////////////////////////////////// +// Borland +#ifdef __BORLANDC__ + +#endif // __BORLANDC__ + +//////////////////////////////////////////////////////////////////////////////// +// EDG +#ifdef __EDG_VERSION__ + +#endif // __EDG_VERSION__ + +//////////////////////////////////////////////////////////////////////////////// +// Digital Mars +#ifdef __DMC__ + +#endif // __DMC__ + +//////////////////////////////////////////////////////////////////////////////// +// GCC +#ifdef __GNUC__ + +# if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) +# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR +# endif + +// - otherwise more recent versions define __cplusplus >= 201103L +// and will get picked up below + +#endif // __GNUC__ + +//////////////////////////////////////////////////////////////////////////////// +// Visual C++ +#ifdef _MSC_VER + +#define CATCH_INTERNAL_CONFIG_WINDOWS_SEH + +#if (_MSC_VER >= 1600) +# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR +# define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR +#endif + +#if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015)) +#define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT +#define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +#define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE +#define CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS +#endif + +#endif // _MSC_VER + +//////////////////////////////////////////////////////////////////////////////// + +// Use variadic macros if the compiler supports them +#if ( defined _MSC_VER && _MSC_VER > 1400 && !defined __EDGE__) || \ + ( defined __WAVE__ && __WAVE_HAS_VARIADICS ) || \ + ( defined __GNUC__ && __GNUC__ >= 3 ) || \ + ( !defined __cplusplus && __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L ) + +#define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS + +#endif + +// Use __COUNTER__ if the compiler supports it +#if ( defined _MSC_VER && _MSC_VER >= 1300 ) || \ + ( defined __GNUC__ && ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 )) ) || \ + ( defined __clang__ && __clang_major__ >= 3 ) + +// Use of __COUNTER__ is suppressed during code analysis in CLion/AppCode 2017.2.x and former, +// because __COUNTER__ is not properly handled by it. +// This does not affect compilation +#if ( !defined __JETBRAINS_IDE__ || __JETBRAINS_IDE__ >= 20170300L ) + #define CATCH_INTERNAL_CONFIG_COUNTER +#endif + +#endif + +//////////////////////////////////////////////////////////////////////////////// +// C++ language feature support + +// catch all support for C++11 +#if defined(CATCH_CPP11_OR_GREATER) + +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) +# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR +# endif + +# ifndef CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT +# define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT +# endif + +# ifndef CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +# define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +# endif + +# ifndef CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM +# define CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM +# endif + +# ifndef CATCH_INTERNAL_CONFIG_CPP11_TUPLE +# define CATCH_INTERNAL_CONFIG_CPP11_TUPLE +# endif + +# ifndef CATCH_INTERNAL_CONFIG_VARIADIC_MACROS +# define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS +# endif + +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) +# define CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG +# endif + +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) +# define CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE +# endif +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) +# define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR +# endif +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) +# define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE +# endif +# if !defined(CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS) +# define CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS +# endif + +#endif // __cplusplus >= 201103L + +// Now set the actual defines based on the above + anything the user has configured +#if defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NO_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_NULLPTR +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_NOEXCEPT +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_GENERATED_METHODS +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_NO_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_IS_ENUM +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_CPP11_NO_TUPLE) && !defined(CATCH_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_TUPLE +#endif +#if defined(CATCH_INTERNAL_CONFIG_VARIADIC_MACROS) && !defined(CATCH_CONFIG_NO_VARIADIC_MACROS) && !defined(CATCH_CONFIG_VARIADIC_MACROS) +# define CATCH_CONFIG_VARIADIC_MACROS +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_NO_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_LONG_LONG +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_NO_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_OVERRIDE +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_NO_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_UNIQUE_PTR +#endif +#if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) +# define CATCH_CONFIG_COUNTER +#endif +#if defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_NO_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_SHUFFLE +#endif +# if defined(CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS) && !defined(CATCH_CONFIG_CPP11_NO_TYPE_TRAITS) && !defined(CATCH_CONFIG_CPP11_TYPE_TRAITS) && !defined(CATCH_CONFIG_NO_CPP11) +# define CATCH_CONFIG_CPP11_TYPE_TRAITS +# endif +#if defined(CATCH_INTERNAL_CONFIG_WINDOWS_SEH) && !defined(CATCH_CONFIG_NO_WINDOWS_SEH) && !defined(CATCH_CONFIG_WINDOWS_SEH) +# define CATCH_CONFIG_WINDOWS_SEH +#endif +// This is set by default, because we assume that unix compilers are posix-signal-compatible by default. +#if !defined(CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_POSIX_SIGNALS) +# define CATCH_CONFIG_POSIX_SIGNALS +#endif + +#if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS +#endif + +// noexcept support: +#if defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_NOEXCEPT) +# define CATCH_NOEXCEPT noexcept +# define CATCH_NOEXCEPT_IS(x) noexcept(x) +#else +# define CATCH_NOEXCEPT throw() +# define CATCH_NOEXCEPT_IS(x) +#endif + +// nullptr support +#ifdef CATCH_CONFIG_CPP11_NULLPTR +# define CATCH_NULL nullptr +#else +# define CATCH_NULL NULL +#endif + +// override support +#ifdef CATCH_CONFIG_CPP11_OVERRIDE +# define CATCH_OVERRIDE override +#else +# define CATCH_OVERRIDE +#endif + +// unique_ptr support +#ifdef CATCH_CONFIG_CPP11_UNIQUE_PTR +# define CATCH_AUTO_PTR( T ) std::unique_ptr +#else +# define CATCH_AUTO_PTR( T ) std::auto_ptr +#endif + +#define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line +#define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) +#ifdef CATCH_CONFIG_COUNTER +# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __COUNTER__ ) +#else +# define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ ) +#endif + +#define INTERNAL_CATCH_STRINGIFY2( expr ) #expr +#define INTERNAL_CATCH_STRINGIFY( expr ) INTERNAL_CATCH_STRINGIFY2( expr ) + +#include +#include + +namespace Catch { + + struct IConfig; + + struct CaseSensitive { enum Choice { + Yes, + No + }; }; + + class NonCopyable { +#ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + NonCopyable( NonCopyable const& ) = delete; + NonCopyable( NonCopyable && ) = delete; + NonCopyable& operator = ( NonCopyable const& ) = delete; + NonCopyable& operator = ( NonCopyable && ) = delete; +#else + NonCopyable( NonCopyable const& info ); + NonCopyable& operator = ( NonCopyable const& ); +#endif + + protected: + NonCopyable() {} + virtual ~NonCopyable(); + }; + + class SafeBool { + public: + typedef void (SafeBool::*type)() const; + + static type makeSafe( bool value ) { + return value ? &SafeBool::trueValue : 0; + } + private: + void trueValue() const {} + }; + + template + void deleteAll( ContainerT& container ) { + typename ContainerT::const_iterator it = container.begin(); + typename ContainerT::const_iterator itEnd = container.end(); + for(; it != itEnd; ++it ) + delete *it; + } + template + void deleteAllValues( AssociativeContainerT& container ) { + typename AssociativeContainerT::const_iterator it = container.begin(); + typename AssociativeContainerT::const_iterator itEnd = container.end(); + for(; it != itEnd; ++it ) + delete it->second; + } + + bool startsWith( std::string const& s, std::string const& prefix ); + bool startsWith( std::string const& s, char prefix ); + bool endsWith( std::string const& s, std::string const& suffix ); + bool endsWith( std::string const& s, char suffix ); + bool contains( std::string const& s, std::string const& infix ); + void toLowerInPlace( std::string& s ); + std::string toLower( std::string const& s ); + std::string trim( std::string const& str ); + bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ); + + struct pluralise { + pluralise( std::size_t count, std::string const& label ); + + friend std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ); + + std::size_t m_count; + std::string m_label; + }; + + struct SourceLineInfo { + + SourceLineInfo(); + SourceLineInfo( char const* _file, std::size_t _line ); +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + SourceLineInfo(SourceLineInfo const& other) = default; + SourceLineInfo( SourceLineInfo && ) = default; + SourceLineInfo& operator = ( SourceLineInfo const& ) = default; + SourceLineInfo& operator = ( SourceLineInfo && ) = default; +# endif + bool empty() const; + bool operator == ( SourceLineInfo const& other ) const; + bool operator < ( SourceLineInfo const& other ) const; + + char const* file; + std::size_t line; + }; + + std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ); + + // This is just here to avoid compiler warnings with macro constants and boolean literals + inline bool isTrue( bool value ){ return value; } + inline bool alwaysTrue() { return true; } + inline bool alwaysFalse() { return false; } + + void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ); + + void seedRng( IConfig const& config ); + unsigned int rngSeed(); + + // Use this in variadic streaming macros to allow + // >> +StreamEndStop + // as well as + // >> stuff +StreamEndStop + struct StreamEndStop { + std::string operator+() { + return std::string(); + } + }; + template + T const& operator + ( T const& value, StreamEndStop ) { + return value; + } +} + +#define CATCH_INTERNAL_LINEINFO ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) +#define CATCH_INTERNAL_ERROR( msg ) ::Catch::throwLogicError( msg, CATCH_INTERNAL_LINEINFO ); + +namespace Catch { + + class NotImplementedException : public std::exception + { + public: + NotImplementedException( SourceLineInfo const& lineInfo ); + + virtual ~NotImplementedException() CATCH_NOEXCEPT {} + + virtual const char* what() const CATCH_NOEXCEPT; + + private: + std::string m_what; + SourceLineInfo m_lineInfo; + }; + +} // end namespace Catch + +/////////////////////////////////////////////////////////////////////////////// +#define CATCH_NOT_IMPLEMENTED throw Catch::NotImplementedException( CATCH_INTERNAL_LINEINFO ) + +// #included from: internal/catch_context.h +#define TWOBLUECUBES_CATCH_CONTEXT_H_INCLUDED + +// #included from: catch_interfaces_generators.h +#define TWOBLUECUBES_CATCH_INTERFACES_GENERATORS_H_INCLUDED + +#include + +namespace Catch { + + struct IGeneratorInfo { + virtual ~IGeneratorInfo(); + virtual bool moveNext() = 0; + virtual std::size_t getCurrentIndex() const = 0; + }; + + struct IGeneratorsForTest { + virtual ~IGeneratorsForTest(); + + virtual IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) = 0; + virtual bool moveNext() = 0; + }; + + IGeneratorsForTest* createGeneratorsForTest(); + +} // end namespace Catch + +// #included from: catch_ptr.hpp +#define TWOBLUECUBES_CATCH_PTR_HPP_INCLUDED + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +namespace Catch { + + // An intrusive reference counting smart pointer. + // T must implement addRef() and release() methods + // typically implementing the IShared interface + template + class Ptr { + public: + Ptr() : m_p( CATCH_NULL ){} + Ptr( T* p ) : m_p( p ){ + if( m_p ) + m_p->addRef(); + } + Ptr( Ptr const& other ) : m_p( other.m_p ){ + if( m_p ) + m_p->addRef(); + } + ~Ptr(){ + if( m_p ) + m_p->release(); + } + void reset() { + if( m_p ) + m_p->release(); + m_p = CATCH_NULL; + } + Ptr& operator = ( T* p ){ + Ptr temp( p ); + swap( temp ); + return *this; + } + Ptr& operator = ( Ptr const& other ){ + Ptr temp( other ); + swap( temp ); + return *this; + } + void swap( Ptr& other ) { std::swap( m_p, other.m_p ); } + T* get() const{ return m_p; } + T& operator*() const { return *m_p; } + T* operator->() const { return m_p; } + bool operator !() const { return m_p == CATCH_NULL; } + operator SafeBool::type() const { return SafeBool::makeSafe( m_p != CATCH_NULL ); } + + private: + T* m_p; + }; + + struct IShared : NonCopyable { + virtual ~IShared(); + virtual void addRef() const = 0; + virtual void release() const = 0; + }; + + template + struct SharedImpl : T { + + SharedImpl() : m_rc( 0 ){} + + virtual void addRef() const { + ++m_rc; + } + virtual void release() const { + if( --m_rc == 0 ) + delete this; + } + + mutable unsigned int m_rc; + }; + +} // end namespace Catch + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +namespace Catch { + + class TestCase; + class Stream; + struct IResultCapture; + struct IRunner; + struct IGeneratorsForTest; + struct IConfig; + + struct IContext + { + virtual ~IContext(); + + virtual IResultCapture* getResultCapture() = 0; + virtual IRunner* getRunner() = 0; + virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) = 0; + virtual bool advanceGeneratorsForCurrentTest() = 0; + virtual Ptr getConfig() const = 0; + }; + + struct IMutableContext : IContext + { + virtual ~IMutableContext(); + virtual void setResultCapture( IResultCapture* resultCapture ) = 0; + virtual void setRunner( IRunner* runner ) = 0; + virtual void setConfig( Ptr const& config ) = 0; + }; + + IContext& getCurrentContext(); + IMutableContext& getCurrentMutableContext(); + void cleanUpContext(); + Stream createStream( std::string const& streamName ); + +} + +// #included from: internal/catch_test_registry.hpp +#define TWOBLUECUBES_CATCH_TEST_REGISTRY_HPP_INCLUDED + +// #included from: catch_interfaces_testcase.h +#define TWOBLUECUBES_CATCH_INTERFACES_TESTCASE_H_INCLUDED + +#include + +namespace Catch { + + class TestSpec; + + struct ITestCase : IShared { + virtual void invoke () const = 0; + protected: + virtual ~ITestCase(); + }; + + class TestCase; + struct IConfig; + + struct ITestCaseRegistry { + virtual ~ITestCaseRegistry(); + virtual std::vector const& getAllTests() const = 0; + virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; + }; + + bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); + std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); + std::vector const& getAllTestCasesSorted( IConfig const& config ); + +} + +namespace Catch { + +template +class MethodTestCase : public SharedImpl { + +public: + MethodTestCase( void (C::*method)() ) : m_method( method ) {} + + virtual void invoke() const { + C obj; + (obj.*m_method)(); + } + +private: + virtual ~MethodTestCase() {} + + void (C::*m_method)(); +}; + +typedef void(*TestFunction)(); + +struct NameAndDesc { + NameAndDesc( const char* _name = "", const char* _description= "" ) + : name( _name ), description( _description ) + {} + + const char* name; + const char* description; +}; + +void registerTestCase + ( ITestCase* testCase, + char const* className, + NameAndDesc const& nameAndDesc, + SourceLineInfo const& lineInfo ); + +struct AutoReg { + + AutoReg + ( TestFunction function, + SourceLineInfo const& lineInfo, + NameAndDesc const& nameAndDesc ); + + template + AutoReg + ( void (C::*method)(), + char const* className, + NameAndDesc const& nameAndDesc, + SourceLineInfo const& lineInfo ) { + + registerTestCase + ( new MethodTestCase( method ), + className, + nameAndDesc, + lineInfo ); + } + + ~AutoReg(); + +private: + AutoReg( AutoReg const& ); + void operator= ( AutoReg const& ); +}; + +void registerTestCaseFunction + ( TestFunction function, + SourceLineInfo const& lineInfo, + NameAndDesc const& nameAndDesc ); + +} // end namespace Catch + +#ifdef CATCH_CONFIG_VARIADIC_MACROS + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_TESTCASE2( TestName, ... ) \ + static void TestName(); \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &TestName, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( __VA_ARGS__ ) ); } /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + static void TestName() + #define INTERNAL_CATCH_TESTCASE( ... ) \ + INTERNAL_CATCH_TESTCASE2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), __VA_ARGS__ ) + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, ... ) \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); } /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_TEST_CASE_METHOD2( TestName, ClassName, ... )\ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ \ + struct TestName : ClassName{ \ + void test(); \ + }; \ + Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &TestName::test, #ClassName, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); /* NOLINT */ \ + } \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + void TestName::test() + #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, ... ) \ + INTERNAL_CATCH_TEST_CASE_METHOD2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), ClassName, __VA_ARGS__ ) + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_REGISTER_TESTCASE( Function, ... ) \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + Catch::AutoReg( Function, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( __VA_ARGS__ ) ); /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS + +#else + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_TESTCASE2( TestName, Name, Desc ) \ + static void TestName(); \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &TestName, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( Name, Desc ) ); } /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + static void TestName() + #define INTERNAL_CATCH_TESTCASE( Name, Desc ) \ + INTERNAL_CATCH_TESTCASE2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), Name, Desc ) + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, Name, Desc ) \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( Name, Desc ), CATCH_INTERNAL_LINEINFO ); } /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_TEST_CASE_METHOD2( TestCaseName, ClassName, TestName, Desc )\ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + namespace{ \ + struct TestCaseName : ClassName{ \ + void test(); \ + }; \ + Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &TestCaseName::test, #ClassName, Catch::NameAndDesc( TestName, Desc ), CATCH_INTERNAL_LINEINFO ); /* NOLINT */ \ + } \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + void TestCaseName::test() + #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, TestName, Desc )\ + INTERNAL_CATCH_TEST_CASE_METHOD2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), ClassName, TestName, Desc ) + + /////////////////////////////////////////////////////////////////////////////// + #define INTERNAL_CATCH_REGISTER_TESTCASE( Function, Name, Desc ) \ + CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ + Catch::AutoReg( Function, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( Name, Desc ) ); /* NOLINT */ \ + CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS + +#endif + +// #included from: internal/catch_capture.hpp +#define TWOBLUECUBES_CATCH_CAPTURE_HPP_INCLUDED + +// #included from: catch_result_builder.h +#define TWOBLUECUBES_CATCH_RESULT_BUILDER_H_INCLUDED + +// #included from: catch_result_type.h +#define TWOBLUECUBES_CATCH_RESULT_TYPE_H_INCLUDED + +namespace Catch { + + // ResultWas::OfType enum + struct ResultWas { enum OfType { + Unknown = -1, + Ok = 0, + Info = 1, + Warning = 2, + + FailureBit = 0x10, + + ExpressionFailed = FailureBit | 1, + ExplicitFailure = FailureBit | 2, + + Exception = 0x100 | FailureBit, + + ThrewException = Exception | 1, + DidntThrowException = Exception | 2, + + FatalErrorCondition = 0x200 | FailureBit + + }; }; + + inline bool isOk( ResultWas::OfType resultType ) { + return ( resultType & ResultWas::FailureBit ) == 0; + } + inline bool isJustInfo( int flags ) { + return flags == ResultWas::Info; + } + + // ResultDisposition::Flags enum + struct ResultDisposition { enum Flags { + Normal = 0x01, + + ContinueOnFailure = 0x02, // Failures fail test, but execution continues + FalseTest = 0x04, // Prefix expression with ! + SuppressFail = 0x08 // Failures are reported but do not fail the test + }; }; + + inline ResultDisposition::Flags operator | ( ResultDisposition::Flags lhs, ResultDisposition::Flags rhs ) { + return static_cast( static_cast( lhs ) | static_cast( rhs ) ); + } + + inline bool shouldContinueOnFailure( int flags ) { return ( flags & ResultDisposition::ContinueOnFailure ) != 0; } + inline bool isFalseTest( int flags ) { return ( flags & ResultDisposition::FalseTest ) != 0; } + inline bool shouldSuppressFailure( int flags ) { return ( flags & ResultDisposition::SuppressFail ) != 0; } + +} // end namespace Catch + +// #included from: catch_assertionresult.h +#define TWOBLUECUBES_CATCH_ASSERTIONRESULT_H_INCLUDED + +#include + +namespace Catch { + + struct STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison; + + struct DecomposedExpression + { + virtual ~DecomposedExpression() {} + virtual bool isBinaryExpression() const { + return false; + } + virtual void reconstructExpression( std::string& dest ) const = 0; + + // Only simple binary comparisons can be decomposed. + // If more complex check is required then wrap sub-expressions in parentheses. + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator + ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator - ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator * ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator / ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator % ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator && ( T const& ); + template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator || ( T const& ); + + private: + DecomposedExpression& operator = (DecomposedExpression const&); + }; + + struct AssertionInfo + { + AssertionInfo(); + AssertionInfo( char const * _macroName, + SourceLineInfo const& _lineInfo, + char const * _capturedExpression, + ResultDisposition::Flags _resultDisposition, + char const * _secondArg = ""); + + char const * macroName; + SourceLineInfo lineInfo; + char const * capturedExpression; + ResultDisposition::Flags resultDisposition; + char const * secondArg; + }; + + struct AssertionResultData + { + AssertionResultData() : decomposedExpression( CATCH_NULL ) + , resultType( ResultWas::Unknown ) + , negated( false ) + , parenthesized( false ) {} + + void negate( bool parenthesize ) { + negated = !negated; + parenthesized = parenthesize; + if( resultType == ResultWas::Ok ) + resultType = ResultWas::ExpressionFailed; + else if( resultType == ResultWas::ExpressionFailed ) + resultType = ResultWas::Ok; + } + + std::string const& reconstructExpression() const { + if( decomposedExpression != CATCH_NULL ) { + decomposedExpression->reconstructExpression( reconstructedExpression ); + if( parenthesized ) { + reconstructedExpression.insert( 0, 1, '(' ); + reconstructedExpression.append( 1, ')' ); + } + if( negated ) { + reconstructedExpression.insert( 0, 1, '!' ); + } + decomposedExpression = CATCH_NULL; + } + return reconstructedExpression; + } + + mutable DecomposedExpression const* decomposedExpression; + mutable std::string reconstructedExpression; + std::string message; + ResultWas::OfType resultType; + bool negated; + bool parenthesized; + }; + + class AssertionResult { + public: + AssertionResult(); + AssertionResult( AssertionInfo const& info, AssertionResultData const& data ); + ~AssertionResult(); +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + AssertionResult( AssertionResult const& ) = default; + AssertionResult( AssertionResult && ) = default; + AssertionResult& operator = ( AssertionResult const& ) = default; + AssertionResult& operator = ( AssertionResult && ) = default; +# endif + + bool isOk() const; + bool succeeded() const; + ResultWas::OfType getResultType() const; + bool hasExpression() const; + bool hasMessage() const; + std::string getExpression() const; + std::string getExpressionInMacro() const; + bool hasExpandedExpression() const; + std::string getExpandedExpression() const; + std::string getMessage() const; + SourceLineInfo getSourceInfo() const; + std::string getTestMacroName() const; + void discardDecomposedExpression() const; + void expandDecomposedExpression() const; + + protected: + AssertionInfo m_info; + AssertionResultData m_resultData; + }; + +} // end namespace Catch + +// #included from: catch_matchers.hpp +#define TWOBLUECUBES_CATCH_MATCHERS_HPP_INCLUDED + +namespace Catch { +namespace Matchers { + namespace Impl { + + template struct MatchAllOf; + template struct MatchAnyOf; + template struct MatchNotOf; + + class MatcherUntypedBase { + public: + std::string toString() const { + if( m_cachedToString.empty() ) + m_cachedToString = describe(); + return m_cachedToString; + } + + protected: + virtual ~MatcherUntypedBase(); + virtual std::string describe() const = 0; + mutable std::string m_cachedToString; + private: + MatcherUntypedBase& operator = ( MatcherUntypedBase const& ); + }; + + template + struct MatcherMethod { + virtual bool match( ObjectT const& arg ) const = 0; + }; + template + struct MatcherMethod { + virtual bool match( PtrT* arg ) const = 0; + }; + + template + struct MatcherBase : MatcherUntypedBase, MatcherMethod { + + MatchAllOf operator && ( MatcherBase const& other ) const; + MatchAnyOf operator || ( MatcherBase const& other ) const; + MatchNotOf operator ! () const; + }; + + template + struct MatchAllOf : MatcherBase { + virtual bool match( ArgT const& arg ) const CATCH_OVERRIDE { + for( std::size_t i = 0; i < m_matchers.size(); ++i ) { + if (!m_matchers[i]->match(arg)) + return false; + } + return true; + } + virtual std::string describe() const CATCH_OVERRIDE { + std::string description; + description.reserve( 4 + m_matchers.size()*32 ); + description += "( "; + for( std::size_t i = 0; i < m_matchers.size(); ++i ) { + if( i != 0 ) + description += " and "; + description += m_matchers[i]->toString(); + } + description += " )"; + return description; + } + + MatchAllOf& operator && ( MatcherBase const& other ) { + m_matchers.push_back( &other ); + return *this; + } + + std::vector const*> m_matchers; + }; + template + struct MatchAnyOf : MatcherBase { + + virtual bool match( ArgT const& arg ) const CATCH_OVERRIDE { + for( std::size_t i = 0; i < m_matchers.size(); ++i ) { + if (m_matchers[i]->match(arg)) + return true; + } + return false; + } + virtual std::string describe() const CATCH_OVERRIDE { + std::string description; + description.reserve( 4 + m_matchers.size()*32 ); + description += "( "; + for( std::size_t i = 0; i < m_matchers.size(); ++i ) { + if( i != 0 ) + description += " or "; + description += m_matchers[i]->toString(); + } + description += " )"; + return description; + } + + MatchAnyOf& operator || ( MatcherBase const& other ) { + m_matchers.push_back( &other ); + return *this; + } + + std::vector const*> m_matchers; + }; + + template + struct MatchNotOf : MatcherBase { + + MatchNotOf( MatcherBase const& underlyingMatcher ) : m_underlyingMatcher( underlyingMatcher ) {} + + virtual bool match( ArgT const& arg ) const CATCH_OVERRIDE { + return !m_underlyingMatcher.match( arg ); + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "not " + m_underlyingMatcher.toString(); + } + MatcherBase const& m_underlyingMatcher; + }; + + template + MatchAllOf MatcherBase::operator && ( MatcherBase const& other ) const { + return MatchAllOf() && *this && other; + } + template + MatchAnyOf MatcherBase::operator || ( MatcherBase const& other ) const { + return MatchAnyOf() || *this || other; + } + template + MatchNotOf MatcherBase::operator ! () const { + return MatchNotOf( *this ); + } + + } // namespace Impl + + // The following functions create the actual matcher objects. + // This allows the types to be inferred + // - deprecated: prefer ||, && and ! + template + Impl::MatchNotOf Not( Impl::MatcherBase const& underlyingMatcher ) { + return Impl::MatchNotOf( underlyingMatcher ); + } + template + Impl::MatchAllOf AllOf( Impl::MatcherBase const& m1, Impl::MatcherBase const& m2 ) { + return Impl::MatchAllOf() && m1 && m2; + } + template + Impl::MatchAllOf AllOf( Impl::MatcherBase const& m1, Impl::MatcherBase const& m2, Impl::MatcherBase const& m3 ) { + return Impl::MatchAllOf() && m1 && m2 && m3; + } + template + Impl::MatchAnyOf AnyOf( Impl::MatcherBase const& m1, Impl::MatcherBase const& m2 ) { + return Impl::MatchAnyOf() || m1 || m2; + } + template + Impl::MatchAnyOf AnyOf( Impl::MatcherBase const& m1, Impl::MatcherBase const& m2, Impl::MatcherBase const& m3 ) { + return Impl::MatchAnyOf() || m1 || m2 || m3; + } + +} // namespace Matchers + +using namespace Matchers; +using Matchers::Impl::MatcherBase; + +} // namespace Catch + +namespace Catch { + + struct TestFailureException{}; + + template class ExpressionLhs; + + struct CopyableStream { + CopyableStream() {} + CopyableStream( CopyableStream const& other ) { + oss << other.oss.str(); + } + CopyableStream& operator=( CopyableStream const& other ) { + oss.str(std::string()); + oss << other.oss.str(); + return *this; + } + std::ostringstream oss; + }; + + class ResultBuilder : public DecomposedExpression { + public: + ResultBuilder( char const* macroName, + SourceLineInfo const& lineInfo, + char const* capturedExpression, + ResultDisposition::Flags resultDisposition, + char const* secondArg = "" ); + ~ResultBuilder(); + + template + ExpressionLhs operator <= ( T const& operand ); + ExpressionLhs operator <= ( bool value ); + + template + ResultBuilder& operator << ( T const& value ) { + stream().oss << value; + return *this; + } + + ResultBuilder& setResultType( ResultWas::OfType result ); + ResultBuilder& setResultType( bool result ); + + void endExpression( DecomposedExpression const& expr ); + + virtual void reconstructExpression( std::string& dest ) const CATCH_OVERRIDE; + + AssertionResult build() const; + AssertionResult build( DecomposedExpression const& expr ) const; + + void useActiveException( ResultDisposition::Flags resultDisposition = ResultDisposition::Normal ); + void captureResult( ResultWas::OfType resultType ); + void captureExpression(); + void captureExpectedException( std::string const& expectedMessage ); + void captureExpectedException( Matchers::Impl::MatcherBase const& matcher ); + void handleResult( AssertionResult const& result ); + void react(); + bool shouldDebugBreak() const; + bool allowThrows() const; + + template + void captureMatch( ArgT const& arg, MatcherT const& matcher, char const* matcherString ); + + void setExceptionGuard(); + void unsetExceptionGuard(); + + private: + AssertionInfo m_assertionInfo; + AssertionResultData m_data; + + CopyableStream &stream() + { + if(!m_usedStream) + { + m_usedStream = true; + m_stream().oss.str(""); + } + return m_stream(); + } + + static CopyableStream &m_stream() + { + static CopyableStream s; + return s; + } + + bool m_shouldDebugBreak; + bool m_shouldThrow; + bool m_guardException; + bool m_usedStream; + }; + +} // namespace Catch + +// Include after due to circular dependency: +// #included from: catch_expression_lhs.hpp +#define TWOBLUECUBES_CATCH_EXPRESSION_LHS_HPP_INCLUDED + +// #included from: catch_evaluate.hpp +#define TWOBLUECUBES_CATCH_EVALUATE_HPP_INCLUDED + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4389) // '==' : signed/unsigned mismatch +#pragma warning(disable:4018) // more "signed/unsigned mismatch" +#pragma warning(disable:4312) // Converting int to T* using reinterpret_cast (issue on x64 platform) +#endif + +#include + +namespace Catch { +namespace Internal { + + enum Operator { + IsEqualTo, + IsNotEqualTo, + IsLessThan, + IsGreaterThan, + IsLessThanOrEqualTo, + IsGreaterThanOrEqualTo + }; + + template struct OperatorTraits { static const char* getName(){ return "*error*"; } }; + template<> struct OperatorTraits { static const char* getName(){ return "=="; } }; + template<> struct OperatorTraits { static const char* getName(){ return "!="; } }; + template<> struct OperatorTraits { static const char* getName(){ return "<"; } }; + template<> struct OperatorTraits { static const char* getName(){ return ">"; } }; + template<> struct OperatorTraits { static const char* getName(){ return "<="; } }; + template<> struct OperatorTraits{ static const char* getName(){ return ">="; } }; + + template + T& opCast(T const& t) { return const_cast(t); } + +// nullptr_t support based on pull request #154 from Konstantin Baumann +#ifdef CATCH_CONFIG_CPP11_NULLPTR + inline std::nullptr_t opCast(std::nullptr_t) { return nullptr; } +#endif // CATCH_CONFIG_CPP11_NULLPTR + + // So the compare overloads can be operator agnostic we convey the operator as a template + // enum, which is used to specialise an Evaluator for doing the comparison. + template + struct Evaluator{}; + + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs) { + return bool( opCast( lhs ) == opCast( rhs ) ); + } + }; + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs ) { + return bool( opCast( lhs ) != opCast( rhs ) ); + } + }; + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs ) { + return bool( opCast( lhs ) < opCast( rhs ) ); + } + }; + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs ) { + return bool( opCast( lhs ) > opCast( rhs ) ); + } + }; + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs ) { + return bool( opCast( lhs ) >= opCast( rhs ) ); + } + }; + template + struct Evaluator { + static bool evaluate( T1 const& lhs, T2 const& rhs ) { + return bool( opCast( lhs ) <= opCast( rhs ) ); + } + }; + + template + bool applyEvaluator( T1 const& lhs, T2 const& rhs ) { + return Evaluator::evaluate( lhs, rhs ); + } + + // This level of indirection allows us to specialise for integer types + // to avoid signed/ unsigned warnings + + // "base" overload + template + bool compare( T1 const& lhs, T2 const& rhs ) { + return Evaluator::evaluate( lhs, rhs ); + } + + // unsigned X to int + template bool compare( unsigned int lhs, int rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + template bool compare( unsigned long lhs, int rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + template bool compare( unsigned char lhs, int rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + + // unsigned X to long + template bool compare( unsigned int lhs, long rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + template bool compare( unsigned long lhs, long rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + template bool compare( unsigned char lhs, long rhs ) { + return applyEvaluator( lhs, static_cast( rhs ) ); + } + + // int to unsigned X + template bool compare( int lhs, unsigned int rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( int lhs, unsigned long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( int lhs, unsigned char rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + + // long to unsigned X + template bool compare( long lhs, unsigned int rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( long lhs, unsigned long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( long lhs, unsigned char rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + + // pointer to long (when comparing against NULL) + template bool compare( long lhs, T* rhs ) { + return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); + } + template bool compare( T* lhs, long rhs ) { + return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); + } + + // pointer to int (when comparing against NULL) + template bool compare( int lhs, T* rhs ) { + return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); + } + template bool compare( T* lhs, int rhs ) { + return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); + } + +#ifdef CATCH_CONFIG_CPP11_LONG_LONG + // long long to unsigned X + template bool compare( long long lhs, unsigned int rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( long long lhs, unsigned long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( long long lhs, unsigned long long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( long long lhs, unsigned char rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + + // unsigned long long to X + template bool compare( unsigned long long lhs, int rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( unsigned long long lhs, long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( unsigned long long lhs, long long rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + template bool compare( unsigned long long lhs, char rhs ) { + return applyEvaluator( static_cast( lhs ), rhs ); + } + + // pointer to long long (when comparing against NULL) + template bool compare( long long lhs, T* rhs ) { + return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); + } + template bool compare( T* lhs, long long rhs ) { + return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); + } +#endif // CATCH_CONFIG_CPP11_LONG_LONG + +#ifdef CATCH_CONFIG_CPP11_NULLPTR + // pointer to nullptr_t (when comparing against nullptr) + template bool compare( std::nullptr_t, T* rhs ) { + return Evaluator::evaluate( nullptr, rhs ); + } + template bool compare( T* lhs, std::nullptr_t ) { + return Evaluator::evaluate( lhs, nullptr ); + } +#endif // CATCH_CONFIG_CPP11_NULLPTR + +} // end of namespace Internal +} // end of namespace Catch + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +// #included from: catch_tostring.h +#define TWOBLUECUBES_CATCH_TOSTRING_H_INCLUDED + +#include +#include +#include +#include +#include + +#ifdef __OBJC__ +// #included from: catch_objc_arc.hpp +#define TWOBLUECUBES_CATCH_OBJC_ARC_HPP_INCLUDED + +#import + +#ifdef __has_feature +#define CATCH_ARC_ENABLED __has_feature(objc_arc) +#else +#define CATCH_ARC_ENABLED 0 +#endif + +void arcSafeRelease( NSObject* obj ); +id performOptionalSelector( id obj, SEL sel ); + +#if !CATCH_ARC_ENABLED +inline void arcSafeRelease( NSObject* obj ) { + [obj release]; +} +inline id performOptionalSelector( id obj, SEL sel ) { + if( [obj respondsToSelector: sel] ) + return [obj performSelector: sel]; + return nil; +} +#define CATCH_UNSAFE_UNRETAINED +#define CATCH_ARC_STRONG +#else +inline void arcSafeRelease( NSObject* ){} +inline id performOptionalSelector( id obj, SEL sel ) { +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Warc-performSelector-leaks" +#endif + if( [obj respondsToSelector: sel] ) + return [obj performSelector: sel]; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + return nil; +} +#define CATCH_UNSAFE_UNRETAINED __unsafe_unretained +#define CATCH_ARC_STRONG __strong +#endif + +#endif + +#ifdef CATCH_CONFIG_CPP11_TUPLE +#include +#endif + +#ifdef CATCH_CONFIG_CPP11_IS_ENUM +#include +#endif + +namespace Catch { + +// Why we're here. +template +std::string toString( T const& value ); + +// Built in overloads + +std::string toString( std::string const& value ); +std::string toString( std::wstring const& value ); +std::string toString( const char* const value ); +std::string toString( char* const value ); +std::string toString( const wchar_t* const value ); +std::string toString( wchar_t* const value ); +std::string toString( int value ); +std::string toString( unsigned long value ); +std::string toString( unsigned int value ); +std::string toString( const double value ); +std::string toString( const float value ); +std::string toString( bool value ); +std::string toString( char value ); +std::string toString( signed char value ); +std::string toString( unsigned char value ); + +#ifdef CATCH_CONFIG_CPP11_LONG_LONG +std::string toString( long long value ); +std::string toString( unsigned long long value ); +#endif + +#ifdef CATCH_CONFIG_CPP11_NULLPTR +std::string toString( std::nullptr_t ); +#endif + +#ifdef __OBJC__ + std::string toString( NSString const * const& nsstring ); + std::string toString( NSString * CATCH_ARC_STRONG & nsstring ); + std::string toString( NSObject* const& nsObject ); +#endif + +namespace Detail { + + extern const std::string unprintableString; + + #if !defined(CATCH_CONFIG_CPP11_STREAM_INSERTABLE_CHECK) + struct BorgType { + template BorgType( T const& ); + }; + + struct TrueType { char sizer[1]; }; + struct FalseType { char sizer[2]; }; + + TrueType& testStreamable( std::ostream& ); + FalseType testStreamable( FalseType ); + + FalseType operator<<( std::ostream const&, BorgType const& ); + + template + struct IsStreamInsertable { + static std::ostream &s; + static T const&t; + enum { value = sizeof( testStreamable(s << t) ) == sizeof( TrueType ) }; + }; +#else + template + class IsStreamInsertable { + template + static auto test(int) + -> decltype( std::declval() << std::declval(), std::true_type() ); + + template + static auto test(...) -> std::false_type; + + public: + static const bool value = decltype(test(0))::value; + }; +#endif + +#if defined(CATCH_CONFIG_CPP11_IS_ENUM) + template::value + > + struct EnumStringMaker + { + static std::string convert( T const& ) { return unprintableString; } + }; + + template + struct EnumStringMaker + { + static std::string convert( T const& v ) + { + return ::Catch::toString( + static_cast::type>(v) + ); + } + }; +#endif + template + struct StringMakerBase { +#if defined(CATCH_CONFIG_CPP11_IS_ENUM) + template + static std::string convert( T const& v ) + { + return EnumStringMaker::convert( v ); + } +#else + template + static std::string convert( T const& ) { return unprintableString; } +#endif + }; + + template<> + struct StringMakerBase { + template + static std::string convert( T const& _value ) { + std::ostringstream oss; + oss << _value; + return oss.str(); + } + }; + + std::string rawMemoryToString( const void *object, std::size_t size ); + + template + std::string rawMemoryToString( const T& object ) { + return rawMemoryToString( &object, sizeof(object) ); + } + +} // end namespace Detail + +template +struct StringMaker : + Detail::StringMakerBase::value> {}; + +template +struct StringMaker { + template + static std::string convert( U* p ) { + if( !p ) + return "NULL"; + else + return Detail::rawMemoryToString( p ); + } +}; + +template +struct StringMaker { + static std::string convert( R C::* p ) { + if( !p ) + return "NULL"; + else + return Detail::rawMemoryToString( p ); + } +}; + +namespace Detail { + template + std::string rangeToString( InputIterator first, InputIterator last ); +} + +//template +//struct StringMaker > { +// static std::string convert( std::vector const& v ) { +// return Detail::rangeToString( v.begin(), v.end() ); +// } +//}; + +template +std::string toString( std::vector const& v ) { + return Detail::rangeToString( v.begin(), v.end() ); +} + +#ifdef CATCH_CONFIG_CPP11_TUPLE + +// toString for tuples +namespace TupleDetail { + template< + typename Tuple, + std::size_t N = 0, + bool = (N < std::tuple_size::value) + > + struct ElementPrinter { + static void print( const Tuple& tuple, std::ostream& os ) + { + os << ( N ? ", " : " " ) + << Catch::toString(std::get(tuple)); + ElementPrinter::print(tuple,os); + } + }; + + template< + typename Tuple, + std::size_t N + > + struct ElementPrinter { + static void print( const Tuple&, std::ostream& ) {} + }; + +} + +template +struct StringMaker> { + + static std::string convert( const std::tuple& tuple ) + { + std::ostringstream os; + os << '{'; + TupleDetail::ElementPrinter>::print( tuple, os ); + os << " }"; + return os.str(); + } +}; +#endif // CATCH_CONFIG_CPP11_TUPLE + +namespace Detail { + template + std::string makeString( T const& value ) { + return StringMaker::convert( value ); + } +} // end namespace Detail + +/// \brief converts any type to a string +/// +/// The default template forwards on to ostringstream - except when an +/// ostringstream overload does not exist - in which case it attempts to detect +/// that and writes {?}. +/// Overload (not specialise) this template for custom typs that you don't want +/// to provide an ostream overload for. +template +std::string toString( T const& value ) { + return StringMaker::convert( value ); +} + + namespace Detail { + template + std::string rangeToString( InputIterator first, InputIterator last ) { + std::ostringstream oss; + oss << "{ "; + if( first != last ) { + oss << Catch::toString( *first ); + for( ++first ; first != last ; ++first ) + oss << ", " << Catch::toString( *first ); + } + oss << " }"; + return oss.str(); + } +} + +} // end namespace Catch + +namespace Catch { + +template +class BinaryExpression; + +template +class MatchExpression; + +// Wraps the LHS of an expression and overloads comparison operators +// for also capturing those and RHS (if any) +template +class ExpressionLhs : public DecomposedExpression { +public: + ExpressionLhs( ResultBuilder& rb, T lhs ) : m_rb( rb ), m_lhs( lhs ), m_truthy(false) {} + + ExpressionLhs& operator = ( const ExpressionLhs& ); + + template + BinaryExpression + operator == ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + template + BinaryExpression + operator != ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + template + BinaryExpression + operator < ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + template + BinaryExpression + operator > ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + template + BinaryExpression + operator <= ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + template + BinaryExpression + operator >= ( RhsT const& rhs ) { + return captureExpression( rhs ); + } + + BinaryExpression operator == ( bool rhs ) { + return captureExpression( rhs ); + } + + BinaryExpression operator != ( bool rhs ) { + return captureExpression( rhs ); + } + + void endExpression() { + m_truthy = m_lhs ? true : false; + m_rb + .setResultType( m_truthy ) + .endExpression( *this ); + } + + virtual void reconstructExpression( std::string& dest ) const CATCH_OVERRIDE { + dest = Catch::toString( m_lhs ); + } + +private: + template + BinaryExpression captureExpression( RhsT& rhs ) const { + return BinaryExpression( m_rb, m_lhs, rhs ); + } + + template + BinaryExpression captureExpression( bool rhs ) const { + return BinaryExpression( m_rb, m_lhs, rhs ); + } + +private: + ResultBuilder& m_rb; + T m_lhs; + bool m_truthy; +}; + +template +class BinaryExpression : public DecomposedExpression { +public: + BinaryExpression( ResultBuilder& rb, LhsT lhs, RhsT rhs ) + : m_rb( rb ), m_lhs( lhs ), m_rhs( rhs ) {} + + BinaryExpression& operator = ( BinaryExpression& ); + + void endExpression() const { + m_rb + .setResultType( Internal::compare( m_lhs, m_rhs ) ) + .endExpression( *this ); + } + + virtual bool isBinaryExpression() const CATCH_OVERRIDE { + return true; + } + + virtual void reconstructExpression( std::string& dest ) const CATCH_OVERRIDE { + std::string lhs = Catch::toString( m_lhs ); + std::string rhs = Catch::toString( m_rhs ); + char delim = lhs.size() + rhs.size() < 40 && + lhs.find('\n') == std::string::npos && + rhs.find('\n') == std::string::npos ? ' ' : '\n'; + dest.reserve( 7 + lhs.size() + rhs.size() ); + // 2 for spaces around operator + // 2 for operator + // 2 for parentheses (conditionally added later) + // 1 for negation (conditionally added later) + dest = lhs; + dest += delim; + dest += Internal::OperatorTraits::getName(); + dest += delim; + dest += rhs; + } + +private: + ResultBuilder& m_rb; + LhsT m_lhs; + RhsT m_rhs; +}; + +template +class MatchExpression : public DecomposedExpression { +public: + MatchExpression( ArgT arg, MatcherT matcher, char const* matcherString ) + : m_arg( arg ), m_matcher( matcher ), m_matcherString( matcherString ) {} + + virtual bool isBinaryExpression() const CATCH_OVERRIDE { + return true; + } + + virtual void reconstructExpression( std::string& dest ) const CATCH_OVERRIDE { + std::string matcherAsString = m_matcher.toString(); + dest = Catch::toString( m_arg ); + dest += ' '; + if( matcherAsString == Detail::unprintableString ) + dest += m_matcherString; + else + dest += matcherAsString; + } + +private: + ArgT m_arg; + MatcherT m_matcher; + char const* m_matcherString; +}; + +} // end namespace Catch + + +namespace Catch { + + template + ExpressionLhs ResultBuilder::operator <= ( T const& operand ) { + return ExpressionLhs( *this, operand ); + } + + inline ExpressionLhs ResultBuilder::operator <= ( bool value ) { + return ExpressionLhs( *this, value ); + } + + template + void ResultBuilder::captureMatch( ArgT const& arg, MatcherT const& matcher, + char const* matcherString ) { + MatchExpression expr( arg, matcher, matcherString ); + setResultType( matcher.match( arg ) ); + endExpression( expr ); + } + +} // namespace Catch + +// #included from: catch_message.h +#define TWOBLUECUBES_CATCH_MESSAGE_H_INCLUDED + +#include + +namespace Catch { + + struct MessageInfo { + MessageInfo( std::string const& _macroName, + SourceLineInfo const& _lineInfo, + ResultWas::OfType _type ); + + std::string macroName; + SourceLineInfo lineInfo; + ResultWas::OfType type; + std::string message; + unsigned int sequence; + + bool operator == ( MessageInfo const& other ) const { + return sequence == other.sequence; + } + bool operator < ( MessageInfo const& other ) const { + return sequence < other.sequence; + } + private: + static unsigned int globalCount; + }; + + struct MessageBuilder { + MessageBuilder( std::string const& macroName, + SourceLineInfo const& lineInfo, + ResultWas::OfType type ) + : m_info( macroName, lineInfo, type ) + {} + + template + MessageBuilder& operator << ( T const& value ) { + m_stream << value; + return *this; + } + + MessageInfo m_info; + std::ostringstream m_stream; + }; + + class ScopedMessage { + public: + ScopedMessage( MessageBuilder const& builder ); + ScopedMessage( ScopedMessage const& other ); + ~ScopedMessage(); + + MessageInfo m_info; + }; + +} // end namespace Catch + +// #included from: catch_interfaces_capture.h +#define TWOBLUECUBES_CATCH_INTERFACES_CAPTURE_H_INCLUDED + +#include + +namespace Catch { + + class TestCase; + class AssertionResult; + struct AssertionInfo; + struct SectionInfo; + struct SectionEndInfo; + struct MessageInfo; + class ScopedMessageBuilder; + struct Counts; + + struct IResultCapture { + + virtual ~IResultCapture(); + + virtual void assertionEnded( AssertionResult const& result ) = 0; + virtual bool sectionStarted( SectionInfo const& sectionInfo, + Counts& assertions ) = 0; + virtual void sectionEnded( SectionEndInfo const& endInfo ) = 0; + virtual void sectionEndedEarly( SectionEndInfo const& endInfo ) = 0; + virtual void pushScopedMessage( MessageInfo const& message ) = 0; + virtual void popScopedMessage( MessageInfo const& message ) = 0; + + virtual std::string getCurrentTestName() const = 0; + virtual const AssertionResult* getLastResult() const = 0; + + virtual void exceptionEarlyReported() = 0; + + virtual void handleFatalErrorCondition( std::string const& message ) = 0; + + virtual bool lastAssertionPassed() = 0; + virtual void assertionPassed() = 0; + virtual void assertionRun() = 0; + }; + + IResultCapture& getResultCapture(); +} + +// #included from: catch_debugger.h +#define TWOBLUECUBES_CATCH_DEBUGGER_H_INCLUDED + +// #included from: catch_platform.h +#define TWOBLUECUBES_CATCH_PLATFORM_H_INCLUDED + +#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) +# define CATCH_PLATFORM_MAC +#elif defined(__IPHONE_OS_VERSION_MIN_REQUIRED) +# define CATCH_PLATFORM_IPHONE +#elif defined(linux) || defined(__linux) || defined(__linux__) +# define CATCH_PLATFORM_LINUX +#elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +# define CATCH_PLATFORM_WINDOWS +# if !defined(NOMINMAX) && !defined(CATCH_CONFIG_NO_NOMINMAX) +# define CATCH_DEFINES_NOMINMAX +# endif +# if !defined(WIN32_LEAN_AND_MEAN) && !defined(CATCH_CONFIG_NO_WIN32_LEAN_AND_MEAN) +# define CATCH_DEFINES_WIN32_LEAN_AND_MEAN +# endif +#endif + +#include + +namespace Catch{ + + bool isDebuggerActive(); + void writeToDebugConsole( std::string const& text ); +} + +#ifdef CATCH_PLATFORM_MAC + + // The following code snippet based on: + // http://cocoawithlove.com/2008/03/break-into-debugger.html + #if defined(__ppc64__) || defined(__ppc__) + #define CATCH_TRAP() \ + __asm__("li r0, 20\nsc\nnop\nli r0, 37\nli r4, 2\nsc\nnop\n" \ + : : : "memory","r0","r3","r4" ) /* NOLINT */ + #else + #define CATCH_TRAP() __asm__("int $3\n" : : /* NOLINT */ ) + #endif + +#elif defined(CATCH_PLATFORM_LINUX) + // If we can use inline assembler, do it because this allows us to break + // directly at the location of the failing check instead of breaking inside + // raise() called from it, i.e. one stack frame below. + #if defined(__GNUC__) && (defined(__i386) || defined(__x86_64)) + #define CATCH_TRAP() asm volatile ("int $3") /* NOLINT */ + #else // Fall back to the generic way. + #include + + #define CATCH_TRAP() raise(SIGTRAP) + #endif +#elif defined(_MSC_VER) + #define CATCH_TRAP() __debugbreak() +#elif defined(__MINGW32__) + extern "C" __declspec(dllimport) void __stdcall DebugBreak(); + #define CATCH_TRAP() DebugBreak() +#endif + +#ifdef CATCH_TRAP + #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) { CATCH_TRAP(); } +#else + #define CATCH_BREAK_INTO_DEBUGGER() Catch::alwaysTrue(); +#endif + +// #included from: catch_interfaces_runner.h +#define TWOBLUECUBES_CATCH_INTERFACES_RUNNER_H_INCLUDED + +namespace Catch { + class TestCase; + + struct IRunner { + virtual ~IRunner(); + virtual bool aborting() const = 0; + }; +} + +#if !defined(CATCH_CONFIG_DISABLE_STRINGIFICATION) +# define CATCH_INTERNAL_STRINGIFY(expr) #expr +#else +# define CATCH_INTERNAL_STRINGIFY(expr) "Disabled by CATCH_CONFIG_DISABLE_STRINGIFICATION" +#endif + +#if defined(CATCH_CONFIG_FAST_COMPILE) +/////////////////////////////////////////////////////////////////////////////// +// We can speedup compilation significantly by breaking into debugger lower in +// the callstack, because then we don't have to expand CATCH_BREAK_INTO_DEBUGGER +// macro in each assertion +#define INTERNAL_CATCH_REACT( resultBuilder ) \ + resultBuilder.react(); + +/////////////////////////////////////////////////////////////////////////////// +// Another way to speed-up compilation is to omit local try-catch for REQUIRE* +// macros. +// This can potentially cause false negative, if the test code catches +// the exception before it propagates back up to the runner. +#define INTERNAL_CATCH_TEST_NO_TRY( macroName, resultDisposition, expr ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(expr), resultDisposition ); \ + __catchResult.setExceptionGuard(); \ + CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ + ( __catchResult <= expr ).endExpression(); \ + CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS \ + __catchResult.unsetExceptionGuard(); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::isTrue( false && static_cast( !!(expr) ) ) ) // expr here is never evaluated at runtime but it forces the compiler to give it a look +// The double negation silences MSVC's C4800 warning, the static_cast forces short-circuit evaluation if the type has overloaded &&. + +#define INTERNAL_CHECK_THAT_NO_TRY( macroName, matcher, resultDisposition, arg ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(arg) ", " CATCH_INTERNAL_STRINGIFY(matcher), resultDisposition ); \ + __catchResult.setExceptionGuard(); \ + __catchResult.captureMatch( arg, matcher, CATCH_INTERNAL_STRINGIFY(matcher) ); \ + __catchResult.unsetExceptionGuard(); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) + +#else +/////////////////////////////////////////////////////////////////////////////// +// In the event of a failure works out if the debugger needs to be invoked +// and/or an exception thrown and takes appropriate action. +// This needs to be done as a macro so the debugger will stop in the user +// source code rather than in Catch library code +#define INTERNAL_CATCH_REACT( resultBuilder ) \ + if( resultBuilder.shouldDebugBreak() ) CATCH_BREAK_INTO_DEBUGGER(); \ + resultBuilder.react(); +#endif + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_TEST( macroName, resultDisposition, expr ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(expr), resultDisposition ); \ + try { \ + CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ + ( __catchResult <= expr ).endExpression(); \ + CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS \ + } \ + catch( ... ) { \ + __catchResult.useActiveException( resultDisposition ); \ + } \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::isTrue( false && static_cast( !!(expr) ) ) ) // expr here is never evaluated at runtime but it forces the compiler to give it a look + // The double negation silences MSVC's C4800 warning, the static_cast forces short-circuit evaluation if the type has overloaded &&. + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_IF( macroName, resultDisposition, expr ) \ + INTERNAL_CATCH_TEST( macroName, resultDisposition, expr ); \ + if( Catch::getResultCapture().lastAssertionPassed() ) + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_ELSE( macroName, resultDisposition, expr ) \ + INTERNAL_CATCH_TEST( macroName, resultDisposition, expr ); \ + if( !Catch::getResultCapture().lastAssertionPassed() ) + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_NO_THROW( macroName, resultDisposition, expr ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(expr), resultDisposition ); \ + try { \ + static_cast(expr); \ + __catchResult.captureResult( Catch::ResultWas::Ok ); \ + } \ + catch( ... ) { \ + __catchResult.useActiveException( resultDisposition ); \ + } \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_THROWS( macroName, resultDisposition, matcher, expr ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(expr), resultDisposition, CATCH_INTERNAL_STRINGIFY(matcher) ); \ + if( __catchResult.allowThrows() ) \ + try { \ + static_cast(expr); \ + __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \ + } \ + catch( ... ) { \ + __catchResult.captureExpectedException( matcher ); \ + } \ + else \ + __catchResult.captureResult( Catch::ResultWas::Ok ); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_THROWS_AS( macroName, exceptionType, resultDisposition, expr ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(expr) ", " CATCH_INTERNAL_STRINGIFY(exceptionType), resultDisposition ); \ + if( __catchResult.allowThrows() ) \ + try { \ + static_cast(expr); \ + __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \ + } \ + catch( exceptionType ) { \ + __catchResult.captureResult( Catch::ResultWas::Ok ); \ + } \ + catch( ... ) { \ + __catchResult.useActiveException( resultDisposition ); \ + } \ + else \ + __catchResult.captureResult( Catch::ResultWas::Ok ); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) + +/////////////////////////////////////////////////////////////////////////////// +#ifdef CATCH_CONFIG_VARIADIC_MACROS + #define INTERNAL_CATCH_MSG( macroName, messageType, resultDisposition, ... ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \ + __catchResult << __VA_ARGS__ + ::Catch::StreamEndStop(); \ + __catchResult.captureResult( messageType ); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) +#else + #define INTERNAL_CATCH_MSG( macroName, messageType, resultDisposition, log ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \ + __catchResult << log + ::Catch::StreamEndStop(); \ + __catchResult.captureResult( messageType ); \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) +#endif + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_INFO( macroName, log ) \ + Catch::ScopedMessage INTERNAL_CATCH_UNIQUE_NAME( scopedMessage ) = Catch::MessageBuilder( macroName, CATCH_INTERNAL_LINEINFO, Catch::ResultWas::Info ) << log; + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CHECK_THAT( macroName, matcher, resultDisposition, arg ) \ + do { \ + Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, CATCH_INTERNAL_STRINGIFY(arg) ", " CATCH_INTERNAL_STRINGIFY(matcher), resultDisposition ); \ + try { \ + __catchResult.captureMatch( arg, matcher, CATCH_INTERNAL_STRINGIFY(matcher) ); \ + } catch( ... ) { \ + __catchResult.useActiveException( resultDisposition | Catch::ResultDisposition::ContinueOnFailure ); \ + } \ + INTERNAL_CATCH_REACT( __catchResult ) \ + } while( Catch::alwaysFalse() ) + +// #included from: internal/catch_section.h +#define TWOBLUECUBES_CATCH_SECTION_H_INCLUDED + +// #included from: catch_section_info.h +#define TWOBLUECUBES_CATCH_SECTION_INFO_H_INCLUDED + +// #included from: catch_totals.hpp +#define TWOBLUECUBES_CATCH_TOTALS_HPP_INCLUDED + +#include + +namespace Catch { + + struct Counts { + Counts() : passed( 0 ), failed( 0 ), failedButOk( 0 ) {} + + Counts operator - ( Counts const& other ) const { + Counts diff; + diff.passed = passed - other.passed; + diff.failed = failed - other.failed; + diff.failedButOk = failedButOk - other.failedButOk; + return diff; + } + Counts& operator += ( Counts const& other ) { + passed += other.passed; + failed += other.failed; + failedButOk += other.failedButOk; + return *this; + } + + std::size_t total() const { + return passed + failed + failedButOk; + } + bool allPassed() const { + return failed == 0 && failedButOk == 0; + } + bool allOk() const { + return failed == 0; + } + + std::size_t passed; + std::size_t failed; + std::size_t failedButOk; + }; + + struct Totals { + + Totals operator - ( Totals const& other ) const { + Totals diff; + diff.assertions = assertions - other.assertions; + diff.testCases = testCases - other.testCases; + return diff; + } + + Totals delta( Totals const& prevTotals ) const { + Totals diff = *this - prevTotals; + if( diff.assertions.failed > 0 ) + ++diff.testCases.failed; + else if( diff.assertions.failedButOk > 0 ) + ++diff.testCases.failedButOk; + else + ++diff.testCases.passed; + return diff; + } + + Totals& operator += ( Totals const& other ) { + assertions += other.assertions; + testCases += other.testCases; + return *this; + } + + Counts assertions; + Counts testCases; + }; +} + +#include + +namespace Catch { + + struct SectionInfo { + SectionInfo + ( SourceLineInfo const& _lineInfo, + std::string const& _name, + std::string const& _description = std::string() ); + + std::string name; + std::string description; + SourceLineInfo lineInfo; + }; + + struct SectionEndInfo { + SectionEndInfo( SectionInfo const& _sectionInfo, Counts const& _prevAssertions, double _durationInSeconds ) + : sectionInfo( _sectionInfo ), prevAssertions( _prevAssertions ), durationInSeconds( _durationInSeconds ) + {} + + SectionInfo sectionInfo; + Counts prevAssertions; + double durationInSeconds; + }; + +} // end namespace Catch + +// #included from: catch_timer.h +#define TWOBLUECUBES_CATCH_TIMER_H_INCLUDED + +#ifdef _MSC_VER + +namespace Catch { + typedef unsigned long long UInt64; +} +#else +#include +namespace Catch { + typedef uint64_t UInt64; +} +#endif + +namespace Catch { + class Timer { + public: + Timer() : m_ticks( 0 ) {} + void start(); + unsigned int getElapsedMicroseconds() const; + unsigned int getElapsedMilliseconds() const; + double getElapsedSeconds() const; + + private: + UInt64 m_ticks; + }; + +} // namespace Catch + +#include + +namespace Catch { + + class Section : NonCopyable { + public: + Section( SectionInfo const& info ); + ~Section(); + + // This indicates whether the section should be executed or not + operator bool() const; + + private: + SectionInfo m_info; + + std::string m_name; + Counts m_assertions; + bool m_sectionIncluded; + Timer m_timer; + }; + +} // end namespace Catch + +#ifdef CATCH_CONFIG_VARIADIC_MACROS + #define INTERNAL_CATCH_SECTION( ... ) \ + if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, __VA_ARGS__ ) ) +#else + #define INTERNAL_CATCH_SECTION( name, desc ) \ + if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, name, desc ) ) +#endif + +// #included from: internal/catch_generators.hpp +#define TWOBLUECUBES_CATCH_GENERATORS_HPP_INCLUDED + +#include +#include +#include + +namespace Catch { + +template +struct IGenerator { + virtual ~IGenerator() {} + virtual T getValue( std::size_t index ) const = 0; + virtual std::size_t size () const = 0; +}; + +template +class BetweenGenerator : public IGenerator { +public: + BetweenGenerator( T from, T to ) : m_from( from ), m_to( to ){} + + virtual T getValue( std::size_t index ) const { + return m_from+static_cast( index ); + } + + virtual std::size_t size() const { + return static_cast( 1+m_to-m_from ); + } + +private: + + T m_from; + T m_to; +}; + +template +class ValuesGenerator : public IGenerator { +public: + ValuesGenerator(){} + + void add( T value ) { + m_values.push_back( value ); + } + + virtual T getValue( std::size_t index ) const { + return m_values[index]; + } + + virtual std::size_t size() const { + return m_values.size(); + } + +private: + std::vector m_values; +}; + +template +class CompositeGenerator { +public: + CompositeGenerator() : m_totalSize( 0 ) {} + + // *** Move semantics, similar to auto_ptr *** + CompositeGenerator( CompositeGenerator& other ) + : m_fileInfo( other.m_fileInfo ), + m_totalSize( 0 ) + { + move( other ); + } + + CompositeGenerator& setFileInfo( const char* fileInfo ) { + m_fileInfo = fileInfo; + return *this; + } + + ~CompositeGenerator() { + deleteAll( m_composed ); + } + + operator T () const { + size_t overallIndex = getCurrentContext().getGeneratorIndex( m_fileInfo, m_totalSize ); + + typename std::vector*>::const_iterator it = m_composed.begin(); + typename std::vector*>::const_iterator itEnd = m_composed.end(); + for( size_t index = 0; it != itEnd; ++it ) + { + const IGenerator* generator = *it; + if( overallIndex >= index && overallIndex < index + generator->size() ) + { + return generator->getValue( overallIndex-index ); + } + index += generator->size(); + } + CATCH_INTERNAL_ERROR( "Indexed past end of generated range" ); + return T(); // Suppress spurious "not all control paths return a value" warning in Visual Studio - if you know how to fix this please do so + } + + void add( const IGenerator* generator ) { + m_totalSize += generator->size(); + m_composed.push_back( generator ); + } + + CompositeGenerator& then( CompositeGenerator& other ) { + move( other ); + return *this; + } + + CompositeGenerator& then( T value ) { + ValuesGenerator* valuesGen = new ValuesGenerator(); + valuesGen->add( value ); + add( valuesGen ); + return *this; + } + +private: + + void move( CompositeGenerator& other ) { + m_composed.insert( m_composed.end(), other.m_composed.begin(), other.m_composed.end() ); + m_totalSize += other.m_totalSize; + other.m_composed.clear(); + } + + std::vector*> m_composed; + std::string m_fileInfo; + size_t m_totalSize; +}; + +namespace Generators +{ + template + CompositeGenerator between( T from, T to ) { + CompositeGenerator generators; + generators.add( new BetweenGenerator( from, to ) ); + return generators; + } + + template + CompositeGenerator values( T val1, T val2 ) { + CompositeGenerator generators; + ValuesGenerator* valuesGen = new ValuesGenerator(); + valuesGen->add( val1 ); + valuesGen->add( val2 ); + generators.add( valuesGen ); + return generators; + } + + template + CompositeGenerator values( T val1, T val2, T val3 ){ + CompositeGenerator generators; + ValuesGenerator* valuesGen = new ValuesGenerator(); + valuesGen->add( val1 ); + valuesGen->add( val2 ); + valuesGen->add( val3 ); + generators.add( valuesGen ); + return generators; + } + + template + CompositeGenerator values( T val1, T val2, T val3, T val4 ) { + CompositeGenerator generators; + ValuesGenerator* valuesGen = new ValuesGenerator(); + valuesGen->add( val1 ); + valuesGen->add( val2 ); + valuesGen->add( val3 ); + valuesGen->add( val4 ); + generators.add( valuesGen ); + return generators; + } + +} // end namespace Generators + +using namespace Generators; + +} // end namespace Catch + +#define INTERNAL_CATCH_LINESTR2( line ) #line +#define INTERNAL_CATCH_LINESTR( line ) INTERNAL_CATCH_LINESTR2( line ) + +#define INTERNAL_CATCH_GENERATE( expr ) expr.setFileInfo( __FILE__ "(" INTERNAL_CATCH_LINESTR( __LINE__ ) ")" ) + +// #included from: internal/catch_interfaces_exception.h +#define TWOBLUECUBES_CATCH_INTERFACES_EXCEPTION_H_INCLUDED + +#include +#include + +// #included from: catch_interfaces_registry_hub.h +#define TWOBLUECUBES_CATCH_INTERFACES_REGISTRY_HUB_H_INCLUDED + +#include + +namespace Catch { + + class TestCase; + struct ITestCaseRegistry; + struct IExceptionTranslatorRegistry; + struct IExceptionTranslator; + struct IReporterRegistry; + struct IReporterFactory; + struct ITagAliasRegistry; + + struct IRegistryHub { + virtual ~IRegistryHub(); + + virtual IReporterRegistry const& getReporterRegistry() const = 0; + virtual ITestCaseRegistry const& getTestCaseRegistry() const = 0; + virtual ITagAliasRegistry const& getTagAliasRegistry() const = 0; + + virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() = 0; + }; + + struct IMutableRegistryHub { + virtual ~IMutableRegistryHub(); + virtual void registerReporter( std::string const& name, Ptr const& factory ) = 0; + virtual void registerListener( Ptr const& factory ) = 0; + virtual void registerTest( TestCase const& testInfo ) = 0; + virtual void registerTranslator( const IExceptionTranslator* translator ) = 0; + virtual void registerTagAlias( std::string const& alias, std::string const& tag, SourceLineInfo const& lineInfo ) = 0; + }; + + IRegistryHub& getRegistryHub(); + IMutableRegistryHub& getMutableRegistryHub(); + void cleanUp(); + std::string translateActiveException(); + +} + +namespace Catch { + + typedef std::string(*exceptionTranslateFunction)(); + + struct IExceptionTranslator; + typedef std::vector ExceptionTranslators; + + struct IExceptionTranslator { + virtual ~IExceptionTranslator(); + virtual std::string translate( ExceptionTranslators::const_iterator it, ExceptionTranslators::const_iterator itEnd ) const = 0; + }; + + struct IExceptionTranslatorRegistry { + virtual ~IExceptionTranslatorRegistry(); + + virtual std::string translateActiveException() const = 0; + }; + + class ExceptionTranslatorRegistrar { + template + class ExceptionTranslator : public IExceptionTranslator { + public: + + ExceptionTranslator( std::string(*translateFunction)( T& ) ) + : m_translateFunction( translateFunction ) + {} + + virtual std::string translate( ExceptionTranslators::const_iterator it, ExceptionTranslators::const_iterator itEnd ) const CATCH_OVERRIDE { + try { + if( it == itEnd ) + throw; + else + return (*it)->translate( it+1, itEnd ); + } + catch( T& ex ) { + return m_translateFunction( ex ); + } + } + + protected: + std::string(*m_translateFunction)( T& ); + }; + + public: + template + ExceptionTranslatorRegistrar( std::string(*translateFunction)( T& ) ) { + getMutableRegistryHub().registerTranslator + ( new ExceptionTranslator( translateFunction ) ); + } + }; +} + +/////////////////////////////////////////////////////////////////////////////// +#define INTERNAL_CATCH_TRANSLATE_EXCEPTION2( translatorName, signature ) \ + static std::string translatorName( signature ); \ + namespace{ Catch::ExceptionTranslatorRegistrar INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionRegistrar )( &translatorName ); }\ + static std::string translatorName( signature ) + +#define INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature ) INTERNAL_CATCH_TRANSLATE_EXCEPTION2( INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionTranslator ), signature ) + +// #included from: internal/catch_approx.hpp +#define TWOBLUECUBES_CATCH_APPROX_HPP_INCLUDED + +#include +#include + +#if defined(CATCH_CONFIG_CPP11_TYPE_TRAITS) +#include +#endif + +namespace Catch { +namespace Detail { + + class Approx { + public: + explicit Approx ( double value ) + : m_epsilon( std::numeric_limits::epsilon()*100 ), + m_margin( 0.0 ), + m_scale( 1.0 ), + m_value( value ) + {} + + static Approx custom() { + return Approx( 0 ); + } + +#if defined(CATCH_CONFIG_CPP11_TYPE_TRAITS) + + template ::value>::type> + Approx operator()( T value ) { + Approx approx( static_cast(value) ); + approx.epsilon( m_epsilon ); + approx.margin( m_margin ); + approx.scale( m_scale ); + return approx; + } + + template ::value>::type> + explicit Approx( T value ): Approx(static_cast(value)) + {} + + template ::value>::type> + friend bool operator == ( const T& lhs, Approx const& rhs ) { + // Thanks to Richard Harris for his help refining this formula + auto lhs_v = double(lhs); + bool relativeOK = std::fabs(lhs_v - rhs.m_value) < rhs.m_epsilon * (rhs.m_scale + (std::max)(std::fabs(lhs_v), std::fabs(rhs.m_value))); + if (relativeOK) { + return true; + } + + return std::fabs(lhs_v - rhs.m_value) <= rhs.m_margin; + } + + template ::value>::type> + friend bool operator == ( Approx const& lhs, const T& rhs ) { + return operator==( rhs, lhs ); + } + + template ::value>::type> + friend bool operator != ( T lhs, Approx const& rhs ) { + return !operator==( lhs, rhs ); + } + + template ::value>::type> + friend bool operator != ( Approx const& lhs, T rhs ) { + return !operator==( rhs, lhs ); + } + + template ::value>::type> + friend bool operator <= ( T lhs, Approx const& rhs ) { + return double(lhs) < rhs.m_value || lhs == rhs; + } + + template ::value>::type> + friend bool operator <= ( Approx const& lhs, T rhs ) { + return lhs.m_value < double(rhs) || lhs == rhs; + } + + template ::value>::type> + friend bool operator >= ( T lhs, Approx const& rhs ) { + return double(lhs) > rhs.m_value || lhs == rhs; + } + + template ::value>::type> + friend bool operator >= ( Approx const& lhs, T rhs ) { + return lhs.m_value > double(rhs) || lhs == rhs; + } + + template ::value>::type> + Approx& epsilon( T newEpsilon ) { + m_epsilon = double(newEpsilon); + return *this; + } + + template ::value>::type> + Approx& margin( T newMargin ) { + m_margin = double(newMargin); + return *this; + } + + template ::value>::type> + Approx& scale( T newScale ) { + m_scale = double(newScale); + return *this; + } + +#else + + Approx operator()( double value ) { + Approx approx( value ); + approx.epsilon( m_epsilon ); + approx.margin( m_margin ); + approx.scale( m_scale ); + return approx; + } + + friend bool operator == ( double lhs, Approx const& rhs ) { + // Thanks to Richard Harris for his help refining this formula + bool relativeOK = std::fabs( lhs - rhs.m_value ) < rhs.m_epsilon * (rhs.m_scale + (std::max)( std::fabs(lhs), std::fabs(rhs.m_value) ) ); + if (relativeOK) { + return true; + } + return std::fabs(lhs - rhs.m_value) <= rhs.m_margin; + } + + friend bool operator == ( Approx const& lhs, double rhs ) { + return operator==( rhs, lhs ); + } + + friend bool operator != ( double lhs, Approx const& rhs ) { + return !operator==( lhs, rhs ); + } + + friend bool operator != ( Approx const& lhs, double rhs ) { + return !operator==( rhs, lhs ); + } + + friend bool operator <= ( double lhs, Approx const& rhs ) { + return lhs < rhs.m_value || lhs == rhs; + } + + friend bool operator <= ( Approx const& lhs, double rhs ) { + return lhs.m_value < rhs || lhs == rhs; + } + + friend bool operator >= ( double lhs, Approx const& rhs ) { + return lhs > rhs.m_value || lhs == rhs; + } + + friend bool operator >= ( Approx const& lhs, double rhs ) { + return lhs.m_value > rhs || lhs == rhs; + } + + Approx& epsilon( double newEpsilon ) { + m_epsilon = newEpsilon; + return *this; + } + + Approx& margin( double newMargin ) { + m_margin = newMargin; + return *this; + } + + Approx& scale( double newScale ) { + m_scale = newScale; + return *this; + } +#endif + + std::string toString() const { + std::ostringstream oss; + oss << "Approx( " << Catch::toString( m_value ) << " )"; + return oss.str(); + } + + private: + double m_epsilon; + double m_margin; + double m_scale; + double m_value; + }; +} + +template<> +inline std::string toString( Detail::Approx const& value ) { + return value.toString(); +} + +} // end namespace Catch + +// #included from: internal/catch_matchers_string.h +#define TWOBLUECUBES_CATCH_MATCHERS_STRING_H_INCLUDED + +namespace Catch { +namespace Matchers { + + namespace StdString { + + struct CasedString + { + CasedString( std::string const& str, CaseSensitive::Choice caseSensitivity ); + std::string adjustString( std::string const& str ) const; + std::string caseSensitivitySuffix() const; + + CaseSensitive::Choice m_caseSensitivity; + std::string m_str; + }; + + struct StringMatcherBase : MatcherBase { + StringMatcherBase( std::string const& operation, CasedString const& comparator ); + virtual std::string describe() const CATCH_OVERRIDE; + + CasedString m_comparator; + std::string m_operation; + }; + + struct EqualsMatcher : StringMatcherBase { + EqualsMatcher( CasedString const& comparator ); + virtual bool match( std::string const& source ) const CATCH_OVERRIDE; + }; + struct ContainsMatcher : StringMatcherBase { + ContainsMatcher( CasedString const& comparator ); + virtual bool match( std::string const& source ) const CATCH_OVERRIDE; + }; + struct StartsWithMatcher : StringMatcherBase { + StartsWithMatcher( CasedString const& comparator ); + virtual bool match( std::string const& source ) const CATCH_OVERRIDE; + }; + struct EndsWithMatcher : StringMatcherBase { + EndsWithMatcher( CasedString const& comparator ); + virtual bool match( std::string const& source ) const CATCH_OVERRIDE; + }; + + } // namespace StdString + + // The following functions create the actual matcher objects. + // This allows the types to be inferred + + StdString::EqualsMatcher Equals( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ); + StdString::ContainsMatcher Contains( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ); + StdString::EndsWithMatcher EndsWith( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ); + StdString::StartsWithMatcher StartsWith( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ); + +} // namespace Matchers +} // namespace Catch + +// #included from: internal/catch_matchers_vector.h +#define TWOBLUECUBES_CATCH_MATCHERS_VECTOR_H_INCLUDED + +namespace Catch { +namespace Matchers { + + namespace Vector { + + template + struct ContainsElementMatcher : MatcherBase, T> { + + ContainsElementMatcher(T const &comparator) : m_comparator( comparator) {} + + bool match(std::vector const &v) const CATCH_OVERRIDE { + return std::find(v.begin(), v.end(), m_comparator) != v.end(); + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "Contains: " + Catch::toString( m_comparator ); + } + + T const& m_comparator; + }; + + template + struct ContainsMatcher : MatcherBase, std::vector > { + + ContainsMatcher(std::vector const &comparator) : m_comparator( comparator ) {} + + bool match(std::vector const &v) const CATCH_OVERRIDE { + // !TBD: see note in EqualsMatcher + if (m_comparator.size() > v.size()) + return false; + for (size_t i = 0; i < m_comparator.size(); ++i) + if (std::find(v.begin(), v.end(), m_comparator[i]) == v.end()) + return false; + return true; + } + virtual std::string describe() const CATCH_OVERRIDE { + return "Contains: " + Catch::toString( m_comparator ); + } + + std::vector const& m_comparator; + }; + + template + struct EqualsMatcher : MatcherBase, std::vector > { + + EqualsMatcher(std::vector const &comparator) : m_comparator( comparator ) {} + + bool match(std::vector const &v) const CATCH_OVERRIDE { + // !TBD: This currently works if all elements can be compared using != + // - a more general approach would be via a compare template that defaults + // to using !=. but could be specialised for, e.g. std::vector etc + // - then just call that directly + if (m_comparator.size() != v.size()) + return false; + for (size_t i = 0; i < v.size(); ++i) + if (m_comparator[i] != v[i]) + return false; + return true; + } + virtual std::string describe() const CATCH_OVERRIDE { + return "Equals: " + Catch::toString( m_comparator ); + } + std::vector const& m_comparator; + }; + + } // namespace Vector + + // The following functions create the actual matcher objects. + // This allows the types to be inferred + + template + Vector::ContainsMatcher Contains( std::vector const& comparator ) { + return Vector::ContainsMatcher( comparator ); + } + + template + Vector::ContainsElementMatcher VectorContains( T const& comparator ) { + return Vector::ContainsElementMatcher( comparator ); + } + + template + Vector::EqualsMatcher Equals( std::vector const& comparator ) { + return Vector::EqualsMatcher( comparator ); + } + +} // namespace Matchers +} // namespace Catch + +// #included from: internal/catch_interfaces_tag_alias_registry.h +#define TWOBLUECUBES_CATCH_INTERFACES_TAG_ALIAS_REGISTRY_H_INCLUDED + +// #included from: catch_tag_alias.h +#define TWOBLUECUBES_CATCH_TAG_ALIAS_H_INCLUDED + +#include + +namespace Catch { + + struct TagAlias { + TagAlias( std::string const& _tag, SourceLineInfo _lineInfo ) : tag( _tag ), lineInfo( _lineInfo ) {} + + std::string tag; + SourceLineInfo lineInfo; + }; + + struct RegistrarForTagAliases { + RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); + }; + +} // end namespace Catch + +#define CATCH_REGISTER_TAG_ALIAS( alias, spec ) namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } +// #included from: catch_option.hpp +#define TWOBLUECUBES_CATCH_OPTION_HPP_INCLUDED + +namespace Catch { + + // An optional type + template + class Option { + public: + Option() : nullableValue( CATCH_NULL ) {} + Option( T const& _value ) + : nullableValue( new( storage ) T( _value ) ) + {} + Option( Option const& _other ) + : nullableValue( _other ? new( storage ) T( *_other ) : CATCH_NULL ) + {} + + ~Option() { + reset(); + } + + Option& operator= ( Option const& _other ) { + if( &_other != this ) { + reset(); + if( _other ) + nullableValue = new( storage ) T( *_other ); + } + return *this; + } + Option& operator = ( T const& _value ) { + reset(); + nullableValue = new( storage ) T( _value ); + return *this; + } + + void reset() { + if( nullableValue ) + nullableValue->~T(); + nullableValue = CATCH_NULL; + } + + T& operator*() { return *nullableValue; } + T const& operator*() const { return *nullableValue; } + T* operator->() { return nullableValue; } + const T* operator->() const { return nullableValue; } + + T valueOr( T const& defaultValue ) const { + return nullableValue ? *nullableValue : defaultValue; + } + + bool some() const { return nullableValue != CATCH_NULL; } + bool none() const { return nullableValue == CATCH_NULL; } + + bool operator !() const { return nullableValue == CATCH_NULL; } + operator SafeBool::type() const { + return SafeBool::makeSafe( some() ); + } + + private: + T *nullableValue; + union { + char storage[sizeof(T)]; + + // These are here to force alignment for the storage + long double dummy1; + void (*dummy2)(); + long double dummy3; +#ifdef CATCH_CONFIG_CPP11_LONG_LONG + long long dummy4; +#endif + }; + }; + +} // end namespace Catch + +namespace Catch { + + struct ITagAliasRegistry { + virtual ~ITagAliasRegistry(); + virtual Option find( std::string const& alias ) const = 0; + virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const = 0; + + static ITagAliasRegistry const& get(); + }; + +} // end namespace Catch + +// These files are included here so the single_include script doesn't put them +// in the conditionally compiled sections +// #included from: internal/catch_test_case_info.h +#define TWOBLUECUBES_CATCH_TEST_CASE_INFO_H_INCLUDED + +#include +#include + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +namespace Catch { + + struct ITestCase; + + struct TestCaseInfo { + enum SpecialProperties{ + None = 0, + IsHidden = 1 << 1, + ShouldFail = 1 << 2, + MayFail = 1 << 3, + Throws = 1 << 4, + NonPortable = 1 << 5 + }; + + TestCaseInfo( std::string const& _name, + std::string const& _className, + std::string const& _description, + std::set const& _tags, + SourceLineInfo const& _lineInfo ); + + TestCaseInfo( TestCaseInfo const& other ); + + friend void setTags( TestCaseInfo& testCaseInfo, std::set const& tags ); + + bool isHidden() const; + bool throws() const; + bool okToFail() const; + bool expectedToFail() const; + + std::string name; + std::string className; + std::string description; + std::set tags; + std::set lcaseTags; + std::string tagsAsString; + SourceLineInfo lineInfo; + SpecialProperties properties; + }; + + class TestCase : public TestCaseInfo { + public: + + TestCase( ITestCase* testCase, TestCaseInfo const& info ); + TestCase( TestCase const& other ); + + TestCase withName( std::string const& _newName ) const; + + void invoke() const; + + TestCaseInfo const& getTestCaseInfo() const; + + void swap( TestCase& other ); + bool operator == ( TestCase const& other ) const; + bool operator < ( TestCase const& other ) const; + TestCase& operator = ( TestCase const& other ); + + private: + Ptr test; + }; + + TestCase makeTestCase( ITestCase* testCase, + std::string const& className, + std::string const& name, + std::string const& description, + SourceLineInfo const& lineInfo ); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + +#ifdef __OBJC__ +// #included from: internal/catch_objc.hpp +#define TWOBLUECUBES_CATCH_OBJC_HPP_INCLUDED + +#import + +#include + +// NB. Any general catch headers included here must be included +// in catch.hpp first to make sure they are included by the single +// header for non obj-usage + +/////////////////////////////////////////////////////////////////////////////// +// This protocol is really only here for (self) documenting purposes, since +// all its methods are optional. +@protocol OcFixture + +@optional + +-(void) setUp; +-(void) tearDown; + +@end + +namespace Catch { + + class OcMethod : public SharedImpl { + + public: + OcMethod( Class cls, SEL sel ) : m_cls( cls ), m_sel( sel ) {} + + virtual void invoke() const { + id obj = [[m_cls alloc] init]; + + performOptionalSelector( obj, @selector(setUp) ); + performOptionalSelector( obj, m_sel ); + performOptionalSelector( obj, @selector(tearDown) ); + + arcSafeRelease( obj ); + } + private: + virtual ~OcMethod() {} + + Class m_cls; + SEL m_sel; + }; + + namespace Detail{ + + inline std::string getAnnotation( Class cls, + std::string const& annotationName, + std::string const& testCaseName ) { + NSString* selStr = [[NSString alloc] initWithFormat:@"Catch_%s_%s", annotationName.c_str(), testCaseName.c_str()]; + SEL sel = NSSelectorFromString( selStr ); + arcSafeRelease( selStr ); + id value = performOptionalSelector( cls, sel ); + if( value ) + return [(NSString*)value UTF8String]; + return ""; + } + } + + inline size_t registerTestMethods() { + size_t noTestMethods = 0; + int noClasses = objc_getClassList( CATCH_NULL, 0 ); + + Class* classes = (CATCH_UNSAFE_UNRETAINED Class *)malloc( sizeof(Class) * noClasses); + objc_getClassList( classes, noClasses ); + + for( int c = 0; c < noClasses; c++ ) { + Class cls = classes[c]; + { + u_int count; + Method* methods = class_copyMethodList( cls, &count ); + for( u_int m = 0; m < count ; m++ ) { + SEL selector = method_getName(methods[m]); + std::string methodName = sel_getName(selector); + if( startsWith( methodName, "Catch_TestCase_" ) ) { + std::string testCaseName = methodName.substr( 15 ); + std::string name = Detail::getAnnotation( cls, "Name", testCaseName ); + std::string desc = Detail::getAnnotation( cls, "Description", testCaseName ); + const char* className = class_getName( cls ); + + getMutableRegistryHub().registerTest( makeTestCase( new OcMethod( cls, selector ), className, name.c_str(), desc.c_str(), SourceLineInfo() ) ); + noTestMethods++; + } + } + free(methods); + } + } + return noTestMethods; + } + + namespace Matchers { + namespace Impl { + namespace NSStringMatchers { + + struct StringHolder : MatcherBase{ + StringHolder( NSString* substr ) : m_substr( [substr copy] ){} + StringHolder( StringHolder const& other ) : m_substr( [other.m_substr copy] ){} + StringHolder() { + arcSafeRelease( m_substr ); + } + + virtual bool match( NSString* arg ) const CATCH_OVERRIDE { + return false; + } + + NSString* m_substr; + }; + + struct Equals : StringHolder { + Equals( NSString* substr ) : StringHolder( substr ){} + + virtual bool match( NSString* str ) const CATCH_OVERRIDE { + return (str != nil || m_substr == nil ) && + [str isEqualToString:m_substr]; + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "equals string: " + Catch::toString( m_substr ); + } + }; + + struct Contains : StringHolder { + Contains( NSString* substr ) : StringHolder( substr ){} + + virtual bool match( NSString* str ) const { + return (str != nil || m_substr == nil ) && + [str rangeOfString:m_substr].location != NSNotFound; + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "contains string: " + Catch::toString( m_substr ); + } + }; + + struct StartsWith : StringHolder { + StartsWith( NSString* substr ) : StringHolder( substr ){} + + virtual bool match( NSString* str ) const { + return (str != nil || m_substr == nil ) && + [str rangeOfString:m_substr].location == 0; + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "starts with: " + Catch::toString( m_substr ); + } + }; + struct EndsWith : StringHolder { + EndsWith( NSString* substr ) : StringHolder( substr ){} + + virtual bool match( NSString* str ) const { + return (str != nil || m_substr == nil ) && + [str rangeOfString:m_substr].location == [str length] - [m_substr length]; + } + + virtual std::string describe() const CATCH_OVERRIDE { + return "ends with: " + Catch::toString( m_substr ); + } + }; + + } // namespace NSStringMatchers + } // namespace Impl + + inline Impl::NSStringMatchers::Equals + Equals( NSString* substr ){ return Impl::NSStringMatchers::Equals( substr ); } + + inline Impl::NSStringMatchers::Contains + Contains( NSString* substr ){ return Impl::NSStringMatchers::Contains( substr ); } + + inline Impl::NSStringMatchers::StartsWith + StartsWith( NSString* substr ){ return Impl::NSStringMatchers::StartsWith( substr ); } + + inline Impl::NSStringMatchers::EndsWith + EndsWith( NSString* substr ){ return Impl::NSStringMatchers::EndsWith( substr ); } + + } // namespace Matchers + + using namespace Matchers; + +} // namespace Catch + +/////////////////////////////////////////////////////////////////////////////// +#define OC_TEST_CASE( name, desc )\ ++(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Name_test ) \ +{\ +return @ name; \ +}\ ++(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Description_test ) \ +{ \ +return @ desc; \ +} \ +-(void) INTERNAL_CATCH_UNIQUE_NAME( Catch_TestCase_test ) + +#endif + +#ifdef CATCH_IMPL + +// !TBD: Move the leak detector code into a separate header +#ifdef CATCH_CONFIG_WINDOWS_CRTDBG +#include +class LeakDetector { +public: + LeakDetector() { + int flag = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG); + flag |= _CRTDBG_LEAK_CHECK_DF; + flag |= _CRTDBG_ALLOC_MEM_DF; + _CrtSetDbgFlag(flag); + _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG); + _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); + // Change this to leaking allocation's number to break there + _CrtSetBreakAlloc(-1); + } +}; +#else +class LeakDetector {}; +#endif + +LeakDetector leakDetector; + +// #included from: internal/catch_impl.hpp +#define TWOBLUECUBES_CATCH_IMPL_HPP_INCLUDED + +// Collect all the implementation files together here +// These are the equivalent of what would usually be cpp files + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wweak-vtables" +#endif + +// #included from: ../catch_session.hpp +#define TWOBLUECUBES_CATCH_RUNNER_HPP_INCLUDED + +// #included from: internal/catch_commandline.hpp +#define TWOBLUECUBES_CATCH_COMMANDLINE_HPP_INCLUDED + +// #included from: catch_config.hpp +#define TWOBLUECUBES_CATCH_CONFIG_HPP_INCLUDED + +// #included from: catch_test_spec_parser.hpp +#define TWOBLUECUBES_CATCH_TEST_SPEC_PARSER_HPP_INCLUDED + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +// #included from: catch_test_spec.hpp +#define TWOBLUECUBES_CATCH_TEST_SPEC_HPP_INCLUDED + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +// #included from: catch_wildcard_pattern.hpp +#define TWOBLUECUBES_CATCH_WILDCARD_PATTERN_HPP_INCLUDED + +#include + +namespace Catch +{ + class WildcardPattern { + enum WildcardPosition { + NoWildcard = 0, + WildcardAtStart = 1, + WildcardAtEnd = 2, + WildcardAtBothEnds = WildcardAtStart | WildcardAtEnd + }; + + public: + + WildcardPattern( std::string const& pattern, CaseSensitive::Choice caseSensitivity ) + : m_caseSensitivity( caseSensitivity ), + m_wildcard( NoWildcard ), + m_pattern( adjustCase( pattern ) ) + { + if( startsWith( m_pattern, '*' ) ) { + m_pattern = m_pattern.substr( 1 ); + m_wildcard = WildcardAtStart; + } + if( endsWith( m_pattern, '*' ) ) { + m_pattern = m_pattern.substr( 0, m_pattern.size()-1 ); + m_wildcard = static_cast( m_wildcard | WildcardAtEnd ); + } + } + virtual ~WildcardPattern(); + virtual bool matches( std::string const& str ) const { + switch( m_wildcard ) { + case NoWildcard: + return m_pattern == adjustCase( str ); + case WildcardAtStart: + return endsWith( adjustCase( str ), m_pattern ); + case WildcardAtEnd: + return startsWith( adjustCase( str ), m_pattern ); + case WildcardAtBothEnds: + return contains( adjustCase( str ), m_pattern ); + } + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" +#endif + throw std::logic_error( "Unknown enum" ); +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + } + private: + std::string adjustCase( std::string const& str ) const { + return m_caseSensitivity == CaseSensitive::No ? toLower( str ) : str; + } + CaseSensitive::Choice m_caseSensitivity; + WildcardPosition m_wildcard; + std::string m_pattern; + }; +} + +#include +#include + +namespace Catch { + + class TestSpec { + struct Pattern : SharedImpl<> { + virtual ~Pattern(); + virtual bool matches( TestCaseInfo const& testCase ) const = 0; + }; + class NamePattern : public Pattern { + public: + NamePattern( std::string const& name ) + : m_wildcardPattern( toLower( name ), CaseSensitive::No ) + {} + virtual ~NamePattern(); + virtual bool matches( TestCaseInfo const& testCase ) const { + return m_wildcardPattern.matches( toLower( testCase.name ) ); + } + private: + WildcardPattern m_wildcardPattern; + }; + + class TagPattern : public Pattern { + public: + TagPattern( std::string const& tag ) : m_tag( toLower( tag ) ) {} + virtual ~TagPattern(); + virtual bool matches( TestCaseInfo const& testCase ) const { + return testCase.lcaseTags.find( m_tag ) != testCase.lcaseTags.end(); + } + private: + std::string m_tag; + }; + + class ExcludedPattern : public Pattern { + public: + ExcludedPattern( Ptr const& underlyingPattern ) : m_underlyingPattern( underlyingPattern ) {} + virtual ~ExcludedPattern(); + virtual bool matches( TestCaseInfo const& testCase ) const { return !m_underlyingPattern->matches( testCase ); } + private: + Ptr m_underlyingPattern; + }; + + struct Filter { + std::vector > m_patterns; + + bool matches( TestCaseInfo const& testCase ) const { + // All patterns in a filter must match for the filter to be a match + for( std::vector >::const_iterator it = m_patterns.begin(), itEnd = m_patterns.end(); it != itEnd; ++it ) { + if( !(*it)->matches( testCase ) ) + return false; + } + return true; + } + }; + + public: + bool hasFilters() const { + return !m_filters.empty(); + } + bool matches( TestCaseInfo const& testCase ) const { + // A TestSpec matches if any filter matches + for( std::vector::const_iterator it = m_filters.begin(), itEnd = m_filters.end(); it != itEnd; ++it ) + if( it->matches( testCase ) ) + return true; + return false; + } + + private: + std::vector m_filters; + + friend class TestSpecParser; + }; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +namespace Catch { + + class TestSpecParser { + enum Mode{ None, Name, QuotedName, Tag, EscapedName }; + Mode m_mode; + bool m_exclusion; + std::size_t m_start, m_pos; + std::string m_arg; + std::vector m_escapeChars; + TestSpec::Filter m_currentFilter; + TestSpec m_testSpec; + ITagAliasRegistry const* m_tagAliases; + + public: + TestSpecParser( ITagAliasRegistry const& tagAliases ) :m_mode(None), m_exclusion(false), m_start(0), m_pos(0), m_tagAliases( &tagAliases ) {} + + TestSpecParser& parse( std::string const& arg ) { + m_mode = None; + m_exclusion = false; + m_start = std::string::npos; + m_arg = m_tagAliases->expandAliases( arg ); + m_escapeChars.clear(); + for( m_pos = 0; m_pos < m_arg.size(); ++m_pos ) + visitChar( m_arg[m_pos] ); + if( m_mode == Name ) + addPattern(); + return *this; + } + TestSpec testSpec() { + addFilter(); + return m_testSpec; + } + private: + void visitChar( char c ) { + if( m_mode == None ) { + switch( c ) { + case ' ': return; + case '~': m_exclusion = true; return; + case '[': return startNewMode( Tag, ++m_pos ); + case '"': return startNewMode( QuotedName, ++m_pos ); + case '\\': return escape(); + default: startNewMode( Name, m_pos ); break; + } + } + if( m_mode == Name ) { + if( c == ',' ) { + addPattern(); + addFilter(); + } + else if( c == '[' ) { + if( subString() == "exclude:" ) + m_exclusion = true; + else + addPattern(); + startNewMode( Tag, ++m_pos ); + } + else if( c == '\\' ) + escape(); + } + else if( m_mode == EscapedName ) + m_mode = Name; + else if( m_mode == QuotedName && c == '"' ) + addPattern(); + else if( m_mode == Tag && c == ']' ) + addPattern(); + } + void startNewMode( Mode mode, std::size_t start ) { + m_mode = mode; + m_start = start; + } + void escape() { + if( m_mode == None ) + m_start = m_pos; + m_mode = EscapedName; + m_escapeChars.push_back( m_pos ); + } + std::string subString() const { return m_arg.substr( m_start, m_pos - m_start ); } + template + void addPattern() { + std::string token = subString(); + for( size_t i = 0; i < m_escapeChars.size(); ++i ) + token = token.substr( 0, m_escapeChars[i]-m_start-i ) + token.substr( m_escapeChars[i]-m_start-i+1 ); + m_escapeChars.clear(); + if( startsWith( token, "exclude:" ) ) { + m_exclusion = true; + token = token.substr( 8 ); + } + if( !token.empty() ) { + Ptr pattern = new T( token ); + if( m_exclusion ) + pattern = new TestSpec::ExcludedPattern( pattern ); + m_currentFilter.m_patterns.push_back( pattern ); + } + m_exclusion = false; + m_mode = None; + } + void addFilter() { + if( !m_currentFilter.m_patterns.empty() ) { + m_testSpec.m_filters.push_back( m_currentFilter ); + m_currentFilter = TestSpec::Filter(); + } + } + }; + inline TestSpec parseTestSpec( std::string const& arg ) { + return TestSpecParser( ITagAliasRegistry::get() ).parse( arg ).testSpec(); + } + +} // namespace Catch + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +// #included from: catch_interfaces_config.h +#define TWOBLUECUBES_CATCH_INTERFACES_CONFIG_H_INCLUDED + +#include +#include +#include + +namespace Catch { + + struct Verbosity { enum Level { + NoOutput = 0, + Quiet, + Normal + }; }; + + struct WarnAbout { enum What { + Nothing = 0x00, + NoAssertions = 0x01 + }; }; + + struct ShowDurations { enum OrNot { + DefaultForReporter, + Always, + Never + }; }; + struct RunTests { enum InWhatOrder { + InDeclarationOrder, + InLexicographicalOrder, + InRandomOrder + }; }; + struct UseColour { enum YesOrNo { + Auto, + Yes, + No + }; }; + struct WaitForKeypress { enum When { + Never, + BeforeStart = 1, + BeforeExit = 2, + BeforeStartAndExit = BeforeStart | BeforeExit + }; }; + + class TestSpec; + + struct IConfig : IShared { + + virtual ~IConfig(); + + virtual bool allowThrows() const = 0; + virtual std::ostream& stream() const = 0; + virtual std::string name() const = 0; + virtual bool includeSuccessfulResults() const = 0; + virtual bool shouldDebugBreak() const = 0; + virtual bool warnAboutMissingAssertions() const = 0; + virtual int abortAfter() const = 0; + virtual bool showInvisibles() const = 0; + virtual ShowDurations::OrNot showDurations() const = 0; + virtual TestSpec const& testSpec() const = 0; + virtual RunTests::InWhatOrder runOrder() const = 0; + virtual unsigned int rngSeed() const = 0; + virtual UseColour::YesOrNo useColour() const = 0; + virtual std::vector const& getSectionsToRun() const = 0; + + }; +} + +// #included from: catch_stream.h +#define TWOBLUECUBES_CATCH_STREAM_H_INCLUDED + +// #included from: catch_streambuf.h +#define TWOBLUECUBES_CATCH_STREAMBUF_H_INCLUDED + +#include + +namespace Catch { + + class StreamBufBase : public std::streambuf { + public: + virtual ~StreamBufBase() CATCH_NOEXCEPT; + }; +} + +#include +#include +#include +#include + +namespace Catch { + + std::ostream& cout(); + std::ostream& cerr(); + std::ostream& clog(); + + struct IStream { + virtual ~IStream() CATCH_NOEXCEPT; + virtual std::ostream& stream() const = 0; + }; + + class FileStream : public IStream { + mutable std::ofstream m_ofs; + public: + FileStream( std::string const& filename ); + virtual ~FileStream() CATCH_NOEXCEPT; + public: // IStream + virtual std::ostream& stream() const CATCH_OVERRIDE; + }; + + class CoutStream : public IStream { + mutable std::ostream m_os; + public: + CoutStream(); + virtual ~CoutStream() CATCH_NOEXCEPT; + + public: // IStream + virtual std::ostream& stream() const CATCH_OVERRIDE; + }; + + class DebugOutStream : public IStream { + CATCH_AUTO_PTR( StreamBufBase ) m_streamBuf; + mutable std::ostream m_os; + public: + DebugOutStream(); + virtual ~DebugOutStream() CATCH_NOEXCEPT; + + public: // IStream + virtual std::ostream& stream() const CATCH_OVERRIDE; + }; +} + +#include +#include +#include +#include + +#ifndef CATCH_CONFIG_CONSOLE_WIDTH +#define CATCH_CONFIG_CONSOLE_WIDTH 80 +#endif + +namespace Catch { + + struct ConfigData { + + ConfigData() + : listTests( false ), + listTags( false ), + listReporters( false ), + listTestNamesOnly( false ), + listExtraInfo( false ), + showSuccessfulTests( false ), + shouldDebugBreak( false ), + noThrow( false ), + showHelp( false ), + showInvisibles( false ), + filenamesAsTags( false ), + libIdentify( false ), + abortAfter( -1 ), + rngSeed( 0 ), + verbosity( Verbosity::Normal ), + warnings( WarnAbout::Nothing ), + showDurations( ShowDurations::DefaultForReporter ), + runOrder( RunTests::InDeclarationOrder ), + useColour( UseColour::Auto ), + waitForKeypress( WaitForKeypress::Never ) + {} + + bool listTests; + bool listTags; + bool listReporters; + bool listTestNamesOnly; + bool listExtraInfo; + + bool showSuccessfulTests; + bool shouldDebugBreak; + bool noThrow; + bool showHelp; + bool showInvisibles; + bool filenamesAsTags; + bool libIdentify; + + int abortAfter; + unsigned int rngSeed; + + Verbosity::Level verbosity; + WarnAbout::What warnings; + ShowDurations::OrNot showDurations; + RunTests::InWhatOrder runOrder; + UseColour::YesOrNo useColour; + WaitForKeypress::When waitForKeypress; + + std::string outputFilename; + std::string name; + std::string processName; + + std::vector reporterNames; + std::vector testsOrTags; + std::vector sectionsToRun; + }; + + class Config : public SharedImpl { + private: + Config( Config const& other ); + Config& operator = ( Config const& other ); + virtual void dummy(); + public: + + Config() + {} + + Config( ConfigData const& data ) + : m_data( data ), + m_stream( openStream() ) + { + if( !data.testsOrTags.empty() ) { + TestSpecParser parser( ITagAliasRegistry::get() ); + for( std::size_t i = 0; i < data.testsOrTags.size(); ++i ) + parser.parse( data.testsOrTags[i] ); + m_testSpec = parser.testSpec(); + } + } + + virtual ~Config() {} + + std::string const& getFilename() const { + return m_data.outputFilename ; + } + + bool listTests() const { return m_data.listTests; } + bool listTestNamesOnly() const { return m_data.listTestNamesOnly; } + bool listTags() const { return m_data.listTags; } + bool listReporters() const { return m_data.listReporters; } + bool listExtraInfo() const { return m_data.listExtraInfo; } + + std::string getProcessName() const { return m_data.processName; } + + std::vector const& getReporterNames() const { return m_data.reporterNames; } + std::vector const& getSectionsToRun() const CATCH_OVERRIDE { return m_data.sectionsToRun; } + + virtual TestSpec const& testSpec() const CATCH_OVERRIDE { return m_testSpec; } + + bool showHelp() const { return m_data.showHelp; } + + // IConfig interface + virtual bool allowThrows() const CATCH_OVERRIDE { return !m_data.noThrow; } + virtual std::ostream& stream() const CATCH_OVERRIDE { return m_stream->stream(); } + virtual std::string name() const CATCH_OVERRIDE { return m_data.name.empty() ? m_data.processName : m_data.name; } + virtual bool includeSuccessfulResults() const CATCH_OVERRIDE { return m_data.showSuccessfulTests; } + virtual bool warnAboutMissingAssertions() const CATCH_OVERRIDE { return m_data.warnings & WarnAbout::NoAssertions; } + virtual ShowDurations::OrNot showDurations() const CATCH_OVERRIDE { return m_data.showDurations; } + virtual RunTests::InWhatOrder runOrder() const CATCH_OVERRIDE { return m_data.runOrder; } + virtual unsigned int rngSeed() const CATCH_OVERRIDE { return m_data.rngSeed; } + virtual UseColour::YesOrNo useColour() const CATCH_OVERRIDE { return m_data.useColour; } + virtual bool shouldDebugBreak() const CATCH_OVERRIDE { return m_data.shouldDebugBreak; } + virtual int abortAfter() const CATCH_OVERRIDE { return m_data.abortAfter; } + virtual bool showInvisibles() const CATCH_OVERRIDE { return m_data.showInvisibles; } + + private: + + IStream const* openStream() { + if( m_data.outputFilename.empty() ) + return new CoutStream(); + else if( m_data.outputFilename[0] == '%' ) { + if( m_data.outputFilename == "%debug" ) + return new DebugOutStream(); + else + throw std::domain_error( "Unrecognised stream: " + m_data.outputFilename ); + } + else + return new FileStream( m_data.outputFilename ); + } + ConfigData m_data; + + CATCH_AUTO_PTR( IStream const ) m_stream; + TestSpec m_testSpec; + }; + +} // end namespace Catch + +// #included from: catch_clara.h +#define TWOBLUECUBES_CATCH_CLARA_H_INCLUDED + +// Use Catch's value for console width (store Clara's off to the side, if present) +#ifdef CLARA_CONFIG_CONSOLE_WIDTH +#define CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH CLARA_CONFIG_CONSOLE_WIDTH +#undef CLARA_CONFIG_CONSOLE_WIDTH +#endif +#define CLARA_CONFIG_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH + +// Declare Clara inside the Catch namespace +#define STITCH_CLARA_OPEN_NAMESPACE namespace Catch { +// #included from: ../external/clara.h + +// Version 0.0.2.4 + +// Only use header guard if we are not using an outer namespace +#if !defined(TWOBLUECUBES_CLARA_H_INCLUDED) || defined(STITCH_CLARA_OPEN_NAMESPACE) + +#ifndef STITCH_CLARA_OPEN_NAMESPACE +#define TWOBLUECUBES_CLARA_H_INCLUDED +#define STITCH_CLARA_OPEN_NAMESPACE +#define STITCH_CLARA_CLOSE_NAMESPACE +#else +#define STITCH_CLARA_CLOSE_NAMESPACE } +#endif + +#define STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE STITCH_CLARA_OPEN_NAMESPACE + +// ----------- #included from tbc_text_format.h ----------- + +// Only use header guard if we are not using an outer namespace +#if !defined(TBC_TEXT_FORMAT_H_INCLUDED) || defined(STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE) +#ifndef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE +#define TBC_TEXT_FORMAT_H_INCLUDED +#endif + +#include +#include +#include +#include +#include + +// Use optional outer namespace +#ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE +namespace STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE { +#endif + +namespace Tbc { + +#ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH + const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH; +#else + const unsigned int consoleWidth = 80; +#endif + + struct TextAttributes { + TextAttributes() + : initialIndent( std::string::npos ), + indent( 0 ), + width( consoleWidth-1 ), + tabChar( '\t' ) + {} + + TextAttributes& setInitialIndent( std::size_t _value ) { initialIndent = _value; return *this; } + TextAttributes& setIndent( std::size_t _value ) { indent = _value; return *this; } + TextAttributes& setWidth( std::size_t _value ) { width = _value; return *this; } + TextAttributes& setTabChar( char _value ) { tabChar = _value; return *this; } + + std::size_t initialIndent; // indent of first line, or npos + std::size_t indent; // indent of subsequent lines, or all if initialIndent is npos + std::size_t width; // maximum width of text, including indent. Longer text will wrap + char tabChar; // If this char is seen the indent is changed to current pos + }; + + class Text { + public: + Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() ) + : attr( _attr ) + { + std::string wrappableChars = " [({.,/|\\-"; + std::size_t indent = _attr.initialIndent != std::string::npos + ? _attr.initialIndent + : _attr.indent; + std::string remainder = _str; + + while( !remainder.empty() ) { + if( lines.size() >= 1000 ) { + lines.push_back( "... message truncated due to excessive size" ); + return; + } + std::size_t tabPos = std::string::npos; + std::size_t width = (std::min)( remainder.size(), _attr.width - indent ); + std::size_t pos = remainder.find_first_of( '\n' ); + if( pos <= width ) { + width = pos; + } + pos = remainder.find_last_of( _attr.tabChar, width ); + if( pos != std::string::npos ) { + tabPos = pos; + if( remainder[width] == '\n' ) + width--; + remainder = remainder.substr( 0, tabPos ) + remainder.substr( tabPos+1 ); + } + + if( width == remainder.size() ) { + spliceLine( indent, remainder, width ); + } + else if( remainder[width] == '\n' ) { + spliceLine( indent, remainder, width ); + if( width <= 1 || remainder.size() != 1 ) + remainder = remainder.substr( 1 ); + indent = _attr.indent; + } + else { + pos = remainder.find_last_of( wrappableChars, width ); + if( pos != std::string::npos && pos > 0 ) { + spliceLine( indent, remainder, pos ); + if( remainder[0] == ' ' ) + remainder = remainder.substr( 1 ); + } + else { + spliceLine( indent, remainder, width-1 ); + lines.back() += "-"; + } + if( lines.size() == 1 ) + indent = _attr.indent; + if( tabPos != std::string::npos ) + indent += tabPos; + } + } + } + + void spliceLine( std::size_t _indent, std::string& _remainder, std::size_t _pos ) { + lines.push_back( std::string( _indent, ' ' ) + _remainder.substr( 0, _pos ) ); + _remainder = _remainder.substr( _pos ); + } + + typedef std::vector::const_iterator const_iterator; + + const_iterator begin() const { return lines.begin(); } + const_iterator end() const { return lines.end(); } + std::string const& last() const { return lines.back(); } + std::size_t size() const { return lines.size(); } + std::string const& operator[]( std::size_t _index ) const { return lines[_index]; } + std::string toString() const { + std::ostringstream oss; + oss << *this; + return oss.str(); + } + + friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) { + for( Text::const_iterator it = _text.begin(), itEnd = _text.end(); + it != itEnd; ++it ) { + if( it != _text.begin() ) + _stream << "\n"; + _stream << *it; + } + return _stream; + } + + private: + std::string str; + TextAttributes attr; + std::vector lines; + }; + +} // end namespace Tbc + +#ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE +} // end outer namespace +#endif + +#endif // TBC_TEXT_FORMAT_H_INCLUDED + +// ----------- end of #include from tbc_text_format.h ----------- +// ........... back in clara.h + +#undef STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE + +// ----------- #included from clara_compilers.h ----------- + +#ifndef TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED +#define TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED + +// Detect a number of compiler features - mostly C++11/14 conformance - by compiler +// The following features are defined: +// +// CLARA_CONFIG_CPP11_NULLPTR : is nullptr supported? +// CLARA_CONFIG_CPP11_NOEXCEPT : is noexcept supported? +// CLARA_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods +// CLARA_CONFIG_CPP11_OVERRIDE : is override supported? +// CLARA_CONFIG_CPP11_UNIQUE_PTR : is unique_ptr supported (otherwise use auto_ptr) + +// CLARA_CONFIG_CPP11_OR_GREATER : Is C++11 supported? + +// CLARA_CONFIG_VARIADIC_MACROS : are variadic macros supported? + +// In general each macro has a _NO_ form +// (e.g. CLARA_CONFIG_CPP11_NO_NULLPTR) which disables the feature. +// Many features, at point of detection, define an _INTERNAL_ macro, so they +// can be combined, en-mass, with the _NO_ forms later. + +// All the C++11 features can be disabled with CLARA_CONFIG_NO_CPP11 + +#ifdef __clang__ + +#if __has_feature(cxx_nullptr) +#define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR +#endif + +#if __has_feature(cxx_noexcept) +#define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT +#endif + +#endif // __clang__ + +//////////////////////////////////////////////////////////////////////////////// +// GCC +#ifdef __GNUC__ + +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) +#define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR +#endif + +// - otherwise more recent versions define __cplusplus >= 201103L +// and will get picked up below + +#endif // __GNUC__ + +//////////////////////////////////////////////////////////////////////////////// +// Visual C++ +#ifdef _MSC_VER + +#if (_MSC_VER >= 1600) +#define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR +#define CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR +#endif + +#if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015)) +#define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT +#define CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +#endif + +#endif // _MSC_VER + +//////////////////////////////////////////////////////////////////////////////// +// C++ language feature support + +// catch all support for C++11 +#if defined(__cplusplus) && __cplusplus >= 201103L + +#define CLARA_CPP11_OR_GREATER + +#if !defined(CLARA_INTERNAL_CONFIG_CPP11_NULLPTR) +#define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR +#endif + +#ifndef CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT +#define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT +#endif + +#ifndef CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +#define CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS +#endif + +#if !defined(CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE) +#define CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE +#endif +#if !defined(CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) +#define CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR +#endif + +#endif // __cplusplus >= 201103L + +// Now set the actual defines based on the above + anything the user has configured +#if defined(CLARA_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CLARA_CONFIG_CPP11_NO_NULLPTR) && !defined(CLARA_CONFIG_CPP11_NULLPTR) && !defined(CLARA_CONFIG_NO_CPP11) +#define CLARA_CONFIG_CPP11_NULLPTR +#endif +#if defined(CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CLARA_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_CONFIG_NO_CPP11) +#define CLARA_CONFIG_CPP11_NOEXCEPT +#endif +#if defined(CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CLARA_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CLARA_CONFIG_CPP11_GENERATED_METHODS) && !defined(CLARA_CONFIG_NO_CPP11) +#define CLARA_CONFIG_CPP11_GENERATED_METHODS +#endif +#if defined(CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE) && !defined(CLARA_CONFIG_NO_OVERRIDE) && !defined(CLARA_CONFIG_CPP11_OVERRIDE) && !defined(CLARA_CONFIG_NO_CPP11) +#define CLARA_CONFIG_CPP11_OVERRIDE +#endif +#if defined(CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) && !defined(CLARA_CONFIG_NO_UNIQUE_PTR) && !defined(CLARA_CONFIG_CPP11_UNIQUE_PTR) && !defined(CLARA_CONFIG_NO_CPP11) +#define CLARA_CONFIG_CPP11_UNIQUE_PTR +#endif + +// noexcept support: +#if defined(CLARA_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_NOEXCEPT) +#define CLARA_NOEXCEPT noexcept +# define CLARA_NOEXCEPT_IS(x) noexcept(x) +#else +#define CLARA_NOEXCEPT throw() +# define CLARA_NOEXCEPT_IS(x) +#endif + +// nullptr support +#ifdef CLARA_CONFIG_CPP11_NULLPTR +#define CLARA_NULL nullptr +#else +#define CLARA_NULL NULL +#endif + +// override support +#ifdef CLARA_CONFIG_CPP11_OVERRIDE +#define CLARA_OVERRIDE override +#else +#define CLARA_OVERRIDE +#endif + +// unique_ptr support +#ifdef CLARA_CONFIG_CPP11_UNIQUE_PTR +# define CLARA_AUTO_PTR( T ) std::unique_ptr +#else +# define CLARA_AUTO_PTR( T ) std::auto_ptr +#endif + +#endif // TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED + +// ----------- end of #include from clara_compilers.h ----------- +// ........... back in clara.h + +#include +#include +#include + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define CLARA_PLATFORM_WINDOWS +#endif + +// Use optional outer namespace +#ifdef STITCH_CLARA_OPEN_NAMESPACE +STITCH_CLARA_OPEN_NAMESPACE +#endif + +namespace Clara { + + struct UnpositionalTag {}; + + extern UnpositionalTag _; + +#ifdef CLARA_CONFIG_MAIN + UnpositionalTag _; +#endif + + namespace Detail { + +#ifdef CLARA_CONSOLE_WIDTH + const unsigned int consoleWidth = CLARA_CONFIG_CONSOLE_WIDTH; +#else + const unsigned int consoleWidth = 80; +#endif + + using namespace Tbc; + + inline bool startsWith( std::string const& str, std::string const& prefix ) { + return str.size() >= prefix.size() && str.substr( 0, prefix.size() ) == prefix; + } + + template struct RemoveConstRef{ typedef T type; }; + template struct RemoveConstRef{ typedef T type; }; + template struct RemoveConstRef{ typedef T type; }; + template struct RemoveConstRef{ typedef T type; }; + + template struct IsBool { static const bool value = false; }; + template<> struct IsBool { static const bool value = true; }; + + template + void convertInto( std::string const& _source, T& _dest ) { + std::stringstream ss; + ss << _source; + ss >> _dest; + if( ss.fail() ) + throw std::runtime_error( "Unable to convert " + _source + " to destination type" ); + } + inline void convertInto( std::string const& _source, std::string& _dest ) { + _dest = _source; + } + char toLowerCh(char c) { + return static_cast( std::tolower( c ) ); + } + inline void convertInto( std::string const& _source, bool& _dest ) { + std::string sourceLC = _source; + std::transform( sourceLC.begin(), sourceLC.end(), sourceLC.begin(), toLowerCh ); + if( sourceLC == "y" || sourceLC == "1" || sourceLC == "true" || sourceLC == "yes" || sourceLC == "on" ) + _dest = true; + else if( sourceLC == "n" || sourceLC == "0" || sourceLC == "false" || sourceLC == "no" || sourceLC == "off" ) + _dest = false; + else + throw std::runtime_error( "Expected a boolean value but did not recognise:\n '" + _source + "'" ); + } + + template + struct IArgFunction { + virtual ~IArgFunction() {} +#ifdef CLARA_CONFIG_CPP11_GENERATED_METHODS + IArgFunction() = default; + IArgFunction( IArgFunction const& ) = default; +#endif + virtual void set( ConfigT& config, std::string const& value ) const = 0; + virtual bool takesArg() const = 0; + virtual IArgFunction* clone() const = 0; + }; + + template + class BoundArgFunction { + public: + BoundArgFunction() : functionObj( CLARA_NULL ) {} + BoundArgFunction( IArgFunction* _functionObj ) : functionObj( _functionObj ) {} + BoundArgFunction( BoundArgFunction const& other ) : functionObj( other.functionObj ? other.functionObj->clone() : CLARA_NULL ) {} + BoundArgFunction& operator = ( BoundArgFunction const& other ) { + IArgFunction* newFunctionObj = other.functionObj ? other.functionObj->clone() : CLARA_NULL; + delete functionObj; + functionObj = newFunctionObj; + return *this; + } + ~BoundArgFunction() { delete functionObj; } + + void set( ConfigT& config, std::string const& value ) const { + functionObj->set( config, value ); + } + bool takesArg() const { return functionObj->takesArg(); } + + bool isSet() const { + return functionObj != CLARA_NULL; + } + private: + IArgFunction* functionObj; + }; + + template + struct NullBinder : IArgFunction{ + virtual void set( C&, std::string const& ) const {} + virtual bool takesArg() const { return true; } + virtual IArgFunction* clone() const { return new NullBinder( *this ); } + }; + + template + struct BoundDataMember : IArgFunction{ + BoundDataMember( M C::* _member ) : member( _member ) {} + virtual void set( C& p, std::string const& stringValue ) const { + convertInto( stringValue, p.*member ); + } + virtual bool takesArg() const { return !IsBool::value; } + virtual IArgFunction* clone() const { return new BoundDataMember( *this ); } + M C::* member; + }; + template + struct BoundUnaryMethod : IArgFunction{ + BoundUnaryMethod( void (C::*_member)( M ) ) : member( _member ) {} + virtual void set( C& p, std::string const& stringValue ) const { + typename RemoveConstRef::type value; + convertInto( stringValue, value ); + (p.*member)( value ); + } + virtual bool takesArg() const { return !IsBool::value; } + virtual IArgFunction* clone() const { return new BoundUnaryMethod( *this ); } + void (C::*member)( M ); + }; + template + struct BoundNullaryMethod : IArgFunction{ + BoundNullaryMethod( void (C::*_member)() ) : member( _member ) {} + virtual void set( C& p, std::string const& stringValue ) const { + bool value; + convertInto( stringValue, value ); + if( value ) + (p.*member)(); + } + virtual bool takesArg() const { return false; } + virtual IArgFunction* clone() const { return new BoundNullaryMethod( *this ); } + void (C::*member)(); + }; + + template + struct BoundUnaryFunction : IArgFunction{ + BoundUnaryFunction( void (*_function)( C& ) ) : function( _function ) {} + virtual void set( C& obj, std::string const& stringValue ) const { + bool value; + convertInto( stringValue, value ); + if( value ) + function( obj ); + } + virtual bool takesArg() const { return false; } + virtual IArgFunction* clone() const { return new BoundUnaryFunction( *this ); } + void (*function)( C& ); + }; + + template + struct BoundBinaryFunction : IArgFunction{ + BoundBinaryFunction( void (*_function)( C&, T ) ) : function( _function ) {} + virtual void set( C& obj, std::string const& stringValue ) const { + typename RemoveConstRef::type value; + convertInto( stringValue, value ); + function( obj, value ); + } + virtual bool takesArg() const { return !IsBool::value; } + virtual IArgFunction* clone() const { return new BoundBinaryFunction( *this ); } + void (*function)( C&, T ); + }; + + } // namespace Detail + + inline std::vector argsToVector( int argc, char const* const* const argv ) { + std::vector args( static_cast( argc ) ); + for( std::size_t i = 0; i < static_cast( argc ); ++i ) + args[i] = argv[i]; + + return args; + } + + class Parser { + enum Mode { None, MaybeShortOpt, SlashOpt, ShortOpt, LongOpt, Positional }; + Mode mode; + std::size_t from; + bool inQuotes; + public: + + struct Token { + enum Type { Positional, ShortOpt, LongOpt }; + Token( Type _type, std::string const& _data ) : type( _type ), data( _data ) {} + Type type; + std::string data; + }; + + Parser() : mode( None ), from( 0 ), inQuotes( false ){} + + void parseIntoTokens( std::vector const& args, std::vector& tokens ) { + const std::string doubleDash = "--"; + for( std::size_t i = 1; i < args.size() && args[i] != doubleDash; ++i ) + parseIntoTokens( args[i], tokens); + } + + void parseIntoTokens( std::string const& arg, std::vector& tokens ) { + for( std::size_t i = 0; i < arg.size(); ++i ) { + char c = arg[i]; + if( c == '"' ) + inQuotes = !inQuotes; + mode = handleMode( i, c, arg, tokens ); + } + mode = handleMode( arg.size(), '\0', arg, tokens ); + } + Mode handleMode( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { + switch( mode ) { + case None: return handleNone( i, c ); + case MaybeShortOpt: return handleMaybeShortOpt( i, c ); + case ShortOpt: + case LongOpt: + case SlashOpt: return handleOpt( i, c, arg, tokens ); + case Positional: return handlePositional( i, c, arg, tokens ); + default: throw std::logic_error( "Unknown mode" ); + } + } + + Mode handleNone( std::size_t i, char c ) { + if( inQuotes ) { + from = i; + return Positional; + } + switch( c ) { + case '-': return MaybeShortOpt; +#ifdef CLARA_PLATFORM_WINDOWS + case '/': from = i+1; return SlashOpt; +#endif + default: from = i; return Positional; + } + } + Mode handleMaybeShortOpt( std::size_t i, char c ) { + switch( c ) { + case '-': from = i+1; return LongOpt; + default: from = i; return ShortOpt; + } + } + + Mode handleOpt( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { + if( std::string( ":=\0", 3 ).find( c ) == std::string::npos ) + return mode; + + std::string optName = arg.substr( from, i-from ); + if( mode == ShortOpt ) + for( std::size_t j = 0; j < optName.size(); ++j ) + tokens.push_back( Token( Token::ShortOpt, optName.substr( j, 1 ) ) ); + else if( mode == SlashOpt && optName.size() == 1 ) + tokens.push_back( Token( Token::ShortOpt, optName ) ); + else + tokens.push_back( Token( Token::LongOpt, optName ) ); + return None; + } + Mode handlePositional( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { + if( inQuotes || std::string( "\0", 1 ).find( c ) == std::string::npos ) + return mode; + + std::string data = arg.substr( from, i-from ); + tokens.push_back( Token( Token::Positional, data ) ); + return None; + } + }; + + template + struct CommonArgProperties { + CommonArgProperties() {} + CommonArgProperties( Detail::BoundArgFunction const& _boundField ) : boundField( _boundField ) {} + + Detail::BoundArgFunction boundField; + std::string description; + std::string detail; + std::string placeholder; // Only value if boundField takes an arg + + bool takesArg() const { + return !placeholder.empty(); + } + void validate() const { + if( !boundField.isSet() ) + throw std::logic_error( "option not bound" ); + } + }; + struct OptionArgProperties { + std::vector shortNames; + std::string longName; + + bool hasShortName( std::string const& shortName ) const { + return std::find( shortNames.begin(), shortNames.end(), shortName ) != shortNames.end(); + } + bool hasLongName( std::string const& _longName ) const { + return _longName == longName; + } + }; + struct PositionalArgProperties { + PositionalArgProperties() : position( -1 ) {} + int position; // -1 means non-positional (floating) + + bool isFixedPositional() const { + return position != -1; + } + }; + + template + class CommandLine { + + struct Arg : CommonArgProperties, OptionArgProperties, PositionalArgProperties { + Arg() {} + Arg( Detail::BoundArgFunction const& _boundField ) : CommonArgProperties( _boundField ) {} + + using CommonArgProperties::placeholder; // !TBD + + std::string dbgName() const { + if( !longName.empty() ) + return "--" + longName; + if( !shortNames.empty() ) + return "-" + shortNames[0]; + return "positional args"; + } + std::string commands() const { + std::ostringstream oss; + bool first = true; + std::vector::const_iterator it = shortNames.begin(), itEnd = shortNames.end(); + for(; it != itEnd; ++it ) { + if( first ) + first = false; + else + oss << ", "; + oss << "-" << *it; + } + if( !longName.empty() ) { + if( !first ) + oss << ", "; + oss << "--" << longName; + } + if( !placeholder.empty() ) + oss << " <" << placeholder << ">"; + return oss.str(); + } + }; + + typedef CLARA_AUTO_PTR( Arg ) ArgAutoPtr; + + friend void addOptName( Arg& arg, std::string const& optName ) + { + if( optName.empty() ) + return; + if( Detail::startsWith( optName, "--" ) ) { + if( !arg.longName.empty() ) + throw std::logic_error( "Only one long opt may be specified. '" + + arg.longName + + "' already specified, now attempting to add '" + + optName + "'" ); + arg.longName = optName.substr( 2 ); + } + else if( Detail::startsWith( optName, "-" ) ) + arg.shortNames.push_back( optName.substr( 1 ) ); + else + throw std::logic_error( "option must begin with - or --. Option was: '" + optName + "'" ); + } + friend void setPositionalArg( Arg& arg, int position ) + { + arg.position = position; + } + + class ArgBuilder { + public: + ArgBuilder( Arg* arg ) : m_arg( arg ) {} + + // Bind a non-boolean data member (requires placeholder string) + template + void bind( M C::* field, std::string const& placeholder ) { + m_arg->boundField = new Detail::BoundDataMember( field ); + m_arg->placeholder = placeholder; + } + // Bind a boolean data member (no placeholder required) + template + void bind( bool C::* field ) { + m_arg->boundField = new Detail::BoundDataMember( field ); + } + + // Bind a method taking a single, non-boolean argument (requires a placeholder string) + template + void bind( void (C::* unaryMethod)( M ), std::string const& placeholder ) { + m_arg->boundField = new Detail::BoundUnaryMethod( unaryMethod ); + m_arg->placeholder = placeholder; + } + + // Bind a method taking a single, boolean argument (no placeholder string required) + template + void bind( void (C::* unaryMethod)( bool ) ) { + m_arg->boundField = new Detail::BoundUnaryMethod( unaryMethod ); + } + + // Bind a method that takes no arguments (will be called if opt is present) + template + void bind( void (C::* nullaryMethod)() ) { + m_arg->boundField = new Detail::BoundNullaryMethod( nullaryMethod ); + } + + // Bind a free function taking a single argument - the object to operate on (no placeholder string required) + template + void bind( void (* unaryFunction)( C& ) ) { + m_arg->boundField = new Detail::BoundUnaryFunction( unaryFunction ); + } + + // Bind a free function taking a single argument - the object to operate on (requires a placeholder string) + template + void bind( void (* binaryFunction)( C&, T ), std::string const& placeholder ) { + m_arg->boundField = new Detail::BoundBinaryFunction( binaryFunction ); + m_arg->placeholder = placeholder; + } + + ArgBuilder& describe( std::string const& description ) { + m_arg->description = description; + return *this; + } + ArgBuilder& detail( std::string const& detail ) { + m_arg->detail = detail; + return *this; + } + + protected: + Arg* m_arg; + }; + + class OptBuilder : public ArgBuilder { + public: + OptBuilder( Arg* arg ) : ArgBuilder( arg ) {} + OptBuilder( OptBuilder& other ) : ArgBuilder( other ) {} + + OptBuilder& operator[]( std::string const& optName ) { + addOptName( *ArgBuilder::m_arg, optName ); + return *this; + } + }; + + public: + + CommandLine() + : m_boundProcessName( new Detail::NullBinder() ), + m_highestSpecifiedArgPosition( 0 ), + m_throwOnUnrecognisedTokens( false ) + {} + CommandLine( CommandLine const& other ) + : m_boundProcessName( other.m_boundProcessName ), + m_options ( other.m_options ), + m_positionalArgs( other.m_positionalArgs ), + m_highestSpecifiedArgPosition( other.m_highestSpecifiedArgPosition ), + m_throwOnUnrecognisedTokens( other.m_throwOnUnrecognisedTokens ) + { + if( other.m_floatingArg.get() ) + m_floatingArg.reset( new Arg( *other.m_floatingArg ) ); + } + + CommandLine& setThrowOnUnrecognisedTokens( bool shouldThrow = true ) { + m_throwOnUnrecognisedTokens = shouldThrow; + return *this; + } + + OptBuilder operator[]( std::string const& optName ) { + m_options.push_back( Arg() ); + addOptName( m_options.back(), optName ); + OptBuilder builder( &m_options.back() ); + return builder; + } + + ArgBuilder operator[]( int position ) { + m_positionalArgs.insert( std::make_pair( position, Arg() ) ); + if( position > m_highestSpecifiedArgPosition ) + m_highestSpecifiedArgPosition = position; + setPositionalArg( m_positionalArgs[position], position ); + ArgBuilder builder( &m_positionalArgs[position] ); + return builder; + } + + // Invoke this with the _ instance + ArgBuilder operator[]( UnpositionalTag ) { + if( m_floatingArg.get() ) + throw std::logic_error( "Only one unpositional argument can be added" ); + m_floatingArg.reset( new Arg() ); + ArgBuilder builder( m_floatingArg.get() ); + return builder; + } + + template + void bindProcessName( M C::* field ) { + m_boundProcessName = new Detail::BoundDataMember( field ); + } + template + void bindProcessName( void (C::*_unaryMethod)( M ) ) { + m_boundProcessName = new Detail::BoundUnaryMethod( _unaryMethod ); + } + + void optUsage( std::ostream& os, std::size_t indent = 0, std::size_t width = Detail::consoleWidth ) const { + typename std::vector::const_iterator itBegin = m_options.begin(), itEnd = m_options.end(), it; + std::size_t maxWidth = 0; + for( it = itBegin; it != itEnd; ++it ) + maxWidth = (std::max)( maxWidth, it->commands().size() ); + + for( it = itBegin; it != itEnd; ++it ) { + Detail::Text usage( it->commands(), Detail::TextAttributes() + .setWidth( maxWidth+indent ) + .setIndent( indent ) ); + Detail::Text desc( it->description, Detail::TextAttributes() + .setWidth( width - maxWidth - 3 ) ); + + for( std::size_t i = 0; i < (std::max)( usage.size(), desc.size() ); ++i ) { + std::string usageCol = i < usage.size() ? usage[i] : ""; + os << usageCol; + + if( i < desc.size() && !desc[i].empty() ) + os << std::string( indent + 2 + maxWidth - usageCol.size(), ' ' ) + << desc[i]; + os << "\n"; + } + } + } + std::string optUsage() const { + std::ostringstream oss; + optUsage( oss ); + return oss.str(); + } + + void argSynopsis( std::ostream& os ) const { + for( int i = 1; i <= m_highestSpecifiedArgPosition; ++i ) { + if( i > 1 ) + os << " "; + typename std::map::const_iterator it = m_positionalArgs.find( i ); + if( it != m_positionalArgs.end() ) + os << "<" << it->second.placeholder << ">"; + else if( m_floatingArg.get() ) + os << "<" << m_floatingArg->placeholder << ">"; + else + throw std::logic_error( "non consecutive positional arguments with no floating args" ); + } + // !TBD No indication of mandatory args + if( m_floatingArg.get() ) { + if( m_highestSpecifiedArgPosition > 1 ) + os << " "; + os << "[<" << m_floatingArg->placeholder << "> ...]"; + } + } + std::string argSynopsis() const { + std::ostringstream oss; + argSynopsis( oss ); + return oss.str(); + } + + void usage( std::ostream& os, std::string const& procName ) const { + validate(); + os << "usage:\n " << procName << " "; + argSynopsis( os ); + if( !m_options.empty() ) { + os << " [options]\n\nwhere options are: \n"; + optUsage( os, 2 ); + } + os << "\n"; + } + std::string usage( std::string const& procName ) const { + std::ostringstream oss; + usage( oss, procName ); + return oss.str(); + } + + ConfigT parse( std::vector const& args ) const { + ConfigT config; + parseInto( args, config ); + return config; + } + + std::vector parseInto( std::vector const& args, ConfigT& config ) const { + std::string processName = args.empty() ? std::string() : args[0]; + std::size_t lastSlash = processName.find_last_of( "/\\" ); + if( lastSlash != std::string::npos ) + processName = processName.substr( lastSlash+1 ); + m_boundProcessName.set( config, processName ); + std::vector tokens; + Parser parser; + parser.parseIntoTokens( args, tokens ); + return populate( tokens, config ); + } + + std::vector populate( std::vector const& tokens, ConfigT& config ) const { + validate(); + std::vector unusedTokens = populateOptions( tokens, config ); + unusedTokens = populateFixedArgs( unusedTokens, config ); + unusedTokens = populateFloatingArgs( unusedTokens, config ); + return unusedTokens; + } + + std::vector populateOptions( std::vector const& tokens, ConfigT& config ) const { + std::vector unusedTokens; + std::vector errors; + for( std::size_t i = 0; i < tokens.size(); ++i ) { + Parser::Token const& token = tokens[i]; + typename std::vector::const_iterator it = m_options.begin(), itEnd = m_options.end(); + for(; it != itEnd; ++it ) { + Arg const& arg = *it; + + try { + if( ( token.type == Parser::Token::ShortOpt && arg.hasShortName( token.data ) ) || + ( token.type == Parser::Token::LongOpt && arg.hasLongName( token.data ) ) ) { + if( arg.takesArg() ) { + if( i == tokens.size()-1 || tokens[i+1].type != Parser::Token::Positional ) + errors.push_back( "Expected argument to option: " + token.data ); + else + arg.boundField.set( config, tokens[++i].data ); + } + else { + arg.boundField.set( config, "true" ); + } + break; + } + } + catch( std::exception& ex ) { + errors.push_back( std::string( ex.what() ) + "\n- while parsing: (" + arg.commands() + ")" ); + } + } + if( it == itEnd ) { + if( token.type == Parser::Token::Positional || !m_throwOnUnrecognisedTokens ) + unusedTokens.push_back( token ); + else if( errors.empty() && m_throwOnUnrecognisedTokens ) + errors.push_back( "unrecognised option: " + token.data ); + } + } + if( !errors.empty() ) { + std::ostringstream oss; + for( std::vector::const_iterator it = errors.begin(), itEnd = errors.end(); + it != itEnd; + ++it ) { + if( it != errors.begin() ) + oss << "\n"; + oss << *it; + } + throw std::runtime_error( oss.str() ); + } + return unusedTokens; + } + std::vector populateFixedArgs( std::vector const& tokens, ConfigT& config ) const { + std::vector unusedTokens; + int position = 1; + for( std::size_t i = 0; i < tokens.size(); ++i ) { + Parser::Token const& token = tokens[i]; + typename std::map::const_iterator it = m_positionalArgs.find( position ); + if( it != m_positionalArgs.end() ) + it->second.boundField.set( config, token.data ); + else + unusedTokens.push_back( token ); + if( token.type == Parser::Token::Positional ) + position++; + } + return unusedTokens; + } + std::vector populateFloatingArgs( std::vector const& tokens, ConfigT& config ) const { + if( !m_floatingArg.get() ) + return tokens; + std::vector unusedTokens; + for( std::size_t i = 0; i < tokens.size(); ++i ) { + Parser::Token const& token = tokens[i]; + if( token.type == Parser::Token::Positional ) + m_floatingArg->boundField.set( config, token.data ); + else + unusedTokens.push_back( token ); + } + return unusedTokens; + } + + void validate() const + { + if( m_options.empty() && m_positionalArgs.empty() && !m_floatingArg.get() ) + throw std::logic_error( "No options or arguments specified" ); + + for( typename std::vector::const_iterator it = m_options.begin(), + itEnd = m_options.end(); + it != itEnd; ++it ) + it->validate(); + } + + private: + Detail::BoundArgFunction m_boundProcessName; + std::vector m_options; + std::map m_positionalArgs; + ArgAutoPtr m_floatingArg; + int m_highestSpecifiedArgPosition; + bool m_throwOnUnrecognisedTokens; + }; + +} // end namespace Clara + +STITCH_CLARA_CLOSE_NAMESPACE +#undef STITCH_CLARA_OPEN_NAMESPACE +#undef STITCH_CLARA_CLOSE_NAMESPACE + +#endif // TWOBLUECUBES_CLARA_H_INCLUDED +#undef STITCH_CLARA_OPEN_NAMESPACE + +// Restore Clara's value for console width, if present +#ifdef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH +#define CLARA_CONFIG_CONSOLE_WIDTH CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH +#undef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH +#endif + +#include +#include + +namespace Catch { + + inline void abortAfterFirst( ConfigData& config ) { config.abortAfter = 1; } + inline void abortAfterX( ConfigData& config, int x ) { + if( x < 1 ) + throw std::runtime_error( "Value after -x or --abortAfter must be greater than zero" ); + config.abortAfter = x; + } + inline void addTestOrTags( ConfigData& config, std::string const& _testSpec ) { config.testsOrTags.push_back( _testSpec ); } + inline void addSectionToRun( ConfigData& config, std::string const& sectionName ) { config.sectionsToRun.push_back( sectionName ); } + inline void addReporterName( ConfigData& config, std::string const& _reporterName ) { config.reporterNames.push_back( _reporterName ); } + + inline void addWarning( ConfigData& config, std::string const& _warning ) { + if( _warning == "NoAssertions" ) + config.warnings = static_cast( config.warnings | WarnAbout::NoAssertions ); + else + throw std::runtime_error( "Unrecognised warning: '" + _warning + '\'' ); + } + inline void setOrder( ConfigData& config, std::string const& order ) { + if( startsWith( "declared", order ) ) + config.runOrder = RunTests::InDeclarationOrder; + else if( startsWith( "lexical", order ) ) + config.runOrder = RunTests::InLexicographicalOrder; + else if( startsWith( "random", order ) ) + config.runOrder = RunTests::InRandomOrder; + else + throw std::runtime_error( "Unrecognised ordering: '" + order + '\'' ); + } + inline void setRngSeed( ConfigData& config, std::string const& seed ) { + if( seed == "time" ) { + config.rngSeed = static_cast( std::time(0) ); + } + else { + std::stringstream ss; + ss << seed; + ss >> config.rngSeed; + if( ss.fail() ) + throw std::runtime_error( "Argument to --rng-seed should be the word 'time' or a number" ); + } + } + inline void setVerbosity( ConfigData& config, int level ) { + // !TBD: accept strings? + config.verbosity = static_cast( level ); + } + inline void setShowDurations( ConfigData& config, bool _showDurations ) { + config.showDurations = _showDurations + ? ShowDurations::Always + : ShowDurations::Never; + } + inline void setUseColour( ConfigData& config, std::string const& value ) { + std::string mode = toLower( value ); + + if( mode == "yes" ) + config.useColour = UseColour::Yes; + else if( mode == "no" ) + config.useColour = UseColour::No; + else if( mode == "auto" ) + config.useColour = UseColour::Auto; + else + throw std::runtime_error( "colour mode must be one of: auto, yes or no" ); + } + inline void setWaitForKeypress( ConfigData& config, std::string const& keypress ) { + std::string keypressLc = toLower( keypress ); + if( keypressLc == "start" ) + config.waitForKeypress = WaitForKeypress::BeforeStart; + else if( keypressLc == "exit" ) + config.waitForKeypress = WaitForKeypress::BeforeExit; + else if( keypressLc == "both" ) + config.waitForKeypress = WaitForKeypress::BeforeStartAndExit; + else + throw std::runtime_error( "keypress argument must be one of: start, exit or both. '" + keypress + "' not recognised" ); + }; + + inline void forceColour( ConfigData& config ) { + config.useColour = UseColour::Yes; + } + inline void loadTestNamesFromFile( ConfigData& config, std::string const& _filename ) { + std::ifstream f( _filename.c_str() ); + if( !f.is_open() ) + throw std::domain_error( "Unable to load input file: " + _filename ); + + std::string line; + while( std::getline( f, line ) ) { + line = trim(line); + if( !line.empty() && !startsWith( line, '#' ) ) { + if( !startsWith( line, '"' ) ) + line = '"' + line + '"'; + addTestOrTags( config, line + ',' ); + } + } + } + + inline Clara::CommandLine makeCommandLineParser() { + + using namespace Clara; + CommandLine cli; + + cli.bindProcessName( &ConfigData::processName ); + + cli["-?"]["-h"]["--help"] + .describe( "display usage information" ) + .bind( &ConfigData::showHelp ); + + cli["-l"]["--list-tests"] + .describe( "list all/matching test cases" ) + .bind( &ConfigData::listTests ); + + cli["-t"]["--list-tags"] + .describe( "list all/matching tags" ) + .bind( &ConfigData::listTags ); + + cli["-s"]["--success"] + .describe( "include successful tests in output" ) + .bind( &ConfigData::showSuccessfulTests ); + + cli["-b"]["--break"] + .describe( "break into debugger on failure" ) + .bind( &ConfigData::shouldDebugBreak ); + + cli["-e"]["--nothrow"] + .describe( "skip exception tests" ) + .bind( &ConfigData::noThrow ); + + cli["-i"]["--invisibles"] + .describe( "show invisibles (tabs, newlines)" ) + .bind( &ConfigData::showInvisibles ); + + cli["-o"]["--out"] + .describe( "output filename" ) + .bind( &ConfigData::outputFilename, "filename" ); + + cli["-r"]["--reporter"] +// .placeholder( "name[:filename]" ) + .describe( "reporter to use (defaults to console)" ) + .bind( &addReporterName, "name" ); + + cli["-n"]["--name"] + .describe( "suite name" ) + .bind( &ConfigData::name, "name" ); + + cli["-a"]["--abort"] + .describe( "abort at first failure" ) + .bind( &abortAfterFirst ); + + cli["-x"]["--abortx"] + .describe( "abort after x failures" ) + .bind( &abortAfterX, "no. failures" ); + + cli["-w"]["--warn"] + .describe( "enable warnings" ) + .bind( &addWarning, "warning name" ); + +// - needs updating if reinstated +// cli.into( &setVerbosity ) +// .describe( "level of verbosity (0=no output)" ) +// .shortOpt( "v") +// .longOpt( "verbosity" ) +// .placeholder( "level" ); + + cli[_] + .describe( "which test or tests to use" ) + .bind( &addTestOrTags, "test name, pattern or tags" ); + + cli["-d"]["--durations"] + .describe( "show test durations" ) + .bind( &setShowDurations, "yes|no" ); + + cli["-f"]["--input-file"] + .describe( "load test names to run from a file" ) + .bind( &loadTestNamesFromFile, "filename" ); + + cli["-#"]["--filenames-as-tags"] + .describe( "adds a tag for the filename" ) + .bind( &ConfigData::filenamesAsTags ); + + cli["-c"]["--section"] + .describe( "specify section to run" ) + .bind( &addSectionToRun, "section name" ); + + // Less common commands which don't have a short form + cli["--list-test-names-only"] + .describe( "list all/matching test cases names only" ) + .bind( &ConfigData::listTestNamesOnly ); + + cli["--list-extra-info"] + .describe( "list all/matching test cases with more info" ) + .bind( &ConfigData::listExtraInfo ); + + cli["--list-reporters"] + .describe( "list all reporters" ) + .bind( &ConfigData::listReporters ); + + cli["--order"] + .describe( "test case order (defaults to decl)" ) + .bind( &setOrder, "decl|lex|rand" ); + + cli["--rng-seed"] + .describe( "set a specific seed for random numbers" ) + .bind( &setRngSeed, "'time'|number" ); + + cli["--force-colour"] + .describe( "force colourised output (deprecated)" ) + .bind( &forceColour ); + + cli["--use-colour"] + .describe( "should output be colourised" ) + .bind( &setUseColour, "yes|no" ); + + cli["--libidentify"] + .describe( "report name and version according to libidentify standard" ) + .bind( &ConfigData::libIdentify ); + + cli["--wait-for-keypress"] + .describe( "waits for a keypress before exiting" ) + .bind( &setWaitForKeypress, "start|exit|both" ); + + return cli; + } + +} // end namespace Catch + +// #included from: internal/catch_list.hpp +#define TWOBLUECUBES_CATCH_LIST_HPP_INCLUDED + +// #included from: catch_text.h +#define TWOBLUECUBES_CATCH_TEXT_H_INCLUDED + +#define TBC_TEXT_FORMAT_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH + +#define CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE Catch +// #included from: ../external/tbc_text_format.h +// Only use header guard if we are not using an outer namespace +#ifndef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE +# ifdef TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED +# ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED +# define TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED +# endif +# else +# define TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED +# endif +#endif +#ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED +#include +#include +#include + +// Use optional outer namespace +#ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE +namespace CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE { +#endif + +namespace Tbc { + +#ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH + const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH; +#else + const unsigned int consoleWidth = 80; +#endif + + struct TextAttributes { + TextAttributes() + : initialIndent( std::string::npos ), + indent( 0 ), + width( consoleWidth-1 ) + {} + + TextAttributes& setInitialIndent( std::size_t _value ) { initialIndent = _value; return *this; } + TextAttributes& setIndent( std::size_t _value ) { indent = _value; return *this; } + TextAttributes& setWidth( std::size_t _value ) { width = _value; return *this; } + + std::size_t initialIndent; // indent of first line, or npos + std::size_t indent; // indent of subsequent lines, or all if initialIndent is npos + std::size_t width; // maximum width of text, including indent. Longer text will wrap + }; + + class Text { + public: + Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() ) + : attr( _attr ) + { + const std::string wrappableBeforeChars = "[({<\t"; + const std::string wrappableAfterChars = "])}>-,./|\\"; + const std::string wrappableInsteadOfChars = " \n\r"; + std::string indent = _attr.initialIndent != std::string::npos + ? std::string( _attr.initialIndent, ' ' ) + : std::string( _attr.indent, ' ' ); + + typedef std::string::const_iterator iterator; + iterator it = _str.begin(); + const iterator strEnd = _str.end(); + + while( it != strEnd ) { + + if( lines.size() >= 1000 ) { + lines.push_back( "... message truncated due to excessive size" ); + return; + } + + std::string suffix; + std::size_t width = (std::min)( static_cast( strEnd-it ), _attr.width-static_cast( indent.size() ) ); + iterator itEnd = it+width; + iterator itNext = _str.end(); + + iterator itNewLine = std::find( it, itEnd, '\n' ); + if( itNewLine != itEnd ) + itEnd = itNewLine; + + if( itEnd != strEnd ) { + bool foundWrapPoint = false; + iterator findIt = itEnd; + do { + if( wrappableAfterChars.find( *findIt ) != std::string::npos && findIt != itEnd ) { + itEnd = findIt+1; + itNext = findIt+1; + foundWrapPoint = true; + } + else if( findIt > it && wrappableBeforeChars.find( *findIt ) != std::string::npos ) { + itEnd = findIt; + itNext = findIt; + foundWrapPoint = true; + } + else if( wrappableInsteadOfChars.find( *findIt ) != std::string::npos ) { + itNext = findIt+1; + itEnd = findIt; + foundWrapPoint = true; + } + if( findIt == it ) + break; + else + --findIt; + } + while( !foundWrapPoint ); + + if( !foundWrapPoint ) { + // No good wrap char, so we'll break mid word and add a hyphen + --itEnd; + itNext = itEnd; + suffix = "-"; + } + else { + while( itEnd > it && wrappableInsteadOfChars.find( *(itEnd-1) ) != std::string::npos ) + --itEnd; + } + } + lines.push_back( indent + std::string( it, itEnd ) + suffix ); + + if( indent.size() != _attr.indent ) + indent = std::string( _attr.indent, ' ' ); + it = itNext; + } + } + + typedef std::vector::const_iterator const_iterator; + + const_iterator begin() const { return lines.begin(); } + const_iterator end() const { return lines.end(); } + std::string const& last() const { return lines.back(); } + std::size_t size() const { return lines.size(); } + std::string const& operator[]( std::size_t _index ) const { return lines[_index]; } + std::string toString() const { + std::ostringstream oss; + oss << *this; + return oss.str(); + } + + inline friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) { + for( Text::const_iterator it = _text.begin(), itEnd = _text.end(); + it != itEnd; ++it ) { + if( it != _text.begin() ) + _stream << "\n"; + _stream << *it; + } + return _stream; + } + + private: + std::string str; + TextAttributes attr; + std::vector lines; + }; + +} // end namespace Tbc + +#ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE +} // end outer namespace +#endif + +#endif // TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED +#undef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE + +namespace Catch { + using Tbc::Text; + using Tbc::TextAttributes; +} + +// #included from: catch_console_colour.hpp +#define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_HPP_INCLUDED + +namespace Catch { + + struct Colour { + enum Code { + None = 0, + + White, + Red, + Green, + Blue, + Cyan, + Yellow, + Grey, + + Bright = 0x10, + + BrightRed = Bright | Red, + BrightGreen = Bright | Green, + LightGrey = Bright | Grey, + BrightWhite = Bright | White, + + // By intention + FileName = LightGrey, + Warning = Yellow, + ResultError = BrightRed, + ResultSuccess = BrightGreen, + ResultExpectedFailure = Warning, + + Error = BrightRed, + Success = Green, + + OriginalExpression = Cyan, + ReconstructedExpression = Yellow, + + SecondaryText = LightGrey, + Headers = White + }; + + // Use constructed object for RAII guard + Colour( Code _colourCode ); + Colour( Colour const& other ); + ~Colour(); + + // Use static method for one-shot changes + static void use( Code _colourCode ); + + private: + bool m_moved; + }; + + inline std::ostream& operator << ( std::ostream& os, Colour const& ) { return os; } + +} // end namespace Catch + +// #included from: catch_interfaces_reporter.h +#define TWOBLUECUBES_CATCH_INTERFACES_REPORTER_H_INCLUDED + +#include +#include +#include + +namespace Catch +{ + struct ReporterConfig { + explicit ReporterConfig( Ptr const& _fullConfig ) + : m_stream( &_fullConfig->stream() ), m_fullConfig( _fullConfig ) {} + + ReporterConfig( Ptr const& _fullConfig, std::ostream& _stream ) + : m_stream( &_stream ), m_fullConfig( _fullConfig ) {} + + std::ostream& stream() const { return *m_stream; } + Ptr fullConfig() const { return m_fullConfig; } + + private: + std::ostream* m_stream; + Ptr m_fullConfig; + }; + + struct ReporterPreferences { + ReporterPreferences() + : shouldRedirectStdOut( false ) + {} + + bool shouldRedirectStdOut; + }; + + template + struct LazyStat : Option { + LazyStat() : used( false ) {} + LazyStat& operator=( T const& _value ) { + Option::operator=( _value ); + used = false; + return *this; + } + void reset() { + Option::reset(); + used = false; + } + bool used; + }; + + struct TestRunInfo { + TestRunInfo( std::string const& _name ) : name( _name ) {} + std::string name; + }; + struct GroupInfo { + GroupInfo( std::string const& _name, + std::size_t _groupIndex, + std::size_t _groupsCount ) + : name( _name ), + groupIndex( _groupIndex ), + groupsCounts( _groupsCount ) + {} + + std::string name; + std::size_t groupIndex; + std::size_t groupsCounts; + }; + + struct AssertionStats { + AssertionStats( AssertionResult const& _assertionResult, + std::vector const& _infoMessages, + Totals const& _totals ) + : assertionResult( _assertionResult ), + infoMessages( _infoMessages ), + totals( _totals ) + { + if( assertionResult.hasMessage() ) { + // Copy message into messages list. + // !TBD This should have been done earlier, somewhere + MessageBuilder builder( assertionResult.getTestMacroName(), assertionResult.getSourceInfo(), assertionResult.getResultType() ); + builder << assertionResult.getMessage(); + builder.m_info.message = builder.m_stream.str(); + + infoMessages.push_back( builder.m_info ); + } + } + virtual ~AssertionStats(); + +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + AssertionStats( AssertionStats const& ) = default; + AssertionStats( AssertionStats && ) = default; + AssertionStats& operator = ( AssertionStats const& ) = default; + AssertionStats& operator = ( AssertionStats && ) = default; +# endif + + AssertionResult assertionResult; + std::vector infoMessages; + Totals totals; + }; + + struct SectionStats { + SectionStats( SectionInfo const& _sectionInfo, + Counts const& _assertions, + double _durationInSeconds, + bool _missingAssertions ) + : sectionInfo( _sectionInfo ), + assertions( _assertions ), + durationInSeconds( _durationInSeconds ), + missingAssertions( _missingAssertions ) + {} + virtual ~SectionStats(); +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + SectionStats( SectionStats const& ) = default; + SectionStats( SectionStats && ) = default; + SectionStats& operator = ( SectionStats const& ) = default; + SectionStats& operator = ( SectionStats && ) = default; +# endif + + SectionInfo sectionInfo; + Counts assertions; + double durationInSeconds; + bool missingAssertions; + }; + + struct TestCaseStats { + TestCaseStats( TestCaseInfo const& _testInfo, + Totals const& _totals, + std::string const& _stdOut, + std::string const& _stdErr, + bool _aborting ) + : testInfo( _testInfo ), + totals( _totals ), + stdOut( _stdOut ), + stdErr( _stdErr ), + aborting( _aborting ) + {} + virtual ~TestCaseStats(); + +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + TestCaseStats( TestCaseStats const& ) = default; + TestCaseStats( TestCaseStats && ) = default; + TestCaseStats& operator = ( TestCaseStats const& ) = default; + TestCaseStats& operator = ( TestCaseStats && ) = default; +# endif + + TestCaseInfo testInfo; + Totals totals; + std::string stdOut; + std::string stdErr; + bool aborting; + }; + + struct TestGroupStats { + TestGroupStats( GroupInfo const& _groupInfo, + Totals const& _totals, + bool _aborting ) + : groupInfo( _groupInfo ), + totals( _totals ), + aborting( _aborting ) + {} + TestGroupStats( GroupInfo const& _groupInfo ) + : groupInfo( _groupInfo ), + aborting( false ) + {} + virtual ~TestGroupStats(); + +# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS + TestGroupStats( TestGroupStats const& ) = default; + TestGroupStats( TestGroupStats && ) = default; + TestGroupStats& operator = ( TestGroupStats const& ) = default; + TestGroupStats& operator = ( TestGroupStats && ) = default; +# endif + + GroupInfo groupInfo; + Totals totals; + bool aborting; + }; + + struct TestRunStats { + TestRunStats( TestRunInfo const& _runInfo, + Totals const& _totals, + bool _aborting ) + : runInfo( _runInfo ), + totals( _totals ), + aborting( _aborting ) + {} + virtual ~TestRunStats(); + +# ifndef CATCH_CONFIG_CPP11_GENERATED_METHODS + TestRunStats( TestRunStats const& _other ) + : runInfo( _other.runInfo ), + totals( _other.totals ), + aborting( _other.aborting ) + {} +# else + TestRunStats( TestRunStats const& ) = default; + TestRunStats( TestRunStats && ) = default; + TestRunStats& operator = ( TestRunStats const& ) = default; + TestRunStats& operator = ( TestRunStats && ) = default; +# endif + + TestRunInfo runInfo; + Totals totals; + bool aborting; + }; + + class MultipleReporters; + + struct IStreamingReporter : IShared { + virtual ~IStreamingReporter(); + + // Implementing class must also provide the following static method: + // static std::string getDescription(); + + virtual ReporterPreferences getPreferences() const = 0; + + virtual void noMatchingTestCases( std::string const& spec ) = 0; + + virtual void testRunStarting( TestRunInfo const& testRunInfo ) = 0; + virtual void testGroupStarting( GroupInfo const& groupInfo ) = 0; + + virtual void testCaseStarting( TestCaseInfo const& testInfo ) = 0; + virtual void sectionStarting( SectionInfo const& sectionInfo ) = 0; + + virtual void assertionStarting( AssertionInfo const& assertionInfo ) = 0; + + // The return value indicates if the messages buffer should be cleared: + virtual bool assertionEnded( AssertionStats const& assertionStats ) = 0; + + virtual void sectionEnded( SectionStats const& sectionStats ) = 0; + virtual void testCaseEnded( TestCaseStats const& testCaseStats ) = 0; + virtual void testGroupEnded( TestGroupStats const& testGroupStats ) = 0; + virtual void testRunEnded( TestRunStats const& testRunStats ) = 0; + + virtual void skipTest( TestCaseInfo const& testInfo ) = 0; + + virtual MultipleReporters* tryAsMulti() { return CATCH_NULL; } + }; + + struct IReporterFactory : IShared { + virtual ~IReporterFactory(); + virtual IStreamingReporter* create( ReporterConfig const& config ) const = 0; + virtual std::string getDescription() const = 0; + }; + + struct IReporterRegistry { + typedef std::map > FactoryMap; + typedef std::vector > Listeners; + + virtual ~IReporterRegistry(); + virtual IStreamingReporter* create( std::string const& name, Ptr const& config ) const = 0; + virtual FactoryMap const& getFactories() const = 0; + virtual Listeners const& getListeners() const = 0; + }; + + Ptr addReporter( Ptr const& existingReporter, Ptr const& additionalReporter ); + +} + +#include +#include + +namespace Catch { + + inline std::size_t listTests( Config const& config ) { + + TestSpec testSpec = config.testSpec(); + if( config.testSpec().hasFilters() ) + Catch::cout() << "Matching test cases:\n"; + else { + Catch::cout() << "All available test cases:\n"; + testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); + } + + std::size_t matchedTests = 0; + TextAttributes nameAttr, descAttr, tagsAttr; + nameAttr.setInitialIndent( 2 ).setIndent( 4 ); + descAttr.setIndent( 4 ); + tagsAttr.setIndent( 6 ); + + std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); + for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); + it != itEnd; + ++it ) { + matchedTests++; + TestCaseInfo const& testCaseInfo = it->getTestCaseInfo(); + Colour::Code colour = testCaseInfo.isHidden() + ? Colour::SecondaryText + : Colour::None; + Colour colourGuard( colour ); + + Catch::cout() << Text( testCaseInfo.name, nameAttr ) << std::endl; + if( config.listExtraInfo() ) { + Catch::cout() << " " << testCaseInfo.lineInfo << std::endl; + std::string description = testCaseInfo.description; + if( description.empty() ) + description = "(NO DESCRIPTION)"; + Catch::cout() << Text( description, descAttr ) << std::endl; + } + if( !testCaseInfo.tags.empty() ) + Catch::cout() << Text( testCaseInfo.tagsAsString, tagsAttr ) << std::endl; + } + + if( !config.testSpec().hasFilters() ) + Catch::cout() << pluralise( matchedTests, "test case" ) << '\n' << std::endl; + else + Catch::cout() << pluralise( matchedTests, "matching test case" ) << '\n' << std::endl; + return matchedTests; + } + + inline std::size_t listTestsNamesOnly( Config const& config ) { + TestSpec testSpec = config.testSpec(); + if( !config.testSpec().hasFilters() ) + testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); + std::size_t matchedTests = 0; + std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); + for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); + it != itEnd; + ++it ) { + matchedTests++; + TestCaseInfo const& testCaseInfo = it->getTestCaseInfo(); + if( startsWith( testCaseInfo.name, '#' ) ) + Catch::cout() << '"' << testCaseInfo.name << '"'; + else + Catch::cout() << testCaseInfo.name; + if ( config.listExtraInfo() ) + Catch::cout() << "\t@" << testCaseInfo.lineInfo; + Catch::cout() << std::endl; + } + return matchedTests; + } + + struct TagInfo { + TagInfo() : count ( 0 ) {} + void add( std::string const& spelling ) { + ++count; + spellings.insert( spelling ); + } + std::string all() const { + std::string out; + for( std::set::const_iterator it = spellings.begin(), itEnd = spellings.end(); + it != itEnd; + ++it ) + out += "[" + *it + "]"; + return out; + } + std::set spellings; + std::size_t count; + }; + + inline std::size_t listTags( Config const& config ) { + TestSpec testSpec = config.testSpec(); + if( config.testSpec().hasFilters() ) + Catch::cout() << "Tags for matching test cases:\n"; + else { + Catch::cout() << "All available tags:\n"; + testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); + } + + std::map tagCounts; + + std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); + for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); + it != itEnd; + ++it ) { + for( std::set::const_iterator tagIt = it->getTestCaseInfo().tags.begin(), + tagItEnd = it->getTestCaseInfo().tags.end(); + tagIt != tagItEnd; + ++tagIt ) { + std::string tagName = *tagIt; + std::string lcaseTagName = toLower( tagName ); + std::map::iterator countIt = tagCounts.find( lcaseTagName ); + if( countIt == tagCounts.end() ) + countIt = tagCounts.insert( std::make_pair( lcaseTagName, TagInfo() ) ).first; + countIt->second.add( tagName ); + } + } + + for( std::map::const_iterator countIt = tagCounts.begin(), + countItEnd = tagCounts.end(); + countIt != countItEnd; + ++countIt ) { + std::ostringstream oss; + oss << " " << std::setw(2) << countIt->second.count << " "; + Text wrapper( countIt->second.all(), TextAttributes() + .setInitialIndent( 0 ) + .setIndent( oss.str().size() ) + .setWidth( CATCH_CONFIG_CONSOLE_WIDTH-10 ) ); + Catch::cout() << oss.str() << wrapper << '\n'; + } + Catch::cout() << pluralise( tagCounts.size(), "tag" ) << '\n' << std::endl; + return tagCounts.size(); + } + + inline std::size_t listReporters( Config const& /*config*/ ) { + Catch::cout() << "Available reporters:\n"; + IReporterRegistry::FactoryMap const& factories = getRegistryHub().getReporterRegistry().getFactories(); + IReporterRegistry::FactoryMap::const_iterator itBegin = factories.begin(), itEnd = factories.end(), it; + std::size_t maxNameLen = 0; + for(it = itBegin; it != itEnd; ++it ) + maxNameLen = (std::max)( maxNameLen, it->first.size() ); + + for(it = itBegin; it != itEnd; ++it ) { + Text wrapper( it->second->getDescription(), TextAttributes() + .setInitialIndent( 0 ) + .setIndent( 7+maxNameLen ) + .setWidth( CATCH_CONFIG_CONSOLE_WIDTH - maxNameLen-8 ) ); + Catch::cout() << " " + << it->first + << ':' + << std::string( maxNameLen - it->first.size() + 2, ' ' ) + << wrapper << '\n'; + } + Catch::cout() << std::endl; + return factories.size(); + } + + inline Option list( Config const& config ) { + Option listedCount; + if( config.listTests() || ( config.listExtraInfo() && !config.listTestNamesOnly() ) ) + listedCount = listedCount.valueOr(0) + listTests( config ); + if( config.listTestNamesOnly() ) + listedCount = listedCount.valueOr(0) + listTestsNamesOnly( config ); + if( config.listTags() ) + listedCount = listedCount.valueOr(0) + listTags( config ); + if( config.listReporters() ) + listedCount = listedCount.valueOr(0) + listReporters( config ); + return listedCount; + } + +} // end namespace Catch + +// #included from: internal/catch_run_context.hpp +#define TWOBLUECUBES_CATCH_RUNNER_IMPL_HPP_INCLUDED + +// #included from: catch_test_case_tracker.hpp +#define TWOBLUECUBES_CATCH_TEST_CASE_TRACKER_HPP_INCLUDED + +#include +#include +#include +#include +#include + +CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS + +namespace Catch { +namespace TestCaseTracking { + + struct NameAndLocation { + std::string name; + SourceLineInfo location; + + NameAndLocation( std::string const& _name, SourceLineInfo const& _location ) + : name( _name ), + location( _location ) + {} + }; + + struct ITracker : SharedImpl<> { + virtual ~ITracker(); + + // static queries + virtual NameAndLocation const& nameAndLocation() const = 0; + + // dynamic queries + virtual bool isComplete() const = 0; // Successfully completed or failed + virtual bool isSuccessfullyCompleted() const = 0; + virtual bool isOpen() const = 0; // Started but not complete + virtual bool hasChildren() const = 0; + + virtual ITracker& parent() = 0; + + // actions + virtual void close() = 0; // Successfully complete + virtual void fail() = 0; + virtual void markAsNeedingAnotherRun() = 0; + + virtual void addChild( Ptr const& child ) = 0; + virtual ITracker* findChild( NameAndLocation const& nameAndLocation ) = 0; + virtual void openChild() = 0; + + // Debug/ checking + virtual bool isSectionTracker() const = 0; + virtual bool isIndexTracker() const = 0; + }; + + class TrackerContext { + + enum RunState { + NotStarted, + Executing, + CompletedCycle + }; + + Ptr m_rootTracker; + ITracker* m_currentTracker; + RunState m_runState; + + public: + + static TrackerContext& instance() { + static TrackerContext s_instance; + return s_instance; + } + + TrackerContext() + : m_currentTracker( CATCH_NULL ), + m_runState( NotStarted ) + {} + + ITracker& startRun(); + + void endRun() { + m_rootTracker.reset(); + m_currentTracker = CATCH_NULL; + m_runState = NotStarted; + } + + void startCycle() { + m_currentTracker = m_rootTracker.get(); + m_runState = Executing; + } + void completeCycle() { + m_runState = CompletedCycle; + } + + bool completedCycle() const { + return m_runState == CompletedCycle; + } + ITracker& currentTracker() { + return *m_currentTracker; + } + void setCurrentTracker( ITracker* tracker ) { + m_currentTracker = tracker; + } + }; + + class TrackerBase : public ITracker { + protected: + enum CycleState { + NotStarted, + Executing, + ExecutingChildren, + NeedsAnotherRun, + CompletedSuccessfully, + Failed + }; + class TrackerHasName { + NameAndLocation m_nameAndLocation; + public: + TrackerHasName( NameAndLocation const& nameAndLocation ) : m_nameAndLocation( nameAndLocation ) {} + bool operator ()( Ptr const& tracker ) { + return + tracker->nameAndLocation().name == m_nameAndLocation.name && + tracker->nameAndLocation().location == m_nameAndLocation.location; + } + }; + typedef std::vector > Children; + NameAndLocation m_nameAndLocation; + TrackerContext& m_ctx; + ITracker* m_parent; + Children m_children; + CycleState m_runState; + public: + TrackerBase( NameAndLocation const& nameAndLocation, TrackerContext& ctx, ITracker* parent ) + : m_nameAndLocation( nameAndLocation ), + m_ctx( ctx ), + m_parent( parent ), + m_runState( NotStarted ) + {} + virtual ~TrackerBase(); + + virtual NameAndLocation const& nameAndLocation() const CATCH_OVERRIDE { + return m_nameAndLocation; + } + virtual bool isComplete() const CATCH_OVERRIDE { + return m_runState == CompletedSuccessfully || m_runState == Failed; + } + virtual bool isSuccessfullyCompleted() const CATCH_OVERRIDE { + return m_runState == CompletedSuccessfully; + } + virtual bool isOpen() const CATCH_OVERRIDE { + return m_runState != NotStarted && !isComplete(); + } + virtual bool hasChildren() const CATCH_OVERRIDE { + return !m_children.empty(); + } + + virtual void addChild( Ptr const& child ) CATCH_OVERRIDE { + m_children.push_back( child ); + } + + virtual ITracker* findChild( NameAndLocation const& nameAndLocation ) CATCH_OVERRIDE { + Children::const_iterator it = std::find_if( m_children.begin(), m_children.end(), TrackerHasName( nameAndLocation ) ); + return( it != m_children.end() ) + ? it->get() + : CATCH_NULL; + } + virtual ITracker& parent() CATCH_OVERRIDE { + assert( m_parent ); // Should always be non-null except for root + return *m_parent; + } + + virtual void openChild() CATCH_OVERRIDE { + if( m_runState != ExecutingChildren ) { + m_runState = ExecutingChildren; + if( m_parent ) + m_parent->openChild(); + } + } + + virtual bool isSectionTracker() const CATCH_OVERRIDE { return false; } + virtual bool isIndexTracker() const CATCH_OVERRIDE { return false; } + + void open() { + m_runState = Executing; + moveToThis(); + if( m_parent ) + m_parent->openChild(); + } + + virtual void close() CATCH_OVERRIDE { + + // Close any still open children (e.g. generators) + while( &m_ctx.currentTracker() != this ) + m_ctx.currentTracker().close(); + + switch( m_runState ) { + case NotStarted: + case CompletedSuccessfully: + case Failed: + throw std::logic_error( "Illogical state" ); + + case NeedsAnotherRun: + break;; + + case Executing: + m_runState = CompletedSuccessfully; + break; + case ExecutingChildren: + if( m_children.empty() || m_children.back()->isComplete() ) + m_runState = CompletedSuccessfully; + break; + + default: + throw std::logic_error( "Unexpected state" ); + } + moveToParent(); + m_ctx.completeCycle(); + } + virtual void fail() CATCH_OVERRIDE { + m_runState = Failed; + if( m_parent ) + m_parent->markAsNeedingAnotherRun(); + moveToParent(); + m_ctx.completeCycle(); + } + virtual void markAsNeedingAnotherRun() CATCH_OVERRIDE { + m_runState = NeedsAnotherRun; + } + private: + void moveToParent() { + assert( m_parent ); + m_ctx.setCurrentTracker( m_parent ); + } + void moveToThis() { + m_ctx.setCurrentTracker( this ); + } + }; + + class SectionTracker : public TrackerBase { + std::vector m_filters; + public: + SectionTracker( NameAndLocation const& nameAndLocation, TrackerContext& ctx, ITracker* parent ) + : TrackerBase( nameAndLocation, ctx, parent ) + { + if( parent ) { + while( !parent->isSectionTracker() ) + parent = &parent->parent(); + + SectionTracker& parentSection = static_cast( *parent ); + addNextFilters( parentSection.m_filters ); + } + } + virtual ~SectionTracker(); + + virtual bool isSectionTracker() const CATCH_OVERRIDE { return true; } + + static SectionTracker& acquire( TrackerContext& ctx, NameAndLocation const& nameAndLocation ) { + SectionTracker* section = CATCH_NULL; + + ITracker& currentTracker = ctx.currentTracker(); + if( ITracker* childTracker = currentTracker.findChild( nameAndLocation ) ) { + assert( childTracker ); + assert( childTracker->isSectionTracker() ); + section = static_cast( childTracker ); + } + else { + section = new SectionTracker( nameAndLocation, ctx, ¤tTracker ); + currentTracker.addChild( section ); + } + if( !ctx.completedCycle() ) + section->tryOpen(); + return *section; + } + + void tryOpen() { + if( !isComplete() && (m_filters.empty() || m_filters[0].empty() || m_filters[0] == m_nameAndLocation.name ) ) + open(); + } + + void addInitialFilters( std::vector const& filters ) { + if( !filters.empty() ) { + m_filters.push_back(""); // Root - should never be consulted + m_filters.push_back(""); // Test Case - not a section filter + m_filters.insert( m_filters.end(), filters.begin(), filters.end() ); + } + } + void addNextFilters( std::vector const& filters ) { + if( filters.size() > 1 ) + m_filters.insert( m_filters.end(), ++filters.begin(), filters.end() ); + } + }; + + class IndexTracker : public TrackerBase { + int m_size; + int m_index; + public: + IndexTracker( NameAndLocation const& nameAndLocation, TrackerContext& ctx, ITracker* parent, int size ) + : TrackerBase( nameAndLocation, ctx, parent ), + m_size( size ), + m_index( -1 ) + {} + virtual ~IndexTracker(); + + virtual bool isIndexTracker() const CATCH_OVERRIDE { return true; } + + static IndexTracker& acquire( TrackerContext& ctx, NameAndLocation const& nameAndLocation, int size ) { + IndexTracker* tracker = CATCH_NULL; + + ITracker& currentTracker = ctx.currentTracker(); + if( ITracker* childTracker = currentTracker.findChild( nameAndLocation ) ) { + assert( childTracker ); + assert( childTracker->isIndexTracker() ); + tracker = static_cast( childTracker ); + } + else { + tracker = new IndexTracker( nameAndLocation, ctx, ¤tTracker, size ); + currentTracker.addChild( tracker ); + } + + if( !ctx.completedCycle() && !tracker->isComplete() ) { + if( tracker->m_runState != ExecutingChildren && tracker->m_runState != NeedsAnotherRun ) + tracker->moveNext(); + tracker->open(); + } + + return *tracker; + } + + int index() const { return m_index; } + + void moveNext() { + m_index++; + m_children.clear(); + } + + virtual void close() CATCH_OVERRIDE { + TrackerBase::close(); + if( m_runState == CompletedSuccessfully && m_index < m_size-1 ) + m_runState = Executing; + } + }; + + inline ITracker& TrackerContext::startRun() { + m_rootTracker = new SectionTracker( NameAndLocation( "{root}", CATCH_INTERNAL_LINEINFO ), *this, CATCH_NULL ); + m_currentTracker = CATCH_NULL; + m_runState = Executing; + return *m_rootTracker; + } + +} // namespace TestCaseTracking + +using TestCaseTracking::ITracker; +using TestCaseTracking::TrackerContext; +using TestCaseTracking::SectionTracker; +using TestCaseTracking::IndexTracker; + +} // namespace Catch + +CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS + +// #included from: catch_fatal_condition.hpp +#define TWOBLUECUBES_CATCH_FATAL_CONDITION_H_INCLUDED + +namespace Catch { + + // Report the error condition + inline void reportFatal( std::string const& message ) { + IContext& context = Catch::getCurrentContext(); + IResultCapture* resultCapture = context.getResultCapture(); + resultCapture->handleFatalErrorCondition( message ); + } + +} // namespace Catch + +#if defined ( CATCH_PLATFORM_WINDOWS ) ///////////////////////////////////////// +// #included from: catch_windows_h_proxy.h + +#define TWOBLUECUBES_CATCH_WINDOWS_H_PROXY_H_INCLUDED + +#ifdef CATCH_DEFINES_NOMINMAX +# define NOMINMAX +#endif +#ifdef CATCH_DEFINES_WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#ifdef __AFXDLL +#include +#else +#include +#endif + +#ifdef CATCH_DEFINES_NOMINMAX +# undef NOMINMAX +#endif +#ifdef CATCH_DEFINES_WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN +#endif + + +# if !defined ( CATCH_CONFIG_WINDOWS_SEH ) + +namespace Catch { + struct FatalConditionHandler { + void reset() {} + }; +} + +# else // CATCH_CONFIG_WINDOWS_SEH is defined + +namespace Catch { + + struct SignalDefs { DWORD id; const char* name; }; + extern SignalDefs signalDefs[]; + // There is no 1-1 mapping between signals and windows exceptions. + // Windows can easily distinguish between SO and SigSegV, + // but SigInt, SigTerm, etc are handled differently. + SignalDefs signalDefs[] = { + { EXCEPTION_ILLEGAL_INSTRUCTION, "SIGILL - Illegal instruction signal" }, + { EXCEPTION_STACK_OVERFLOW, "SIGSEGV - Stack overflow" }, + { EXCEPTION_ACCESS_VIOLATION, "SIGSEGV - Segmentation violation signal" }, + { EXCEPTION_INT_DIVIDE_BY_ZERO, "Divide by zero error" }, + }; + + struct FatalConditionHandler { + + static LONG CALLBACK handleVectoredException(PEXCEPTION_POINTERS ExceptionInfo) { + for (int i = 0; i < sizeof(signalDefs) / sizeof(SignalDefs); ++i) { + if (ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) { + reportFatal(signalDefs[i].name); + } + } + // If its not an exception we care about, pass it along. + // This stops us from eating debugger breaks etc. + return EXCEPTION_CONTINUE_SEARCH; + } + + FatalConditionHandler() { + isSet = true; + // 32k seems enough for Catch to handle stack overflow, + // but the value was found experimentally, so there is no strong guarantee + guaranteeSize = 32 * 1024; + exceptionHandlerHandle = CATCH_NULL; + // Register as first handler in current chain + exceptionHandlerHandle = AddVectoredExceptionHandler(1, handleVectoredException); + // Pass in guarantee size to be filled + SetThreadStackGuarantee(&guaranteeSize); + } + + static void reset() { + if (isSet) { + // Unregister handler and restore the old guarantee + RemoveVectoredExceptionHandler(exceptionHandlerHandle); + SetThreadStackGuarantee(&guaranteeSize); + exceptionHandlerHandle = CATCH_NULL; + isSet = false; + } + } + + ~FatalConditionHandler() { + reset(); + } + private: + static bool isSet; + static ULONG guaranteeSize; + static PVOID exceptionHandlerHandle; + }; + + bool FatalConditionHandler::isSet = false; + ULONG FatalConditionHandler::guaranteeSize = 0; + PVOID FatalConditionHandler::exceptionHandlerHandle = CATCH_NULL; + +} // namespace Catch + +# endif // CATCH_CONFIG_WINDOWS_SEH + +#else // Not Windows - assumed to be POSIX compatible ////////////////////////// + +# if !defined(CATCH_CONFIG_POSIX_SIGNALS) + +namespace Catch { + struct FatalConditionHandler { + void reset() {} + }; +} + +# else // CATCH_CONFIG_POSIX_SIGNALS is defined + +#include + +namespace Catch { + + struct SignalDefs { + int id; + const char* name; + }; + extern SignalDefs signalDefs[]; + SignalDefs signalDefs[] = { + { SIGINT, "SIGINT - Terminal interrupt signal" }, + { SIGILL, "SIGILL - Illegal instruction signal" }, + { SIGFPE, "SIGFPE - Floating point error signal" }, + { SIGSEGV, "SIGSEGV - Segmentation violation signal" }, + { SIGTERM, "SIGTERM - Termination request signal" }, + { SIGABRT, "SIGABRT - Abort (abnormal termination) signal" } + }; + + struct FatalConditionHandler { + + static bool isSet; + static struct sigaction oldSigActions [sizeof(signalDefs)/sizeof(SignalDefs)]; + static stack_t oldSigStack; + static char altStackMem[SIGSTKSZ]; + + static void handleSignal( int sig ) { + std::string name = ""; + for (std::size_t i = 0; i < sizeof(signalDefs) / sizeof(SignalDefs); ++i) { + SignalDefs &def = signalDefs[i]; + if (sig == def.id) { + name = def.name; + break; + } + } + reset(); + reportFatal(name); + raise( sig ); + } + + FatalConditionHandler() { + isSet = true; + stack_t sigStack; + sigStack.ss_sp = altStackMem; + sigStack.ss_size = SIGSTKSZ; + sigStack.ss_flags = 0; + sigaltstack(&sigStack, &oldSigStack); + struct sigaction sa = { 0 }; + + sa.sa_handler = handleSignal; + sa.sa_flags = SA_ONSTACK; + for (std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i) { + sigaction(signalDefs[i].id, &sa, &oldSigActions[i]); + } + } + + ~FatalConditionHandler() { + reset(); + } + static void reset() { + if( isSet ) { + // Set signals back to previous values -- hopefully nobody overwrote them in the meantime + for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i ) { + sigaction(signalDefs[i].id, &oldSigActions[i], CATCH_NULL); + } + // Return the old stack + sigaltstack(&oldSigStack, CATCH_NULL); + isSet = false; + } + } + }; + + bool FatalConditionHandler::isSet = false; + struct sigaction FatalConditionHandler::oldSigActions[sizeof(signalDefs)/sizeof(SignalDefs)] = {}; + stack_t FatalConditionHandler::oldSigStack = {}; + char FatalConditionHandler::altStackMem[SIGSTKSZ] = {}; + +} // namespace Catch + +# endif // CATCH_CONFIG_POSIX_SIGNALS + +#endif // not Windows + +#include +#include +#include + +namespace Catch { + + class StreamRedirect { + + public: + StreamRedirect( std::ostream& stream, std::string& targetString ) + : m_stream( stream ), + m_prevBuf( stream.rdbuf() ), + m_targetString( targetString ) + { + stream.rdbuf( m_oss.rdbuf() ); + } + + ~StreamRedirect() { + m_targetString += m_oss.str(); + m_stream.rdbuf( m_prevBuf ); + } + + private: + std::ostream& m_stream; + std::streambuf* m_prevBuf; + std::ostringstream m_oss; + std::string& m_targetString; + }; + + // StdErr has two constituent streams in C++, std::cerr and std::clog + // This means that we need to redirect 2 streams into 1 to keep proper + // order of writes and cannot use StreamRedirect on its own + class StdErrRedirect { + public: + StdErrRedirect(std::string& targetString) + :m_cerrBuf( cerr().rdbuf() ), m_clogBuf(clog().rdbuf()), + m_targetString(targetString){ + cerr().rdbuf(m_oss.rdbuf()); + clog().rdbuf(m_oss.rdbuf()); + } + ~StdErrRedirect() { + m_targetString += m_oss.str(); + cerr().rdbuf(m_cerrBuf); + clog().rdbuf(m_clogBuf); + } + private: + std::streambuf* m_cerrBuf; + std::streambuf* m_clogBuf; + std::ostringstream m_oss; + std::string& m_targetString; + }; + + /////////////////////////////////////////////////////////////////////////// + + class RunContext : public IResultCapture, public IRunner { + + RunContext( RunContext const& ); + void operator =( RunContext const& ); + + public: + + explicit RunContext( Ptr const& _config, Ptr const& reporter ) + : m_runInfo( _config->name() ), + m_context( getCurrentMutableContext() ), + m_activeTestCase( CATCH_NULL ), + m_config( _config ), + m_reporter( reporter ), + m_shouldReportUnexpected ( true ) + { + m_context.setRunner( this ); + m_context.setConfig( m_config ); + m_context.setResultCapture( this ); + m_reporter->testRunStarting( m_runInfo ); + } + + virtual ~RunContext() { + m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, aborting() ) ); + } + + void testGroupStarting( std::string const& testSpec, std::size_t groupIndex, std::size_t groupsCount ) { + m_reporter->testGroupStarting( GroupInfo( testSpec, groupIndex, groupsCount ) ); + } + void testGroupEnded( std::string const& testSpec, Totals const& totals, std::size_t groupIndex, std::size_t groupsCount ) { + m_reporter->testGroupEnded( TestGroupStats( GroupInfo( testSpec, groupIndex, groupsCount ), totals, aborting() ) ); + } + + Totals runTest( TestCase const& testCase ) { + Totals prevTotals = m_totals; + + std::string redirectedCout; + std::string redirectedCerr; + + TestCaseInfo testInfo = testCase.getTestCaseInfo(); + + m_reporter->testCaseStarting( testInfo ); + + m_activeTestCase = &testCase; + + do { + ITracker& rootTracker = m_trackerContext.startRun(); + assert( rootTracker.isSectionTracker() ); + static_cast( rootTracker ).addInitialFilters( m_config->getSectionsToRun() ); + do { + m_trackerContext.startCycle(); + m_testCaseTracker = &SectionTracker::acquire( m_trackerContext, TestCaseTracking::NameAndLocation( testInfo.name, testInfo.lineInfo ) ); + runCurrentTest( redirectedCout, redirectedCerr ); + } + while( !m_testCaseTracker->isSuccessfullyCompleted() && !aborting() ); + } + // !TBD: deprecated - this will be replaced by indexed trackers + while( getCurrentContext().advanceGeneratorsForCurrentTest() && !aborting() ); + + Totals deltaTotals = m_totals.delta( prevTotals ); + if( testInfo.expectedToFail() && deltaTotals.testCases.passed > 0 ) { + deltaTotals.assertions.failed++; + deltaTotals.testCases.passed--; + deltaTotals.testCases.failed++; + } + m_totals.testCases += deltaTotals.testCases; + m_reporter->testCaseEnded( TestCaseStats( testInfo, + deltaTotals, + redirectedCout, + redirectedCerr, + aborting() ) ); + + m_activeTestCase = CATCH_NULL; + m_testCaseTracker = CATCH_NULL; + + return deltaTotals; + } + + Ptr config() const { + return m_config; + } + + private: // IResultCapture + + virtual void assertionEnded( AssertionResult const& result ) { + if( result.getResultType() == ResultWas::Ok ) { + m_totals.assertions.passed++; + } + else if( !result.isOk() ) { + if( m_activeTestCase->getTestCaseInfo().okToFail() ) + m_totals.assertions.failedButOk++; + else + m_totals.assertions.failed++; + } + + // We have no use for the return value (whether messages should be cleared), because messages were made scoped + // and should be let to clear themselves out. + static_cast(m_reporter->assertionEnded(AssertionStats(result, m_messages, m_totals))); + + // Reset working state + m_lastAssertionInfo = AssertionInfo( "", m_lastAssertionInfo.lineInfo, "{Unknown expression after the reported line}" , m_lastAssertionInfo.resultDisposition ); + m_lastResult = result; + } + + virtual bool lastAssertionPassed() + { + return m_totals.assertions.passed == (m_prevPassed + 1); + } + + virtual void assertionPassed() + { + m_totals.assertions.passed++; + m_lastAssertionInfo.capturedExpression = "{Unknown expression after the reported line}"; + m_lastAssertionInfo.macroName = ""; + } + + virtual void assertionRun() + { + m_prevPassed = m_totals.assertions.passed; + } + + virtual bool sectionStarted ( + SectionInfo const& sectionInfo, + Counts& assertions + ) + { + ITracker& sectionTracker = SectionTracker::acquire( m_trackerContext, TestCaseTracking::NameAndLocation( sectionInfo.name, sectionInfo.lineInfo ) ); + if( !sectionTracker.isOpen() ) + return false; + m_activeSections.push_back( §ionTracker ); + + m_lastAssertionInfo.lineInfo = sectionInfo.lineInfo; + + m_reporter->sectionStarting( sectionInfo ); + + assertions = m_totals.assertions; + + return true; + } + bool testForMissingAssertions( Counts& assertions ) { + if( assertions.total() != 0 ) + return false; + if( !m_config->warnAboutMissingAssertions() ) + return false; + if( m_trackerContext.currentTracker().hasChildren() ) + return false; + m_totals.assertions.failed++; + assertions.failed++; + return true; + } + + virtual void sectionEnded( SectionEndInfo const& endInfo ) { + Counts assertions = m_totals.assertions - endInfo.prevAssertions; + bool missingAssertions = testForMissingAssertions( assertions ); + + if( !m_activeSections.empty() ) { + m_activeSections.back()->close(); + m_activeSections.pop_back(); + } + + m_reporter->sectionEnded( SectionStats( endInfo.sectionInfo, assertions, endInfo.durationInSeconds, missingAssertions ) ); + m_messages.clear(); + } + + virtual void sectionEndedEarly( SectionEndInfo const& endInfo ) { + if( m_unfinishedSections.empty() ) + m_activeSections.back()->fail(); + else + m_activeSections.back()->close(); + m_activeSections.pop_back(); + + m_unfinishedSections.push_back( endInfo ); + } + + virtual void pushScopedMessage( MessageInfo const& message ) { + m_messages.push_back( message ); + } + + virtual void popScopedMessage( MessageInfo const& message ) { + m_messages.erase( std::remove( m_messages.begin(), m_messages.end(), message ), m_messages.end() ); + } + + virtual std::string getCurrentTestName() const { + return m_activeTestCase + ? m_activeTestCase->getTestCaseInfo().name + : std::string(); + } + + virtual const AssertionResult* getLastResult() const { + return &m_lastResult; + } + + virtual void exceptionEarlyReported() { + m_shouldReportUnexpected = false; + } + + virtual void handleFatalErrorCondition( std::string const& message ) { + // Don't rebuild the result -- the stringification itself can cause more fatal errors + // Instead, fake a result data. + AssertionResultData tempResult; + tempResult.resultType = ResultWas::FatalErrorCondition; + tempResult.message = message; + AssertionResult result(m_lastAssertionInfo, tempResult); + + getResultCapture().assertionEnded(result); + + handleUnfinishedSections(); + + // Recreate section for test case (as we will lose the one that was in scope) + TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo(); + SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description ); + + Counts assertions; + assertions.failed = 1; + SectionStats testCaseSectionStats( testCaseSection, assertions, 0, false ); + m_reporter->sectionEnded( testCaseSectionStats ); + + TestCaseInfo testInfo = m_activeTestCase->getTestCaseInfo(); + + Totals deltaTotals; + deltaTotals.testCases.failed = 1; + deltaTotals.assertions.failed = 1; + m_reporter->testCaseEnded( TestCaseStats( testInfo, + deltaTotals, + std::string(), + std::string(), + false ) ); + m_totals.testCases.failed++; + testGroupEnded( std::string(), m_totals, 1, 1 ); + m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, false ) ); + } + + public: + // !TBD We need to do this another way! + bool aborting() const { + return m_totals.assertions.failed == static_cast( m_config->abortAfter() ); + } + + private: + + void runCurrentTest( std::string& redirectedCout, std::string& redirectedCerr ) { + TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo(); + SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description ); + m_reporter->sectionStarting( testCaseSection ); + Counts prevAssertions = m_totals.assertions; + double duration = 0; + m_shouldReportUnexpected = true; + try { + m_lastAssertionInfo = AssertionInfo( "TEST_CASE", testCaseInfo.lineInfo, "", ResultDisposition::Normal ); + + seedRng( *m_config ); + + Timer timer; + timer.start(); + if( m_reporter->getPreferences().shouldRedirectStdOut ) { + StreamRedirect coutRedir( Catch::cout(), redirectedCout ); + StdErrRedirect errRedir( redirectedCerr ); + invokeActiveTestCase(); + } + else { + invokeActiveTestCase(); + } + duration = timer.getElapsedSeconds(); + } + catch( TestFailureException& ) { + // This just means the test was aborted due to failure + } + catch(...) { + // Under CATCH_CONFIG_FAST_COMPILE, unexpected exceptions under REQUIRE assertions + // are reported without translation at the point of origin. + if (m_shouldReportUnexpected) { + makeUnexpectedResultBuilder().useActiveException(); + } + } + m_testCaseTracker->close(); + handleUnfinishedSections(); + m_messages.clear(); + + Counts assertions = m_totals.assertions - prevAssertions; + bool missingAssertions = testForMissingAssertions( assertions ); + + SectionStats testCaseSectionStats( testCaseSection, assertions, duration, missingAssertions ); + m_reporter->sectionEnded( testCaseSectionStats ); + } + + void invokeActiveTestCase() { + FatalConditionHandler fatalConditionHandler; // Handle signals + m_activeTestCase->invoke(); + fatalConditionHandler.reset(); + } + + private: + + ResultBuilder makeUnexpectedResultBuilder() const { + return ResultBuilder( m_lastAssertionInfo.macroName, + m_lastAssertionInfo.lineInfo, + m_lastAssertionInfo.capturedExpression, + m_lastAssertionInfo.resultDisposition ); + } + + void handleUnfinishedSections() { + // If sections ended prematurely due to an exception we stored their + // infos here so we can tear them down outside the unwind process. + for( std::vector::const_reverse_iterator it = m_unfinishedSections.rbegin(), + itEnd = m_unfinishedSections.rend(); + it != itEnd; + ++it ) + sectionEnded( *it ); + m_unfinishedSections.clear(); + } + + TestRunInfo m_runInfo; + IMutableContext& m_context; + TestCase const* m_activeTestCase; + ITracker* m_testCaseTracker; + ITracker* m_currentSectionTracker; + AssertionResult m_lastResult; + + Ptr m_config; + Totals m_totals; + Ptr m_reporter; + std::vector m_messages; + AssertionInfo m_lastAssertionInfo; + std::vector m_unfinishedSections; + std::vector m_activeSections; + TrackerContext m_trackerContext; + size_t m_prevPassed; + bool m_shouldReportUnexpected; + }; + + IResultCapture& getResultCapture() { + if( IResultCapture* capture = getCurrentContext().getResultCapture() ) + return *capture; + else + throw std::logic_error( "No result capture instance" ); + } + +} // end namespace Catch + +// #included from: internal/catch_version.h +#define TWOBLUECUBES_CATCH_VERSION_H_INCLUDED + +namespace Catch { + + // Versioning information + struct Version { + Version( unsigned int _majorVersion, + unsigned int _minorVersion, + unsigned int _patchNumber, + char const * const _branchName, + unsigned int _buildNumber ); + + unsigned int const majorVersion; + unsigned int const minorVersion; + unsigned int const patchNumber; + + // buildNumber is only used if branchName is not null + char const * const branchName; + unsigned int const buildNumber; + + friend std::ostream& operator << ( std::ostream& os, Version const& version ); + + private: + void operator=( Version const& ); + }; + + inline Version libraryVersion(); +} + +#include +#include +#include + +namespace Catch { + + Ptr createReporter( std::string const& reporterName, Ptr const& config ) { + Ptr reporter = getRegistryHub().getReporterRegistry().create( reporterName, config.get() ); + if( !reporter ) { + std::ostringstream oss; + oss << "No reporter registered with name: '" << reporterName << "'"; + throw std::domain_error( oss.str() ); + } + return reporter; + } + +#if !defined(CATCH_CONFIG_DEFAULT_REPORTER) +#define CATCH_CONFIG_DEFAULT_REPORTER "console" +#endif + + Ptr makeReporter( Ptr const& config ) { + std::vector reporters = config->getReporterNames(); + if( reporters.empty() ) + reporters.push_back( CATCH_CONFIG_DEFAULT_REPORTER ); + + Ptr reporter; + for( std::vector::const_iterator it = reporters.begin(), itEnd = reporters.end(); + it != itEnd; + ++it ) + reporter = addReporter( reporter, createReporter( *it, config ) ); + return reporter; + } + Ptr addListeners( Ptr const& config, Ptr reporters ) { + IReporterRegistry::Listeners listeners = getRegistryHub().getReporterRegistry().getListeners(); + for( IReporterRegistry::Listeners::const_iterator it = listeners.begin(), itEnd = listeners.end(); + it != itEnd; + ++it ) + reporters = addReporter(reporters, (*it)->create( ReporterConfig( config ) ) ); + return reporters; + } + + Totals runTests( Ptr const& config ) { + + Ptr iconfig = config.get(); + + Ptr reporter = makeReporter( config ); + reporter = addListeners( iconfig, reporter ); + + RunContext context( iconfig, reporter ); + + Totals totals; + + context.testGroupStarting( config->name(), 1, 1 ); + + TestSpec testSpec = config->testSpec(); + if( !testSpec.hasFilters() ) + testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "~[.]" ).testSpec(); // All not hidden tests + + std::vector const& allTestCases = getAllTestCasesSorted( *iconfig ); + for( std::vector::const_iterator it = allTestCases.begin(), itEnd = allTestCases.end(); + it != itEnd; + ++it ) { + if( !context.aborting() && matchTest( *it, testSpec, *iconfig ) ) + totals += context.runTest( *it ); + else + reporter->skipTest( *it ); + } + + context.testGroupEnded( iconfig->name(), totals, 1, 1 ); + return totals; + } + + void applyFilenamesAsTags( IConfig const& config ) { + std::vector const& tests = getAllTestCasesSorted( config ); + for(std::size_t i = 0; i < tests.size(); ++i ) { + TestCase& test = const_cast( tests[i] ); + std::set tags = test.tags; + + std::string filename = test.lineInfo.file; + std::string::size_type lastSlash = filename.find_last_of( "\\/" ); + if( lastSlash != std::string::npos ) + filename = filename.substr( lastSlash+1 ); + + std::string::size_type lastDot = filename.find_last_of( '.' ); + if( lastDot != std::string::npos ) + filename = filename.substr( 0, lastDot ); + + tags.insert( '#' + filename ); + setTags( test, tags ); + } + } + + class Session : NonCopyable { + static bool alreadyInstantiated; + + public: + + struct OnUnusedOptions { enum DoWhat { Ignore, Fail }; }; + + Session() + : m_cli( makeCommandLineParser() ) { + if( alreadyInstantiated ) { + std::string msg = "Only one instance of Catch::Session can ever be used"; + Catch::cerr() << msg << std::endl; + throw std::logic_error( msg ); + } + alreadyInstantiated = true; + } + ~Session() { + Catch::cleanUp(); + } + + void showHelp( std::string const& processName ) { + Catch::cout() << "\nCatch v" << libraryVersion() << "\n"; + + m_cli.usage( Catch::cout(), processName ); + Catch::cout() << "For more detail usage please see the project docs\n" << std::endl; + } + void libIdentify() { + Catch::cout() + << std::left << std::setw(16) << "description: " << "A Catch test executable\n" + << std::left << std::setw(16) << "category: " << "testframework\n" + << std::left << std::setw(16) << "framework: " << "Catch Test\n" + << std::left << std::setw(16) << "version: " << libraryVersion() << std::endl; + } + + int applyCommandLine( int argc, char const* const* const argv, OnUnusedOptions::DoWhat unusedOptionBehaviour = OnUnusedOptions::Fail ) { + try { + m_cli.setThrowOnUnrecognisedTokens( unusedOptionBehaviour == OnUnusedOptions::Fail ); + m_unusedTokens = m_cli.parseInto( Clara::argsToVector( argc, argv ), m_configData ); + if( m_configData.showHelp ) + showHelp( m_configData.processName ); + if( m_configData.libIdentify ) + libIdentify(); + m_config.reset(); + } + catch( std::exception& ex ) { + { + Colour colourGuard( Colour::Red ); + Catch::cerr() + << "\nError(s) in input:\n" + << Text( ex.what(), TextAttributes().setIndent(2) ) + << "\n\n"; + } + m_cli.usage( Catch::cout(), m_configData.processName ); + return (std::numeric_limits::max)(); + } + return 0; + } + + void useConfigData( ConfigData const& _configData ) { + m_configData = _configData; + m_config.reset(); + } + + int run( int argc, char const* const* const argv ) { + + int returnCode = applyCommandLine( argc, argv ); + if( returnCode == 0 ) + returnCode = run(); + return returnCode; + } + + #if defined(WIN32) && defined(UNICODE) + int run( int argc, wchar_t const* const* const argv ) { + + char **utf8Argv = new char *[ argc ]; + + for ( int i = 0; i < argc; ++i ) { + int bufSize = WideCharToMultiByte( CP_UTF8, 0, argv[i], -1, NULL, 0, NULL, NULL ); + + utf8Argv[ i ] = new char[ bufSize ]; + + WideCharToMultiByte( CP_UTF8, 0, argv[i], -1, utf8Argv[i], bufSize, NULL, NULL ); + } + + int returnCode = applyCommandLine( argc, utf8Argv ); + if( returnCode == 0 ) + returnCode = run(); + + for ( int i = 0; i < argc; ++i ) + delete [] utf8Argv[ i ]; + + delete [] utf8Argv; + + return returnCode; + } + #endif + + int run() { + if( ( m_configData.waitForKeypress & WaitForKeypress::BeforeStart ) != 0 ) { + Catch::cout() << "...waiting for enter/ return before starting" << std::endl; + static_cast(std::getchar()); + } + int exitCode = runInternal(); + if( ( m_configData.waitForKeypress & WaitForKeypress::BeforeExit ) != 0 ) { + Catch::cout() << "...waiting for enter/ return before exiting, with code: " << exitCode << std::endl; + static_cast(std::getchar()); + } + return exitCode; + } + + Clara::CommandLine const& cli() const { + return m_cli; + } + std::vector const& unusedTokens() const { + return m_unusedTokens; + } + ConfigData& configData() { + return m_configData; + } + Config& config() { + if( !m_config ) + m_config = new Config( m_configData ); + return *m_config; + } + private: + + int runInternal() { + if( m_configData.showHelp || m_configData.libIdentify ) + return 0; + + try + { + config(); // Force config to be constructed + + seedRng( *m_config ); + + if( m_configData.filenamesAsTags ) + applyFilenamesAsTags( *m_config ); + + // Handle list request + if( Option listed = list( config() ) ) + return static_cast( *listed ); + + return static_cast( runTests( m_config ).assertions.failed ); + } + catch( std::exception& ex ) { + Catch::cerr() << ex.what() << std::endl; + return (std::numeric_limits::max)(); + } + } + + Clara::CommandLine m_cli; + std::vector m_unusedTokens; + ConfigData m_configData; + Ptr m_config; + }; + + bool Session::alreadyInstantiated = false; + +} // end namespace Catch + +// #included from: catch_registry_hub.hpp +#define TWOBLUECUBES_CATCH_REGISTRY_HUB_HPP_INCLUDED + +// #included from: catch_test_case_registry_impl.hpp +#define TWOBLUECUBES_CATCH_TEST_CASE_REGISTRY_IMPL_HPP_INCLUDED + +#include +#include +#include +#include + +namespace Catch { + + struct RandomNumberGenerator { + typedef unsigned int result_type; + + result_type operator()( result_type n ) const { return std::rand() % n; } + +#ifdef CATCH_CONFIG_CPP11_SHUFFLE + static constexpr result_type (min)() { return 0; } + static constexpr result_type (max)() { return 1000000; } + result_type operator()() const { return std::rand() % (max)(); } +#endif + template + static void shuffle( V& vector ) { + RandomNumberGenerator rng; +#ifdef CATCH_CONFIG_CPP11_SHUFFLE + std::shuffle( vector.begin(), vector.end(), rng ); +#else + std::random_shuffle( vector.begin(), vector.end(), rng ); +#endif + } + }; + + inline std::vector sortTests( IConfig const& config, std::vector const& unsortedTestCases ) { + + std::vector sorted = unsortedTestCases; + + switch( config.runOrder() ) { + case RunTests::InLexicographicalOrder: + std::sort( sorted.begin(), sorted.end() ); + break; + case RunTests::InRandomOrder: + { + seedRng( config ); + RandomNumberGenerator::shuffle( sorted ); + } + break; + case RunTests::InDeclarationOrder: + // already in declaration order + break; + } + return sorted; + } + bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ) { + return testSpec.matches( testCase ) && ( config.allowThrows() || !testCase.throws() ); + } + + void enforceNoDuplicateTestCases( std::vector const& functions ) { + std::set seenFunctions; + for( std::vector::const_iterator it = functions.begin(), itEnd = functions.end(); + it != itEnd; + ++it ) { + std::pair::const_iterator, bool> prev = seenFunctions.insert( *it ); + if( !prev.second ) { + std::ostringstream ss; + + ss << Colour( Colour::Red ) + << "error: TEST_CASE( \"" << it->name << "\" ) already defined.\n" + << "\tFirst seen at " << prev.first->getTestCaseInfo().lineInfo << '\n' + << "\tRedefined at " << it->getTestCaseInfo().lineInfo << std::endl; + + throw std::runtime_error(ss.str()); + } + } + } + + std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ) { + std::vector filtered; + filtered.reserve( testCases.size() ); + for( std::vector::const_iterator it = testCases.begin(), itEnd = testCases.end(); + it != itEnd; + ++it ) + if( matchTest( *it, testSpec, config ) ) + filtered.push_back( *it ); + return filtered; + } + std::vector const& getAllTestCasesSorted( IConfig const& config ) { + return getRegistryHub().getTestCaseRegistry().getAllTestsSorted( config ); + } + + class TestRegistry : public ITestCaseRegistry { + public: + TestRegistry() + : m_currentSortOrder( RunTests::InDeclarationOrder ), + m_unnamedCount( 0 ) + {} + virtual ~TestRegistry(); + + virtual void registerTest( TestCase const& testCase ) { + std::string name = testCase.getTestCaseInfo().name; + if( name.empty() ) { + std::ostringstream oss; + oss << "Anonymous test case " << ++m_unnamedCount; + return registerTest( testCase.withName( oss.str() ) ); + } + m_functions.push_back( testCase ); + } + + virtual std::vector const& getAllTests() const { + return m_functions; + } + virtual std::vector const& getAllTestsSorted( IConfig const& config ) const { + if( m_sortedFunctions.empty() ) + enforceNoDuplicateTestCases( m_functions ); + + if( m_currentSortOrder != config.runOrder() || m_sortedFunctions.empty() ) { + m_sortedFunctions = sortTests( config, m_functions ); + m_currentSortOrder = config.runOrder(); + } + return m_sortedFunctions; + } + + private: + std::vector m_functions; + mutable RunTests::InWhatOrder m_currentSortOrder; + mutable std::vector m_sortedFunctions; + size_t m_unnamedCount; + std::ios_base::Init m_ostreamInit; // Forces cout/ cerr to be initialised + }; + + /////////////////////////////////////////////////////////////////////////// + + class FreeFunctionTestCase : public SharedImpl { + public: + + FreeFunctionTestCase( TestFunction fun ) : m_fun( fun ) {} + + virtual void invoke() const { + m_fun(); + } + + private: + virtual ~FreeFunctionTestCase(); + + TestFunction m_fun; + }; + + inline std::string extractClassName( std::string const& classOrQualifiedMethodName ) { + std::string className = classOrQualifiedMethodName; + if( startsWith( className, '&' ) ) + { + std::size_t lastColons = className.rfind( "::" ); + std::size_t penultimateColons = className.rfind( "::", lastColons-1 ); + if( penultimateColons == std::string::npos ) + penultimateColons = 1; + className = className.substr( penultimateColons, lastColons-penultimateColons ); + } + return className; + } + + void registerTestCase + ( ITestCase* testCase, + char const* classOrQualifiedMethodName, + NameAndDesc const& nameAndDesc, + SourceLineInfo const& lineInfo ) { + + getMutableRegistryHub().registerTest + ( makeTestCase + ( testCase, + extractClassName( classOrQualifiedMethodName ), + nameAndDesc.name, + nameAndDesc.description, + lineInfo ) ); + } + void registerTestCaseFunction + ( TestFunction function, + SourceLineInfo const& lineInfo, + NameAndDesc const& nameAndDesc ) { + registerTestCase( new FreeFunctionTestCase( function ), "", nameAndDesc, lineInfo ); + } + + /////////////////////////////////////////////////////////////////////////// + + AutoReg::AutoReg + ( TestFunction function, + SourceLineInfo const& lineInfo, + NameAndDesc const& nameAndDesc ) { + registerTestCaseFunction( function, lineInfo, nameAndDesc ); + } + + AutoReg::~AutoReg() {} + +} // end namespace Catch + +// #included from: catch_reporter_registry.hpp +#define TWOBLUECUBES_CATCH_REPORTER_REGISTRY_HPP_INCLUDED + +#include + +namespace Catch { + + class ReporterRegistry : public IReporterRegistry { + + public: + + virtual ~ReporterRegistry() CATCH_OVERRIDE {} + + virtual IStreamingReporter* create( std::string const& name, Ptr const& config ) const CATCH_OVERRIDE { + FactoryMap::const_iterator it = m_factories.find( name ); + if( it == m_factories.end() ) + return CATCH_NULL; + return it->second->create( ReporterConfig( config ) ); + } + + void registerReporter( std::string const& name, Ptr const& factory ) { + m_factories.insert( std::make_pair( name, factory ) ); + } + void registerListener( Ptr const& factory ) { + m_listeners.push_back( factory ); + } + + virtual FactoryMap const& getFactories() const CATCH_OVERRIDE { + return m_factories; + } + virtual Listeners const& getListeners() const CATCH_OVERRIDE { + return m_listeners; + } + + private: + FactoryMap m_factories; + Listeners m_listeners; + }; +} + +// #included from: catch_exception_translator_registry.hpp +#define TWOBLUECUBES_CATCH_EXCEPTION_TRANSLATOR_REGISTRY_HPP_INCLUDED + +#ifdef __OBJC__ +#import "Foundation/Foundation.h" +#endif + +namespace Catch { + + class ExceptionTranslatorRegistry : public IExceptionTranslatorRegistry { + public: + ~ExceptionTranslatorRegistry() { + deleteAll( m_translators ); + } + + virtual void registerTranslator( const IExceptionTranslator* translator ) { + m_translators.push_back( translator ); + } + + virtual std::string translateActiveException() const { + try { +#ifdef __OBJC__ + // In Objective-C try objective-c exceptions first + @try { + return tryTranslators(); + } + @catch (NSException *exception) { + return Catch::toString( [exception description] ); + } +#else + return tryTranslators(); +#endif + } + catch( TestFailureException& ) { + throw; + } + catch( std::exception& ex ) { + return ex.what(); + } + catch( std::string& msg ) { + return msg; + } + catch( const char* msg ) { + return msg; + } + catch(...) { + return "Unknown exception"; + } + } + + std::string tryTranslators() const { + if( m_translators.empty() ) + throw; + else + return m_translators[0]->translate( m_translators.begin()+1, m_translators.end() ); + } + + private: + std::vector m_translators; + }; +} + +// #included from: catch_tag_alias_registry.h +#define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_H_INCLUDED + +#include + +namespace Catch { + + class TagAliasRegistry : public ITagAliasRegistry { + public: + virtual ~TagAliasRegistry(); + virtual Option find( std::string const& alias ) const; + virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const; + void add( std::string const& alias, std::string const& tag, SourceLineInfo const& lineInfo ); + + private: + std::map m_registry; + }; + +} // end namespace Catch + +namespace Catch { + + namespace { + + class RegistryHub : public IRegistryHub, public IMutableRegistryHub { + + RegistryHub( RegistryHub const& ); + void operator=( RegistryHub const& ); + + public: // IRegistryHub + RegistryHub() { + } + virtual IReporterRegistry const& getReporterRegistry() const CATCH_OVERRIDE { + return m_reporterRegistry; + } + virtual ITestCaseRegistry const& getTestCaseRegistry() const CATCH_OVERRIDE { + return m_testCaseRegistry; + } + virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() CATCH_OVERRIDE { + return m_exceptionTranslatorRegistry; + } + virtual ITagAliasRegistry const& getTagAliasRegistry() const CATCH_OVERRIDE { + return m_tagAliasRegistry; + } + + public: // IMutableRegistryHub + virtual void registerReporter( std::string const& name, Ptr const& factory ) CATCH_OVERRIDE { + m_reporterRegistry.registerReporter( name, factory ); + } + virtual void registerListener( Ptr const& factory ) CATCH_OVERRIDE { + m_reporterRegistry.registerListener( factory ); + } + virtual void registerTest( TestCase const& testInfo ) CATCH_OVERRIDE { + m_testCaseRegistry.registerTest( testInfo ); + } + virtual void registerTranslator( const IExceptionTranslator* translator ) CATCH_OVERRIDE { + m_exceptionTranslatorRegistry.registerTranslator( translator ); + } + virtual void registerTagAlias( std::string const& alias, std::string const& tag, SourceLineInfo const& lineInfo ) CATCH_OVERRIDE { + m_tagAliasRegistry.add( alias, tag, lineInfo ); + } + + private: + TestRegistry m_testCaseRegistry; + ReporterRegistry m_reporterRegistry; + ExceptionTranslatorRegistry m_exceptionTranslatorRegistry; + TagAliasRegistry m_tagAliasRegistry; + }; + + // Single, global, instance + inline RegistryHub*& getTheRegistryHub() { + static RegistryHub* theRegistryHub = CATCH_NULL; + if( !theRegistryHub ) + theRegistryHub = new RegistryHub(); + return theRegistryHub; + } + } + + IRegistryHub& getRegistryHub() { + return *getTheRegistryHub(); + } + IMutableRegistryHub& getMutableRegistryHub() { + return *getTheRegistryHub(); + } + void cleanUp() { + delete getTheRegistryHub(); + getTheRegistryHub() = CATCH_NULL; + cleanUpContext(); + } + std::string translateActiveException() { + return getRegistryHub().getExceptionTranslatorRegistry().translateActiveException(); + } + +} // end namespace Catch + +// #included from: catch_notimplemented_exception.hpp +#define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_HPP_INCLUDED + +#include + +namespace Catch { + + NotImplementedException::NotImplementedException( SourceLineInfo const& lineInfo ) + : m_lineInfo( lineInfo ) { + std::ostringstream oss; + oss << lineInfo << ": function "; + oss << "not implemented"; + m_what = oss.str(); + } + + const char* NotImplementedException::what() const CATCH_NOEXCEPT { + return m_what.c_str(); + } + +} // end namespace Catch + +// #included from: catch_context_impl.hpp +#define TWOBLUECUBES_CATCH_CONTEXT_IMPL_HPP_INCLUDED + +// #included from: catch_stream.hpp +#define TWOBLUECUBES_CATCH_STREAM_HPP_INCLUDED + +#include +#include +#include + +namespace Catch { + + template + class StreamBufImpl : public StreamBufBase { + char data[bufferSize]; + WriterF m_writer; + + public: + StreamBufImpl() { + setp( data, data + sizeof(data) ); + } + + ~StreamBufImpl() CATCH_NOEXCEPT { + sync(); + } + + private: + int overflow( int c ) { + sync(); + + if( c != EOF ) { + if( pbase() == epptr() ) + m_writer( std::string( 1, static_cast( c ) ) ); + else + sputc( static_cast( c ) ); + } + return 0; + } + + int sync() { + if( pbase() != pptr() ) { + m_writer( std::string( pbase(), static_cast( pptr() - pbase() ) ) ); + setp( pbase(), epptr() ); + } + return 0; + } + }; + + /////////////////////////////////////////////////////////////////////////// + + FileStream::FileStream( std::string const& filename ) { + m_ofs.open( filename.c_str() ); + if( m_ofs.fail() ) { + std::ostringstream oss; + oss << "Unable to open file: '" << filename << '\''; + throw std::domain_error( oss.str() ); + } + } + + std::ostream& FileStream::stream() const { + return m_ofs; + } + + struct OutputDebugWriter { + + void operator()( std::string const&str ) { + writeToDebugConsole( str ); + } + }; + + DebugOutStream::DebugOutStream() + : m_streamBuf( new StreamBufImpl() ), + m_os( m_streamBuf.get() ) + {} + + std::ostream& DebugOutStream::stream() const { + return m_os; + } + + // Store the streambuf from cout up-front because + // cout may get redirected when running tests + CoutStream::CoutStream() + : m_os( Catch::cout().rdbuf() ) + {} + + std::ostream& CoutStream::stream() const { + return m_os; + } + +#ifndef CATCH_CONFIG_NOSTDOUT // If you #define this you must implement these functions + std::ostream& cout() { + return std::cout; + } + std::ostream& cerr() { + return std::cerr; + } + std::ostream& clog() { + return std::clog; + } +#endif +} + +namespace Catch { + + class Context : public IMutableContext { + + Context() : m_config( CATCH_NULL ), m_runner( CATCH_NULL ), m_resultCapture( CATCH_NULL ) {} + Context( Context const& ); + void operator=( Context const& ); + + public: + virtual ~Context() { + deleteAllValues( m_generatorsByTestName ); + } + + public: // IContext + virtual IResultCapture* getResultCapture() { + return m_resultCapture; + } + virtual IRunner* getRunner() { + return m_runner; + } + virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) { + return getGeneratorsForCurrentTest() + .getGeneratorInfo( fileInfo, totalSize ) + .getCurrentIndex(); + } + virtual bool advanceGeneratorsForCurrentTest() { + IGeneratorsForTest* generators = findGeneratorsForCurrentTest(); + return generators && generators->moveNext(); + } + + virtual Ptr getConfig() const { + return m_config; + } + + public: // IMutableContext + virtual void setResultCapture( IResultCapture* resultCapture ) { + m_resultCapture = resultCapture; + } + virtual void setRunner( IRunner* runner ) { + m_runner = runner; + } + virtual void setConfig( Ptr const& config ) { + m_config = config; + } + + friend IMutableContext& getCurrentMutableContext(); + + private: + IGeneratorsForTest* findGeneratorsForCurrentTest() { + std::string testName = getResultCapture()->getCurrentTestName(); + + std::map::const_iterator it = + m_generatorsByTestName.find( testName ); + return it != m_generatorsByTestName.end() + ? it->second + : CATCH_NULL; + } + + IGeneratorsForTest& getGeneratorsForCurrentTest() { + IGeneratorsForTest* generators = findGeneratorsForCurrentTest(); + if( !generators ) { + std::string testName = getResultCapture()->getCurrentTestName(); + generators = createGeneratorsForTest(); + m_generatorsByTestName.insert( std::make_pair( testName, generators ) ); + } + return *generators; + } + + private: + Ptr m_config; + IRunner* m_runner; + IResultCapture* m_resultCapture; + std::map m_generatorsByTestName; + }; + + namespace { + Context* currentContext = CATCH_NULL; + } + IMutableContext& getCurrentMutableContext() { + if( !currentContext ) + currentContext = new Context(); + return *currentContext; + } + IContext& getCurrentContext() { + return getCurrentMutableContext(); + } + + void cleanUpContext() { + delete currentContext; + currentContext = CATCH_NULL; + } +} + +// #included from: catch_console_colour_impl.hpp +#define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_IMPL_HPP_INCLUDED + +// #included from: catch_errno_guard.hpp +#define TWOBLUECUBES_CATCH_ERRNO_GUARD_HPP_INCLUDED + +#include + +namespace Catch { + + class ErrnoGuard { + public: + ErrnoGuard():m_oldErrno(errno){} + ~ErrnoGuard() { errno = m_oldErrno; } + private: + int m_oldErrno; + }; + +} + +namespace Catch { + namespace { + + struct IColourImpl { + virtual ~IColourImpl() {} + virtual void use( Colour::Code _colourCode ) = 0; + }; + + struct NoColourImpl : IColourImpl { + void use( Colour::Code ) {} + + static IColourImpl* instance() { + static NoColourImpl s_instance; + return &s_instance; + } + }; + + } // anon namespace +} // namespace Catch + +#if !defined( CATCH_CONFIG_COLOUR_NONE ) && !defined( CATCH_CONFIG_COLOUR_WINDOWS ) && !defined( CATCH_CONFIG_COLOUR_ANSI ) +# ifdef CATCH_PLATFORM_WINDOWS +# define CATCH_CONFIG_COLOUR_WINDOWS +# else +# define CATCH_CONFIG_COLOUR_ANSI +# endif +#endif + +#if defined ( CATCH_CONFIG_COLOUR_WINDOWS ) ///////////////////////////////////////// + +namespace Catch { +namespace { + + class Win32ColourImpl : public IColourImpl { + public: + Win32ColourImpl() : stdoutHandle( GetStdHandle(STD_OUTPUT_HANDLE) ) + { + CONSOLE_SCREEN_BUFFER_INFO csbiInfo; + GetConsoleScreenBufferInfo( stdoutHandle, &csbiInfo ); + originalForegroundAttributes = csbiInfo.wAttributes & ~( BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_BLUE | BACKGROUND_INTENSITY ); + originalBackgroundAttributes = csbiInfo.wAttributes & ~( FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_INTENSITY ); + } + + virtual void use( Colour::Code _colourCode ) { + switch( _colourCode ) { + case Colour::None: return setTextAttribute( originalForegroundAttributes ); + case Colour::White: return setTextAttribute( FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE ); + case Colour::Red: return setTextAttribute( FOREGROUND_RED ); + case Colour::Green: return setTextAttribute( FOREGROUND_GREEN ); + case Colour::Blue: return setTextAttribute( FOREGROUND_BLUE ); + case Colour::Cyan: return setTextAttribute( FOREGROUND_BLUE | FOREGROUND_GREEN ); + case Colour::Yellow: return setTextAttribute( FOREGROUND_RED | FOREGROUND_GREEN ); + case Colour::Grey: return setTextAttribute( 0 ); + + case Colour::LightGrey: return setTextAttribute( FOREGROUND_INTENSITY ); + case Colour::BrightRed: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_RED ); + case Colour::BrightGreen: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN ); + case Colour::BrightWhite: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE ); + + case Colour::Bright: throw std::logic_error( "not a colour" ); + } + } + + private: + void setTextAttribute( WORD _textAttribute ) { + SetConsoleTextAttribute( stdoutHandle, _textAttribute | originalBackgroundAttributes ); + } + HANDLE stdoutHandle; + WORD originalForegroundAttributes; + WORD originalBackgroundAttributes; + }; + + IColourImpl* platformColourInstance() { + static Win32ColourImpl s_instance; + + Ptr config = getCurrentContext().getConfig(); + UseColour::YesOrNo colourMode = config + ? config->useColour() + : UseColour::Auto; + if( colourMode == UseColour::Auto ) + colourMode = !isDebuggerActive() + ? UseColour::Yes + : UseColour::No; + return colourMode == UseColour::Yes + ? &s_instance + : NoColourImpl::instance(); + } + +} // end anon namespace +} // end namespace Catch + +#elif defined( CATCH_CONFIG_COLOUR_ANSI ) ////////////////////////////////////// + +#include + +namespace Catch { +namespace { + + // use POSIX/ ANSI console terminal codes + // Thanks to Adam Strzelecki for original contribution + // (http://github.com/nanoant) + // https://github.com/philsquared/Catch/pull/131 + class PosixColourImpl : public IColourImpl { + public: + virtual void use( Colour::Code _colourCode ) { + switch( _colourCode ) { + case Colour::None: + case Colour::White: return setColour( "[0m" ); + case Colour::Red: return setColour( "[0;31m" ); + case Colour::Green: return setColour( "[0;32m" ); + case Colour::Blue: return setColour( "[0;34m" ); + case Colour::Cyan: return setColour( "[0;36m" ); + case Colour::Yellow: return setColour( "[0;33m" ); + case Colour::Grey: return setColour( "[1;30m" ); + + case Colour::LightGrey: return setColour( "[0;37m" ); + case Colour::BrightRed: return setColour( "[1;31m" ); + case Colour::BrightGreen: return setColour( "[1;32m" ); + case Colour::BrightWhite: return setColour( "[1;37m" ); + + case Colour::Bright: throw std::logic_error( "not a colour" ); + } + } + static IColourImpl* instance() { + static PosixColourImpl s_instance; + return &s_instance; + } + + private: + void setColour( const char* _escapeCode ) { + Catch::cout() << '\033' << _escapeCode; + } + }; + + IColourImpl* platformColourInstance() { + ErrnoGuard guard; + Ptr config = getCurrentContext().getConfig(); + UseColour::YesOrNo colourMode = config + ? config->useColour() + : UseColour::Auto; + if( colourMode == UseColour::Auto ) + colourMode = (!isDebuggerActive() && isatty(STDOUT_FILENO) ) + ? UseColour::Yes + : UseColour::No; + return colourMode == UseColour::Yes + ? PosixColourImpl::instance() + : NoColourImpl::instance(); + } + +} // end anon namespace +} // end namespace Catch + +#else // not Windows or ANSI /////////////////////////////////////////////// + +namespace Catch { + + static IColourImpl* platformColourInstance() { return NoColourImpl::instance(); } + +} // end namespace Catch + +#endif // Windows/ ANSI/ None + +namespace Catch { + + Colour::Colour( Code _colourCode ) : m_moved( false ) { use( _colourCode ); } + Colour::Colour( Colour const& _other ) : m_moved( false ) { const_cast( _other ).m_moved = true; } + Colour::~Colour(){ if( !m_moved ) use( None ); } + + void Colour::use( Code _colourCode ) { + static IColourImpl* impl = platformColourInstance(); + impl->use( _colourCode ); + } + +} // end namespace Catch + +// #included from: catch_generators_impl.hpp +#define TWOBLUECUBES_CATCH_GENERATORS_IMPL_HPP_INCLUDED + +#include +#include +#include + +namespace Catch { + + struct GeneratorInfo : IGeneratorInfo { + + GeneratorInfo( std::size_t size ) + : m_size( size ), + m_currentIndex( 0 ) + {} + + bool moveNext() { + if( ++m_currentIndex == m_size ) { + m_currentIndex = 0; + return false; + } + return true; + } + + std::size_t getCurrentIndex() const { + return m_currentIndex; + } + + std::size_t m_size; + std::size_t m_currentIndex; + }; + + /////////////////////////////////////////////////////////////////////////// + + class GeneratorsForTest : public IGeneratorsForTest { + + public: + ~GeneratorsForTest() { + deleteAll( m_generatorsInOrder ); + } + + IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) { + std::map::const_iterator it = m_generatorsByName.find( fileInfo ); + if( it == m_generatorsByName.end() ) { + IGeneratorInfo* info = new GeneratorInfo( size ); + m_generatorsByName.insert( std::make_pair( fileInfo, info ) ); + m_generatorsInOrder.push_back( info ); + return *info; + } + return *it->second; + } + + bool moveNext() { + std::vector::const_iterator it = m_generatorsInOrder.begin(); + std::vector::const_iterator itEnd = m_generatorsInOrder.end(); + for(; it != itEnd; ++it ) { + if( (*it)->moveNext() ) + return true; + } + return false; + } + + private: + std::map m_generatorsByName; + std::vector m_generatorsInOrder; + }; + + IGeneratorsForTest* createGeneratorsForTest() + { + return new GeneratorsForTest(); + } + +} // end namespace Catch + +// #included from: catch_assertionresult.hpp +#define TWOBLUECUBES_CATCH_ASSERTIONRESULT_HPP_INCLUDED + +namespace Catch { + + AssertionInfo::AssertionInfo():macroName(""), capturedExpression(""), resultDisposition(ResultDisposition::Normal), secondArg(""){} + + AssertionInfo::AssertionInfo( char const * _macroName, + SourceLineInfo const& _lineInfo, + char const * _capturedExpression, + ResultDisposition::Flags _resultDisposition, + char const * _secondArg) + : macroName( _macroName ), + lineInfo( _lineInfo ), + capturedExpression( _capturedExpression ), + resultDisposition( _resultDisposition ), + secondArg( _secondArg ) + {} + + AssertionResult::AssertionResult() {} + + AssertionResult::AssertionResult( AssertionInfo const& info, AssertionResultData const& data ) + : m_info( info ), + m_resultData( data ) + {} + + AssertionResult::~AssertionResult() {} + + // Result was a success + bool AssertionResult::succeeded() const { + return Catch::isOk( m_resultData.resultType ); + } + + // Result was a success, or failure is suppressed + bool AssertionResult::isOk() const { + return Catch::isOk( m_resultData.resultType ) || shouldSuppressFailure( m_info.resultDisposition ); + } + + ResultWas::OfType AssertionResult::getResultType() const { + return m_resultData.resultType; + } + + bool AssertionResult::hasExpression() const { + return m_info.capturedExpression[0] != 0; + } + + bool AssertionResult::hasMessage() const { + return !m_resultData.message.empty(); + } + + std::string capturedExpressionWithSecondArgument( char const * capturedExpression, char const * secondArg ) { + return (secondArg[0] == 0 || secondArg[0] == '"' && secondArg[1] == '"') + ? capturedExpression + : std::string(capturedExpression) + ", " + secondArg; + } + + std::string AssertionResult::getExpression() const { + if( isFalseTest( m_info.resultDisposition ) ) + return "!(" + capturedExpressionWithSecondArgument(m_info.capturedExpression, m_info.secondArg) + ")"; + else + return capturedExpressionWithSecondArgument(m_info.capturedExpression, m_info.secondArg); + } + std::string AssertionResult::getExpressionInMacro() const { + if( m_info.macroName[0] == 0 ) + return capturedExpressionWithSecondArgument(m_info.capturedExpression, m_info.secondArg); + else + return std::string(m_info.macroName) + "( " + capturedExpressionWithSecondArgument(m_info.capturedExpression, m_info.secondArg) + " )"; + } + + bool AssertionResult::hasExpandedExpression() const { + return hasExpression() && getExpandedExpression() != getExpression(); + } + + std::string AssertionResult::getExpandedExpression() const { + return m_resultData.reconstructExpression(); + } + + std::string AssertionResult::getMessage() const { + return m_resultData.message; + } + SourceLineInfo AssertionResult::getSourceInfo() const { + return m_info.lineInfo; + } + + std::string AssertionResult::getTestMacroName() const { + return m_info.macroName; + } + + void AssertionResult::discardDecomposedExpression() const { + m_resultData.decomposedExpression = CATCH_NULL; + } + + void AssertionResult::expandDecomposedExpression() const { + m_resultData.reconstructExpression(); + } + +} // end namespace Catch + +// #included from: catch_test_case_info.hpp +#define TWOBLUECUBES_CATCH_TEST_CASE_INFO_HPP_INCLUDED + +#include + +namespace Catch { + + inline TestCaseInfo::SpecialProperties parseSpecialTag( std::string const& tag ) { + if( startsWith( tag, '.' ) || + tag == "hide" || + tag == "!hide" ) + return TestCaseInfo::IsHidden; + else if( tag == "!throws" ) + return TestCaseInfo::Throws; + else if( tag == "!shouldfail" ) + return TestCaseInfo::ShouldFail; + else if( tag == "!mayfail" ) + return TestCaseInfo::MayFail; + else if( tag == "!nonportable" ) + return TestCaseInfo::NonPortable; + else + return TestCaseInfo::None; + } + inline bool isReservedTag( std::string const& tag ) { + return parseSpecialTag( tag ) == TestCaseInfo::None && tag.size() > 0 && !std::isalnum( tag[0] ); + } + inline void enforceNotReservedTag( std::string const& tag, SourceLineInfo const& _lineInfo ) { + if( isReservedTag( tag ) ) { + std::ostringstream ss; + ss << Colour(Colour::Red) + << "Tag name [" << tag << "] not allowed.\n" + << "Tag names starting with non alpha-numeric characters are reserved\n" + << Colour(Colour::FileName) + << _lineInfo << '\n'; + throw std::runtime_error(ss.str()); + } + } + + TestCase makeTestCase( ITestCase* _testCase, + std::string const& _className, + std::string const& _name, + std::string const& _descOrTags, + SourceLineInfo const& _lineInfo ) + { + bool isHidden( startsWith( _name, "./" ) ); // Legacy support + + // Parse out tags + std::set tags; + std::string desc, tag; + bool inTag = false; + for( std::size_t i = 0; i < _descOrTags.size(); ++i ) { + char c = _descOrTags[i]; + if( !inTag ) { + if( c == '[' ) + inTag = true; + else + desc += c; + } + else { + if( c == ']' ) { + TestCaseInfo::SpecialProperties prop = parseSpecialTag( tag ); + if( prop == TestCaseInfo::IsHidden ) + isHidden = true; + else if( prop == TestCaseInfo::None ) + enforceNotReservedTag( tag, _lineInfo ); + + tags.insert( tag ); + tag.clear(); + inTag = false; + } + else + tag += c; + } + } + if( isHidden ) { + tags.insert( "hide" ); + tags.insert( "." ); + } + + TestCaseInfo info( _name, _className, desc, tags, _lineInfo ); + return TestCase( _testCase, info ); + } + + void setTags( TestCaseInfo& testCaseInfo, std::set const& tags ) + { + testCaseInfo.tags = tags; + testCaseInfo.lcaseTags.clear(); + + std::ostringstream oss; + for( std::set::const_iterator it = tags.begin(), itEnd = tags.end(); it != itEnd; ++it ) { + oss << '[' << *it << ']'; + std::string lcaseTag = toLower( *it ); + testCaseInfo.properties = static_cast( testCaseInfo.properties | parseSpecialTag( lcaseTag ) ); + testCaseInfo.lcaseTags.insert( lcaseTag ); + } + testCaseInfo.tagsAsString = oss.str(); + } + + TestCaseInfo::TestCaseInfo( std::string const& _name, + std::string const& _className, + std::string const& _description, + std::set const& _tags, + SourceLineInfo const& _lineInfo ) + : name( _name ), + className( _className ), + description( _description ), + lineInfo( _lineInfo ), + properties( None ) + { + setTags( *this, _tags ); + } + + TestCaseInfo::TestCaseInfo( TestCaseInfo const& other ) + : name( other.name ), + className( other.className ), + description( other.description ), + tags( other.tags ), + lcaseTags( other.lcaseTags ), + tagsAsString( other.tagsAsString ), + lineInfo( other.lineInfo ), + properties( other.properties ) + {} + + bool TestCaseInfo::isHidden() const { + return ( properties & IsHidden ) != 0; + } + bool TestCaseInfo::throws() const { + return ( properties & Throws ) != 0; + } + bool TestCaseInfo::okToFail() const { + return ( properties & (ShouldFail | MayFail ) ) != 0; + } + bool TestCaseInfo::expectedToFail() const { + return ( properties & (ShouldFail ) ) != 0; + } + + TestCase::TestCase( ITestCase* testCase, TestCaseInfo const& info ) : TestCaseInfo( info ), test( testCase ) {} + + TestCase::TestCase( TestCase const& other ) + : TestCaseInfo( other ), + test( other.test ) + {} + + TestCase TestCase::withName( std::string const& _newName ) const { + TestCase other( *this ); + other.name = _newName; + return other; + } + + void TestCase::swap( TestCase& other ) { + test.swap( other.test ); + name.swap( other.name ); + className.swap( other.className ); + description.swap( other.description ); + tags.swap( other.tags ); + lcaseTags.swap( other.lcaseTags ); + tagsAsString.swap( other.tagsAsString ); + std::swap( TestCaseInfo::properties, static_cast( other ).properties ); + std::swap( lineInfo, other.lineInfo ); + } + + void TestCase::invoke() const { + test->invoke(); + } + + bool TestCase::operator == ( TestCase const& other ) const { + return test.get() == other.test.get() && + name == other.name && + className == other.className; + } + + bool TestCase::operator < ( TestCase const& other ) const { + return name < other.name; + } + TestCase& TestCase::operator = ( TestCase const& other ) { + TestCase temp( other ); + swap( temp ); + return *this; + } + + TestCaseInfo const& TestCase::getTestCaseInfo() const + { + return *this; + } + +} // end namespace Catch + +// #included from: catch_version.hpp +#define TWOBLUECUBES_CATCH_VERSION_HPP_INCLUDED + +namespace Catch { + + Version::Version + ( unsigned int _majorVersion, + unsigned int _minorVersion, + unsigned int _patchNumber, + char const * const _branchName, + unsigned int _buildNumber ) + : majorVersion( _majorVersion ), + minorVersion( _minorVersion ), + patchNumber( _patchNumber ), + branchName( _branchName ), + buildNumber( _buildNumber ) + {} + + std::ostream& operator << ( std::ostream& os, Version const& version ) { + os << version.majorVersion << '.' + << version.minorVersion << '.' + << version.patchNumber; + // branchName is never null -> 0th char is \0 if it is empty + if (version.branchName[0]) { + os << '-' << version.branchName + << '.' << version.buildNumber; + } + return os; + } + + inline Version libraryVersion() { + static Version version( 1, 12, 2, "", 0 ); + return version; + } + +} + +// #included from: catch_message.hpp +#define TWOBLUECUBES_CATCH_MESSAGE_HPP_INCLUDED + +namespace Catch { + + MessageInfo::MessageInfo( std::string const& _macroName, + SourceLineInfo const& _lineInfo, + ResultWas::OfType _type ) + : macroName( _macroName ), + lineInfo( _lineInfo ), + type( _type ), + sequence( ++globalCount ) + {} + + // This may need protecting if threading support is added + unsigned int MessageInfo::globalCount = 0; + + //////////////////////////////////////////////////////////////////////////// + + ScopedMessage::ScopedMessage( MessageBuilder const& builder ) + : m_info( builder.m_info ) + { + m_info.message = builder.m_stream.str(); + getResultCapture().pushScopedMessage( m_info ); + } + ScopedMessage::ScopedMessage( ScopedMessage const& other ) + : m_info( other.m_info ) + {} + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable:4996) // std::uncaught_exception is deprecated in C++17 +#endif + ScopedMessage::~ScopedMessage() { + if ( !std::uncaught_exception() ){ + getResultCapture().popScopedMessage(m_info); + } + } +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +} // end namespace Catch + +// #included from: catch_legacy_reporter_adapter.hpp +#define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_HPP_INCLUDED + +// #included from: catch_legacy_reporter_adapter.h +#define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_H_INCLUDED + +namespace Catch +{ + // Deprecated + struct IReporter : IShared { + virtual ~IReporter(); + + virtual bool shouldRedirectStdout() const = 0; + + virtual void StartTesting() = 0; + virtual void EndTesting( Totals const& totals ) = 0; + virtual void StartGroup( std::string const& groupName ) = 0; + virtual void EndGroup( std::string const& groupName, Totals const& totals ) = 0; + virtual void StartTestCase( TestCaseInfo const& testInfo ) = 0; + virtual void EndTestCase( TestCaseInfo const& testInfo, Totals const& totals, std::string const& stdOut, std::string const& stdErr ) = 0; + virtual void StartSection( std::string const& sectionName, std::string const& description ) = 0; + virtual void EndSection( std::string const& sectionName, Counts const& assertions ) = 0; + virtual void NoAssertionsInSection( std::string const& sectionName ) = 0; + virtual void NoAssertionsInTestCase( std::string const& testName ) = 0; + virtual void Aborted() = 0; + virtual void Result( AssertionResult const& result ) = 0; + }; + + class LegacyReporterAdapter : public SharedImpl + { + public: + LegacyReporterAdapter( Ptr const& legacyReporter ); + virtual ~LegacyReporterAdapter(); + + virtual ReporterPreferences getPreferences() const; + virtual void noMatchingTestCases( std::string const& ); + virtual void testRunStarting( TestRunInfo const& ); + virtual void testGroupStarting( GroupInfo const& groupInfo ); + virtual void testCaseStarting( TestCaseInfo const& testInfo ); + virtual void sectionStarting( SectionInfo const& sectionInfo ); + virtual void assertionStarting( AssertionInfo const& ); + virtual bool assertionEnded( AssertionStats const& assertionStats ); + virtual void sectionEnded( SectionStats const& sectionStats ); + virtual void testCaseEnded( TestCaseStats const& testCaseStats ); + virtual void testGroupEnded( TestGroupStats const& testGroupStats ); + virtual void testRunEnded( TestRunStats const& testRunStats ); + virtual void skipTest( TestCaseInfo const& ); + + private: + Ptr m_legacyReporter; + }; +} + +namespace Catch +{ + LegacyReporterAdapter::LegacyReporterAdapter( Ptr const& legacyReporter ) + : m_legacyReporter( legacyReporter ) + {} + LegacyReporterAdapter::~LegacyReporterAdapter() {} + + ReporterPreferences LegacyReporterAdapter::getPreferences() const { + ReporterPreferences prefs; + prefs.shouldRedirectStdOut = m_legacyReporter->shouldRedirectStdout(); + return prefs; + } + + void LegacyReporterAdapter::noMatchingTestCases( std::string const& ) {} + void LegacyReporterAdapter::testRunStarting( TestRunInfo const& ) { + m_legacyReporter->StartTesting(); + } + void LegacyReporterAdapter::testGroupStarting( GroupInfo const& groupInfo ) { + m_legacyReporter->StartGroup( groupInfo.name ); + } + void LegacyReporterAdapter::testCaseStarting( TestCaseInfo const& testInfo ) { + m_legacyReporter->StartTestCase( testInfo ); + } + void LegacyReporterAdapter::sectionStarting( SectionInfo const& sectionInfo ) { + m_legacyReporter->StartSection( sectionInfo.name, sectionInfo.description ); + } + void LegacyReporterAdapter::assertionStarting( AssertionInfo const& ) { + // Not on legacy interface + } + + bool LegacyReporterAdapter::assertionEnded( AssertionStats const& assertionStats ) { + if( assertionStats.assertionResult.getResultType() != ResultWas::Ok ) { + for( std::vector::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end(); + it != itEnd; + ++it ) { + if( it->type == ResultWas::Info ) { + ResultBuilder rb( it->macroName.c_str(), it->lineInfo, "", ResultDisposition::Normal ); + rb << it->message; + rb.setResultType( ResultWas::Info ); + AssertionResult result = rb.build(); + m_legacyReporter->Result( result ); + } + } + } + m_legacyReporter->Result( assertionStats.assertionResult ); + return true; + } + void LegacyReporterAdapter::sectionEnded( SectionStats const& sectionStats ) { + if( sectionStats.missingAssertions ) + m_legacyReporter->NoAssertionsInSection( sectionStats.sectionInfo.name ); + m_legacyReporter->EndSection( sectionStats.sectionInfo.name, sectionStats.assertions ); + } + void LegacyReporterAdapter::testCaseEnded( TestCaseStats const& testCaseStats ) { + m_legacyReporter->EndTestCase + ( testCaseStats.testInfo, + testCaseStats.totals, + testCaseStats.stdOut, + testCaseStats.stdErr ); + } + void LegacyReporterAdapter::testGroupEnded( TestGroupStats const& testGroupStats ) { + if( testGroupStats.aborting ) + m_legacyReporter->Aborted(); + m_legacyReporter->EndGroup( testGroupStats.groupInfo.name, testGroupStats.totals ); + } + void LegacyReporterAdapter::testRunEnded( TestRunStats const& testRunStats ) { + m_legacyReporter->EndTesting( testRunStats.totals ); + } + void LegacyReporterAdapter::skipTest( TestCaseInfo const& ) { + } +} + +// #included from: catch_timer.hpp + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#endif + +#ifdef CATCH_PLATFORM_WINDOWS + +#else + +#include + +#endif + +namespace Catch { + + namespace { +#ifdef CATCH_PLATFORM_WINDOWS + UInt64 getCurrentTicks() { + static UInt64 hz=0, hzo=0; + if (!hz) { + QueryPerformanceFrequency( reinterpret_cast( &hz ) ); + QueryPerformanceCounter( reinterpret_cast( &hzo ) ); + } + UInt64 t; + QueryPerformanceCounter( reinterpret_cast( &t ) ); + return ((t-hzo)*1000000)/hz; + } +#else + UInt64 getCurrentTicks() { + timeval t; + gettimeofday(&t,CATCH_NULL); + return static_cast( t.tv_sec ) * 1000000ull + static_cast( t.tv_usec ); + } +#endif + } + + void Timer::start() { + m_ticks = getCurrentTicks(); + } + unsigned int Timer::getElapsedMicroseconds() const { + return static_cast(getCurrentTicks() - m_ticks); + } + unsigned int Timer::getElapsedMilliseconds() const { + return static_cast(getElapsedMicroseconds()/1000); + } + double Timer::getElapsedSeconds() const { + return getElapsedMicroseconds()/1000000.0; + } + +} // namespace Catch + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +// #included from: catch_common.hpp +#define TWOBLUECUBES_CATCH_COMMON_HPP_INCLUDED + +#include +#include + +namespace Catch { + + bool startsWith( std::string const& s, std::string const& prefix ) { + return s.size() >= prefix.size() && std::equal(prefix.begin(), prefix.end(), s.begin()); + } + bool startsWith( std::string const& s, char prefix ) { + return !s.empty() && s[0] == prefix; + } + bool endsWith( std::string const& s, std::string const& suffix ) { + return s.size() >= suffix.size() && std::equal(suffix.rbegin(), suffix.rend(), s.rbegin()); + } + bool endsWith( std::string const& s, char suffix ) { + return !s.empty() && s[s.size()-1] == suffix; + } + bool contains( std::string const& s, std::string const& infix ) { + return s.find( infix ) != std::string::npos; + } + char toLowerCh(char c) { + return static_cast( std::tolower( c ) ); + } + void toLowerInPlace( std::string& s ) { + std::transform( s.begin(), s.end(), s.begin(), toLowerCh ); + } + std::string toLower( std::string const& s ) { + std::string lc = s; + toLowerInPlace( lc ); + return lc; + } + std::string trim( std::string const& str ) { + static char const* whitespaceChars = "\n\r\t "; + std::string::size_type start = str.find_first_not_of( whitespaceChars ); + std::string::size_type end = str.find_last_not_of( whitespaceChars ); + + return start != std::string::npos ? str.substr( start, 1+end-start ) : std::string(); + } + + bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ) { + bool replaced = false; + std::size_t i = str.find( replaceThis ); + while( i != std::string::npos ) { + replaced = true; + str = str.substr( 0, i ) + withThis + str.substr( i+replaceThis.size() ); + if( i < str.size()-withThis.size() ) + i = str.find( replaceThis, i+withThis.size() ); + else + i = std::string::npos; + } + return replaced; + } + + pluralise::pluralise( std::size_t count, std::string const& label ) + : m_count( count ), + m_label( label ) + {} + + std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ) { + os << pluraliser.m_count << ' ' << pluraliser.m_label; + if( pluraliser.m_count != 1 ) + os << 's'; + return os; + } + + SourceLineInfo::SourceLineInfo() : file(""), line( 0 ){} + SourceLineInfo::SourceLineInfo( char const* _file, std::size_t _line ) + : file( _file ), + line( _line ) + {} + bool SourceLineInfo::empty() const { + return file[0] == '\0'; + } + bool SourceLineInfo::operator == ( SourceLineInfo const& other ) const { + return line == other.line && (file == other.file || std::strcmp(file, other.file) == 0); + } + bool SourceLineInfo::operator < ( SourceLineInfo const& other ) const { + return line < other.line || ( line == other.line && (std::strcmp(file, other.file) < 0)); + } + + void seedRng( IConfig const& config ) { + if( config.rngSeed() != 0 ) + std::srand( config.rngSeed() ); + } + unsigned int rngSeed() { + return getCurrentContext().getConfig()->rngSeed(); + } + + std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ) { +#ifndef __GNUG__ + os << info.file << '(' << info.line << ')'; +#else + os << info.file << ':' << info.line; +#endif + return os; + } + + void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ) { + std::ostringstream oss; + oss << locationInfo << ": Internal Catch error: '" << message << '\''; + if( alwaysTrue() ) + throw std::logic_error( oss.str() ); + } +} + +// #included from: catch_section.hpp +#define TWOBLUECUBES_CATCH_SECTION_HPP_INCLUDED + +namespace Catch { + + SectionInfo::SectionInfo + ( SourceLineInfo const& _lineInfo, + std::string const& _name, + std::string const& _description ) + : name( _name ), + description( _description ), + lineInfo( _lineInfo ) + {} + + Section::Section( SectionInfo const& info ) + : m_info( info ), + m_sectionIncluded( getResultCapture().sectionStarted( m_info, m_assertions ) ) + { + m_timer.start(); + } + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable:4996) // std::uncaught_exception is deprecated in C++17 +#endif + Section::~Section() { + if( m_sectionIncluded ) { + SectionEndInfo endInfo( m_info, m_assertions, m_timer.getElapsedSeconds() ); + if( std::uncaught_exception() ) + getResultCapture().sectionEndedEarly( endInfo ); + else + getResultCapture().sectionEnded( endInfo ); + } + } +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + // This indicates whether the section should be executed or not + Section::operator bool() const { + return m_sectionIncluded; + } + +} // end namespace Catch + +// #included from: catch_debugger.hpp +#define TWOBLUECUBES_CATCH_DEBUGGER_HPP_INCLUDED + +#ifdef CATCH_PLATFORM_MAC + + #include + #include + #include + #include + #include + + namespace Catch{ + + // The following function is taken directly from the following technical note: + // http://developer.apple.com/library/mac/#qa/qa2004/qa1361.html + + // Returns true if the current process is being debugged (either + // running under the debugger or has a debugger attached post facto). + bool isDebuggerActive(){ + + int mib[4]; + struct kinfo_proc info; + size_t size; + + // Initialize the flags so that, if sysctl fails for some bizarre + // reason, we get a predictable result. + + info.kp_proc.p_flag = 0; + + // Initialize mib, which tells sysctl the info we want, in this case + // we're looking for information about a specific process ID. + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + // Call sysctl. + + size = sizeof(info); + if( sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, CATCH_NULL, 0) != 0 ) { + Catch::cerr() << "\n** Call to sysctl failed - unable to determine if debugger is active **\n" << std::endl; + return false; + } + + // We're being debugged if the P_TRACED flag is set. + + return ( (info.kp_proc.p_flag & P_TRACED) != 0 ); + } + } // namespace Catch + +#elif defined(CATCH_PLATFORM_LINUX) + #include + #include + + namespace Catch{ + // The standard POSIX way of detecting a debugger is to attempt to + // ptrace() the process, but this needs to be done from a child and not + // this process itself to still allow attaching to this process later + // if wanted, so is rather heavy. Under Linux we have the PID of the + // "debugger" (which doesn't need to be gdb, of course, it could also + // be strace, for example) in /proc/$PID/status, so just get it from + // there instead. + bool isDebuggerActive(){ + // Libstdc++ has a bug, where std::ifstream sets errno to 0 + // This way our users can properly assert over errno values + ErrnoGuard guard; + std::ifstream in("/proc/self/status"); + for( std::string line; std::getline(in, line); ) { + static const int PREFIX_LEN = 11; + if( line.compare(0, PREFIX_LEN, "TracerPid:\t") == 0 ) { + // We're traced if the PID is not 0 and no other PID starts + // with 0 digit, so it's enough to check for just a single + // character. + return line.length() > PREFIX_LEN && line[PREFIX_LEN] != '0'; + } + } + + return false; + } + } // namespace Catch +#elif defined(_MSC_VER) + extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent(); + namespace Catch { + bool isDebuggerActive() { + return IsDebuggerPresent() != 0; + } + } +#elif defined(__MINGW32__) + extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent(); + namespace Catch { + bool isDebuggerActive() { + return IsDebuggerPresent() != 0; + } + } +#else + namespace Catch { + inline bool isDebuggerActive() { return false; } + } +#endif // Platform + +#ifdef CATCH_PLATFORM_WINDOWS + + namespace Catch { + void writeToDebugConsole( std::string const& text ) { + ::OutputDebugStringA( text.c_str() ); + } + } +#else + namespace Catch { + void writeToDebugConsole( std::string const& text ) { + // !TBD: Need a version for Mac/ XCode and other IDEs + Catch::cout() << text; + } + } +#endif // Platform + +// #included from: catch_tostring.hpp +#define TWOBLUECUBES_CATCH_TOSTRING_HPP_INCLUDED + +namespace Catch { + +namespace Detail { + + const std::string unprintableString = "{?}"; + + namespace { + const int hexThreshold = 255; + + struct Endianness { + enum Arch { Big, Little }; + + static Arch which() { + union _{ + int asInt; + char asChar[sizeof (int)]; + } u; + + u.asInt = 1; + return ( u.asChar[sizeof(int)-1] == 1 ) ? Big : Little; + } + }; + } + + std::string rawMemoryToString( const void *object, std::size_t size ) + { + // Reverse order for little endian architectures + int i = 0, end = static_cast( size ), inc = 1; + if( Endianness::which() == Endianness::Little ) { + i = end-1; + end = inc = -1; + } + + unsigned char const *bytes = static_cast(object); + std::ostringstream os; + os << "0x" << std::setfill('0') << std::hex; + for( ; i != end; i += inc ) + os << std::setw(2) << static_cast(bytes[i]); + return os.str(); + } +} + +std::string toString( std::string const& value ) { + std::string s = value; + if( getCurrentContext().getConfig()->showInvisibles() ) { + for(size_t i = 0; i < s.size(); ++i ) { + std::string subs; + switch( s[i] ) { + case '\n': subs = "\\n"; break; + case '\t': subs = "\\t"; break; + default: break; + } + if( !subs.empty() ) { + s = s.substr( 0, i ) + subs + s.substr( i+1 ); + ++i; + } + } + } + return '"' + s + '"'; +} +std::string toString( std::wstring const& value ) { + + std::string s; + s.reserve( value.size() ); + for(size_t i = 0; i < value.size(); ++i ) + s += value[i] <= 0xff ? static_cast( value[i] ) : '?'; + return Catch::toString( s ); +} + +std::string toString( const char* const value ) { + return value ? Catch::toString( std::string( value ) ) : std::string( "{null string}" ); +} + +std::string toString( char* const value ) { + return Catch::toString( static_cast( value ) ); +} + +std::string toString( const wchar_t* const value ) +{ + return value ? Catch::toString( std::wstring(value) ) : std::string( "{null string}" ); +} + +std::string toString( wchar_t* const value ) +{ + return Catch::toString( static_cast( value ) ); +} + +std::string toString( int value ) { + std::ostringstream oss; + oss << value; + if( value > Detail::hexThreshold ) + oss << " (0x" << std::hex << value << ')'; + return oss.str(); +} + +std::string toString( unsigned long value ) { + std::ostringstream oss; + oss << value; + if( value > Detail::hexThreshold ) + oss << " (0x" << std::hex << value << ')'; + return oss.str(); +} + +std::string toString( unsigned int value ) { + return Catch::toString( static_cast( value ) ); +} + +template +std::string fpToString( T value, int precision ) { + std::ostringstream oss; + oss << std::setprecision( precision ) + << std::fixed + << value; + std::string d = oss.str(); + std::size_t i = d.find_last_not_of( '0' ); + if( i != std::string::npos && i != d.size()-1 ) { + if( d[i] == '.' ) + i++; + d = d.substr( 0, i+1 ); + } + return d; +} + +std::string toString( const double value ) { + return fpToString( value, 10 ); +} +std::string toString( const float value ) { + return fpToString( value, 5 ) + 'f'; +} + +std::string toString( bool value ) { + return value ? "true" : "false"; +} + +std::string toString( char value ) { + if ( value == '\r' ) + return "'\\r'"; + if ( value == '\f' ) + return "'\\f'"; + if ( value == '\n' ) + return "'\\n'"; + if ( value == '\t' ) + return "'\\t'"; + if ( '\0' <= value && value < ' ' ) + return toString( static_cast( value ) ); + char chstr[] = "' '"; + chstr[1] = value; + return chstr; +} + +std::string toString( signed char value ) { + return toString( static_cast( value ) ); +} + +std::string toString( unsigned char value ) { + return toString( static_cast( value ) ); +} + +#ifdef CATCH_CONFIG_CPP11_LONG_LONG +std::string toString( long long value ) { + std::ostringstream oss; + oss << value; + if( value > Detail::hexThreshold ) + oss << " (0x" << std::hex << value << ')'; + return oss.str(); +} +std::string toString( unsigned long long value ) { + std::ostringstream oss; + oss << value; + if( value > Detail::hexThreshold ) + oss << " (0x" << std::hex << value << ')'; + return oss.str(); +} +#endif + +#ifdef CATCH_CONFIG_CPP11_NULLPTR +std::string toString( std::nullptr_t ) { + return "nullptr"; +} +#endif + +#ifdef __OBJC__ + std::string toString( NSString const * const& nsstring ) { + if( !nsstring ) + return "nil"; + return "@" + toString([nsstring UTF8String]); + } + std::string toString( NSString * CATCH_ARC_STRONG & nsstring ) { + if( !nsstring ) + return "nil"; + return "@" + toString([nsstring UTF8String]); + } + std::string toString( NSObject* const& nsObject ) { + return toString( [nsObject description] ); + } +#endif + +} // end namespace Catch + +// #included from: catch_result_builder.hpp +#define TWOBLUECUBES_CATCH_RESULT_BUILDER_HPP_INCLUDED + +#include + +namespace Catch { + + ResultBuilder::ResultBuilder( char const* macroName, + SourceLineInfo const& lineInfo, + char const* capturedExpression, + ResultDisposition::Flags resultDisposition, + char const* secondArg ) + : m_assertionInfo( macroName, lineInfo, capturedExpression, resultDisposition, secondArg ), + m_shouldDebugBreak( false ), + m_shouldThrow( false ), + m_guardException( false ), + m_usedStream( false ) + {} + + ResultBuilder::~ResultBuilder() { +#if defined(CATCH_CONFIG_FAST_COMPILE) + if ( m_guardException ) { + stream().oss << "Exception translation was disabled by CATCH_CONFIG_FAST_COMPILE"; + captureResult( ResultWas::ThrewException ); + getCurrentContext().getResultCapture()->exceptionEarlyReported(); + } +#endif + } + + ResultBuilder& ResultBuilder::setResultType( ResultWas::OfType result ) { + m_data.resultType = result; + return *this; + } + ResultBuilder& ResultBuilder::setResultType( bool result ) { + m_data.resultType = result ? ResultWas::Ok : ResultWas::ExpressionFailed; + return *this; + } + + void ResultBuilder::endExpression( DecomposedExpression const& expr ) { + // Flip bool results if FalseTest flag is set + if( isFalseTest( m_assertionInfo.resultDisposition ) ) { + m_data.negate( expr.isBinaryExpression() ); + } + + getResultCapture().assertionRun(); + + if(getCurrentContext().getConfig()->includeSuccessfulResults() || m_data.resultType != ResultWas::Ok) + { + AssertionResult result = build( expr ); + handleResult( result ); + } + else + getResultCapture().assertionPassed(); + } + + void ResultBuilder::useActiveException( ResultDisposition::Flags resultDisposition ) { + m_assertionInfo.resultDisposition = resultDisposition; + stream().oss << Catch::translateActiveException(); + captureResult( ResultWas::ThrewException ); + } + + void ResultBuilder::captureResult( ResultWas::OfType resultType ) { + setResultType( resultType ); + captureExpression(); + } + + void ResultBuilder::captureExpectedException( std::string const& expectedMessage ) { + if( expectedMessage.empty() ) + captureExpectedException( Matchers::Impl::MatchAllOf() ); + else + captureExpectedException( Matchers::Equals( expectedMessage ) ); + } + + void ResultBuilder::captureExpectedException( Matchers::Impl::MatcherBase const& matcher ) { + + assert( !isFalseTest( m_assertionInfo.resultDisposition ) ); + AssertionResultData data = m_data; + data.resultType = ResultWas::Ok; + data.reconstructedExpression = capturedExpressionWithSecondArgument(m_assertionInfo.capturedExpression, m_assertionInfo.secondArg); + + std::string actualMessage = Catch::translateActiveException(); + if( !matcher.match( actualMessage ) ) { + data.resultType = ResultWas::ExpressionFailed; + data.reconstructedExpression = actualMessage; + } + AssertionResult result( m_assertionInfo, data ); + handleResult( result ); + } + + void ResultBuilder::captureExpression() { + AssertionResult result = build(); + handleResult( result ); + } + + void ResultBuilder::handleResult( AssertionResult const& result ) + { + getResultCapture().assertionEnded( result ); + + if( !result.isOk() ) { + if( getCurrentContext().getConfig()->shouldDebugBreak() ) + m_shouldDebugBreak = true; + if( getCurrentContext().getRunner()->aborting() || (m_assertionInfo.resultDisposition & ResultDisposition::Normal) ) + m_shouldThrow = true; + } + } + + void ResultBuilder::react() { +#if defined(CATCH_CONFIG_FAST_COMPILE) + if (m_shouldDebugBreak) { + /////////////////////////////////////////////////////////////////// + // To inspect the state during test, you need to go one level up the callstack + // To go back to the test and change execution, jump over the throw statement + /////////////////////////////////////////////////////////////////// + CATCH_BREAK_INTO_DEBUGGER(); + } +#endif + if( m_shouldThrow ) + throw Catch::TestFailureException(); + } + + bool ResultBuilder::shouldDebugBreak() const { return m_shouldDebugBreak; } + bool ResultBuilder::allowThrows() const { return getCurrentContext().getConfig()->allowThrows(); } + + AssertionResult ResultBuilder::build() const + { + return build( *this ); + } + + // CAVEAT: The returned AssertionResult stores a pointer to the argument expr, + // a temporary DecomposedExpression, which in turn holds references to + // operands, possibly temporary as well. + // It should immediately be passed to handleResult; if the expression + // needs to be reported, its string expansion must be composed before + // the temporaries are destroyed. + AssertionResult ResultBuilder::build( DecomposedExpression const& expr ) const + { + assert( m_data.resultType != ResultWas::Unknown ); + AssertionResultData data = m_data; + + if(m_usedStream) + data.message = m_stream().oss.str(); + data.decomposedExpression = &expr; // for lazy reconstruction + return AssertionResult( m_assertionInfo, data ); + } + + void ResultBuilder::reconstructExpression( std::string& dest ) const { + dest = capturedExpressionWithSecondArgument(m_assertionInfo.capturedExpression, m_assertionInfo.secondArg); + } + + void ResultBuilder::setExceptionGuard() { + m_guardException = true; + } + void ResultBuilder::unsetExceptionGuard() { + m_guardException = false; + } + +} // end namespace Catch + +// #included from: catch_tag_alias_registry.hpp +#define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_HPP_INCLUDED + +namespace Catch { + + TagAliasRegistry::~TagAliasRegistry() {} + + Option TagAliasRegistry::find( std::string const& alias ) const { + std::map::const_iterator it = m_registry.find( alias ); + if( it != m_registry.end() ) + return it->second; + else + return Option(); + } + + std::string TagAliasRegistry::expandAliases( std::string const& unexpandedTestSpec ) const { + std::string expandedTestSpec = unexpandedTestSpec; + for( std::map::const_iterator it = m_registry.begin(), itEnd = m_registry.end(); + it != itEnd; + ++it ) { + std::size_t pos = expandedTestSpec.find( it->first ); + if( pos != std::string::npos ) { + expandedTestSpec = expandedTestSpec.substr( 0, pos ) + + it->second.tag + + expandedTestSpec.substr( pos + it->first.size() ); + } + } + return expandedTestSpec; + } + + void TagAliasRegistry::add( std::string const& alias, std::string const& tag, SourceLineInfo const& lineInfo ) { + + if( !startsWith( alias, "[@" ) || !endsWith( alias, ']' ) ) { + std::ostringstream oss; + oss << Colour( Colour::Red ) + << "error: tag alias, \"" << alias << "\" is not of the form [@alias name].\n" + << Colour( Colour::FileName ) + << lineInfo << '\n'; + throw std::domain_error( oss.str().c_str() ); + } + if( !m_registry.insert( std::make_pair( alias, TagAlias( tag, lineInfo ) ) ).second ) { + std::ostringstream oss; + oss << Colour( Colour::Red ) + << "error: tag alias, \"" << alias << "\" already registered.\n" + << "\tFirst seen at " + << Colour( Colour::Red ) << find(alias)->lineInfo << '\n' + << Colour( Colour::Red ) << "\tRedefined at " + << Colour( Colour::FileName) << lineInfo << '\n'; + throw std::domain_error( oss.str().c_str() ); + } + } + + ITagAliasRegistry::~ITagAliasRegistry() {} + + ITagAliasRegistry const& ITagAliasRegistry::get() { + return getRegistryHub().getTagAliasRegistry(); + } + + RegistrarForTagAliases::RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ) { + getMutableRegistryHub().registerTagAlias( alias, tag, lineInfo ); + } + +} // end namespace Catch + +// #included from: catch_matchers_string.hpp + +namespace Catch { +namespace Matchers { + + namespace StdString { + + CasedString::CasedString( std::string const& str, CaseSensitive::Choice caseSensitivity ) + : m_caseSensitivity( caseSensitivity ), + m_str( adjustString( str ) ) + {} + std::string CasedString::adjustString( std::string const& str ) const { + return m_caseSensitivity == CaseSensitive::No + ? toLower( str ) + : str; + } + std::string CasedString::caseSensitivitySuffix() const { + return m_caseSensitivity == CaseSensitive::No + ? " (case insensitive)" + : std::string(); + } + + StringMatcherBase::StringMatcherBase( std::string const& operation, CasedString const& comparator ) + : m_comparator( comparator ), + m_operation( operation ) { + } + + std::string StringMatcherBase::describe() const { + std::string description; + description.reserve(5 + m_operation.size() + m_comparator.m_str.size() + + m_comparator.caseSensitivitySuffix().size()); + description += m_operation; + description += ": \""; + description += m_comparator.m_str; + description += "\""; + description += m_comparator.caseSensitivitySuffix(); + return description; + } + + EqualsMatcher::EqualsMatcher( CasedString const& comparator ) : StringMatcherBase( "equals", comparator ) {} + + bool EqualsMatcher::match( std::string const& source ) const { + return m_comparator.adjustString( source ) == m_comparator.m_str; + } + + ContainsMatcher::ContainsMatcher( CasedString const& comparator ) : StringMatcherBase( "contains", comparator ) {} + + bool ContainsMatcher::match( std::string const& source ) const { + return contains( m_comparator.adjustString( source ), m_comparator.m_str ); + } + + StartsWithMatcher::StartsWithMatcher( CasedString const& comparator ) : StringMatcherBase( "starts with", comparator ) {} + + bool StartsWithMatcher::match( std::string const& source ) const { + return startsWith( m_comparator.adjustString( source ), m_comparator.m_str ); + } + + EndsWithMatcher::EndsWithMatcher( CasedString const& comparator ) : StringMatcherBase( "ends with", comparator ) {} + + bool EndsWithMatcher::match( std::string const& source ) const { + return endsWith( m_comparator.adjustString( source ), m_comparator.m_str ); + } + + } // namespace StdString + + StdString::EqualsMatcher Equals( std::string const& str, CaseSensitive::Choice caseSensitivity ) { + return StdString::EqualsMatcher( StdString::CasedString( str, caseSensitivity) ); + } + StdString::ContainsMatcher Contains( std::string const& str, CaseSensitive::Choice caseSensitivity ) { + return StdString::ContainsMatcher( StdString::CasedString( str, caseSensitivity) ); + } + StdString::EndsWithMatcher EndsWith( std::string const& str, CaseSensitive::Choice caseSensitivity ) { + return StdString::EndsWithMatcher( StdString::CasedString( str, caseSensitivity) ); + } + StdString::StartsWithMatcher StartsWith( std::string const& str, CaseSensitive::Choice caseSensitivity ) { + return StdString::StartsWithMatcher( StdString::CasedString( str, caseSensitivity) ); + } + +} // namespace Matchers +} // namespace Catch +// #included from: ../reporters/catch_reporter_multi.hpp +#define TWOBLUECUBES_CATCH_REPORTER_MULTI_HPP_INCLUDED + +namespace Catch { + +class MultipleReporters : public SharedImpl { + typedef std::vector > Reporters; + Reporters m_reporters; + +public: + void add( Ptr const& reporter ) { + m_reporters.push_back( reporter ); + } + +public: // IStreamingReporter + + virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { + return m_reporters[0]->getPreferences(); + } + + virtual void noMatchingTestCases( std::string const& spec ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->noMatchingTestCases( spec ); + } + + virtual void testRunStarting( TestRunInfo const& testRunInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testRunStarting( testRunInfo ); + } + + virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testGroupStarting( groupInfo ); + } + + virtual void testCaseStarting( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testCaseStarting( testInfo ); + } + + virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->sectionStarting( sectionInfo ); + } + + virtual void assertionStarting( AssertionInfo const& assertionInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->assertionStarting( assertionInfo ); + } + + // The return value indicates if the messages buffer should be cleared: + virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { + bool clearBuffer = false; + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + clearBuffer |= (*it)->assertionEnded( assertionStats ); + return clearBuffer; + } + + virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->sectionEnded( sectionStats ); + } + + virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testCaseEnded( testCaseStats ); + } + + virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testGroupEnded( testGroupStats ); + } + + virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->testRunEnded( testRunStats ); + } + + virtual void skipTest( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { + for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); + it != itEnd; + ++it ) + (*it)->skipTest( testInfo ); + } + + virtual MultipleReporters* tryAsMulti() CATCH_OVERRIDE { + return this; + } + +}; + +Ptr addReporter( Ptr const& existingReporter, Ptr const& additionalReporter ) { + Ptr resultingReporter; + + if( existingReporter ) { + MultipleReporters* multi = existingReporter->tryAsMulti(); + if( !multi ) { + multi = new MultipleReporters; + resultingReporter = Ptr( multi ); + if( existingReporter ) + multi->add( existingReporter ); + } + else + resultingReporter = existingReporter; + multi->add( additionalReporter ); + } + else + resultingReporter = additionalReporter; + + return resultingReporter; +} + +} // end namespace Catch + +// #included from: ../reporters/catch_reporter_xml.hpp +#define TWOBLUECUBES_CATCH_REPORTER_XML_HPP_INCLUDED + +// #included from: catch_reporter_bases.hpp +#define TWOBLUECUBES_CATCH_REPORTER_BASES_HPP_INCLUDED + +#include +#include +#include +#include + +namespace Catch { + + namespace { + // Because formatting using c++ streams is stateful, drop down to C is required + // Alternatively we could use stringstream, but its performance is... not good. + std::string getFormattedDuration( double duration ) { + // Max exponent + 1 is required to represent the whole part + // + 1 for decimal point + // + 3 for the 3 decimal places + // + 1 for null terminator + const size_t maxDoubleSize = DBL_MAX_10_EXP + 1 + 1 + 3 + 1; + char buffer[maxDoubleSize]; + + // Save previous errno, to prevent sprintf from overwriting it + ErrnoGuard guard; +#ifdef _MSC_VER + sprintf_s(buffer, "%.3f", duration); +#else + sprintf(buffer, "%.3f", duration); +#endif + return std::string(buffer); + } + } + + struct StreamingReporterBase : SharedImpl { + + StreamingReporterBase( ReporterConfig const& _config ) + : m_config( _config.fullConfig() ), + stream( _config.stream() ) + { + m_reporterPrefs.shouldRedirectStdOut = false; + } + + virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { + return m_reporterPrefs; + } + + virtual ~StreamingReporterBase() CATCH_OVERRIDE; + + virtual void noMatchingTestCases( std::string const& ) CATCH_OVERRIDE {} + + virtual void testRunStarting( TestRunInfo const& _testRunInfo ) CATCH_OVERRIDE { + currentTestRunInfo = _testRunInfo; + } + virtual void testGroupStarting( GroupInfo const& _groupInfo ) CATCH_OVERRIDE { + currentGroupInfo = _groupInfo; + } + + virtual void testCaseStarting( TestCaseInfo const& _testInfo ) CATCH_OVERRIDE { + currentTestCaseInfo = _testInfo; + } + virtual void sectionStarting( SectionInfo const& _sectionInfo ) CATCH_OVERRIDE { + m_sectionStack.push_back( _sectionInfo ); + } + + virtual void sectionEnded( SectionStats const& /* _sectionStats */ ) CATCH_OVERRIDE { + m_sectionStack.pop_back(); + } + virtual void testCaseEnded( TestCaseStats const& /* _testCaseStats */ ) CATCH_OVERRIDE { + currentTestCaseInfo.reset(); + } + virtual void testGroupEnded( TestGroupStats const& /* _testGroupStats */ ) CATCH_OVERRIDE { + currentGroupInfo.reset(); + } + virtual void testRunEnded( TestRunStats const& /* _testRunStats */ ) CATCH_OVERRIDE { + currentTestCaseInfo.reset(); + currentGroupInfo.reset(); + currentTestRunInfo.reset(); + } + + virtual void skipTest( TestCaseInfo const& ) CATCH_OVERRIDE { + // Don't do anything with this by default. + // It can optionally be overridden in the derived class. + } + + Ptr m_config; + std::ostream& stream; + + LazyStat currentTestRunInfo; + LazyStat currentGroupInfo; + LazyStat currentTestCaseInfo; + + std::vector m_sectionStack; + ReporterPreferences m_reporterPrefs; + }; + + struct CumulativeReporterBase : SharedImpl { + template + struct Node : SharedImpl<> { + explicit Node( T const& _value ) : value( _value ) {} + virtual ~Node() {} + + typedef std::vector > ChildNodes; + T value; + ChildNodes children; + }; + struct SectionNode : SharedImpl<> { + explicit SectionNode( SectionStats const& _stats ) : stats( _stats ) {} + virtual ~SectionNode(); + + bool operator == ( SectionNode const& other ) const { + return stats.sectionInfo.lineInfo == other.stats.sectionInfo.lineInfo; + } + bool operator == ( Ptr const& other ) const { + return operator==( *other ); + } + + SectionStats stats; + typedef std::vector > ChildSections; + typedef std::vector Assertions; + ChildSections childSections; + Assertions assertions; + std::string stdOut; + std::string stdErr; + }; + + struct BySectionInfo { + BySectionInfo( SectionInfo const& other ) : m_other( other ) {} + BySectionInfo( BySectionInfo const& other ) : m_other( other.m_other ) {} + bool operator() ( Ptr const& node ) const { + return ((node->stats.sectionInfo.name == m_other.name) && + (node->stats.sectionInfo.lineInfo == m_other.lineInfo)); + } + private: + void operator=( BySectionInfo const& ); + SectionInfo const& m_other; + }; + + typedef Node TestCaseNode; + typedef Node TestGroupNode; + typedef Node TestRunNode; + + CumulativeReporterBase( ReporterConfig const& _config ) + : m_config( _config.fullConfig() ), + stream( _config.stream() ) + { + m_reporterPrefs.shouldRedirectStdOut = false; + } + ~CumulativeReporterBase(); + + virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { + return m_reporterPrefs; + } + + virtual void testRunStarting( TestRunInfo const& ) CATCH_OVERRIDE {} + virtual void testGroupStarting( GroupInfo const& ) CATCH_OVERRIDE {} + + virtual void testCaseStarting( TestCaseInfo const& ) CATCH_OVERRIDE {} + + virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { + SectionStats incompleteStats( sectionInfo, Counts(), 0, false ); + Ptr node; + if( m_sectionStack.empty() ) { + if( !m_rootSection ) + m_rootSection = new SectionNode( incompleteStats ); + node = m_rootSection; + } + else { + SectionNode& parentNode = *m_sectionStack.back(); + SectionNode::ChildSections::const_iterator it = + std::find_if( parentNode.childSections.begin(), + parentNode.childSections.end(), + BySectionInfo( sectionInfo ) ); + if( it == parentNode.childSections.end() ) { + node = new SectionNode( incompleteStats ); + parentNode.childSections.push_back( node ); + } + else + node = *it; + } + m_sectionStack.push_back( node ); + m_deepestSection = node; + } + + virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE {} + + virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { + assert( !m_sectionStack.empty() ); + SectionNode& sectionNode = *m_sectionStack.back(); + sectionNode.assertions.push_back( assertionStats ); + // AssertionResult holds a pointer to a temporary DecomposedExpression, + // which getExpandedExpression() calls to build the expression string. + // Our section stack copy of the assertionResult will likely outlive the + // temporary, so it must be expanded or discarded now to avoid calling + // a destroyed object later. + prepareExpandedExpression( sectionNode.assertions.back().assertionResult ); + return true; + } + virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { + assert( !m_sectionStack.empty() ); + SectionNode& node = *m_sectionStack.back(); + node.stats = sectionStats; + m_sectionStack.pop_back(); + } + virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { + Ptr node = new TestCaseNode( testCaseStats ); + assert( m_sectionStack.size() == 0 ); + node->children.push_back( m_rootSection ); + m_testCases.push_back( node ); + m_rootSection.reset(); + + assert( m_deepestSection ); + m_deepestSection->stdOut = testCaseStats.stdOut; + m_deepestSection->stdErr = testCaseStats.stdErr; + } + virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { + Ptr node = new TestGroupNode( testGroupStats ); + node->children.swap( m_testCases ); + m_testGroups.push_back( node ); + } + virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { + Ptr node = new TestRunNode( testRunStats ); + node->children.swap( m_testGroups ); + m_testRuns.push_back( node ); + testRunEndedCumulative(); + } + virtual void testRunEndedCumulative() = 0; + + virtual void skipTest( TestCaseInfo const& ) CATCH_OVERRIDE {} + + virtual void prepareExpandedExpression( AssertionResult& result ) const { + if( result.isOk() ) + result.discardDecomposedExpression(); + else + result.expandDecomposedExpression(); + } + + Ptr m_config; + std::ostream& stream; + std::vector m_assertions; + std::vector > > m_sections; + std::vector > m_testCases; + std::vector > m_testGroups; + + std::vector > m_testRuns; + + Ptr m_rootSection; + Ptr m_deepestSection; + std::vector > m_sectionStack; + ReporterPreferences m_reporterPrefs; + + }; + + template + char const* getLineOfChars() { + static char line[CATCH_CONFIG_CONSOLE_WIDTH] = {0}; + if( !*line ) { + std::memset( line, C, CATCH_CONFIG_CONSOLE_WIDTH-1 ); + line[CATCH_CONFIG_CONSOLE_WIDTH-1] = 0; + } + return line; + } + + struct TestEventListenerBase : StreamingReporterBase { + TestEventListenerBase( ReporterConfig const& _config ) + : StreamingReporterBase( _config ) + {} + + virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE {} + virtual bool assertionEnded( AssertionStats const& ) CATCH_OVERRIDE { + return false; + } + }; + +} // end namespace Catch + +// #included from: ../internal/catch_reporter_registrars.hpp +#define TWOBLUECUBES_CATCH_REPORTER_REGISTRARS_HPP_INCLUDED + +namespace Catch { + + template + class LegacyReporterRegistrar { + + class ReporterFactory : public IReporterFactory { + virtual IStreamingReporter* create( ReporterConfig const& config ) const { + return new LegacyReporterAdapter( new T( config ) ); + } + + virtual std::string getDescription() const { + return T::getDescription(); + } + }; + + public: + + LegacyReporterRegistrar( std::string const& name ) { + getMutableRegistryHub().registerReporter( name, new ReporterFactory() ); + } + }; + + template + class ReporterRegistrar { + + class ReporterFactory : public SharedImpl { + + // *** Please Note ***: + // - If you end up here looking at a compiler error because it's trying to register + // your custom reporter class be aware that the native reporter interface has changed + // to IStreamingReporter. The "legacy" interface, IReporter, is still supported via + // an adapter. Just use REGISTER_LEGACY_REPORTER to take advantage of the adapter. + // However please consider updating to the new interface as the old one is now + // deprecated and will probably be removed quite soon! + // Please contact me via github if you have any questions at all about this. + // In fact, ideally, please contact me anyway to let me know you've hit this - as I have + // no idea who is actually using custom reporters at all (possibly no-one!). + // The new interface is designed to minimise exposure to interface changes in the future. + virtual IStreamingReporter* create( ReporterConfig const& config ) const { + return new T( config ); + } + + virtual std::string getDescription() const { + return T::getDescription(); + } + }; + + public: + + ReporterRegistrar( std::string const& name ) { + getMutableRegistryHub().registerReporter( name, new ReporterFactory() ); + } + }; + + template + class ListenerRegistrar { + + class ListenerFactory : public SharedImpl { + + virtual IStreamingReporter* create( ReporterConfig const& config ) const { + return new T( config ); + } + virtual std::string getDescription() const { + return std::string(); + } + }; + + public: + + ListenerRegistrar() { + getMutableRegistryHub().registerListener( new ListenerFactory() ); + } + }; +} + +#define INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) \ + namespace{ Catch::LegacyReporterRegistrar catch_internal_RegistrarFor##reporterType( name ); } + +#define INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) \ + namespace{ Catch::ReporterRegistrar catch_internal_RegistrarFor##reporterType( name ); } + +// Deprecated - use the form without INTERNAL_ +#define INTERNAL_CATCH_REGISTER_LISTENER( listenerType ) \ + namespace{ Catch::ListenerRegistrar catch_internal_RegistrarFor##listenerType; } + +#define CATCH_REGISTER_LISTENER( listenerType ) \ + namespace{ Catch::ListenerRegistrar catch_internal_RegistrarFor##listenerType; } + +// #included from: ../internal/catch_xmlwriter.hpp +#define TWOBLUECUBES_CATCH_XMLWRITER_HPP_INCLUDED + +#include +#include +#include +#include + +namespace Catch { + + class XmlEncode { + public: + enum ForWhat { ForTextNodes, ForAttributes }; + + XmlEncode( std::string const& str, ForWhat forWhat = ForTextNodes ) + : m_str( str ), + m_forWhat( forWhat ) + {} + + void encodeTo( std::ostream& os ) const { + + // Apostrophe escaping not necessary if we always use " to write attributes + // (see: http://www.w3.org/TR/xml/#syntax) + + for( std::size_t i = 0; i < m_str.size(); ++ i ) { + char c = m_str[i]; + switch( c ) { + case '<': os << "<"; break; + case '&': os << "&"; break; + + case '>': + // See: http://www.w3.org/TR/xml/#syntax + if( i > 2 && m_str[i-1] == ']' && m_str[i-2] == ']' ) + os << ">"; + else + os << c; + break; + + case '\"': + if( m_forWhat == ForAttributes ) + os << """; + else + os << c; + break; + + default: + // Escape control chars - based on contribution by @espenalb in PR #465 and + // by @mrpi PR #588 + if ( ( c >= 0 && c < '\x09' ) || ( c > '\x0D' && c < '\x20') || c=='\x7F' ) { + // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 + os << "\\x" << std::uppercase << std::hex << std::setfill('0') << std::setw(2) + << static_cast( c ); + } + else + os << c; + } + } + } + + friend std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { + xmlEncode.encodeTo( os ); + return os; + } + + private: + std::string m_str; + ForWhat m_forWhat; + }; + + class XmlWriter { + public: + + class ScopedElement { + public: + ScopedElement( XmlWriter* writer ) + : m_writer( writer ) + {} + + ScopedElement( ScopedElement const& other ) + : m_writer( other.m_writer ){ + other.m_writer = CATCH_NULL; + } + + ~ScopedElement() { + if( m_writer ) + m_writer->endElement(); + } + + ScopedElement& writeText( std::string const& text, bool indent = true ) { + m_writer->writeText( text, indent ); + return *this; + } + + template + ScopedElement& writeAttribute( std::string const& name, T const& attribute ) { + m_writer->writeAttribute( name, attribute ); + return *this; + } + + private: + mutable XmlWriter* m_writer; + }; + + XmlWriter() + : m_tagIsOpen( false ), + m_needsNewline( false ), + m_os( Catch::cout() ) + { + writeDeclaration(); + } + + XmlWriter( std::ostream& os ) + : m_tagIsOpen( false ), + m_needsNewline( false ), + m_os( os ) + { + writeDeclaration(); + } + + ~XmlWriter() { + while( !m_tags.empty() ) + endElement(); + } + + XmlWriter& startElement( std::string const& name ) { + ensureTagClosed(); + newlineIfNecessary(); + m_os << m_indent << '<' << name; + m_tags.push_back( name ); + m_indent += " "; + m_tagIsOpen = true; + return *this; + } + + ScopedElement scopedElement( std::string const& name ) { + ScopedElement scoped( this ); + startElement( name ); + return scoped; + } + + XmlWriter& endElement() { + newlineIfNecessary(); + m_indent = m_indent.substr( 0, m_indent.size()-2 ); + if( m_tagIsOpen ) { + m_os << "/>"; + m_tagIsOpen = false; + } + else { + m_os << m_indent << ""; + } + m_os << std::endl; + m_tags.pop_back(); + return *this; + } + + XmlWriter& writeAttribute( std::string const& name, std::string const& attribute ) { + if( !name.empty() && !attribute.empty() ) + m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"'; + return *this; + } + + XmlWriter& writeAttribute( std::string const& name, bool attribute ) { + m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"'; + return *this; + } + + template + XmlWriter& writeAttribute( std::string const& name, T const& attribute ) { + std::ostringstream oss; + oss << attribute; + return writeAttribute( name, oss.str() ); + } + + XmlWriter& writeText( std::string const& text, bool indent = true ) { + if( !text.empty() ){ + bool tagWasOpen = m_tagIsOpen; + ensureTagClosed(); + if( tagWasOpen && indent ) + m_os << m_indent; + m_os << XmlEncode( text ); + m_needsNewline = true; + } + return *this; + } + + XmlWriter& writeComment( std::string const& text ) { + ensureTagClosed(); + m_os << m_indent << ""; + m_needsNewline = true; + return *this; + } + + void writeStylesheetRef( std::string const& url ) { + m_os << "\n"; + } + + XmlWriter& writeBlankLine() { + ensureTagClosed(); + m_os << '\n'; + return *this; + } + + void ensureTagClosed() { + if( m_tagIsOpen ) { + m_os << ">" << std::endl; + m_tagIsOpen = false; + } + } + + private: + XmlWriter( XmlWriter const& ); + void operator=( XmlWriter const& ); + + void writeDeclaration() { + m_os << "\n"; + } + + void newlineIfNecessary() { + if( m_needsNewline ) { + m_os << std::endl; + m_needsNewline = false; + } + } + + bool m_tagIsOpen; + bool m_needsNewline; + std::vector m_tags; + std::string m_indent; + std::ostream& m_os; + }; + +} + +namespace Catch { + class XmlReporter : public StreamingReporterBase { + public: + XmlReporter( ReporterConfig const& _config ) + : StreamingReporterBase( _config ), + m_xml(_config.stream()), + m_sectionDepth( 0 ) + { + m_reporterPrefs.shouldRedirectStdOut = true; + } + + virtual ~XmlReporter() CATCH_OVERRIDE; + + static std::string getDescription() { + return "Reports test results as an XML document"; + } + + virtual std::string getStylesheetRef() const { + return std::string(); + } + + void writeSourceInfo( SourceLineInfo const& sourceInfo ) { + m_xml + .writeAttribute( "filename", sourceInfo.file ) + .writeAttribute( "line", sourceInfo.line ); + } + + public: // StreamingReporterBase + + virtual void noMatchingTestCases( std::string const& s ) CATCH_OVERRIDE { + StreamingReporterBase::noMatchingTestCases( s ); + } + + virtual void testRunStarting( TestRunInfo const& testInfo ) CATCH_OVERRIDE { + StreamingReporterBase::testRunStarting( testInfo ); + std::string stylesheetRef = getStylesheetRef(); + if( !stylesheetRef.empty() ) + m_xml.writeStylesheetRef( stylesheetRef ); + m_xml.startElement( "Catch" ); + if( !m_config->name().empty() ) + m_xml.writeAttribute( "name", m_config->name() ); + } + + virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { + StreamingReporterBase::testGroupStarting( groupInfo ); + m_xml.startElement( "Group" ) + .writeAttribute( "name", groupInfo.name ); + } + + virtual void testCaseStarting( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { + StreamingReporterBase::testCaseStarting(testInfo); + m_xml.startElement( "TestCase" ) + .writeAttribute( "name", trim( testInfo.name ) ) + .writeAttribute( "description", testInfo.description ) + .writeAttribute( "tags", testInfo.tagsAsString ); + + writeSourceInfo( testInfo.lineInfo ); + + if ( m_config->showDurations() == ShowDurations::Always ) + m_testCaseTimer.start(); + m_xml.ensureTagClosed(); + } + + virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { + StreamingReporterBase::sectionStarting( sectionInfo ); + if( m_sectionDepth++ > 0 ) { + m_xml.startElement( "Section" ) + .writeAttribute( "name", trim( sectionInfo.name ) ) + .writeAttribute( "description", sectionInfo.description ); + writeSourceInfo( sectionInfo.lineInfo ); + m_xml.ensureTagClosed(); + } + } + + virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE { } + + virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { + + AssertionResult const& result = assertionStats.assertionResult; + + bool includeResults = m_config->includeSuccessfulResults() || !result.isOk(); + + if( includeResults || result.getResultType() == ResultWas::Warning ) { + // Print any info messages in tags. + for( std::vector::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end(); + it != itEnd; + ++it ) { + if( it->type == ResultWas::Info && includeResults ) { + m_xml.scopedElement( "Info" ) + .writeText( it->message ); + } else if ( it->type == ResultWas::Warning ) { + m_xml.scopedElement( "Warning" ) + .writeText( it->message ); + } + } + } + + // Drop out if result was successful but we're not printing them. + if( !includeResults && result.getResultType() != ResultWas::Warning ) + return true; + + // Print the expression if there is one. + if( result.hasExpression() ) { + m_xml.startElement( "Expression" ) + .writeAttribute( "success", result.succeeded() ) + .writeAttribute( "type", result.getTestMacroName() ); + + writeSourceInfo( result.getSourceInfo() ); + + m_xml.scopedElement( "Original" ) + .writeText( result.getExpression() ); + m_xml.scopedElement( "Expanded" ) + .writeText( result.getExpandedExpression() ); + } + + // And... Print a result applicable to each result type. + switch( result.getResultType() ) { + case ResultWas::ThrewException: + m_xml.startElement( "Exception" ); + writeSourceInfo( result.getSourceInfo() ); + m_xml.writeText( result.getMessage() ); + m_xml.endElement(); + break; + case ResultWas::FatalErrorCondition: + m_xml.startElement( "FatalErrorCondition" ); + writeSourceInfo( result.getSourceInfo() ); + m_xml.writeText( result.getMessage() ); + m_xml.endElement(); + break; + case ResultWas::Info: + m_xml.scopedElement( "Info" ) + .writeText( result.getMessage() ); + break; + case ResultWas::Warning: + // Warning will already have been written + break; + case ResultWas::ExplicitFailure: + m_xml.startElement( "Failure" ); + writeSourceInfo( result.getSourceInfo() ); + m_xml.writeText( result.getMessage() ); + m_xml.endElement(); + break; + default: + break; + } + + if( result.hasExpression() ) + m_xml.endElement(); + + return true; + } + + virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { + StreamingReporterBase::sectionEnded( sectionStats ); + if( --m_sectionDepth > 0 ) { + XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResults" ); + e.writeAttribute( "successes", sectionStats.assertions.passed ); + e.writeAttribute( "failures", sectionStats.assertions.failed ); + e.writeAttribute( "expectedFailures", sectionStats.assertions.failedButOk ); + + if ( m_config->showDurations() == ShowDurations::Always ) + e.writeAttribute( "durationInSeconds", sectionStats.durationInSeconds ); + + m_xml.endElement(); + } + } + + virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { + StreamingReporterBase::testCaseEnded( testCaseStats ); + XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResult" ); + e.writeAttribute( "success", testCaseStats.totals.assertions.allOk() ); + + if ( m_config->showDurations() == ShowDurations::Always ) + e.writeAttribute( "durationInSeconds", m_testCaseTimer.getElapsedSeconds() ); + + if( !testCaseStats.stdOut.empty() ) + m_xml.scopedElement( "StdOut" ).writeText( trim( testCaseStats.stdOut ), false ); + if( !testCaseStats.stdErr.empty() ) + m_xml.scopedElement( "StdErr" ).writeText( trim( testCaseStats.stdErr ), false ); + + m_xml.endElement(); + } + + virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { + StreamingReporterBase::testGroupEnded( testGroupStats ); + // TODO: Check testGroupStats.aborting and act accordingly. + m_xml.scopedElement( "OverallResults" ) + .writeAttribute( "successes", testGroupStats.totals.assertions.passed ) + .writeAttribute( "failures", testGroupStats.totals.assertions.failed ) + .writeAttribute( "expectedFailures", testGroupStats.totals.assertions.failedButOk ); + m_xml.endElement(); + } + + virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { + StreamingReporterBase::testRunEnded( testRunStats ); + m_xml.scopedElement( "OverallResults" ) + .writeAttribute( "successes", testRunStats.totals.assertions.passed ) + .writeAttribute( "failures", testRunStats.totals.assertions.failed ) + .writeAttribute( "expectedFailures", testRunStats.totals.assertions.failedButOk ); + m_xml.endElement(); + } + + private: + Timer m_testCaseTimer; + XmlWriter m_xml; + int m_sectionDepth; + }; + + INTERNAL_CATCH_REGISTER_REPORTER( "xml", XmlReporter ) + +} // end namespace Catch + +// #included from: ../reporters/catch_reporter_junit.hpp +#define TWOBLUECUBES_CATCH_REPORTER_JUNIT_HPP_INCLUDED + +#include + +namespace Catch { + + namespace { + std::string getCurrentTimestamp() { + // Beware, this is not reentrant because of backward compatibility issues + // Also, UTC only, again because of backward compatibility (%z is C++11) + time_t rawtime; + std::time(&rawtime); + const size_t timeStampSize = sizeof("2017-01-16T17:06:45Z"); + +#ifdef _MSC_VER + std::tm timeInfo = {}; + gmtime_s(&timeInfo, &rawtime); +#else + std::tm* timeInfo; + timeInfo = std::gmtime(&rawtime); +#endif + + char timeStamp[timeStampSize]; + const char * const fmt = "%Y-%m-%dT%H:%M:%SZ"; + +#ifdef _MSC_VER + std::strftime(timeStamp, timeStampSize, fmt, &timeInfo); +#else + std::strftime(timeStamp, timeStampSize, fmt, timeInfo); +#endif + return std::string(timeStamp); + } + + } + + class JunitReporter : public CumulativeReporterBase { + public: + JunitReporter( ReporterConfig const& _config ) + : CumulativeReporterBase( _config ), + xml( _config.stream() ), + unexpectedExceptions( 0 ), + m_okToFail( false ) + { + m_reporterPrefs.shouldRedirectStdOut = true; + } + + virtual ~JunitReporter() CATCH_OVERRIDE; + + static std::string getDescription() { + return "Reports test results in an XML format that looks like Ant's junitreport target"; + } + + virtual void noMatchingTestCases( std::string const& /*spec*/ ) CATCH_OVERRIDE {} + + virtual void testRunStarting( TestRunInfo const& runInfo ) CATCH_OVERRIDE { + CumulativeReporterBase::testRunStarting( runInfo ); + xml.startElement( "testsuites" ); + } + + virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { + suiteTimer.start(); + stdOutForSuite.str(""); + stdErrForSuite.str(""); + unexpectedExceptions = 0; + CumulativeReporterBase::testGroupStarting( groupInfo ); + } + + virtual void testCaseStarting( TestCaseInfo const& testCaseInfo ) CATCH_OVERRIDE { + m_okToFail = testCaseInfo.okToFail(); + } + virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { + if( assertionStats.assertionResult.getResultType() == ResultWas::ThrewException && !m_okToFail ) + unexpectedExceptions++; + return CumulativeReporterBase::assertionEnded( assertionStats ); + } + + virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { + stdOutForSuite << testCaseStats.stdOut; + stdErrForSuite << testCaseStats.stdErr; + CumulativeReporterBase::testCaseEnded( testCaseStats ); + } + + virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { + double suiteTime = suiteTimer.getElapsedSeconds(); + CumulativeReporterBase::testGroupEnded( testGroupStats ); + writeGroup( *m_testGroups.back(), suiteTime ); + } + + virtual void testRunEndedCumulative() CATCH_OVERRIDE { + xml.endElement(); + } + + void writeGroup( TestGroupNode const& groupNode, double suiteTime ) { + XmlWriter::ScopedElement e = xml.scopedElement( "testsuite" ); + TestGroupStats const& stats = groupNode.value; + xml.writeAttribute( "name", stats.groupInfo.name ); + xml.writeAttribute( "errors", unexpectedExceptions ); + xml.writeAttribute( "failures", stats.totals.assertions.failed-unexpectedExceptions ); + xml.writeAttribute( "tests", stats.totals.assertions.total() ); + xml.writeAttribute( "hostname", "tbd" ); // !TBD + if( m_config->showDurations() == ShowDurations::Never ) + xml.writeAttribute( "time", "" ); + else + xml.writeAttribute( "time", suiteTime ); + xml.writeAttribute( "timestamp", getCurrentTimestamp() ); + + // Write test cases + for( TestGroupNode::ChildNodes::const_iterator + it = groupNode.children.begin(), itEnd = groupNode.children.end(); + it != itEnd; + ++it ) + writeTestCase( **it ); + + xml.scopedElement( "system-out" ).writeText( trim( stdOutForSuite.str() ), false ); + xml.scopedElement( "system-err" ).writeText( trim( stdErrForSuite.str() ), false ); + } + + void writeTestCase( TestCaseNode const& testCaseNode ) { + TestCaseStats const& stats = testCaseNode.value; + + // All test cases have exactly one section - which represents the + // test case itself. That section may have 0-n nested sections + assert( testCaseNode.children.size() == 1 ); + SectionNode const& rootSection = *testCaseNode.children.front(); + + std::string className = stats.testInfo.className; + + if( className.empty() ) { + if( rootSection.childSections.empty() ) + className = "global"; + } + writeSection( className, "", rootSection ); + } + + void writeSection( std::string const& className, + std::string const& rootName, + SectionNode const& sectionNode ) { + std::string name = trim( sectionNode.stats.sectionInfo.name ); + if( !rootName.empty() ) + name = rootName + '/' + name; + + if( !sectionNode.assertions.empty() || + !sectionNode.stdOut.empty() || + !sectionNode.stdErr.empty() ) { + XmlWriter::ScopedElement e = xml.scopedElement( "testcase" ); + if( className.empty() ) { + xml.writeAttribute( "classname", name ); + xml.writeAttribute( "name", "root" ); + } + else { + xml.writeAttribute( "classname", className ); + xml.writeAttribute( "name", name ); + } + xml.writeAttribute( "time", Catch::toString( sectionNode.stats.durationInSeconds ) ); + + writeAssertions( sectionNode ); + + if( !sectionNode.stdOut.empty() ) + xml.scopedElement( "system-out" ).writeText( trim( sectionNode.stdOut ), false ); + if( !sectionNode.stdErr.empty() ) + xml.scopedElement( "system-err" ).writeText( trim( sectionNode.stdErr ), false ); + } + for( SectionNode::ChildSections::const_iterator + it = sectionNode.childSections.begin(), + itEnd = sectionNode.childSections.end(); + it != itEnd; + ++it ) + if( className.empty() ) + writeSection( name, "", **it ); + else + writeSection( className, name, **it ); + } + + void writeAssertions( SectionNode const& sectionNode ) { + for( SectionNode::Assertions::const_iterator + it = sectionNode.assertions.begin(), itEnd = sectionNode.assertions.end(); + it != itEnd; + ++it ) + writeAssertion( *it ); + } + void writeAssertion( AssertionStats const& stats ) { + AssertionResult const& result = stats.assertionResult; + if( !result.isOk() ) { + std::string elementName; + switch( result.getResultType() ) { + case ResultWas::ThrewException: + case ResultWas::FatalErrorCondition: + elementName = "error"; + break; + case ResultWas::ExplicitFailure: + elementName = "failure"; + break; + case ResultWas::ExpressionFailed: + elementName = "failure"; + break; + case ResultWas::DidntThrowException: + elementName = "failure"; + break; + + // We should never see these here: + case ResultWas::Info: + case ResultWas::Warning: + case ResultWas::Ok: + case ResultWas::Unknown: + case ResultWas::FailureBit: + case ResultWas::Exception: + elementName = "internalError"; + break; + } + + XmlWriter::ScopedElement e = xml.scopedElement( elementName ); + + xml.writeAttribute( "message", result.getExpandedExpression() ); + xml.writeAttribute( "type", result.getTestMacroName() ); + + std::ostringstream oss; + if( !result.getMessage().empty() ) + oss << result.getMessage() << '\n'; + for( std::vector::const_iterator + it = stats.infoMessages.begin(), + itEnd = stats.infoMessages.end(); + it != itEnd; + ++it ) + if( it->type == ResultWas::Info ) + oss << it->message << '\n'; + + oss << "at " << result.getSourceInfo(); + xml.writeText( oss.str(), false ); + } + } + + XmlWriter xml; + Timer suiteTimer; + std::ostringstream stdOutForSuite; + std::ostringstream stdErrForSuite; + unsigned int unexpectedExceptions; + bool m_okToFail; + }; + + INTERNAL_CATCH_REGISTER_REPORTER( "junit", JunitReporter ) + +} // end namespace Catch + +// #included from: ../reporters/catch_reporter_console.hpp +#define TWOBLUECUBES_CATCH_REPORTER_CONSOLE_HPP_INCLUDED + +#include +#include +#include + +namespace Catch { + + struct ConsoleReporter : StreamingReporterBase { + ConsoleReporter( ReporterConfig const& _config ) + : StreamingReporterBase( _config ), + m_headerPrinted( false ) + {} + + virtual ~ConsoleReporter() CATCH_OVERRIDE; + static std::string getDescription() { + return "Reports test results as plain lines of text"; + } + + virtual void noMatchingTestCases( std::string const& spec ) CATCH_OVERRIDE { + stream << "No test cases matched '" << spec << '\'' << std::endl; + } + + virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE { + } + + virtual bool assertionEnded( AssertionStats const& _assertionStats ) CATCH_OVERRIDE { + AssertionResult const& result = _assertionStats.assertionResult; + + bool includeResults = m_config->includeSuccessfulResults() || !result.isOk(); + + // Drop out if result was successful but we're not printing them. + if( !includeResults && result.getResultType() != ResultWas::Warning ) + return false; + + lazyPrint(); + + AssertionPrinter printer( stream, _assertionStats, includeResults ); + printer.print(); + stream << std::endl; + return true; + } + + virtual void sectionStarting( SectionInfo const& _sectionInfo ) CATCH_OVERRIDE { + m_headerPrinted = false; + StreamingReporterBase::sectionStarting( _sectionInfo ); + } + virtual void sectionEnded( SectionStats const& _sectionStats ) CATCH_OVERRIDE { + if( _sectionStats.missingAssertions ) { + lazyPrint(); + Colour colour( Colour::ResultError ); + if( m_sectionStack.size() > 1 ) + stream << "\nNo assertions in section"; + else + stream << "\nNo assertions in test case"; + stream << " '" << _sectionStats.sectionInfo.name << "'\n" << std::endl; + } + if( m_config->showDurations() == ShowDurations::Always ) { + stream << getFormattedDuration(_sectionStats.durationInSeconds) << " s: " << _sectionStats.sectionInfo.name << std::endl; + } + if( m_headerPrinted ) { + m_headerPrinted = false; + } + StreamingReporterBase::sectionEnded( _sectionStats ); + } + + virtual void testCaseEnded( TestCaseStats const& _testCaseStats ) CATCH_OVERRIDE { + StreamingReporterBase::testCaseEnded( _testCaseStats ); + m_headerPrinted = false; + } + virtual void testGroupEnded( TestGroupStats const& _testGroupStats ) CATCH_OVERRIDE { + if( currentGroupInfo.used ) { + printSummaryDivider(); + stream << "Summary for group '" << _testGroupStats.groupInfo.name << "':\n"; + printTotals( _testGroupStats.totals ); + stream << '\n' << std::endl; + } + StreamingReporterBase::testGroupEnded( _testGroupStats ); + } + virtual void testRunEnded( TestRunStats const& _testRunStats ) CATCH_OVERRIDE { + printTotalsDivider( _testRunStats.totals ); + printTotals( _testRunStats.totals ); + stream << std::endl; + StreamingReporterBase::testRunEnded( _testRunStats ); + } + + private: + + class AssertionPrinter { + void operator= ( AssertionPrinter const& ); + public: + AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages ) + : stream( _stream ), + stats( _stats ), + result( _stats.assertionResult ), + colour( Colour::None ), + message( result.getMessage() ), + messages( _stats.infoMessages ), + printInfoMessages( _printInfoMessages ) + { + switch( result.getResultType() ) { + case ResultWas::Ok: + colour = Colour::Success; + passOrFail = "PASSED"; + //if( result.hasMessage() ) + if( _stats.infoMessages.size() == 1 ) + messageLabel = "with message"; + if( _stats.infoMessages.size() > 1 ) + messageLabel = "with messages"; + break; + case ResultWas::ExpressionFailed: + if( result.isOk() ) { + colour = Colour::Success; + passOrFail = "FAILED - but was ok"; + } + else { + colour = Colour::Error; + passOrFail = "FAILED"; + } + if( _stats.infoMessages.size() == 1 ) + messageLabel = "with message"; + if( _stats.infoMessages.size() > 1 ) + messageLabel = "with messages"; + break; + case ResultWas::ThrewException: + colour = Colour::Error; + passOrFail = "FAILED"; + messageLabel = "due to unexpected exception with "; + if (_stats.infoMessages.size() == 1) + messageLabel += "message"; + if (_stats.infoMessages.size() > 1) + messageLabel += "messages"; + break; + case ResultWas::FatalErrorCondition: + colour = Colour::Error; + passOrFail = "FAILED"; + messageLabel = "due to a fatal error condition"; + break; + case ResultWas::DidntThrowException: + colour = Colour::Error; + passOrFail = "FAILED"; + messageLabel = "because no exception was thrown where one was expected"; + break; + case ResultWas::Info: + messageLabel = "info"; + break; + case ResultWas::Warning: + messageLabel = "warning"; + break; + case ResultWas::ExplicitFailure: + passOrFail = "FAILED"; + colour = Colour::Error; + if( _stats.infoMessages.size() == 1 ) + messageLabel = "explicitly with message"; + if( _stats.infoMessages.size() > 1 ) + messageLabel = "explicitly with messages"; + break; + // These cases are here to prevent compiler warnings + case ResultWas::Unknown: + case ResultWas::FailureBit: + case ResultWas::Exception: + passOrFail = "** internal error **"; + colour = Colour::Error; + break; + } + } + + void print() const { + printSourceInfo(); + if( stats.totals.assertions.total() > 0 ) { + if( result.isOk() ) + stream << '\n'; + printResultType(); + printOriginalExpression(); + printReconstructedExpression(); + } + else { + stream << '\n'; + } + printMessage(); + } + + private: + void printResultType() const { + if( !passOrFail.empty() ) { + Colour colourGuard( colour ); + stream << passOrFail << ":\n"; + } + } + void printOriginalExpression() const { + if( result.hasExpression() ) { + Colour colourGuard( Colour::OriginalExpression ); + stream << " "; + stream << result.getExpressionInMacro(); + stream << '\n'; + } + } + void printReconstructedExpression() const { + if( result.hasExpandedExpression() ) { + stream << "with expansion:\n"; + Colour colourGuard( Colour::ReconstructedExpression ); + stream << Text( result.getExpandedExpression(), TextAttributes().setIndent(2) ) << '\n'; + } + } + void printMessage() const { + if( !messageLabel.empty() ) + stream << messageLabel << ':' << '\n'; + for( std::vector::const_iterator it = messages.begin(), itEnd = messages.end(); + it != itEnd; + ++it ) { + // If this assertion is a warning ignore any INFO messages + if( printInfoMessages || it->type != ResultWas::Info ) + stream << Text( it->message, TextAttributes().setIndent(2) ) << '\n'; + } + } + void printSourceInfo() const { + Colour colourGuard( Colour::FileName ); + stream << result.getSourceInfo() << ": "; + } + + std::ostream& stream; + AssertionStats const& stats; + AssertionResult const& result; + Colour::Code colour; + std::string passOrFail; + std::string messageLabel; + std::string message; + std::vector messages; + bool printInfoMessages; + }; + + void lazyPrint() { + + if( !currentTestRunInfo.used ) + lazyPrintRunInfo(); + if( !currentGroupInfo.used ) + lazyPrintGroupInfo(); + + if( !m_headerPrinted ) { + printTestCaseAndSectionHeader(); + m_headerPrinted = true; + } + } + void lazyPrintRunInfo() { + stream << '\n' << getLineOfChars<'~'>() << '\n'; + Colour colour( Colour::SecondaryText ); + stream << currentTestRunInfo->name + << " is a Catch v" << libraryVersion() << " host application.\n" + << "Run with -? for options\n\n"; + + if( m_config->rngSeed() != 0 ) + stream << "Randomness seeded to: " << m_config->rngSeed() << "\n\n"; + + currentTestRunInfo.used = true; + } + void lazyPrintGroupInfo() { + if( !currentGroupInfo->name.empty() && currentGroupInfo->groupsCounts > 1 ) { + printClosedHeader( "Group: " + currentGroupInfo->name ); + currentGroupInfo.used = true; + } + } + void printTestCaseAndSectionHeader() { + assert( !m_sectionStack.empty() ); + printOpenHeader( currentTestCaseInfo->name ); + + if( m_sectionStack.size() > 1 ) { + Colour colourGuard( Colour::Headers ); + + std::vector::const_iterator + it = m_sectionStack.begin()+1, // Skip first section (test case) + itEnd = m_sectionStack.end(); + for( ; it != itEnd; ++it ) + printHeaderString( it->name, 2 ); + } + + SourceLineInfo lineInfo = m_sectionStack.back().lineInfo; + + if( !lineInfo.empty() ){ + stream << getLineOfChars<'-'>() << '\n'; + Colour colourGuard( Colour::FileName ); + stream << lineInfo << '\n'; + } + stream << getLineOfChars<'.'>() << '\n' << std::endl; + } + + void printClosedHeader( std::string const& _name ) { + printOpenHeader( _name ); + stream << getLineOfChars<'.'>() << '\n'; + } + void printOpenHeader( std::string const& _name ) { + stream << getLineOfChars<'-'>() << '\n'; + { + Colour colourGuard( Colour::Headers ); + printHeaderString( _name ); + } + } + + // if string has a : in first line will set indent to follow it on + // subsequent lines + void printHeaderString( std::string const& _string, std::size_t indent = 0 ) { + std::size_t i = _string.find( ": " ); + if( i != std::string::npos ) + i+=2; + else + i = 0; + stream << Text( _string, TextAttributes() + .setIndent( indent+i) + .setInitialIndent( indent ) ) << '\n'; + } + + struct SummaryColumn { + + SummaryColumn( std::string const& _label, Colour::Code _colour ) + : label( _label ), + colour( _colour ) + {} + SummaryColumn addRow( std::size_t count ) { + std::ostringstream oss; + oss << count; + std::string row = oss.str(); + for( std::vector::iterator it = rows.begin(); it != rows.end(); ++it ) { + while( it->size() < row.size() ) + *it = ' ' + *it; + while( it->size() > row.size() ) + row = ' ' + row; + } + rows.push_back( row ); + return *this; + } + + std::string label; + Colour::Code colour; + std::vector rows; + + }; + + void printTotals( Totals const& totals ) { + if( totals.testCases.total() == 0 ) { + stream << Colour( Colour::Warning ) << "No tests ran\n"; + } + else if( totals.assertions.total() > 0 && totals.testCases.allPassed() ) { + stream << Colour( Colour::ResultSuccess ) << "All tests passed"; + stream << " (" + << pluralise( totals.assertions.passed, "assertion" ) << " in " + << pluralise( totals.testCases.passed, "test case" ) << ')' + << '\n'; + } + else { + + std::vector columns; + columns.push_back( SummaryColumn( "", Colour::None ) + .addRow( totals.testCases.total() ) + .addRow( totals.assertions.total() ) ); + columns.push_back( SummaryColumn( "passed", Colour::Success ) + .addRow( totals.testCases.passed ) + .addRow( totals.assertions.passed ) ); + columns.push_back( SummaryColumn( "failed", Colour::ResultError ) + .addRow( totals.testCases.failed ) + .addRow( totals.assertions.failed ) ); + columns.push_back( SummaryColumn( "failed as expected", Colour::ResultExpectedFailure ) + .addRow( totals.testCases.failedButOk ) + .addRow( totals.assertions.failedButOk ) ); + + printSummaryRow( "test cases", columns, 0 ); + printSummaryRow( "assertions", columns, 1 ); + } + } + void printSummaryRow( std::string const& label, std::vector const& cols, std::size_t row ) { + for( std::vector::const_iterator it = cols.begin(); it != cols.end(); ++it ) { + std::string value = it->rows[row]; + if( it->label.empty() ) { + stream << label << ": "; + if( value != "0" ) + stream << value; + else + stream << Colour( Colour::Warning ) << "- none -"; + } + else if( value != "0" ) { + stream << Colour( Colour::LightGrey ) << " | "; + stream << Colour( it->colour ) + << value << ' ' << it->label; + } + } + stream << '\n'; + } + + static std::size_t makeRatio( std::size_t number, std::size_t total ) { + std::size_t ratio = total > 0 ? CATCH_CONFIG_CONSOLE_WIDTH * number/ total : 0; + return ( ratio == 0 && number > 0 ) ? 1 : ratio; + } + static std::size_t& findMax( std::size_t& i, std::size_t& j, std::size_t& k ) { + if( i > j && i > k ) + return i; + else if( j > k ) + return j; + else + return k; + } + + void printTotalsDivider( Totals const& totals ) { + if( totals.testCases.total() > 0 ) { + std::size_t failedRatio = makeRatio( totals.testCases.failed, totals.testCases.total() ); + std::size_t failedButOkRatio = makeRatio( totals.testCases.failedButOk, totals.testCases.total() ); + std::size_t passedRatio = makeRatio( totals.testCases.passed, totals.testCases.total() ); + while( failedRatio + failedButOkRatio + passedRatio < CATCH_CONFIG_CONSOLE_WIDTH-1 ) + findMax( failedRatio, failedButOkRatio, passedRatio )++; + while( failedRatio + failedButOkRatio + passedRatio > CATCH_CONFIG_CONSOLE_WIDTH-1 ) + findMax( failedRatio, failedButOkRatio, passedRatio )--; + + stream << Colour( Colour::Error ) << std::string( failedRatio, '=' ); + stream << Colour( Colour::ResultExpectedFailure ) << std::string( failedButOkRatio, '=' ); + if( totals.testCases.allPassed() ) + stream << Colour( Colour::ResultSuccess ) << std::string( passedRatio, '=' ); + else + stream << Colour( Colour::Success ) << std::string( passedRatio, '=' ); + } + else { + stream << Colour( Colour::Warning ) << std::string( CATCH_CONFIG_CONSOLE_WIDTH-1, '=' ); + } + stream << '\n'; + } + void printSummaryDivider() { + stream << getLineOfChars<'-'>() << '\n'; + } + + private: + bool m_headerPrinted; + }; + + INTERNAL_CATCH_REGISTER_REPORTER( "console", ConsoleReporter ) + +} // end namespace Catch + +// #included from: ../reporters/catch_reporter_compact.hpp +#define TWOBLUECUBES_CATCH_REPORTER_COMPACT_HPP_INCLUDED + +namespace Catch { + + struct CompactReporter : StreamingReporterBase { + + CompactReporter( ReporterConfig const& _config ) + : StreamingReporterBase( _config ) + {} + + virtual ~CompactReporter(); + + static std::string getDescription() { + return "Reports test results on a single line, suitable for IDEs"; + } + + virtual ReporterPreferences getPreferences() const { + ReporterPreferences prefs; + prefs.shouldRedirectStdOut = false; + return prefs; + } + + virtual void noMatchingTestCases( std::string const& spec ) { + stream << "No test cases matched '" << spec << '\'' << std::endl; + } + + virtual void assertionStarting( AssertionInfo const& ) {} + + virtual bool assertionEnded( AssertionStats const& _assertionStats ) { + AssertionResult const& result = _assertionStats.assertionResult; + + bool printInfoMessages = true; + + // Drop out if result was successful and we're not printing those + if( !m_config->includeSuccessfulResults() && result.isOk() ) { + if( result.getResultType() != ResultWas::Warning ) + return false; + printInfoMessages = false; + } + + AssertionPrinter printer( stream, _assertionStats, printInfoMessages ); + printer.print(); + + stream << std::endl; + return true; + } + + virtual void sectionEnded(SectionStats const& _sectionStats) CATCH_OVERRIDE { + if (m_config->showDurations() == ShowDurations::Always) { + stream << getFormattedDuration(_sectionStats.durationInSeconds) << " s: " << _sectionStats.sectionInfo.name << std::endl; + } + } + + virtual void testRunEnded( TestRunStats const& _testRunStats ) { + printTotals( _testRunStats.totals ); + stream << '\n' << std::endl; + StreamingReporterBase::testRunEnded( _testRunStats ); + } + + private: + class AssertionPrinter { + void operator= ( AssertionPrinter const& ); + public: + AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages ) + : stream( _stream ) + , stats( _stats ) + , result( _stats.assertionResult ) + , messages( _stats.infoMessages ) + , itMessage( _stats.infoMessages.begin() ) + , printInfoMessages( _printInfoMessages ) + {} + + void print() { + printSourceInfo(); + + itMessage = messages.begin(); + + switch( result.getResultType() ) { + case ResultWas::Ok: + printResultType( Colour::ResultSuccess, passedString() ); + printOriginalExpression(); + printReconstructedExpression(); + if ( ! result.hasExpression() ) + printRemainingMessages( Colour::None ); + else + printRemainingMessages(); + break; + case ResultWas::ExpressionFailed: + if( result.isOk() ) + printResultType( Colour::ResultSuccess, failedString() + std::string( " - but was ok" ) ); + else + printResultType( Colour::Error, failedString() ); + printOriginalExpression(); + printReconstructedExpression(); + printRemainingMessages(); + break; + case ResultWas::ThrewException: + printResultType( Colour::Error, failedString() ); + printIssue( "unexpected exception with message:" ); + printMessage(); + printExpressionWas(); + printRemainingMessages(); + break; + case ResultWas::FatalErrorCondition: + printResultType( Colour::Error, failedString() ); + printIssue( "fatal error condition with message:" ); + printMessage(); + printExpressionWas(); + printRemainingMessages(); + break; + case ResultWas::DidntThrowException: + printResultType( Colour::Error, failedString() ); + printIssue( "expected exception, got none" ); + printExpressionWas(); + printRemainingMessages(); + break; + case ResultWas::Info: + printResultType( Colour::None, "info" ); + printMessage(); + printRemainingMessages(); + break; + case ResultWas::Warning: + printResultType( Colour::None, "warning" ); + printMessage(); + printRemainingMessages(); + break; + case ResultWas::ExplicitFailure: + printResultType( Colour::Error, failedString() ); + printIssue( "explicitly" ); + printRemainingMessages( Colour::None ); + break; + // These cases are here to prevent compiler warnings + case ResultWas::Unknown: + case ResultWas::FailureBit: + case ResultWas::Exception: + printResultType( Colour::Error, "** internal error **" ); + break; + } + } + + private: + // Colour::LightGrey + + static Colour::Code dimColour() { return Colour::FileName; } + +#ifdef CATCH_PLATFORM_MAC + static const char* failedString() { return "FAILED"; } + static const char* passedString() { return "PASSED"; } +#else + static const char* failedString() { return "failed"; } + static const char* passedString() { return "passed"; } +#endif + + void printSourceInfo() const { + Colour colourGuard( Colour::FileName ); + stream << result.getSourceInfo() << ':'; + } + + void printResultType( Colour::Code colour, std::string const& passOrFail ) const { + if( !passOrFail.empty() ) { + { + Colour colourGuard( colour ); + stream << ' ' << passOrFail; + } + stream << ':'; + } + } + + void printIssue( std::string const& issue ) const { + stream << ' ' << issue; + } + + void printExpressionWas() { + if( result.hasExpression() ) { + stream << ';'; + { + Colour colour( dimColour() ); + stream << " expression was:"; + } + printOriginalExpression(); + } + } + + void printOriginalExpression() const { + if( result.hasExpression() ) { + stream << ' ' << result.getExpression(); + } + } + + void printReconstructedExpression() const { + if( result.hasExpandedExpression() ) { + { + Colour colour( dimColour() ); + stream << " for: "; + } + stream << result.getExpandedExpression(); + } + } + + void printMessage() { + if ( itMessage != messages.end() ) { + stream << " '" << itMessage->message << '\''; + ++itMessage; + } + } + + void printRemainingMessages( Colour::Code colour = dimColour() ) { + if ( itMessage == messages.end() ) + return; + + // using messages.end() directly yields compilation error: + std::vector::const_iterator itEnd = messages.end(); + const std::size_t N = static_cast( std::distance( itMessage, itEnd ) ); + + { + Colour colourGuard( colour ); + stream << " with " << pluralise( N, "message" ) << ':'; + } + + for(; itMessage != itEnd; ) { + // If this assertion is a warning ignore any INFO messages + if( printInfoMessages || itMessage->type != ResultWas::Info ) { + stream << " '" << itMessage->message << '\''; + if ( ++itMessage != itEnd ) { + Colour colourGuard( dimColour() ); + stream << " and"; + } + } + } + } + + private: + std::ostream& stream; + AssertionStats const& stats; + AssertionResult const& result; + std::vector messages; + std::vector::const_iterator itMessage; + bool printInfoMessages; + }; + + // Colour, message variants: + // - white: No tests ran. + // - red: Failed [both/all] N test cases, failed [both/all] M assertions. + // - white: Passed [both/all] N test cases (no assertions). + // - red: Failed N tests cases, failed M assertions. + // - green: Passed [both/all] N tests cases with M assertions. + + std::string bothOrAll( std::size_t count ) const { + return count == 1 ? std::string() : count == 2 ? "both " : "all " ; + } + + void printTotals( const Totals& totals ) const { + if( totals.testCases.total() == 0 ) { + stream << "No tests ran."; + } + else if( totals.testCases.failed == totals.testCases.total() ) { + Colour colour( Colour::ResultError ); + const std::string qualify_assertions_failed = + totals.assertions.failed == totals.assertions.total() ? + bothOrAll( totals.assertions.failed ) : std::string(); + stream << + "Failed " << bothOrAll( totals.testCases.failed ) + << pluralise( totals.testCases.failed, "test case" ) << ", " + "failed " << qualify_assertions_failed << + pluralise( totals.assertions.failed, "assertion" ) << '.'; + } + else if( totals.assertions.total() == 0 ) { + stream << + "Passed " << bothOrAll( totals.testCases.total() ) + << pluralise( totals.testCases.total(), "test case" ) + << " (no assertions)."; + } + else if( totals.assertions.failed ) { + Colour colour( Colour::ResultError ); + stream << + "Failed " << pluralise( totals.testCases.failed, "test case" ) << ", " + "failed " << pluralise( totals.assertions.failed, "assertion" ) << '.'; + } + else { + Colour colour( Colour::ResultSuccess ); + stream << + "Passed " << bothOrAll( totals.testCases.passed ) + << pluralise( totals.testCases.passed, "test case" ) << + " with " << pluralise( totals.assertions.passed, "assertion" ) << '.'; + } + } + }; + + INTERNAL_CATCH_REGISTER_REPORTER( "compact", CompactReporter ) + +} // end namespace Catch + +namespace Catch { + // These are all here to avoid warnings about not having any out of line + // virtual methods + NonCopyable::~NonCopyable() {} + IShared::~IShared() {} + IStream::~IStream() CATCH_NOEXCEPT {} + FileStream::~FileStream() CATCH_NOEXCEPT {} + CoutStream::~CoutStream() CATCH_NOEXCEPT {} + DebugOutStream::~DebugOutStream() CATCH_NOEXCEPT {} + StreamBufBase::~StreamBufBase() CATCH_NOEXCEPT {} + IContext::~IContext() {} + IResultCapture::~IResultCapture() {} + ITestCase::~ITestCase() {} + ITestCaseRegistry::~ITestCaseRegistry() {} + IRegistryHub::~IRegistryHub() {} + IMutableRegistryHub::~IMutableRegistryHub() {} + IExceptionTranslator::~IExceptionTranslator() {} + IExceptionTranslatorRegistry::~IExceptionTranslatorRegistry() {} + IReporter::~IReporter() {} + IReporterFactory::~IReporterFactory() {} + IReporterRegistry::~IReporterRegistry() {} + IStreamingReporter::~IStreamingReporter() {} + AssertionStats::~AssertionStats() {} + SectionStats::~SectionStats() {} + TestCaseStats::~TestCaseStats() {} + TestGroupStats::~TestGroupStats() {} + TestRunStats::~TestRunStats() {} + CumulativeReporterBase::SectionNode::~SectionNode() {} + CumulativeReporterBase::~CumulativeReporterBase() {} + + StreamingReporterBase::~StreamingReporterBase() {} + ConsoleReporter::~ConsoleReporter() {} + CompactReporter::~CompactReporter() {} + IRunner::~IRunner() {} + IMutableContext::~IMutableContext() {} + IConfig::~IConfig() {} + XmlReporter::~XmlReporter() {} + JunitReporter::~JunitReporter() {} + TestRegistry::~TestRegistry() {} + FreeFunctionTestCase::~FreeFunctionTestCase() {} + IGeneratorInfo::~IGeneratorInfo() {} + IGeneratorsForTest::~IGeneratorsForTest() {} + WildcardPattern::~WildcardPattern() {} + TestSpec::Pattern::~Pattern() {} + TestSpec::NamePattern::~NamePattern() {} + TestSpec::TagPattern::~TagPattern() {} + TestSpec::ExcludedPattern::~ExcludedPattern() {} + Matchers::Impl::MatcherUntypedBase::~MatcherUntypedBase() {} + + void Config::dummy() {} + + namespace TestCaseTracking { + ITracker::~ITracker() {} + TrackerBase::~TrackerBase() {} + SectionTracker::~SectionTracker() {} + IndexTracker::~IndexTracker() {} + } +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif + +#ifdef CATCH_CONFIG_MAIN +// #included from: internal/catch_default_main.hpp +#define TWOBLUECUBES_CATCH_DEFAULT_MAIN_HPP_INCLUDED + +#ifndef __OBJC__ + +#if defined(WIN32) && defined(_UNICODE) && !defined(DO_NOT_USE_WMAIN) +// Standard C/C++ Win32 Unicode wmain entry point +extern "C" int wmain (int argc, wchar_t * argv[], wchar_t * []) { +#else +// Standard C/C++ main entry point +int main (int argc, char * argv[]) { +#endif + + int result = Catch::Session().run( argc, argv ); + return ( result < 0xff ? result : 0xff ); +} + +#else // __OBJC__ + +// Objective-C entry point +int main (int argc, char * const argv[]) { +#if !CATCH_ARC_ENABLED + NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init]; +#endif + + Catch::registerTestMethods(); + int result = Catch::Session().run( argc, (char* const*)argv ); + +#if !CATCH_ARC_ENABLED + [pool drain]; +#endif + + return ( result < 0xff ? result : 0xff ); +} + +#endif // __OBJC__ + +#endif + +#ifdef CLARA_CONFIG_MAIN_NOT_DEFINED +# undef CLARA_CONFIG_MAIN +#endif + +////// + +// If this config identifier is defined then all CATCH macros are prefixed with CATCH_ +#ifdef CATCH_CONFIG_PREFIX_ALL + +#if defined(CATCH_CONFIG_FAST_COMPILE) +#define CATCH_REQUIRE( expr ) INTERNAL_CATCH_TEST_NO_TRY( "CATCH_REQUIRE", Catch::ResultDisposition::Normal, expr ) +#define CATCH_REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST_NO_TRY( "CATCH_REQUIRE_FALSE", Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, expr ) +#else +#define CATCH_REQUIRE( expr ) INTERNAL_CATCH_TEST( "CATCH_REQUIRE", Catch::ResultDisposition::Normal, expr ) +#define CATCH_REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( "CATCH_REQUIRE_FALSE", Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, expr ) +#endif + +#define CATCH_REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( "CATCH_REQUIRE_THROWS", Catch::ResultDisposition::Normal, "", expr ) +#define CATCH_REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( "CATCH_REQUIRE_THROWS_AS", exceptionType, Catch::ResultDisposition::Normal, expr ) +#define CATCH_REQUIRE_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( "CATCH_REQUIRE_THROWS_WITH", Catch::ResultDisposition::Normal, matcher, expr ) +#define CATCH_REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( "CATCH_REQUIRE_NOTHROW", Catch::ResultDisposition::Normal, expr ) + +#define CATCH_CHECK( expr ) INTERNAL_CATCH_TEST( "CATCH_CHECK", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CATCH_CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( "CATCH_CHECK_FALSE", Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, expr ) +#define CATCH_CHECKED_IF( expr ) INTERNAL_CATCH_IF( "CATCH_CHECKED_IF", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CATCH_CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( "CATCH_CHECKED_ELSE", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CATCH_CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( "CATCH_CHECK_NOFAIL", Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, expr ) + +#define CATCH_CHECK_THROWS( expr ) INTERNAL_CATCH_THROWS( "CATCH_CHECK_THROWS", Catch::ResultDisposition::ContinueOnFailure, "", expr ) +#define CATCH_CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( "CATCH_CHECK_THROWS_AS", exceptionType, Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CATCH_CHECK_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( "CATCH_CHECK_THROWS_WITH", Catch::ResultDisposition::ContinueOnFailure, matcher, expr ) +#define CATCH_CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( "CATCH_CHECK_NOTHROW", Catch::ResultDisposition::ContinueOnFailure, expr ) + +#define CATCH_CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( "CATCH_CHECK_THAT", matcher, Catch::ResultDisposition::ContinueOnFailure, arg ) + +#if defined(CATCH_CONFIG_FAST_COMPILE) +#define CATCH_REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT_NO_TRY( "CATCH_REQUIRE_THAT", matcher, Catch::ResultDisposition::Normal, arg ) +#else +#define CATCH_REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( "CATCH_REQUIRE_THAT", matcher, Catch::ResultDisposition::Normal, arg ) +#endif + +#define CATCH_INFO( msg ) INTERNAL_CATCH_INFO( "CATCH_INFO", msg ) +#define CATCH_WARN( msg ) INTERNAL_CATCH_MSG( "CATCH_WARN", Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, msg ) +#define CATCH_SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( "CATCH_INFO", msg ) +#define CATCH_CAPTURE( msg ) INTERNAL_CATCH_INFO( "CATCH_CAPTURE", #msg " := " << Catch::toString(msg) ) +#define CATCH_SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( "CATCH_CAPTURE", #msg " := " << Catch::toString(msg) ) + +#ifdef CATCH_CONFIG_VARIADIC_MACROS + #define CATCH_TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ ) + #define CATCH_TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ ) + #define CATCH_METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ ) + #define CATCH_REGISTER_TEST_CASE( Function, ... ) INTERNAL_CATCH_REGISTER_TESTCASE( Function, __VA_ARGS__ ) + #define CATCH_SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ ) + #define CATCH_FAIL( ... ) INTERNAL_CATCH_MSG( "CATCH_FAIL", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, __VA_ARGS__ ) + #define CATCH_FAIL_CHECK( ... ) INTERNAL_CATCH_MSG( "CATCH_FAIL_CHECK", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::ContinueOnFailure, __VA_ARGS__ ) + #define CATCH_SUCCEED( ... ) INTERNAL_CATCH_MSG( "CATCH_SUCCEED", Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, __VA_ARGS__ ) +#else + #define CATCH_TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description ) + #define CATCH_TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description ) + #define CATCH_METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description ) + #define CATCH_REGISTER_TEST_CASE( function, name, description ) INTERNAL_CATCH_REGISTER_TESTCASE( function, name, description ) + #define CATCH_SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description ) + #define CATCH_FAIL( msg ) INTERNAL_CATCH_MSG( "CATCH_FAIL", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, msg ) + #define CATCH_FAIL_CHECK( msg ) INTERNAL_CATCH_MSG( "CATCH_FAIL_CHECK", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::ContinueOnFailure, msg ) + #define CATCH_SUCCEED( msg ) INTERNAL_CATCH_MSG( "CATCH_SUCCEED", Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, msg ) +#endif +#define CATCH_ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" ) + +#define CATCH_REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) +#define CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) + +#define CATCH_GENERATE( expr) INTERNAL_CATCH_GENERATE( expr ) + +// "BDD-style" convenience wrappers +#ifdef CATCH_CONFIG_VARIADIC_MACROS +#define CATCH_SCENARIO( ... ) CATCH_TEST_CASE( "Scenario: " __VA_ARGS__ ) +#define CATCH_SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ ) +#else +#define CATCH_SCENARIO( name, tags ) CATCH_TEST_CASE( "Scenario: " name, tags ) +#define CATCH_SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags ) +#endif +#define CATCH_GIVEN( desc ) CATCH_SECTION( std::string( "Given: ") + desc, "" ) +#define CATCH_WHEN( desc ) CATCH_SECTION( std::string( " When: ") + desc, "" ) +#define CATCH_AND_WHEN( desc ) CATCH_SECTION( std::string( " And: ") + desc, "" ) +#define CATCH_THEN( desc ) CATCH_SECTION( std::string( " Then: ") + desc, "" ) +#define CATCH_AND_THEN( desc ) CATCH_SECTION( std::string( " And: ") + desc, "" ) + +// If CATCH_CONFIG_PREFIX_ALL is not defined then the CATCH_ prefix is not required +#else + +#if defined(CATCH_CONFIG_FAST_COMPILE) +#define REQUIRE( expr ) INTERNAL_CATCH_TEST_NO_TRY( "REQUIRE", Catch::ResultDisposition::Normal, expr ) +#define REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST_NO_TRY( "REQUIRE_FALSE", Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, expr ) + +#else +#define REQUIRE( expr ) INTERNAL_CATCH_TEST( "REQUIRE", Catch::ResultDisposition::Normal, expr ) +#define REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( "REQUIRE_FALSE", Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, expr ) +#endif + +#define REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( "REQUIRE_THROWS", Catch::ResultDisposition::Normal, "", expr ) +#define REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( "REQUIRE_THROWS_AS", exceptionType, Catch::ResultDisposition::Normal, expr ) +#define REQUIRE_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( "REQUIRE_THROWS_WITH", Catch::ResultDisposition::Normal, matcher, expr ) +#define REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( "REQUIRE_NOTHROW", Catch::ResultDisposition::Normal, expr ) + +#define CHECK( expr ) INTERNAL_CATCH_TEST( "CHECK", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( "CHECK_FALSE", Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, expr ) +#define CHECKED_IF( expr ) INTERNAL_CATCH_IF( "CHECKED_IF", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( "CHECKED_ELSE", Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( "CHECK_NOFAIL", Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, expr ) + +#define CHECK_THROWS( expr ) INTERNAL_CATCH_THROWS( "CHECK_THROWS", Catch::ResultDisposition::ContinueOnFailure, "", expr ) +#define CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( "CHECK_THROWS_AS", exceptionType, Catch::ResultDisposition::ContinueOnFailure, expr ) +#define CHECK_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( "CHECK_THROWS_WITH", Catch::ResultDisposition::ContinueOnFailure, matcher, expr ) +#define CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( "CHECK_NOTHROW", Catch::ResultDisposition::ContinueOnFailure, expr ) + +#define CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( "CHECK_THAT", matcher, Catch::ResultDisposition::ContinueOnFailure, arg ) + +#if defined(CATCH_CONFIG_FAST_COMPILE) +#define REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT_NO_TRY( "REQUIRE_THAT", matcher, Catch::ResultDisposition::Normal, arg ) +#else +#define REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( "REQUIRE_THAT", matcher, Catch::ResultDisposition::Normal, arg ) +#endif + +#define INFO( msg ) INTERNAL_CATCH_INFO( "INFO", msg ) +#define WARN( msg ) INTERNAL_CATCH_MSG( "WARN", Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, msg ) +#define SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( "INFO", msg ) +#define CAPTURE( msg ) INTERNAL_CATCH_INFO( "CAPTURE", #msg " := " << Catch::toString(msg) ) +#define SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( "CAPTURE", #msg " := " << Catch::toString(msg) ) + +#ifdef CATCH_CONFIG_VARIADIC_MACROS +#define TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ ) +#define TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ ) +#define METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ ) +#define REGISTER_TEST_CASE( Function, ... ) INTERNAL_CATCH_REGISTER_TESTCASE( Function, __VA_ARGS__ ) +#define SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ ) +#define FAIL( ... ) INTERNAL_CATCH_MSG( "FAIL", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, __VA_ARGS__ ) +#define FAIL_CHECK( ... ) INTERNAL_CATCH_MSG( "FAIL_CHECK", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::ContinueOnFailure, __VA_ARGS__ ) +#define SUCCEED( ... ) INTERNAL_CATCH_MSG( "SUCCEED", Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, __VA_ARGS__ ) +#else +#define TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description ) + #define TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description ) + #define METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description ) + #define REGISTER_TEST_CASE( method, name, description ) INTERNAL_CATCH_REGISTER_TESTCASE( method, name, description ) + #define SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description ) + #define FAIL( msg ) INTERNAL_CATCH_MSG( "FAIL", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, msg ) + #define FAIL_CHECK( msg ) INTERNAL_CATCH_MSG( "FAIL_CHECK", Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::ContinueOnFailure, msg ) + #define SUCCEED( msg ) INTERNAL_CATCH_MSG( "SUCCEED", Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, msg ) +#endif +#define ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" ) + +#define REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) +#define REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) + +#define GENERATE( expr) INTERNAL_CATCH_GENERATE( expr ) + +#endif + +#define CATCH_TRANSLATE_EXCEPTION( signature ) INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature ) + +// "BDD-style" convenience wrappers +#ifdef CATCH_CONFIG_VARIADIC_MACROS +#define SCENARIO( ... ) TEST_CASE( "Scenario: " __VA_ARGS__ ) +#define SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ ) +#else +#define SCENARIO( name, tags ) TEST_CASE( "Scenario: " name, tags ) +#define SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags ) +#endif +#define GIVEN( desc ) SECTION( std::string(" Given: ") + desc, "" ) +#define WHEN( desc ) SECTION( std::string(" When: ") + desc, "" ) +#define AND_WHEN( desc ) SECTION( std::string("And when: ") + desc, "" ) +#define THEN( desc ) SECTION( std::string(" Then: ") + desc, "" ) +#define AND_THEN( desc ) SECTION( std::string(" And: ") + desc, "" ) + +using Catch::Detail::Approx; + +// #included from: internal/catch_reenable_warnings.h + +#define TWOBLUECUBES_CATCH_REENABLE_WARNINGS_H_INCLUDED + +#ifdef __clang__ +# ifdef __ICC // icpc defines the __clang__ macro +# pragma warning(pop) +# else +# pragma clang diagnostic pop +# endif +#elif defined __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif // TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED + diff --git a/terraphast/cmake/FindGMP.cmake b/terraphast/cmake/FindGMP.cmake new file mode 100644 index 000000000..8259bf3f0 --- /dev/null +++ b/terraphast/cmake/FindGMP.cmake @@ -0,0 +1,13 @@ +find_path(GMP_INCLUDE_DIR NAMES gmp.h gmpxx.h) +find_library(GMP_LIBRARIES NAMES gmp libgmp) +find_library(GMPXX_LIBRARIES NAMES gmpxx libgmpxx ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GMP DEFAULT_MSG GMP_INCLUDE_DIR GMP_LIBRARIES GMPXX_LIBRARIES) +mark_as_advanced(GMP_INCLUDE_DIR GMP_LIBRARIES) +add_library(gmp INTERFACE IMPORTED) +set_target_properties(gmp PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${GMP_INCLUDE_DIR}) +set_target_properties(gmp PROPERTIES INTERFACE_LINK_LIBRARIES ${GMP_LIBRARIES}) +add_library(gmpxx INTERFACE IMPORTED) +set_target_properties(gmpxx PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${GMP_INCLUDE_DIR}) +set_target_properties(gmpxx PROPERTIES INTERFACE_LINK_LIBRARIES ${GMPXX_LIBRARIES}) \ No newline at end of file diff --git a/terraphast/documentation/Changelog.md b/terraphast/documentation/Changelog.md new file mode 100644 index 000000000..79359e7b1 --- /dev/null +++ b/terraphast/documentation/Changelog.md @@ -0,0 +1,27 @@ +Since 1st meeting +================= + +Optimizations +------------- + +* implement bitvector with rank support +* use bitvector instead of index vectors for subsets +* inline lots of often-used methods +* deduplicate constraints +* remap constraints (removing inner nodes from the numbering) +* halved union-find storage by out-of-bounds parent trick +* avoid allocations by reusing old storage +* some micro-optimizations + +Fixes and Features +------------------ + +* fast terrace check without traversing the tree to the end +* fixed subtree computation (problematic input data caused an assertion to fail) +* fix counting bug (communication problem) +* don't count incorrectly rooted trees +* extracted all computations to callback methods +* implemented logging + stack state decorators +* implemented isomorphy check +* Visual C++ compatibility checked with appveyor +* extracted intrinsics to stay portable diff --git a/terraphast/documentation/Dependencies.md b/terraphast/documentation/Dependencies.md new file mode 100644 index 000000000..1c12a4352 --- /dev/null +++ b/terraphast/documentation/Dependencies.md @@ -0,0 +1,21 @@ + +Dependencies +============ + +This library has multiple Dependencies + + +Detailed List +------------- + +* [Catch](https://github.com/philsquared/Catch). The header `catch.hpp` is supposed to be in the default include-path. The framework is avaiable multiple distros under the name `catch`. +* ... + +Copyable List for installation +------------------------------ + + +``` +catch +``` + diff --git a/terraphast/documentation/walkthrough.md b/terraphast/documentation/walkthrough.md new file mode 100644 index 000000000..cb2205788 --- /dev/null +++ b/terraphast/documentation/walkthrough.md @@ -0,0 +1,30 @@ +## A Short Guide to the Codebase + +This is a short and rather shallow guide to the codebase. Starting from main.cpp, you'll see the crucial steps and data structures involved in solving the terrace detection and enumeration problem. + +### 0) Interpreting the CLI Arguments +If two file paths are provided, it is assumed that the first path points to the tree file (.nwk) and the second path points to the data file (.data). + +If one file path Y/X is provided, it is assumed that the files X.nwk and X.data are present in the directory Y. + +### 1) Parsing the Input Files +This is done by calling terraces::parse_nwk and terraces::parse_bitmatrix. Both throw exceptions, if the input files are not in the right format. If the .data file does not contain a species that possesses all gene sites, this is denoted by a terraces::none value in the std::pair returned by terraces::parse_bitmatrix. In this case, the current course of action is to exit with error code 1. + +### 2) Re-Rooting the Input Tree +A call to terraces::reroot_at_taxon_inplace re-roots the given tree at the given species. This is done in-place by traversing the input tree from the given species to the original root, and adjusting all edges so that the parent reference of every node points to the node that was traversed before. This procedure has O(tree-height) time complexity. + +### 3) Extracting Subtrees +Using an occurence bitmatrix (is gene i present in species j?) and the re-rooted tree, all subtrees of this tree are extracted. + +### 4) Computing Constraints +By collecting the left- and rightmost nodes x and y for every inner node i, we establish lowest-common-ancestor relationships of the form lca(x, y) = i. With this information, we then obtain our constraints of the form lca(x, y) < lca(x, z) which mean that the lowest common ancestor node od x and y is a descendant of the lca of x and z. + +### 5) Deduplicating Constaints +Constraint calculation can result in constraints being found multiple times. terraces::deduplicate_constraints ensures that the given constraints vector contains only unique elements by removing all duplicates. + +### 6) Supertree-Assembly and Output +Now that we have extracted the constraints from the re-rooted version of our tree, we assemble our supertrees according to Constantinescu's Algorithm [CS95] which merges sets of nodes so that for each constraint lca(i, j) < lca(k, l), the sets of nodes i and j are merged. This is done for every constraint. In the end, if at least two node sets remain, there's at least one supertree. This gives us the leaves under the two subtrees under our new root's non-comprehensive-leaf child. In these two subtrees, we then use the constraints to further narrow down the tree's overall structure. Doing so, we can end up with subtress with more than two subtrees and without constraints. In this case, we enumerate all possible subtree topologies. + + +### References +[CS95] Mariana Constantinescu and David Sankoff. An efficient algorithm for supertrees. Journal of Classification, 12(1):101–112, 1995. \ No newline at end of file diff --git a/terraphast/doxygen.conf b/terraphast/doxygen.conf new file mode 100644 index 000000000..4abf8fd9d --- /dev/null +++ b/terraphast/doxygen.conf @@ -0,0 +1,2500 @@ +# Doxyfile 1.8.13 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "terraphast" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "An efficient CPP terraces detector and enumerator." + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = "documentation" + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 0. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 0 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, +# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. + +FILE_PATTERNS = *.c \ + *.cpp \ + *.h \ + *.hpp \ + *.md +#FILE_PATTERNS = *.c \ +# *.cc \ +# *.cxx \ +# *.cpp \ +# *.c++ \ +# *.java \ +# *.ii \ +# *.ixx \ +# *.ipp \ +# *.i++ \ +# *.inl \ +# *.idl \ +# *.ddl \ +# *.odl \ +# *.h \ +# *.hh \ +# *.hxx \ +# *.hpp \ +# *.h++ \ +# *.cs \ +# *.d \ +# *.php \ +# *.php4 \ +# *.php5 \ +# *.phtml \ +# *.inc \ +# *.m \ +# *.markdown \ +# *.md \ +# *.mm \ +# *.dox \ +# *.py \ +# *.pyw \ +# *.f90 \ +# *.f95 \ +# *.f03 \ +# *.f08 \ +# *.f \ +# *.for \ +# *.tcl \ +# *.vhd \ +# *.vhdl \ +# *.ucf \ +# *.qsf + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = ./Catch/ +EXCLUDE += ./alexis_samples + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse-libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /