diff --git a/bin/polyply b/bin/polyply index a9a8dedd..2a795530 100755 --- a/bin/polyply +++ b/bin/polyply @@ -23,7 +23,7 @@ import argparse from pathlib import Path import numpy as np import polyply -from polyply import (gen_itp, gen_coords, gen_seq, DATA_PATH) +from polyply import (gen_itp, gen_coords, gen_seq, gen_replicas, DATA_PATH) from polyply.src.load_library import load_library from polyply.src.logging import LOGGER, LOGLEVELS @@ -49,7 +49,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements # List of Subparsers for the different tools parser_gen_itp = subparsers.add_parser('gen_params', aliases=['gen_itp']) - parser_gen_coords = subparsers.add_parser('gen_coords') + # Make this a parent parser so we can later wrap replica around it and + # get all the arguments we need + parser_gen_coords = argparse.ArgumentParser(add_help=False) parser_gen_seq = subparsers.add_parser('gen_seq') # ============================================================================= @@ -69,6 +71,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements help='Input file (ITP|FF)', nargs="*") file_group.add_argument('-o', dest='outpath', type=Path, help='Output ITP (ITP)') + seq_group = file_group.add_mutually_exclusive_group(required=True) seq_group.add_argument('-seq', dest='seq', type=str, nargs='+', help='A linear sequence of residue names.') @@ -161,7 +164,25 @@ def main(): # pylint: disable=too-many-locals,too-many-statements backmap_group.add_argument('-back_fudge', dest='bfudge', type=float, default=0.4, help='factor by which to scale the coordinates when backmapping.') - parser_gen_coords.set_defaults(func=gen_coords) + # gen_coords specific arguments + subparser_gen_coords = subparsers.add_parser('gen_coords', parents=[parser_gen_coords]) + subparser_gen_coords.set_defaults(func=gen_coords) + + # gen_replica specific arguments + subparser_gen_replicas = subparsers.add_parser('gen_replicas', parents=[parser_gen_coords]) + replica_group = subparser_gen_replicas.add_argument_group('Options for replica generation') + replica_group.add_argument("-nrepl", dest='nrepl', type=int, + help="number of system replicas to generate") + replica_group.add_argument("-mdp", dest='mdppaths', type=Path, default=[Path("md.mdp")], + help="mdp file (.mdp)", nargs="*") + replica_group.add_argument("-log", dest='logpath', type=Path, default=Path("polyply.log"), + help="logfile name") + replica_group.add_argument("-wdir", dest='workdir', type=Path, default=os.path.curdir, + help="logfile name") + replica_group.add_argument("-timeout", dest='timeout', type=float, default=None, + help="time after which to timeout gen_coords") + + subparser_gen_replicas.set_defaults(func=gen_replicas) # ============================================================================ # Input Arguments for the sequence generation tool diff --git a/polyply/__init__.py b/polyply/__init__.py index 9ef7d384..e6db655d 100644 --- a/polyply/__init__.py +++ b/polyply/__init__.py @@ -49,3 +49,4 @@ from .src.gen_itp import gen_itp, gen_params from .src.gen_coords import gen_coords from .src.gen_seq import gen_seq +from .src.gen_replicas import gen_replicas diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py index ae33183a..513e234c 100644 --- a/polyply/src/gen_coords.py +++ b/polyply/src/gen_coords.py @@ -130,6 +130,8 @@ def gen_coords(toppath, Path to topology file outpath: :class:pathlib.Path Path to coordinate file + mdppath: :class:pathlib.Path + Path to mdp file default None name: str Name of the molecule build: :class:pathlib.Path diff --git a/polyply/src/gen_replicas.py b/polyply/src/gen_replicas.py new file mode 100644 index 00000000..0c142563 --- /dev/null +++ b/polyply/src/gen_replicas.py @@ -0,0 +1,81 @@ +""" +Generate multiple system replicas using gen_coords. In essence +this is a wrapper for gen_coords which also makes use of the +gromacs wrapper libary. +""" +import os +from pathlib import Path +from vermouth.log_helpers import StyleAdapter, get_logger +from polyply import gen_coords +from polyply.src.gmx_wrapper.workflow import GMXRunner + +LOGGER = StyleAdapter(get_logger(__name__)) + +def gen_replicas(nrepl, + mdppaths, + logpath, + workdir, + timeout, + **kwargs): + """ + Generate multiple coordinate replicas of a system. + this function is a wrapper around gen_coords, which + also has some options for running simulations. + + Paremeters + ---------- + nrepl: int + number of replicas to generate + mdppaths: list[Path] + file path to mdp-file + logpath: Path + file path to log-file + workdir: Path + workdir to use + timeout: float + time after which gen_coords is timed out + """ + mdps_resolved = [] + for mdppath in mdppaths: + mdps_resolved.append(mdppath.resolve()) + + workdir = workdir.resolve() + logpath = logpath.resolve() + kwargs["toppath"] = kwargs["toppath"].resolve() + base_name = kwargs['outpath'].name + os.chdir(workdir) + + for idx in range(0, nrepl): + replica_dir = workdir / Path(str(idx)) + os.mkdir(replica_dir) + os.chdir(replica_dir) + kwargs['outpath'] = replica_dir / Path(base_name) + gen_coords(**kwargs) + + for jdx, mdppath in enumerate(mdps_resolved): + mdpname = str(mdppath.stem) + deffnm = str(jdx) + "_" + mdpname + # Get previous coordinates + if jdx == 0: + startpath = kwargs["outpath"] + else: + startpath = replica_dir / Path(prev_deffnm + ".gro") + + # Running GROMACS protocol + LOGGER.info(f"running simulation {mdpname}") + sim_runner = GMXRunner(startpath, + kwargs["toppath"], + mdppath, + deffnm=deffnm) + status = sim_runner.run() + prev_deffnm = deffnm + + with open(logpath, "a") as logfile: + if status != 0: + logfile.write(f"replica {idx} step {jdx} has failed with unkown gmx issue\n") + if not sim_runner.success: + logfile.write(f"replica {idx} step {jdx} has failed with infinite forces\n") + + logfile.write(f"replica {idx} step {jdx} successfully completed\n") + + os.chdir(workdir) diff --git a/polyply/src/gmx_wrapper/workflow.py b/polyply/src/gmx_wrapper/workflow.py new file mode 100644 index 00000000..e0f72d62 --- /dev/null +++ b/polyply/src/gmx_wrapper/workflow.py @@ -0,0 +1,68 @@ +""" +Useful classes for integrating workflows with gromacs wrapper. +""" +import os +from pathlib import Path +import gromacs as gmx +from gromacs.run import MDrunner +import gromacs.environment +gromacs.environment.flags['capture_output'] = "file" + +class GMXRunner(MDrunner): + """ + Run energy minization on a polyply topology. + To use this method instantiate the class and + then run the simulation with run() or run_check(). + The latter returns a success status of the simulation. + After the simulation the coordinates in the topolgy + object will be updated in place. + """ + def __init__(self, + coordpath, + toppath, + mdppath, + dirname=os.path.curdir, + **kwargs): + """ + Set some enviroment variables for the run. + """ + self.coordpath = coordpath + self.toppath = toppath + self.mdppath = mdppath + self.success = False + self.startpath = dirname / Path("start.gro") + self.outpath = dirname / Path(kwargs.get('deffnm', 'confout') + ".gro") + self.tprname = dirname / Path(kwargs.get('deffnm', 'topol') + ".tpr") + self.deffnm = kwargs.get('deffnm', None) + kwargs['s'] = self.tprname + super().__init__(dirname=dirname, **kwargs) + + def prehook(self, **kwargs): + """ + Write the coordinates of the current system and grompp + the input parameters. + """ + # grompp everything in the directory + gmx.grompp(f=self.mdppath, + c=self.coordpath, + p=self.toppath, + o=self.tprname) + + def posthook(self, **kwargs): + """ + Make sure that the energy minization did not result into + infinite forces. + """ + if self.deffnm: + logpath = Path(self.deffnm + ".log") + else: + logpath = Path("md.log") + + with open(logpath, "r") as logfile: + for line in logfile.readlines(): + if "Norm" in line: + maxforce = line.strip().split()[-1] + if maxforce == "inf": + self.success = False + else: + self.success = True diff --git a/polyply/src/map_to_molecule.py b/polyply/src/map_to_molecule.py index eee77466..8972a43d 100644 --- a/polyply/src/map_to_molecule.py +++ b/polyply/src/map_to_molecule.py @@ -13,6 +13,7 @@ # limitations under the License. import networkx as nx +from vermouth.graph_utils import make_residue_graph from polyply.src.processor import Processor def tag_exclusions(node_to_block, force_field): @@ -128,6 +129,7 @@ def match_nodes_to_blocks(self, meta_molecule): regular_graph = nx.Graph() restart_graph = nx.Graph() restart_attr = nx.get_node_attributes(meta_molecule, "from_itp") + restart_graph.add_nodes_from(restart_attr.keys()) # in case we only have a single residue # we assume the user is sane and that residue is not from @@ -135,14 +137,13 @@ def match_nodes_to_blocks(self, meta_molecule): if len(meta_molecule.nodes) == 1: regular_graph.add_nodes_from(meta_molecule.nodes) - # this breaks down when to proteins are directly linked - # because they would appear as one connected component - # and not two seperate components referring to two molecules - # but that is an edge-case we can worry about later + # this will falesly also connect two molecules with the + # same molecule name, if they are from_itp and consecutively + # we deal with that below for idx, jdx in nx.dfs_edges(meta_molecule): - # the two nodes are restart nodes if idx in restart_attr and jdx in restart_attr: - restart_graph.add_edge(idx, jdx) + if restart_attr[idx] == restart_attr[jdx]: + restart_graph.add_edge(idx, jdx) else: regular_graph.add_edge(idx, jdx) @@ -151,16 +152,34 @@ def match_nodes_to_blocks(self, meta_molecule): self.node_to_block[node] = meta_molecule.nodes[node]["resname"] # fragment nodes match parts of blocks, which describe molecules - # with more than one residue + # with more than one residue. Sometimes multiple molecules come + # after each other in that case the connected component needs to + # be an integer multiple of the block for fragment in nx.connected_components(restart_graph): - block_name = restart_attr[list(fragment)[0]] - if all([restart_attr[node] == block_name for node in fragment]): - self.fragments.append(fragment) - for node in fragment: - self.node_to_block[node] = block_name - self.node_to_fragment[node] = len(self.fragments) - 1 + frag_nodes = list(fragment) + block = self.force_field.blocks[restart_attr[frag_nodes[0]]] + block_res = make_residue_graph(block, attrs=('resid', 'resname')) + len_block = len(block_res) + len_frag = len(fragment) + # here we check if the fragment is an integer multiple + # of the multiresidue block + if len_frag%len_block == 0: + n_blocks = len_frag//len_block else: - raise IOError + # if it is not raise an error + molname = restart_attr[frag_nodes[0]] + msg = (f"When mapping the molecule {molname} onto the residue graph " + "nodes labeled with from_itp, a mismatch in the length between " + "the provided molecule and the residue graph is found. Make " + "sure that all residues are in the residue graph and input itp-file.") + raise IOError(msg) + + for fdx in range(0, n_blocks): + current_frag_nodes = frag_nodes[fdx*len_block: (fdx+1)*len_block] + self.fragments.append(current_frag_nodes) + for node in current_frag_nodes: + self.node_to_block[node] = restart_attr[frag_nodes[0]] + self.node_to_fragment[node] = len(self.fragments) - 1 def add_blocks(self, meta_molecule): """ diff --git a/polyply/tests/test_map_to_molecule.py b/polyply/tests/test_map_to_molecule.py index a827f264..aa62a500 100644 --- a/polyply/tests/test_map_to_molecule.py +++ b/polyply/tests/test_map_to_molecule.py @@ -147,7 +147,64 @@ Interaction(atoms=(7, 8), parameters=['1', '0.37', '8000'], meta={})], [(0, 1), (1, 2), (2, 3), (5, 6), (6, 7), (7, 8)], 9, - {0: "MIX", 1: "MIX", 3: "MIX", 4:"MIX"}) + {0: "MIX", 1: "MIX", 3: "MIX", 4:"MIX"}), + # test two consecutive multiblock fragments + (""" + [ moleculetype ] + ; name nexcl. + MIX 1 + ; + [ atoms ] + 1 SN1a 1 R1 C1 1 0.000 45 + 2 SN1a 1 R1 C2 1 0.000 45 + 3 SC1 2 R2 C1 2 0.000 45 + 4 SC1 2 R2 C2 2 0.000 45 + [ bonds ] + ; back bone bonds + 1 2 1 0.37 7000 + 2 3 1 0.37 7000 + 3 4 1 0.37 8000 + """, + ["R1", "R2", "R1", "R2"], + [Interaction(atoms=(0, 1), parameters=['1', '0.37', '7000'], meta={}), + Interaction(atoms=(1, 2), parameters=['1', '0.37', '7000'], meta={}), + Interaction(atoms=(2, 3), parameters=['1', '0.37', '8000'], meta={}), + Interaction(atoms=(4, 5), parameters=['1', '0.37', '7000'], meta={}), + Interaction(atoms=(5, 6), parameters=['1', '0.37', '7000'], meta={}), + Interaction(atoms=(6, 7), parameters=['1', '0.37', '8000'], meta={})], + [(0, 1), (1, 2), (2, 3), (4, 5), (5, 6), (6, 7)], + 8, + {0: "MIX", 1: "MIX", 2: "MIX", 3:"MIX"}), + # test two consecutive multiblock fragments with single residues + # and different mol-name + (""" + [ moleculetype ] + ; name nexcl. + OTR 1 + ; + [ atoms ] + 1 SN1a 1 R2 C1 1 0.000 45 + 2 SN1a 1 R2 C2 1 0.000 45 + [ bonds ] + ; back bone bonds + 1 2 1 0.4 7000 + [ moleculetype ] + ; name nexcl. + MIX 1 + ; + [ atoms ] + 1 SN1a 1 R1 C1 1 0.000 45 + 2 SN1a 1 R1 C2 1 0.000 45 + [ bonds ] + ; back bone bonds + 1 2 1 0.37 7000 + """, + ["R1", "R2"], + [Interaction(atoms=(0, 1), parameters=['1', '0.37', '7000'], meta={}), + Interaction(atoms=(2, 3), parameters=['1', '0.4', '7000'], meta={}),], + [(0, 1), (2, 3),], + 4, + {0: "MIX", 1: "OTR"}) )) def test_multiresidue_block(lines, monomers, bonds, edges, nnodes, from_itp): """ @@ -166,7 +223,6 @@ def test_multiresidue_block(lines, monomers, bonds, edges, nnodes, from_itp): # map to molecule new_meta_mol = polyply.src.map_to_molecule.MapToMolecule(ff).run_molecule(meta_mol) # check that the disconnected molecule is properly done - #print(new_meta_mol.nodes) for node in new_meta_mol.nodes: assert len(new_meta_mol.nodes[node]['graph'].nodes) != 0 @@ -327,3 +383,62 @@ def test_riase_multiresidue_error(lines, monomers, bonds, edges, nnodes, from_it nx.set_node_attributes(meta_mol, from_itp, "from_itp") with pytest.raises(IOError): new_meta_mol = polyply.src.map_to_molecule.MapToMolecule(ff).run_molecule(meta_mol) + +@pytest.mark.parametrize('lines, monomers, from_itp', ( + # we are missing one residue in the multiblock graph + (""" + [ moleculetype ] + ; name nexcl. + MIX 1 + ; + [ atoms ] + 1 SN1a 1 R1 C1 1 0.000 45 + 2 SN1a 1 R1 C2 1 0.000 45 + 3 SC1 2 R2 C1 2 0.000 45 + 4 SC1 2 R2 C2 2 0.000 45 + [ bonds ] + ; back bone bonds + 1 2 1 0.37 7000 + 2 3 1 0.37 7000 + 3 4 1 0.37 8000 + """, + ["R1", "R2", "R1"], + {0: "MIX", 1: "MIX", 2: "MIX"}), + # we have a residue too many in the residue graph provided + (""" + [ moleculetype ] + ; name nexcl. + MIX 1 + ; + [ atoms ] + 1 SN1a 1 R1 C1 1 0.000 45 + 2 SN1a 1 R1 C2 1 0.000 45 + 3 SC1 2 R2 C1 2 0.000 45 + 4 SC1 2 R2 C2 2 0.000 45 + [ bonds ] + ; back bone bonds + 1 2 1 0.37 7000 + 2 3 1 0.37 7000 + 3 4 1 0.37 8000 + """, + ["R1", "R2", "R3", "R1", "R2"], + {0: "MIX", 1: "MIX", 2: "MIX", 3:"MIX"}) + )) +def test_error_missing_residues_multi(lines, monomers, from_itp): + """ + It can happen that there is a length mismatch between a multiblock as identified + from the residue graph sequence and provided in the blocks file. We check that + an error is raised. + """ + lines = textwrap.dedent(lines).splitlines() + ff = vermouth.forcefield.ForceField(name='test_ff') + polyply.src.polyply_parser.read_polyply(lines, ff) + # build the meta-molecule + meta_mol = MetaMolecule(name="test", force_field=ff) + meta_mol.add_monomer(0, monomers[0], []) + for node, monomer in enumerate(monomers[1:]): + meta_mol.add_monomer(node+1, monomer, [(node, node+1)]) + nx.set_node_attributes(meta_mol, from_itp, "from_itp") + # map to molecule + with pytest.raises(IOError): + polyply.src.map_to_molecule.MapToMolecule(ff).run_molecule(meta_mol)