From 5968f834b718df2003f8aadb8b1cf2a142e89952 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 13 Jun 2023 16:02:57 +0200 Subject: [PATCH 01/82] init draft itp to ff --- bin/polyply | 21 +- polyply/__init__.py | 1 + polyply/src/ff_directive_writer.py | 2 + polyply/src/ffoutput.py | 135 ++++++++++++ polyply/src/fragment_finder.py | 195 ++++++++++++++++++ polyply/src/graph_utils.py | 12 ++ polyply/src/itp_to_ff.py | 320 +++++++++++++++++++++++++++++ 7 files changed, 685 insertions(+), 1 deletion(-) create mode 100644 polyply/src/ff_directive_writer.py create mode 100644 polyply/src/ffoutput.py create mode 100644 polyply/src/fragment_finder.py create mode 100644 polyply/src/itp_to_ff.py diff --git a/bin/polyply b/bin/polyply index 3776c9e9..5a14457c 100755 --- a/bin/polyply +++ b/bin/polyply @@ -23,7 +23,7 @@ import argparse from pathlib import Path import numpy as np import polyply -from polyply import (gen_itp, gen_coords, gen_seq, DATA_PATH) +from polyply import (gen_itp, gen_coords, gen_seq, itp_to_ff, DATA_PATH) from polyply.src.load_library import load_ff_library from polyply.src.logging import LOGGER, LOGLEVELS @@ -51,6 +51,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements parser_gen_itp = subparsers.add_parser('gen_params', aliases=['gen_itp']) parser_gen_coords = subparsers.add_parser('gen_coords') parser_gen_seq = subparsers.add_parser('gen_seq') + parser_itp_ff = subparsers.add_parser('itp_to_ff') # ============================================================================= # Input Arguments for the itp generation tool @@ -225,6 +226,24 @@ def main(): # pylint: disable=too-many-locals,too-many-statements default=[]) parser_gen_seq.set_defaults(func=gen_seq) + # ============================================================================= + # Input Arguments for the itp to ff tool + # ============================================================================= + + parser_itp_ff.add_argument('-v', dest='verbosity', action='count', + help='Enable debug logging output. Can be given ' + 'multiple times.', default=0) + + parser_itp_ff.add_argument('-i', dest="itppath") + parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*') + parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*') + parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter") + parser_itp_ff.add_argument('-o', dest="outpath", type=Path) + parser_itp_ff.add_argument('-c', dest="charge", type=float, default=0.0) + + parser_itp_ff.set_defaults(func=itp_to_ff) + + # ============================================================================ # Deal with queries of the polyply library # ============================================================================ diff --git a/polyply/__init__.py b/polyply/__init__.py index 9ef7d384..2ae08752 100644 --- a/polyply/__init__.py +++ b/polyply/__init__.py @@ -49,3 +49,4 @@ from .src.gen_itp import gen_itp, gen_params from .src.gen_coords import gen_coords from .src.gen_seq import gen_seq +from .src.itp_to_ff import itp_to_ff diff --git a/polyply/src/ff_directive_writer.py b/polyply/src/ff_directive_writer.py new file mode 100644 index 00000000..139597f9 --- /dev/null +++ b/polyply/src/ff_directive_writer.py @@ -0,0 +1,2 @@ + + diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py new file mode 100644 index 00000000..8beb7a6e --- /dev/null +++ b/polyply/src/ffoutput.py @@ -0,0 +1,135 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class ForceFieldDirectiveWriter(): + """ + Write force-field files according to the + vermouth force-field definition. + + Note that this is a leightweight writer + which does not offer the complete rich + syntax of the ff file format. + """ + def __init__(self, forcefield, stream): + """ + Parameters + ---------- + forcefield: `:class:vermouth.forcefield.ForceField` + the force-field object to write + + stream: `` + the stream to which to write; must have a write method + """ + self.forcefield = forcefield + self.stream = stream + # these attributes have a specific order in the moleculetype section + self.normal_order_block_atoms = ["atype", "resid", "resname", + "atomname", "charge_group", "charge", "mass"] + + def write(self): + """ + Write the forcefield to file. + """ + for name, block in self.forcefield.blocks.items(): + self.stream.write("[ moleculetype ]\n") + excl = str(block.nrexcl) + self.stream.write(f"{name} {excl}\n") + self.write_atoms_block(block.nodes(data=True)) + self.write_interaction_dict(block.interactions) + + for link in self.forcefield.links: + self.write_link_header() + self.write_atoms_link(link.nodes(data=True)) + self.write_interaction_dict(link.interactions) + self.write_edges(link.edges) + + def write_interaction_dict(self, inter_dict): + """ + Writes interactions to `self.stream`, with a new + interaction directive per type. Meta attributes + are kept and written as json parasable dicts. + + Parameters + ---------- + inter_dict: `class:dict[list[vermouth.molecule.Interaction]]` + the interaction dict to write + """ + for inter_type in inter_dict: + self.stream.write(f"[ {inter_type} ]\n") + for interaction in inter_dict[inter_type]: + atom_string = " ".join(interaction.atoms) + param_string = " ".join(interaction.parameters) + meta_string = "{" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in interaction.meta.items()]) + "}" + line = atom_string + " " + param_string + " " + meta_string + "\n" + self.stream.write(line) + + def write_edges(self, edges): + """ + Writes edges to `self.stream` into the edges directive. + + Parameters + ---------- + edges: abc.iteratable + pair-wise iteratable edge list + """ + self.stream.write("[ edges ]\n") + for idx, jdx in edges: + self.stream.write(f"{idx} {jdx}\n") + + def write_atoms_block(self, nodes): + """ + Writes the nodes/atoms of the block atomtype directive to `self.stream`. + All attributes are written following the GROMACS atomtype directive + style. + + Parameters + ---------- + edges: abc.iteratable + pair-wise iteratable edge list + """ + self.stream.write("[ atoms ]\n") + for idx, (node, attrs) in enumerate(nodes): + idx += 1 + attr_line = " ".join([str(attrs[attr]) for attr in self.normal_order_block_atoms ]) + line = f"{idx} " + attr_line + "\n" + self.stream.write(line) + + def write_atoms_link(self, nodes): + """ + Writes the nodes/atoms of the link atomtype directive to `self.stream`. + All attributes are written as json style dicts. + + Parameters: + ----------- + nodes: abc.itertable[tuple(abc.hashable, dict)] + list of nodes in form of a list with hashable node-key and dict + of attributes. The format is the same as returned by networkx.nodes(data=True) + """ + self.stream.write("[ atoms ]\n") + for node_key, attributes in nodes: + attr_line = " {" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in attributes.items()]) + "}" + line = str(node_key) + attr_line + "\n" + self.stream.write(line) + + def write_link_header(self): + """ + Write the link directive header, with the resnames written + in form readable to geenerate a `:class:vermouth.molecule.Choice` + object. + + Prameters + --------- + resnames: `abc.itertable[str]` + """ + self.stream.write("[ link ]\n") diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py new file mode 100644 index 00000000..cd1f4d7f --- /dev/null +++ b/polyply/src/fragment_finder.py @@ -0,0 +1,195 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import networkx as nx +from vermouth.graph_utils import make_residue_graph +from polyply.src.graph_utils import find_one_ismags_match + +def _element_match(node1, node2): + """ + Checks if the element attribute of two nodes + is the same. + + Returns: + -------- + bool + """ + return node1["element"] == node2["element"] + +class FragmentFinder(): + """ + Find, label and extract unique fragments from a vermouth.molecule.Molecule. + + Wrire process HERE + """ + + def __init__(self, molecule, prefix): + """ + Initalize the fragment finder with a molecule, setting the + resid attribute to None, and correctly assining elements + based on atomic masses. + + Parameters + ---------- + molecule: :class:`vermouth.molecule.Molecule` + """ + self.max_by_resid = {} + self.ter_prefix = prefix + self.resid = 1 + self.res_assigment = [] + self.assigned_atoms = [] + self.molecule = molecule + self.known_atom = None + self.match_keys = ['element', 'mass'] #, 'charge'] + self.masses_to_element = {16: "O", + 12: "C", + 32: "S", + 1: "H"} + + # resids are not reliable so we set them all to None + nx.set_node_attributes(self.molecule, None, "resid") + + # set the element attribute for each atom in the + # molecule + for node in self.molecule.nodes: + mass = round(self.molecule.nodes[node]["mass"]) + self.molecule.nodes[node]["element"] = self.masses_to_element[mass] + + def _node_match(self, node1, node2): + for attr in self.match_keys: + if node1[attr] != node2[attr]: + return False + return True + + def label_fragment_from_graph(self, fragment_graph): + """ + For the `self.molecule` label all atoms that match + the `fragment_graph` with a resid attribute and set + the atom-name to the element name plus index relative + to the atoms in the fragment. + + Parameters + ---------- + fragment_graph: nx.Graph + graph describing the fragment; must have the + element attribute + """ + # find all isomorphic matches to the target fragments + GM = nx.isomorphism.GraphMatcher(self.molecule, + fragment_graph, + node_match=_element_match, + ) + template_atoms = list(fragment_graph.nodes) + # the below statement scales super duper extra poorly + resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0] + raw_matchs = GM.subgraph_isomorphisms_iter() + # loop over all matchs and check if the atoms are already + # assigned - symmetric matches must be skipped + for current_match in raw_matchs: + # the graph matcher can return the matchs in any order so we need to sort them + # according to our tempalte molecule + rev_current_match = {val: key for key, val in current_match.items()} + atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] + if frozenset(atoms) not in self.res_assigment and not any([atom in self.assigned_atoms for atom in atoms]): + self.res_assigment.append(frozenset(atoms)) + for idx, atom in enumerate(atoms): + self.molecule.nodes[atom]["resid"] = self.resid + self.molecule.nodes[atom]["atomname"] = self.molecule.nodes[atom]["element"] + str(idx) + self.molecule.nodes[atom]["resname"] = resname + self.max_by_resid[self.resid] = idx + self.known_atom = atom + self.assigned_atoms.append(atom) + print(self.molecule.nodes[atom]["element"]) + self.resid += 1 + + def label_fragments_from_graph(self, fragment_graphs): + """ + Call the label_fragment method for multiple fragments. + + Parameters + ---------- + fragment_graphs: list[nx.Graph] + """ + for fragment_graph in fragment_graphs: + self.label_fragment_from_graph(fragment_graph) + + def label_unmatched_atoms(self): + """ + After all atoms have been assigned to target fragments using + the label_fragment method all left-over atoms are assigned to + the first fragment they are attached to. This method sets the + atom-name to the element name and element count and resid + attribute. + """ + for from_node, to_node in nx.dfs_edges(self.molecule, source=self.known_atom): + if not self.molecule.nodes[to_node]["resid"]: + resid = self.molecule.nodes[from_node]["resid"] + self.max_by_resid[resid] = self.max_by_resid[resid] + 1 + self.molecule.nodes[to_node]["resid"] = resid + self.molecule.nodes[to_node]["resname"] = self.molecule.nodes[from_node]["resname"] + self.molecule.nodes[to_node]["atomname"] = self.molecule.nodes[to_node]["element"] + str(self.max_by_resid[resid]) + + def extract_unique_fragments(self, fragment_graphs): + """ + Given a list of fragment-graphs assing all atoms to fragments and + generate new fragments by assinging the left-over atoms to the + connecting fragment. Fragments get a unique resid in the molecule. + Then make the residue graph and filter out all unique residues + and return them. + + Parameters + ---------- + fragment_graphs: list[nx.Graph] + + Returns + ------- + list[nx.Graph] + all unique fragment graphs + """ + # first we find and label all fragments in the molecule + self.label_fragments_from_graph(fragment_graphs) + # then we assign all left-over atoms to the existing residues + self.label_unmatched_atoms() + # now we make the residue graph and find all unique residues + unique_fragments = {} + res_graph = make_residue_graph(self.molecule) + had_resnames = {} + for node in res_graph.nodes: + resname = res_graph.nodes[node]['resname'] + # this fragment is terminal located so we give it a special prefix + fragment = res_graph.nodes[node]['graph'] + if res_graph.degree(node) == 1: + resname = resname + self.ter_prefix + nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") + # here we extract the fragments and set appropiate residue names + for other_frag in unique_fragments.values(): + if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match): + # it can happen that two fragments are completely isomorphic but have different + # atom names because we don't know the order of atoms when looping over the molecule + # and setting the names. In this case we simply take the atom-names of the known + # fragment. Better ideas anyone? + mapping = find_one_ismags_match(fragment, other_frag, self._node_match) + if mapping: + for source, target in mapping.items(): + self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname'] + break + else: + if resname in unique_fragments: + resname = resname + "_" + str(had_resnames[resname] + 1) + nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") + else: + had_resnames[resname] = 0 + unique_fragments[resname] = fragment + + return unique_fragments diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py index b0300d3c..489ba118 100644 --- a/polyply/src/graph_utils.py +++ b/polyply/src/graph_utils.py @@ -214,3 +214,15 @@ def get_all_predecessors(graph, node, start_node=0): predecessors.reverse() return predecessors +def find_one_ismags_match(graph1, graph2, node_match): + """ + Returns one ismags match when graphs are isomorphic + otherwise None. + """ + GM = nx.isomorphism.GraphMatcher(graph1, graph2, node_match=node_match) + raw_matches = GM.subgraph_isomorphisms_iter() + try: + mapping = next(raw_matches) + return mapping + except StopIteration: + return None diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py new file mode 100644 index 00000000..ef9c1ba9 --- /dev/null +++ b/polyply/src/itp_to_ff.py @@ -0,0 +1,320 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +from collections import defaultdict +import numpy as np +import networkx as nx +import pysmiles +import vermouth +from vermouth.forcefield import ForceField +from vermouth.molecule import Interaction +from polyply.src.topology import Topology +from polyply.src.generate_templates import _relabel_interaction_atoms +from polyply.src.fragment_finder import FragmentFinder +from polyply.src.ffoutput import ForceFieldDirectiveWriter + +def diffs_to_prefix(atoms, resid_diffs): + """ + Given a list of atoms and corresponding differences + between their resids, generate the offset prefix for + the atomnames according to the vermouth sepcific offset + language. + + The reference atom must have resid_diff value of 0. + Other atoms either get - or + signs + depending on their resid offset. + + Parameters + ---------- + atoms: abc.itertable[str] + resid_diff: abc.itertable[int] + the differences in resid with respeect to + the smallest/largest resid which is 0 + + Returns + ------- + abc.itertable + list with prefixed atom names + """ + prefixed_atoms = [] + for atom, diff in zip(atoms, resid_diffs): + if diff > 0: + prefix = "".join(["+" for i in range(0, diff)]) + else: + prefix = "".join(["-" for i in range(diff, 0)]) + prefixed_atoms.append(prefix + atom) + return prefixed_atoms + +def _extract_edges_from_shortest_path(atoms, block, min_resid): + """ + Given a list atoms generate a list of edges correspoding to + all edges required to connect all atoms by at least one + shortest path. Edges are retunred on atomname basis with + prefix relative to the `min_resid`. See diffs_to_prefix. + + Paramters: + ---------- + atoms: abc.itertable + the atoms to collect edges for + block: :class:`vermouth.molecule.Block` + the molecule which to servey for edges + min_resid: int + the resid to which the prefix indicate relative resid + distance + + Returns + ------- + list[tuple] + the edge list by atomname with prefix indicating relative + residue distance to min_resid + """ + edges = [] + had_edges = [] + final_atoms = {} + resnames = {} + for origin, target in itertools.combinations(atoms, r=2): + path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0] + for edge in zip(path[:-1], path[1:]): + if edge not in had_edges: + resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid + atom_names = [block.nodes[node]["atomname"] for node in edge] + link_names = diffs_to_prefix(atom_names, resid_diffs) + final_atoms.update(dict(zip(edge, link_names))) + edges.append(link_names) + had_edges.append(edge) + resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) + return final_atoms, edges, resnames + +def extract_block(molecule, nodes, defines): + """ + Given a `vermouth.molecule` and a `resname` + extract the information of a block from the + molecule definition and replace all defines + if any are found. + + Parameters + ---------- + molecule: :class:vermouth.molecule.Molecule + resname: str + defines: dict + dict of type define: value + + Returns + ------- + :class:vermouth.molecule.Block + """ + resid = molecule.nodes[nodes[0]]["resid"] + block = vermouth.molecule.Block() + + # select all nodes with the same first resid and + # make sure the block node labels are atomnames + # also build a correspondance dict between node + # label in the molecule and in the block for + # relabeling the interactions + mapping = {} + for node in nodes: + attr_dict = molecule.nodes[node] + if attr_dict["resid"] == resid: + block.add_node(attr_dict["atomname"], **attr_dict) + mapping[node] = attr_dict["atomname"] + + for inter_type in molecule.interactions: + for interaction in molecule.interactions[inter_type]: + if all(atom in mapping for atom in interaction.atoms): + interaction = _relabel_interaction_atoms(interaction, mapping) + block.interactions[inter_type].append(interaction) + + for inter_type in ["bonds", "constraints", "virtual_sitesn", + "virtual_sites2", "virtual_sites3", "virtual_sites4"]: + block.make_edges_from_interaction_type(inter_type) + + if not nx.is_connected(block): + msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' + 'Make sure all atoms/particles in a residue are connected by bonds,' + ' constraints or virual-sites.') + raise IOError(msg.format(resname, resid)) + + return block + +def extract_links(molecule): + """ + Given a molecule that has the resid and resname attributes + correctly set, extract the interactions which span more than + a single residue and generate a link. + + Parameters + ---------- + molecule: :class:`vermouth.molecule.Molecule` + the molecule from which to extract interactions + + Returns + ------- + list[:class:`vermouth.molecule.Links`] + a list with a links found + """ + links = [] + # patterns are a sqeuence of atoms that define an interaction + # sometimes multiple interactions are defined for one pattern + # in that case they are all collected in this dictionary + patterns = defaultdict(dict) + # for each found pattern the resnames are collected; this is important + # because the same pattern may apply to residues with different name + resnames_for_patterns = defaultdict(dict) + link_atoms_for_patterns = defaultdict(list) + # as additional safe-gaurd against false links we also collect the edges + # that span the interaction by finding the shortest simple path between + # all atoms in patterns. Note that the atoms in patterns not always have + # to be directly bonded. For example, pairs are not directly bonded and + # can span multiple residues + #edges_for_patterns = defaultdict(list) + for inter_type in molecule.interactions: + #print("TYPE", inter_type) + for kdx, interaction in enumerate(molecule.interactions[inter_type]): + # extract resids and resname corresponding to interaction atoms + resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms]) + resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms] + # compute the resid offset to be used for the atom prefixes + min_resid = min(resids) + diff = resids - min_resid + pattern = tuple(set(list(zip(diff, resnames)))) + + # in this case all interactions are in a block and we skip + if np.sum(diff) == 0: + continue + + # we collect the edges corresponding to the simple paths between pairs of atoms + # in the interaction + mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) + #print(kdx, resnames) + link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} + link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] + link_inter = Interaction(atoms=link_atoms, + parameters=interaction.parameters, + meta={}) + #print("inter number", kdx) + # here we deal with filtering redundancy + if pattern in patterns and inter_type in patterns[pattern]: + #print(pattern) + # if pattern == ((0, 'PEO'), (1, 'PEO')): + # print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n") + + for other_inter in patterns[pattern].get(inter_type, []): + if other_inter.atoms == link_inter.atoms: + if other_inter.parameters == link_inter.parameters: + break + else: + patterns[pattern][inter_type].append(link_inter) + resnames_for_patterns[pattern].update(resnames) + link_atoms_for_patterns[pattern] += link_atoms + else: + patterns[pattern][inter_type] = [link_inter] + resnames_for_patterns[pattern].update(resnames) + #edges_for_patterns[pattern] += edges + link_atoms_for_patterns[pattern] += link_atoms + #print('resnames', resnames_for_patterns[pattern], '\n') +# for inter in patterns[list(patterns.keys())[0]]['angles']: +# print(inter) + # we make new links for each unique interaction per type + for pattern in patterns: + link = vermouth.molecule.Link() + link.add_nodes_from(set(link_atoms_for_patterns[pattern])) + #link.add_edges_from(edges_for_patterns[pattern]) + resnames = resnames_for_patterns[pattern] + # print(resnames) + nx.set_node_attributes(link, resnames, "resname") + + had_parameters = [] + for inter_type, inters in patterns[pattern].items(): + for idx, interaction in enumerate(inters): + #new_parameters = interaction.parameters + new_meta = interaction.meta + #new_atoms = interaction.atoms + # to account for the fact when multiple interactions with the same + # atom patterns need to be written to ff + new_meta.update({"version": idx}) + new_meta.update({"comment": "link"}) + had_parameters.append(interaction.parameters) + # map atoms to proper atomnames .. + link.interactions[inter_type].append(interaction) + + links.append(link) + print(links) + return links + +def equalize_charges(molecule, target_charge=0): + """ + Make sure that the total charge of molecule is equal to + the target charge by substracting the differences split + over all atoms. + + Parameters + ---------- + molecule: :class:`vermouth.molecule.Molecule` + target_charge: float + the charge of the molecule + + Returns + ------- + molecule + the molecule with updated charge attribute + """ + total = nx.get_node_attributes(molecule, "charge") + diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes) + for node in molecule.nodes: + charge = float(molecule.nodes[node]['charge']) - diff + molecule.nodes[node]['charge'] = charge + total = nx.get_node_attributes(molecule, "charge") + return molecule + +def handle_chirality(molecule, chiral_centers): + pass + +def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): + """ + Main executable for itp to ff tool. + """ + # read the target itp-file + top = Topology.from_gmx_topfile(itppath, name="test") + mol = top.molecules[0].molecule + mol = equalize_charges(mol, target_charge=charge) + + # read the target fragments and convert to graph + fragment_graphs = [] + for resname, smile in zip(resnames, fragment_smiles): + fragment_graph = pysmiles.read_smiles(smile) + nx.set_node_attributes(fragment_graph, resname, "resname") + fragment_graphs.append(fragment_graph) + + # identify and extract all unique fragments + unique_fragments = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs) + force_field = ForceField("new") + for name, fragment in unique_fragments.items(): + new_block = extract_block(mol, list(fragment.nodes), defines={}) + nx.set_node_attributes(new_block, 1, "resid") + new_block.nrexcl = mol.nrexcl + force_field.blocks[name] = new_block + + for node in mol.nodes: + if mol.nodes[node]['resid'] == 3: + print(mol.nodes[node]) + print("\n\n") + for node in mol.nodes: + if mol.nodes[node]['resid'] == 4: + print(mol.nodes[node]) + + force_field.links = extract_links(mol) + + with open(outpath, "w") as filehandle: + ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() From c5770527104ff6dffa89a6e15cb970a3029c98c6 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Thu, 15 Jun 2023 15:33:19 +0200 Subject: [PATCH 02/82] imporve graph matching --- polyply/src/fragment_finder.py | 87 ++++++++++++++++++++++++++++++---- polyply/src/graph_utils.py | 1 + polyply/src/itp_to_ff.py | 18 +++---- 3 files changed, 88 insertions(+), 18 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index cd1f4d7f..062ce602 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -15,6 +15,7 @@ import networkx as nx from vermouth.graph_utils import make_residue_graph from polyply.src.graph_utils import find_one_ismags_match +import matplotlib.pyplot as plt def _element_match(node1, node2): """ @@ -51,7 +52,7 @@ def __init__(self, molecule, prefix): self.assigned_atoms = [] self.molecule = molecule self.known_atom = None - self.match_keys = ['element', 'mass'] #, 'charge'] + self.match_keys = ['element', 'mass', 'degree'] #, 'charge'] self.masses_to_element = {16: "O", 12: "C", 32: "S", @@ -65,6 +66,7 @@ def __init__(self, molecule, prefix): for node in self.molecule.nodes: mass = round(self.molecule.nodes[node]["mass"]) self.molecule.nodes[node]["element"] = self.masses_to_element[mass] + self.molecule.nodes[node]["degree"] = self.molecule.degree(node) def _node_match(self, node1, node2): for attr in self.match_keys: @@ -72,6 +74,39 @@ def _node_match(self, node1, node2): return False return True + def make_res_graph(self): + self.res_graph = make_residue_graph(self.molecule) + + def pre_match(self, fragment_graph): + """ + Find one match of fragment graph in the molecule + and then extract degrees and atom-types for further + matching. This is a safety measure because even though + the fragment graph is subgraph isomorphic the underlying + itp parameters might not be. + """ + # find subgraph isomorphic matches to the target fragment + # based on the element only + GM = nx.isomorphism.GraphMatcher(self.molecule, + fragment_graph, + node_match=_element_match,) + one_match = next(GM.subgraph_isomorphisms_iter()) + for mol_atom, tempt_atom in one_match.items(): + for attr in self.match_keys: + fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr] + return fragment_graph + + def is_connected_to_prev(self, current, prev): + """ + Check if the atoms in the lists current or + prev are connected. + """ + for node in current: + for neigh_node in self.molecule.neighbors(node): + if neigh_node in prev: + return True + return False + def label_fragment_from_graph(self, fragment_graph): """ For the `self.molecule` label all atoms that match @@ -85,15 +120,19 @@ def label_fragment_from_graph(self, fragment_graph): graph describing the fragment; must have the element attribute """ + # pre-match one residue and extract the atomtypes and degrees + # this is needed to enforce symmetry in matching the other + # residues + fragment_graph = self.pre_match(fragment_graph) # find all isomorphic matches to the target fragments GM = nx.isomorphism.GraphMatcher(self.molecule, fragment_graph, - node_match=_element_match, + node_match=self._node_match, ) template_atoms = list(fragment_graph.nodes) # the below statement scales super duper extra poorly resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0] - raw_matchs = GM.subgraph_isomorphisms_iter() + raw_matchs = list(GM.subgraph_isomorphisms_iter()) # loop over all matchs and check if the atoms are already # assigned - symmetric matches must be skipped for current_match in raw_matchs: @@ -101,7 +140,19 @@ def label_fragment_from_graph(self, fragment_graph): # according to our tempalte molecule rev_current_match = {val: key for key, val in current_match.items()} atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] - if frozenset(atoms) not in self.res_assigment and not any([atom in self.assigned_atoms for atom in atoms]): + if self.assigned_atoms: + connected = self.is_connected_to_prev(current_match.keys(), + self.assigned_atoms,) + else: + connected = True + + #print(connected, frozenset(atoms) not in self.res_assigment, not any([atom in self.assigned_atoms for atom in atoms])) + + if frozenset(atoms) not in self.res_assigment and \ + not any([atom in self.assigned_atoms for atom in atoms]) and \ + connected: + + # print(current_match.keys()) self.res_assigment.append(frozenset(atoms)) for idx, atom in enumerate(atoms): self.molecule.nodes[atom]["resid"] = self.resid @@ -110,7 +161,6 @@ def label_fragment_from_graph(self, fragment_graph): self.max_by_resid[self.resid] = idx self.known_atom = atom self.assigned_atoms.append(atom) - print(self.molecule.nodes[atom]["element"]) self.resid += 1 def label_fragments_from_graph(self, fragment_graphs): @@ -157,19 +207,25 @@ def extract_unique_fragments(self, fragment_graphs): list[nx.Graph] all unique fragment graphs """ + # nx.draw(self.molecule, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) + # plt.show() # first we find and label all fragments in the molecule self.label_fragments_from_graph(fragment_graphs) + # labeldict = nx.get_node_attributes(self.molecule, "atomname") + # nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) + # plt.show() # then we assign all left-over atoms to the existing residues self.label_unmatched_atoms() + # make the residue graph + self.make_res_graph() # now we make the residue graph and find all unique residues unique_fragments = {} - res_graph = make_residue_graph(self.molecule) had_resnames = {} - for node in res_graph.nodes: - resname = res_graph.nodes[node]['resname'] + for node in self.res_graph.nodes: + resname = self.res_graph.nodes[node]['resname'] # this fragment is terminal located so we give it a special prefix - fragment = res_graph.nodes[node]['graph'] - if res_graph.degree(node) == 1: + fragment = self.res_graph.nodes[node]['graph'] + if self.res_graph.degree(node) == 1: resname = resname + self.ter_prefix nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") # here we extract the fragments and set appropiate residue names @@ -192,4 +248,15 @@ def extract_unique_fragments(self, fragment_graphs): had_resnames[resname] = 0 unique_fragments[resname] = fragment + print("--") + resid_col = {0: "r", 1: "g", 2:"b", 3:"c", 4:"m", 5:"y", 6:"orange", 7:"pink"} + labeldict = nx.get_node_attributes(self.molecule, "atomname") + resids = nx.get_node_attributes(self.molecule, "resid") + colors = [resid_col[resid] for node, resid in resids.items()] + print(colors) + print(labeldict) + nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule), node_color=colors) + plt.show() + print("--") return unique_fragments + diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py index 489ba118..1bced361 100644 --- a/polyply/src/graph_utils.py +++ b/polyply/src/graph_utils.py @@ -225,4 +225,5 @@ def find_one_ismags_match(graph1, graph2, node_match): mapping = next(raw_matches) return mapping except StopIteration: + raise IOError("no match_found") return None diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index ef9c1ba9..9ba46c21 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -281,6 +281,13 @@ def equalize_charges(molecule, target_charge=0): def handle_chirality(molecule, chiral_centers): pass +def hcount(molecule, node): + hcounter = 0 + for node in molecule.neighbors(node): + if molecule.nodes[node]["element"] == "H": + hcounter+= 1 + return hcounter + def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): """ Main executable for itp to ff tool. @@ -293,7 +300,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 # read the target fragments and convert to graph fragment_graphs = [] for resname, smile in zip(resnames, fragment_smiles): - fragment_graph = pysmiles.read_smiles(smile) + fragment_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True) nx.set_node_attributes(fragment_graph, resname, "resname") fragment_graphs.append(fragment_graph) @@ -306,13 +313,8 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 new_block.nrexcl = mol.nrexcl force_field.blocks[name] = new_block - for node in mol.nodes: - if mol.nodes[node]['resid'] == 3: - print(mol.nodes[node]) - print("\n\n") - for node in mol.nodes: - if mol.nodes[node]['resid'] == 4: - print(mol.nodes[node]) +# for node in mol.nodes: +# print(mol.nodes[node]) force_field.links = extract_links(mol) From 7eff22a32b3d4bc95c8a5b41aaabb458e535f2e5 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Mon, 19 Jun 2023 11:32:46 +0200 Subject: [PATCH 03/82] fragment finder with prints --- polyply/src/fragment_finder.py | 98 ++++++++++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index 062ce602..53f9d9e1 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -22,7 +22,7 @@ def _element_match(node1, node2): Checks if the element attribute of two nodes is the same. - Returns: + Returns -------- bool """ @@ -30,9 +30,38 @@ def _element_match(node1, node2): class FragmentFinder(): """ - Find, label and extract unique fragments from a vermouth.molecule.Molecule. + This class enables finding and labelling of fragments + in the all-atom description of molecules. Fragments are + small networkx graphs. It makes a number of implicit + assumptions: - Wrire process HERE + - the molecule is connected and acyclic + - the residue graph of the molecule is linear + - the nodes by index increase with increasing resid order + - the graphs provided as fragment graphs follow the sequence + of residues. For example, given a polymer A5-B2-C3-A3 + residue sequence, fragments should be provided as a list + A,B,C,A. The length of the block does not matter. + + The algorithm loops over the fragments and finds a match + between a fragment and the molecule graph using a subgraph + isomorphism based on the element attribute. This match is + then used to set the degree attribute on the fragment. Next + all other subgraph isomorphisms are found under the condition + that each found match must connected to the previous residue. + Nodes are labelled with a resid and resname. This part is done + by the `self.label_fragment_from_graph` class method. + + Subsequently, the algorithm proceeds to merge all left-over + atoms to the residue they are connected with assining a resid + and resname from that residue. This procedure is done by + `self.label_unmatched_atoms`. + + Finally, the code goes over all residues and assigns a prefix to + all terminal residues. In addition residues with the same resname + are compared to each other using a subgraph isomorphism and if + they are not isomorphic as result of assigning left-over atoms, + the resname is appended by a number. """ def __init__(self, molecule, prefix): @@ -44,6 +73,28 @@ def __init__(self, molecule, prefix): Parameters ---------- molecule: :class:`vermouth.molecule.Molecule` + prefix: str + the prefix used to label termini + + Attributes + ---------- + max_by_resid: dict[int][int] + number of atoms by resid + ter_prefix: str + the terminal prefix + resid: int + highest resid + assigned_atoms: list[`abc.hashable`] + atoms assinged to residues + molecule: :class:`vermouth.molecule.Molecule` + the molecule to match against + known_atom: `abc.hashable` + any atom that has been matched to a fragment + match_keys: `list[str]` + molecule properties to use in matching the fragment + graphs in the second stage. + masses_to_elements: dict[int][str] + matches masses to elements """ self.max_by_resid = {} self.ter_prefix = prefix @@ -54,6 +105,7 @@ def __init__(self, molecule, prefix): self.known_atom = None self.match_keys = ['element', 'mass', 'degree'] #, 'charge'] self.masses_to_element = {16: "O", + 14: "N", 12: "C", 32: "S", 1: "H"} @@ -74,6 +126,7 @@ def _node_match(self, node1, node2): return False return True + # this could be a property?? def make_res_graph(self): self.res_graph = make_residue_graph(self.molecule) @@ -84,6 +137,11 @@ def pre_match(self, fragment_graph): matching. This is a safety measure because even though the fragment graph is subgraph isomorphic the underlying itp parameters might not be. + + Parameters + ----------- + fragment_graph: 'nx.Graph' + must have attributes element for each node """ # find subgraph isomorphic matches to the target fragment # based on the element only @@ -100,6 +158,13 @@ def is_connected_to_prev(self, current, prev): """ Check if the atoms in the lists current or prev are connected. + + Parameters + ---------- + current: list[abc.hashable] + list of current nodes + prev: list[abc.hashable] + list of prev nodes """ for node in current: for neigh_node in self.molecule.neighbors(node): @@ -109,8 +174,8 @@ def is_connected_to_prev(self, current, prev): def label_fragment_from_graph(self, fragment_graph): """ - For the `self.molecule` label all atoms that match - the `fragment_graph` with a resid attribute and set + For the `self.molecule` label all atoms, that match + the `fragment_graph`, with a resid attribute and set the atom-name to the element name plus index relative to the atoms in the fragment. @@ -133,9 +198,12 @@ def label_fragment_from_graph(self, fragment_graph): # the below statement scales super duper extra poorly resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0] raw_matchs = list(GM.subgraph_isomorphisms_iter()) + print('\n', resname) # loop over all matchs and check if the atoms are already # assigned - symmetric matches must be skipped for current_match in raw_matchs: + if resname == "OH": + print(current_match) # the graph matcher can return the matchs in any order so we need to sort them # according to our tempalte molecule rev_current_match = {val: key for key, val in current_match.items()} @@ -172,6 +240,9 @@ def label_fragments_from_graph(self, fragment_graphs): fragment_graphs: list[nx.Graph] """ for fragment_graph in fragment_graphs: + labeldict = nx.get_node_attributes(fragment_graph, "element") + nx.draw(fragment_graph, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(fragment_graph)) + plt.show() self.label_fragment_from_graph(fragment_graph) def label_unmatched_atoms(self): @@ -207,8 +278,9 @@ def extract_unique_fragments(self, fragment_graphs): list[nx.Graph] all unique fragment graphs """ - # nx.draw(self.molecule, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) - # plt.show() + labeldict = nx.get_node_attributes(self.molecule, "element") + nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) + plt.show() # first we find and label all fragments in the molecule self.label_fragments_from_graph(fragment_graphs) # labeldict = nx.get_node_attributes(self.molecule, "atomname") @@ -249,9 +321,17 @@ def extract_unique_fragments(self, fragment_graphs): unique_fragments[resname] = fragment print("--") - resid_col = {0: "r", 1: "g", 2:"b", 3:"c", 4:"m", 5:"y", 6:"orange", 7:"pink"} - labeldict = nx.get_node_attributes(self.molecule, "atomname") + resid_col = {} resids = nx.get_node_attributes(self.molecule, "resid") + one = True + for resid in set(resids.values()): + if one: + resid_col[resid] = 'tab:red' + one = False + else: + resid_col[resid] = 'tab:blue' + one = True + labeldict = nx.get_node_attributes(self.molecule, "atomname") colors = [resid_col[resid] for node, resid in resids.items()] print(colors) print(labeldict) From 95c4b87544ea989c090109d85f01c0ac1014bfae Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Mon, 19 Jun 2023 18:33:42 +0200 Subject: [PATCH 04/82] add tests for fragment finder --- polyply/tests/test_fragment_finder.py | 262 ++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 polyply/tests/test_fragment_finder.py diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py new file mode 100644 index 00000000..3e58f5c9 --- /dev/null +++ b/polyply/tests/test_fragment_finder.py @@ -0,0 +1,262 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test the fragment finder for itp_to_ff. +""" + +import textwrap +import pytest +from pathlib import Path +import numpy as np +import networkx as nx +import vermouth.forcefield +import vermouth.molecule +from vermouth.gmx.itp_read import read_itp +from polyply import TEST_DATA +import polyply.src.meta_molecule +from polyply.src.meta_molecule import (MetaMolecule, Monomer) +import polyply +from collections import defaultdict +import pysmiles + +@pytest.mark.parametrize( + "node1, node2, expected", + [ + ({"element": "C"}, {"element": "C"}, True), + ({"element": "H"}, {"element": "O"}, False), + ({"element": "N"}, {"element": "N"}, True), + ({"element": "O"}, {"element": "S"}, False), + ], +) +def test_element_match(node1, node2, expected): + assert polyply.src.fragment_finder._element_match(node1, node2) == expected + +@pytest.mark.parametrize( + "match_keys, node1, node2, expected", + [ + (["element"], {"element": "C"}, {"element": "C"}, True), + (["element"], {"element": "H"}, {"element": "O"}, False), + (["element", "charge"], {"element": "N", "charge": 0}, {"element": "N", "charge": 1}, False), + (["element", "charge"], {"element": "O", "charge": -1}, {"element": "O", "charge": -1}, True), + ], +) +def test_node_match(match_keys, node1, node2, expected): + # molecule and terminal label don't matter + frag_finder = polyply.src.fragment_finder.FragmentFinder(None, "ter") + frag_finder.match_keys = match_keys + assert frag_finder._node_match(node1, node2) == expected + +def find_studs(mol): + """ + By element find all undersatisfied connections + at the all-atom level. + """ + atom_degrees = {"H":1, + "C":4, + "O":2, + "N":3} + for node in mol.nodes: + ele = mol.nodes[node]['element'] + if mol.degree(node) != atom_degrees[ele]: + yield node + +def set_mass(mol): + masses = {"O": 16, "N":14,"C":12, + "S":32, "H":1} + + for atom in mol.nodes: + mol.nodes[atom]['mass'] = masses[mol.nodes[atom]['element']] + return mol + +def polymer_from_fragments(fragments, resnames, remove_resid=True): + """ + Given molecule fragments as smiles + combine them into different polymer + molecules. + """ + fragments_to_mol = [] + frag_mols = [] + frag_graph = pysmiles.read_smiles(fragments[0], explicit_hydrogen=True) + nx.set_node_attributes(frag_graph, 1, "resid") + nx.set_node_attributes(frag_graph, resnames[0], "resname") + frag_mols.append(frag_graph) + mol = vermouth.Molecule(frag_graph) + # terminals should have one stud anyways + prev_stud = next(find_studs(frag_graph)) + fragments_to_mol.append({node: node for node in mol.nodes}) + for resname, smile in zip(resnames[1:], fragments[1:]): + frag_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True) + nx.set_node_attributes(frag_graph, resname, "resname") + frag_mols.append(frag_graph) + next_mol = vermouth.Molecule(frag_graph) + correspondance = mol.merge_molecule(next_mol) + fragments_to_mol.append(correspondance) + stud_iter = find_studs(frag_graph) + mol.add_edge(prev_stud, correspondance[next(stud_iter)]) + + try: + prev_stud = correspondance[next(stud_iter)] + except StopIteration: + # we're done molecule is complete + continue + mol = set_mass(mol) + if remove_resid: + nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resid") + nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resname") + return mol, frag_mols, fragments_to_mol + +@pytest.mark.parametrize( + "smiles, resnames", + [ + # completely defined molecule with two termini + (["[CH3]", "[CH2]O[CH2]", "[CH3]"], ["CH3", "PEO", "CH3"]), + # two different termini + (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "CH3"]), + # two different termini with the same repeat unit + (["[OH][CH2]", "[CH2]O[CH2]","[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "PEO", "CH3"]), + # sequence with two monomers and multiple "wrong" matchs + (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["CH3", "PBD", "PEO", "OH"]), + # sequence with two monomers, four repeats and multiple "wrong" matchs + (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", + "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", + "[CH2][OH]"], ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"]), + # super symmtry - worst case scenario + (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], + ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"]), + ]) +def test_label_fragments(smiles, resnames): + molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames) + frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") + unique_fragments = frag_finder.label_fragments_from_graph(frag_mols) + for resid, (resname, frag_to_mol) in enumerate(zip(resnames, fragments_in_mol), start=1): + for frag_node, mol_node in frag_to_mol.items(): + assert frag_finder.molecule.nodes[mol_node]['resname'] == resname + assert frag_finder.molecule.nodes[mol_node]['resid'] == resid + +@pytest.mark.parametrize( + "smiles, resnames, remove, new_name", + [ + # do not match termini + (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"], + ["CH3", "PEO", "PEO", "PEO", "CH3"], + {1:2, 6:3}, + {1: "PEO", "4": "PEO"}, + ), + # have dangling atom in center + (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], + ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"], + {4:5}, + {4:"PE"}, + ), + ]) +def test_label_unmatched_atoms(smiles, resnames, remove, new_name): + molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=False) + nodes_to_label = {} + max_by_resid = {} + + for node in molecule.nodes: + resid = molecule.nodes[node]['resid'] + if resid in remove: + del molecule.nodes[node]['resid'] + del molecule.nodes[node]['resname'] + nodes_to_label[node] = resid + else: + if resid in max_by_resid: + known_atom = node + max_by_resid[resid] += 1 + else: + max_by_resid[resid] = 1 + + resids = nx.get_node_attributes(molecule, "resid") + # the frag finder removes resid attributes so we have to later reset them + frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") + nx.set_node_attributes(frag_finder.molecule, resids, "resid") + frag_finder.max_by_resid = max_by_resid + frag_finder.known_atom = known_atom + frag_finder.label_unmatched_atoms() + for node, old_id in nodes_to_label.items(): + assert frag_finder.molecule.nodes[node]['resid'] == remove[old_id] + assert frag_finder.molecule.nodes[node]['resname'] == new_name[old_id] + +@pytest.mark.parametrize( + "smiles, resnames, remove, uni_frags", + [ + # completely defined molecule with two termini + (["[CH3]", "[CH2]O[CH2]", "[CH3]"], + ["CH3", "PEO", "CH3"], + {}, + {"CH3ter": 0, "PEO": 1} + ), + # two different termini + (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], + ["OH", "PEO", "CH3"], + {}, + {"OHter": 0, "PEO": 1, "CH3ter": 2} + ), + # sequence with two monomers, four repeats and multiple "wrong" matchs + (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", + "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", + "[CH2][OH]"], + ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"], + {}, + {"CH3ter": 0, "PBE": 1, "PEO": 5, "OHter": 9} + ), + # super symmtry - worst case scenario + (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], + ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"], + {}, + {"CH3ter":0, "PE": 1} + ), + # do not match termini + (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"], + ["CH3", "PEO", "PEO", "PEO", "CH3"], + {5: 4}, + {"CH3ter":0, "PEO": 1, "PEOter": (3, 4)}, + ), + # have dangling atom in center; this is a bit akward but essentially serves + # as a guard of having really shitty input + (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], + ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"], + {4: 3}, + {"CH3ter": 0, "PE": 1, "PEter": (2, 3, 4, 5, 6, 7)}, + ), + ]) +def test_extract_fragments(smiles, resnames, remove, uni_frags): + molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=True) + for node in molecule.nodes: + resid = molecule.nodes[node]['resid'] + if resid in remove: + del molecule.nodes[node]['resid'] + del molecule.nodes[node]['resname'] + + match_mols = [] + for idx, frag in enumerate(frag_mols): + if idx not in remove.values(): + match_mols.append(frag) + + frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") + fragments = frag_finder.extract_unique_fragments(match_mols) + frag_finder.match_keys = ['element', 'mass', 'resname'] + for resname, graph in fragments.items(): + if type(uni_frags[resname]) == tuple: + new_smiles = [smiles[idx] for idx in uni_frags[resname]] + new_resnames = [resnames[idx] for idx in uni_frags[resname]] + ref, _, _ = polymer_from_fragments(new_smiles, new_resnames) + nx.set_node_attributes(ref, resname, "resname") + else: + ref = frag_mols[uni_frags[resname]] + # because the terminii are not labelled yet in the fragment + # graphs used to make the match + nx.set_node_attributes(ref, resname, "resname") + assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match) From ae2794c5bb796acdb9fa712972d51bafa6d2b77a Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 10:14:15 +0200 Subject: [PATCH 05/82] add test for 100% coverage --- polyply/tests/test_fragment_finder.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py index 3e58f5c9..e2b319c0 100644 --- a/polyply/tests/test_fragment_finder.py +++ b/polyply/tests/test_fragment_finder.py @@ -218,6 +218,12 @@ def test_label_unmatched_atoms(smiles, resnames, remove, new_name): {}, {"CH3ter":0, "PE": 1} ), + # different fragments with same resname + (["[CH3]O[CH2]", "[CH2]O[CH2]", "[CH3]"], + ["PEO", "PEO", "CH3"], + {3:2}, + {"PEOter": 0, "PEOter_1": (1,2)} + ), # do not match termini (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"], ["CH3", "PEO", "PEO", "PEO", "CH3"], @@ -248,6 +254,7 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags): frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") fragments = frag_finder.extract_unique_fragments(match_mols) frag_finder.match_keys = ['element', 'mass', 'resname'] + assert len(fragments) == len(uni_frags) for resname, graph in fragments.items(): if type(uni_frags[resname]) == tuple: new_smiles = [smiles[idx] for idx in uni_frags[resname]] From 101d2b7e8f90231e949c7256e09d57efef8348ce Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 10:15:06 +0200 Subject: [PATCH 06/82] refactor graph matchin post isomorph check --- polyply/src/fragment_finder.py | 114 +++++++++++++++++---------------- 1 file changed, 60 insertions(+), 54 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index 53f9d9e1..6d8e67c5 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -15,7 +15,6 @@ import networkx as nx from vermouth.graph_utils import make_residue_graph from polyply.src.graph_utils import find_one_ismags_match -import matplotlib.pyplot as plt def _element_match(node1, node2): """ @@ -95,6 +94,8 @@ def __init__(self, molecule, prefix): graphs in the second stage. masses_to_elements: dict[int][str] matches masses to elements + res_graph: :class:`vermouth.molecule.Molecule` + residue graph of the molecule """ self.max_by_resid = {} self.ter_prefix = prefix @@ -109,18 +110,32 @@ def __init__(self, molecule, prefix): 12: "C", 32: "S", 1: "H"} + self.res_graph = None - # resids are not reliable so we set them all to None - nx.set_node_attributes(self.molecule, None, "resid") + if self.molecule: + # resids are not reliable so we set them all to None + nx.set_node_attributes(self.molecule, None, "resid") - # set the element attribute for each atom in the - # molecule - for node in self.molecule.nodes: - mass = round(self.molecule.nodes[node]["mass"]) - self.molecule.nodes[node]["element"] = self.masses_to_element[mass] - self.molecule.nodes[node]["degree"] = self.molecule.degree(node) + # set the element attribute for each atom in the + # molecule + for node in self.molecule.nodes: + mass = round(self.molecule.nodes[node]["mass"]) + self.molecule.nodes[node]["element"] = self.masses_to_element[mass] + self.molecule.nodes[node]["degree"] = self.molecule.degree(node) def _node_match(self, node1, node2): + """ + Check if two node dicts match. + + Parameters + ---------- + node1: dict + node2: dict + + Returns + ------- + bool + """ for attr in self.match_keys: if node1[attr] != node2[attr]: return False @@ -142,18 +157,46 @@ def pre_match(self, fragment_graph): ----------- fragment_graph: 'nx.Graph' must have attributes element for each node + + Returns + ------- + 'nx.Graph' + the labelled fragment graph """ + template_atoms = list(fragment_graph.nodes) # find subgraph isomorphic matches to the target fragment # based on the element only GM = nx.isomorphism.GraphMatcher(self.molecule, fragment_graph, node_match=_element_match,) - one_match = next(GM.subgraph_isomorphisms_iter()) + + for one_match in GM.subgraph_isomorphisms_iter(): + rev_current_match = {val: key for key, val in one_match.items()} + atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] + if self.is_valid_match(one_match, atoms)[0]: + break + for mol_atom, tempt_atom in one_match.items(): for attr in self.match_keys: fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr] return fragment_graph + def is_valid_match(self, match, atoms): + """ + Check if the found isomorphism match is valid. + """ + # is the match connected to the previous residue + if not self.is_connected_to_prev(match.keys(), self.assigned_atoms,): + return False, 1 + # check if atoms are already assigned + if frozenset(atoms) in self.res_assigment: + return False, 2 + # check if there is any partial overlap + if any([atom in self.assigned_atoms for atom in atoms]): + return False, 3 + + return True, 4 + def is_connected_to_prev(self, current, prev): """ Check if the atoms in the lists current or @@ -166,6 +209,10 @@ def is_connected_to_prev(self, current, prev): prev: list[abc.hashable] list of prev nodes """ + # no atoms have been assigned + if len(prev) == 0: + return True + for node in current: for neigh_node in self.molecule.neighbors(node): if neigh_node in prev: @@ -195,32 +242,16 @@ def label_fragment_from_graph(self, fragment_graph): node_match=self._node_match, ) template_atoms = list(fragment_graph.nodes) - # the below statement scales super duper extra poorly resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0] raw_matchs = list(GM.subgraph_isomorphisms_iter()) - print('\n', resname) # loop over all matchs and check if the atoms are already # assigned - symmetric matches must be skipped for current_match in raw_matchs: - if resname == "OH": - print(current_match) # the graph matcher can return the matchs in any order so we need to sort them # according to our tempalte molecule rev_current_match = {val: key for key, val in current_match.items()} atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] - if self.assigned_atoms: - connected = self.is_connected_to_prev(current_match.keys(), - self.assigned_atoms,) - else: - connected = True - - #print(connected, frozenset(atoms) not in self.res_assigment, not any([atom in self.assigned_atoms for atom in atoms])) - - if frozenset(atoms) not in self.res_assigment and \ - not any([atom in self.assigned_atoms for atom in atoms]) and \ - connected: - - # print(current_match.keys()) + if self.is_valid_match(current_match, atoms)[0]: self.res_assigment.append(frozenset(atoms)) for idx, atom in enumerate(atoms): self.molecule.nodes[atom]["resid"] = self.resid @@ -240,9 +271,6 @@ def label_fragments_from_graph(self, fragment_graphs): fragment_graphs: list[nx.Graph] """ for fragment_graph in fragment_graphs: - labeldict = nx.get_node_attributes(fragment_graph, "element") - nx.draw(fragment_graph, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(fragment_graph)) - plt.show() self.label_fragment_from_graph(fragment_graph) def label_unmatched_atoms(self): @@ -278,14 +306,8 @@ def extract_unique_fragments(self, fragment_graphs): list[nx.Graph] all unique fragment graphs """ - labeldict = nx.get_node_attributes(self.molecule, "element") - nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) - plt.show() # first we find and label all fragments in the molecule self.label_fragments_from_graph(fragment_graphs) - # labeldict = nx.get_node_attributes(self.molecule, "atomname") - # nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule)) - # plt.show() # then we assign all left-over atoms to the existing residues self.label_unmatched_atoms() # make the residue graph @@ -300,6 +322,7 @@ def extract_unique_fragments(self, fragment_graphs): if self.res_graph.degree(node) == 1: resname = resname + self.ter_prefix nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") + nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname") # here we extract the fragments and set appropiate residue names for other_frag in unique_fragments.values(): if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match): @@ -316,27 +339,10 @@ def extract_unique_fragments(self, fragment_graphs): if resname in unique_fragments: resname = resname + "_" + str(had_resnames[resname] + 1) nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") + nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname") else: had_resnames[resname] = 0 unique_fragments[resname] = fragment - print("--") - resid_col = {} - resids = nx.get_node_attributes(self.molecule, "resid") - one = True - for resid in set(resids.values()): - if one: - resid_col[resid] = 'tab:red' - one = False - else: - resid_col[resid] = 'tab:blue' - one = True - labeldict = nx.get_node_attributes(self.molecule, "atomname") - colors = [resid_col[resid] for node, resid in resids.items()] - print(colors) - print(labeldict) - nx.draw(self.molecule, labels=labeldict, with_labels=True, pos=nx.kamada_kawai_layout(self.molecule), node_color=colors) - plt.show() - print("--") return unique_fragments From 6261186a91f3d716348c6dec6dce7902573fd0ac Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 13:23:28 +0200 Subject: [PATCH 07/82] add check on node naming --- polyply/src/fragment_finder.py | 12 +++--------- polyply/tests/test_fragment_finder.py | 9 ++++++++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index 6d8e67c5..3db65c9c 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -326,15 +326,7 @@ def extract_unique_fragments(self, fragment_graphs): # here we extract the fragments and set appropiate residue names for other_frag in unique_fragments.values(): if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match): - # it can happen that two fragments are completely isomorphic but have different - # atom names because we don't know the order of atoms when looping over the molecule - # and setting the names. In this case we simply take the atom-names of the known - # fragment. Better ideas anyone? - mapping = find_one_ismags_match(fragment, other_frag, self._node_match) - if mapping: - for source, target in mapping.items(): - self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname'] - break + break else: if resname in unique_fragments: resname = resname + "_" + str(had_resnames[resname] + 1) @@ -344,5 +336,7 @@ def extract_unique_fragments(self, fragment_graphs): had_resnames[resname] = 0 unique_fragments[resname] = fragment + # remake the residue graph since some resnames have changed + self.make_res_graph() return unique_fragments diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py index e2b319c0..59155e77 100644 --- a/polyply/tests/test_fragment_finder.py +++ b/polyply/tests/test_fragment_finder.py @@ -253,9 +253,9 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags): frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") fragments = frag_finder.extract_unique_fragments(match_mols) - frag_finder.match_keys = ['element', 'mass', 'resname'] assert len(fragments) == len(uni_frags) for resname, graph in fragments.items(): + frag_finder.match_keys = ['element', 'mass', 'resname'] if type(uni_frags[resname]) == tuple: new_smiles = [smiles[idx] for idx in uni_frags[resname]] new_resnames = [resnames[idx] for idx in uni_frags[resname]] @@ -267,3 +267,10 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags): # graphs used to make the match nx.set_node_attributes(ref, resname, "resname") assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match) + # make sure all molecule nodes are named correctly + frag_finder.match_keys = ['atomname', 'resname'] + for node in frag_finder.res_graph: + resname_mol = frag_finder.res_graph.nodes[node]["resname"] + if resname == resname_mol: + target = frag_finder.res_graph.nodes[node]["graph"] + assert nx.is_isomorphic(target, graph, node_match=frag_finder._node_match) From a8ce5a13bf7fb9f8a99159cfd848ffe68e878064 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 13:27:56 +0200 Subject: [PATCH 08/82] add pysmiles to tests --- requirements-tests.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-tests.txt b/requirements-tests.txt index 595a4902..03357910 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -4,3 +4,4 @@ pytest-cov pylint codecov tqdm +pysmiles From b8dfa7be105bdc2e419aa8551ac0a041fa67c03c Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 17:20:38 +0200 Subject: [PATCH 09/82] tests for ffoutput --- polyply/src/ffoutput.py | 89 +++++++++++++++++++++++++++++---- polyply/tests/test_ffoutput.py | 91 ++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 polyply/tests/test_ffoutput.py diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py index 8beb7a6e..a1ac7b89 100644 --- a/polyply/src/ffoutput.py +++ b/polyply/src/ffoutput.py @@ -11,6 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json +from vermouth.molecule import Choice + +def _choice_to_str(attr_dict): + """ + Makes a string out of a choice object. + """ + for attr in attr_dict: + if isinstance(attr_dict[attr], Choice): + attr_string = "|".join(attr_dict[attr].value) + attr_dict[attr] = attr_string + return attr_dict class ForceFieldDirectiveWriter(): """ @@ -21,7 +33,7 @@ class ForceFieldDirectiveWriter(): which does not offer the complete rich syntax of the ff file format. """ - def __init__(self, forcefield, stream): + def __init__(self, forcefield, stream, write_block_edges=True): """ Parameters ---------- @@ -36,6 +48,7 @@ def __init__(self, forcefield, stream): # these attributes have a specific order in the moleculetype section self.normal_order_block_atoms = ["atype", "resid", "resname", "atomname", "charge_group", "charge", "mass"] + self.write_block_edges = True def write(self): """ @@ -47,12 +60,22 @@ def write(self): self.stream.write(f"{name} {excl}\n") self.write_atoms_block(block.nodes(data=True)) self.write_interaction_dict(block.interactions) + if self.write_block_edges: + self.write_edges(block.edges) for link in self.forcefield.links: + if link.patterns: + nometa = True + else: + nometa = False self.write_link_header() - self.write_atoms_link(link.nodes(data=True)) + self.write_atoms_link(link.nodes(data=True), nometa) self.write_interaction_dict(link.interactions) self.write_edges(link.edges) + if link.non_edges: + self.write_nonedges(link.non_edges) + if link.patterns: + self.write_patterns(link.patterns) def write_interaction_dict(self, inter_dict): """ @@ -68,9 +91,14 @@ def write_interaction_dict(self, inter_dict): for inter_type in inter_dict: self.stream.write(f"[ {inter_type} ]\n") for interaction in inter_dict[inter_type]: - atom_string = " ".join(interaction.atoms) - param_string = " ".join(interaction.parameters) - meta_string = "{" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in interaction.meta.items()]) + "}" + if inter_type not in ["virtual_sitesn", "virtual_sites1", "virtual_sites2", "virtual_sites3"]: + atom_string = " ".join(interaction.atoms) + param_string = " ".join(interaction.parameters) + else: + atom_string = " ".join(interaction.atoms) + " -- " + param_string = " ".join(interaction.parameters) + + meta_string = json.dumps(interaction.meta) line = atom_string + " " + param_string + " " + meta_string + "\n" self.stream.write(line) @@ -87,6 +115,24 @@ def write_edges(self, edges): for idx, jdx in edges: self.stream.write(f"{idx} {jdx}\n") + def write_nonedges(self, edges): + """ + Writes edges to `self.stream` into the edges directive. + + Parameters + ---------- + edges: abc.iteratable + pair-wise iteratable edge list + """ + self.stream.write("[ non-edges ]\n") + for idx, jdx in edges: + # for reasons the second edge is actually an attribute dict + kdx = jdx['atomname'] + write_attrs = {key: value for key, value in jdx.items() if key != "atomname"} + write_attrs = _choice_to_str(write_attrs) + attr_line = json.dumps(write_attrs) + self.stream.write(f"{idx} {kdx} {attr_line}\n") + def write_atoms_block(self, nodes): """ Writes the nodes/atoms of the block atomtype directive to `self.stream`. @@ -99,13 +145,14 @@ def write_atoms_block(self, nodes): pair-wise iteratable edge list """ self.stream.write("[ atoms ]\n") - for idx, (node, attrs) in enumerate(nodes): - idx += 1 - attr_line = " ".join([str(attrs[attr]) for attr in self.normal_order_block_atoms ]) + for idx, (node, attrs) in enumerate(nodes, start=1): + write_attrs = {attr: attrs[attr] for attr in self.normal_order_block_atoms if attr in attrs} + write_attrs = _choice_to_str(write_attrs) + attr_line = " ".join([str(value) for value in write_attrs.values()]) line = f"{idx} " + attr_line + "\n" self.stream.write(line) - def write_atoms_link(self, nodes): + def write_atoms_link(self, nodes, nometa=False): """ Writes the nodes/atoms of the link atomtype directive to `self.stream`. All attributes are written as json style dicts. @@ -118,8 +165,13 @@ def write_atoms_link(self, nodes): """ self.stream.write("[ atoms ]\n") for node_key, attributes in nodes: - attr_line = " {" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in attributes.items()]) + "}" - line = str(node_key) + attr_line + "\n" + attributes = {key: value for key, value in attributes.items() if key != "order"} + attributes = _choice_to_str(attributes) + attr_line = " " + json.dumps(attributes) + if nometa: + line = str(node_key) + " { }\n" + else: + line = str(node_key) + attr_line + "\n" self.stream.write(line) def write_link_header(self): @@ -133,3 +185,18 @@ def write_link_header(self): resnames: `abc.itertable[str]` """ self.stream.write("[ link ]\n") + + def write_patterns(self, patterns): + """ + Write the patterns directive. + """ + self.stream.write("[ patterns ]\n") + for pattern in patterns: + line = "" + for tokens in pattern: + atom = tokens[0] + meta = {key: value for key, value in tokens[1].items() if key not in ["atomname", "order"]} + meta_line = json.dumps(_choice_to_str(meta)) + line = line + " " + atom + " " + meta_line + line = line + "\n" + self.stream.write(line) diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py new file mode 100644 index 00000000..878d2325 --- /dev/null +++ b/polyply/tests/test_ffoutput.py @@ -0,0 +1,91 @@ +from pathlib import Path +import pytest +import vermouth +from vermouth.ffinput import read_ff +import polyply +from polyply.src.ffoutput import ForceFieldDirectiveWriter + +def _read_force_field(fpath): + """ + wrapper to read and return force-field + """ + force_field = vermouth.forcefield.ForceField("test") + with open(fpath, "r") as _file: + lines = _file.readlines() + read_ff(lines, force_field) + return force_field + +def equal_blocks(block1, block2): + """ + Need to overwrite since obviously + the force-fields cannot be the same. + """ + return (block1.nrexcl == block2.nrexcl and + block1.same_nodes(block2) and + block1.same_edges(block2) and + block1.same_interactions(block2) and + block1.name == block2.name ) + +def compare_patterns(patterns1, patterns2): + """ + Patterns are evil so we also need a + special compare function. + """ + assert len(patterns1) == len(patterns2) + for pattern1, pattern2 in zip(patterns1, patterns2): + for entry1, entry2 in zip(pattern1, pattern2): + assert entry1[0] == entry2[0] + assert not vermouth.utils.are_different(entry1[1], + entry2[1]) + return True + +def equal_links(link1, link2): + """ + Needs to overwrite for the same reason + as for blocks. + """ + return (equal_blocks(link1, link2) + and link1.same_non_edges(link2) + and link1.removed_interactions == link2.removed_interactions + and link1.molecule_meta == link2.molecule_meta + and compare_patterns(link1.patterns, link2.patterns) + and set(link1.features) == set(link2.features) + ) + +def equal_ffs(ff1, ff2): + """ + Compare two forcefields. + """ + assert len(ff1.blocks) == len(ff2.blocks) + # compare blocks + for name, block in ff1.blocks.items(): + assert equal_blocks(block, ff2.blocks[name]) + + for link1, link2 in zip(ff1.links, ff2.links): + assert equal_links(link1, link2) + return True + +@pytest.mark.parametrize("libname", [ + '2016H66', + 'gromos53A6', + 'oplsaaLigParGen', + 'martini2', + 'parmbsc1', +]) +def test_ffoutput(tmp_path, libname): + """ + Check if we can write and reread our own ff-libraries. + """ + tmp_path = "/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp" + lib_path = Path(polyply.DATA_PATH) / libname + for idx, _file in enumerate(lib_path.iterdir()): + if _file.suffix == ".ff": + # read the forcefield + force_field = _read_force_field(_file) + # write the forcefield + tmp_file = Path(tmp_path) / (str(idx) + f"{libname}_new.ff") + with open(tmp_file, "w") as filehandle: + ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() + # read the smae forcefield file + force_field_target = _read_force_field(tmp_file) + assert equal_ffs(force_field, force_field_target) From b3ea5ac6804ca5e176f0d32092b08117da6aa93e Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 17:21:26 +0200 Subject: [PATCH 10/82] use tmp-file for testing ffoutput --- polyply/tests/test_ffoutput.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py index 878d2325..c5855bd6 100644 --- a/polyply/tests/test_ffoutput.py +++ b/polyply/tests/test_ffoutput.py @@ -76,7 +76,6 @@ def test_ffoutput(tmp_path, libname): """ Check if we can write and reread our own ff-libraries. """ - tmp_path = "/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp" lib_path = Path(polyply.DATA_PATH) / libname for idx, _file in enumerate(lib_path.iterdir()): if _file.suffix == ".ff": From 79c38fb884384112bc4fd81761aec588679147cb Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 17:44:48 +0200 Subject: [PATCH 11/82] modify extract block and use in itp_to_ff --- polyply/src/generate_templates.py | 11 ++++--- polyply/src/itp_to_ff.py | 53 +------------------------------ 2 files changed, 8 insertions(+), 56 deletions(-) diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index 4939353c..509663d7 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -235,7 +235,7 @@ def _relabel_interaction_atoms(interaction, mapping): new_interaction = interaction._replace(atoms=new_atoms) return new_interaction -def extract_block(molecule, resname, defines): +def extract_block(molecule, nodes, defines={}): """ Given a `vermouth.molecule` and a `resname` extract the information of a block from the @@ -245,7 +245,9 @@ def extract_block(molecule, resname, defines): Parameters ---------- molecule: :class:vermouth.molecule.Molecule - resname: str + nodes: abc.hashable + the nodes corresponding to the block to + extract defines: dict dict of type define: value @@ -253,8 +255,8 @@ def extract_block(molecule, resname, defines): ------- :class:vermouth.molecule.Block """ - nodes = find_atoms(molecule, "resname", resname) resid = molecule.nodes[nodes[0]]["resid"] + resname = molecule.nodes[nodes[0]]["resname"] block = vermouth.molecule.Block() # select all nodes with the same first resid and @@ -324,7 +326,8 @@ class variable. for resname in resnames: if resname not in self.templates: - block = extract_block(meta_molecule.molecule, resname, + nodes_from_block = find_atoms(meta_molecule.molecule, "resname", resname) + block = extract_block(meta_molecule.molecule, nodes_from_block, self.topology.defines) opt_counter = 0 diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 9ba46c21..249adb81 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -21,7 +21,7 @@ from vermouth.forcefield import ForceField from vermouth.molecule import Interaction from polyply.src.topology import Topology -from polyply.src.generate_templates import _relabel_interaction_atoms +from polyply.src.generate_templates import extract_block from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter @@ -97,57 +97,6 @@ def _extract_edges_from_shortest_path(atoms, block, min_resid): resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) return final_atoms, edges, resnames -def extract_block(molecule, nodes, defines): - """ - Given a `vermouth.molecule` and a `resname` - extract the information of a block from the - molecule definition and replace all defines - if any are found. - - Parameters - ---------- - molecule: :class:vermouth.molecule.Molecule - resname: str - defines: dict - dict of type define: value - - Returns - ------- - :class:vermouth.molecule.Block - """ - resid = molecule.nodes[nodes[0]]["resid"] - block = vermouth.molecule.Block() - - # select all nodes with the same first resid and - # make sure the block node labels are atomnames - # also build a correspondance dict between node - # label in the molecule and in the block for - # relabeling the interactions - mapping = {} - for node in nodes: - attr_dict = molecule.nodes[node] - if attr_dict["resid"] == resid: - block.add_node(attr_dict["atomname"], **attr_dict) - mapping[node] = attr_dict["atomname"] - - for inter_type in molecule.interactions: - for interaction in molecule.interactions[inter_type]: - if all(atom in mapping for atom in interaction.atoms): - interaction = _relabel_interaction_atoms(interaction, mapping) - block.interactions[inter_type].append(interaction) - - for inter_type in ["bonds", "constraints", "virtual_sitesn", - "virtual_sites2", "virtual_sites3", "virtual_sites4"]: - block.make_edges_from_interaction_type(inter_type) - - if not nx.is_connected(block): - msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' - 'Make sure all atoms/particles in a residue are connected by bonds,' - ' constraints or virual-sites.') - raise IOError(msg.format(resname, resid)) - - return block - def extract_links(molecule): """ Given a molecule that has the resid and resname attributes From 77dfe16959a68c31aad2eb105884a2c61fc1bf7f Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Tue, 20 Jun 2023 18:01:16 +0200 Subject: [PATCH 12/82] update test for generate templates accordingly --- polyply/tests/test_generate_templates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index ff4bd11f..6c21fc63 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -169,7 +169,8 @@ def test_extract_block(): polyply.src.polyply_parser.read_polyply(lines, ff) block = ff.blocks['test'] molecule = block.to_molecule() - new_block = extract_block(molecule, "GLY", {}) + nodes = find_atoms(molecule, "resname", "GLY") + new_block = extract_block(molecule, nodes=nodes, defines={}) for node in ff.blocks["GLY"]: atomname = ff.blocks["GLY"].nodes[node]["atomname"] assert ff.blocks["GLY"].nodes[node] == new_block.nodes[atomname] From 214f5f24a1645a44a51517f6ff6d3906d2e107fc Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Wed, 21 Jun 2023 14:59:27 +0200 Subject: [PATCH 13/82] add isomorphism naming --- polyply/src/fragment_finder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index 3db65c9c..d806c054 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -326,7 +326,11 @@ def extract_unique_fragments(self, fragment_graphs): # here we extract the fragments and set appropiate residue names for other_frag in unique_fragments.values(): if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match): - break + mapping = find_one_ismags_match(fragment, other_frag, self._node_match) + if mapping: + for source, target in mapping.items(): + self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname'] + break else: if resname in unique_fragments: resname = resname + "_" + str(had_resnames[resname] + 1) @@ -339,4 +343,3 @@ def extract_unique_fragments(self, fragment_graphs): # remake the residue graph since some resnames have changed self.make_res_graph() return unique_fragments - From ef700123ea0d2409c0cadb2fb56f02ee3796e7dd Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Wed, 21 Jun 2023 14:59:51 +0200 Subject: [PATCH 14/82] properly check if interactions are equal --- polyply/src/itp_to_ff.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 249adb81..30f48251 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -24,6 +24,7 @@ from polyply.src.generate_templates import extract_block from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter +from polyply.tests.test_lib_files import _interaction_equal def diffs_to_prefix(atoms, resid_diffs): """ @@ -160,9 +161,8 @@ def extract_links(molecule): # print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n") for other_inter in patterns[pattern].get(inter_type, []): - if other_inter.atoms == link_inter.atoms: - if other_inter.parameters == link_inter.parameters: - break + if _interaction_equal(other_inter, link_inter, inter_type): + break else: patterns[pattern][inter_type].append(link_inter) resnames_for_patterns[pattern].update(resnames) From 2410b0a9c3fc1f134c9207310ae4e65cae56bfc1 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Wed, 21 Jun 2023 16:55:31 +0200 Subject: [PATCH 15/82] read itp files --- polyply/src/itp_to_ff.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 30f48251..94214ce7 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -20,6 +20,7 @@ import vermouth from vermouth.forcefield import ForceField from vermouth.molecule import Interaction +from vermouth.gmx.itp_read import read_itp from polyply.src.topology import Topology from polyply.src.generate_templates import extract_block from polyply.src.fragment_finder import FragmentFinder @@ -241,10 +242,20 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 """ Main executable for itp to ff tool. """ - # read the target itp-file - top = Topology.from_gmx_topfile(itppath, name="test") - mol = top.molecules[0].molecule - mol = equalize_charges(mol, target_charge=charge) + if itppath.suffix == ".top": + # read the topology file + top = Topology.from_gmx_topfile(itppath, name="test") + mol = top.molecules[0].molecule + mol = equalize_charges(mol, target_charge=charge) + + if itppath.suffix == ".itp": + with open(itppath, "r") as _file: + lines = _file.readlines() + force_field = ForceField("tmp") + read_itp(lines, force_field) + block = next(iter(force_field.blocks.values())) + mol = block.to_molecule() + mol.make_edges_from_interaction_type(type_="bonds") # read the target fragments and convert to graph fragment_graphs = [] From 450ebc4fee67799fb38a04b07c976452adb3d552 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Wed, 21 Jun 2023 16:55:54 +0200 Subject: [PATCH 16/82] draft round robin tests --- .../test_data/itp_to_ff/PEG_PBE/in_itp.itp | 573 ++++++++++++++++++ .../tests/test_data/itp_to_ff/PEG_PBE/ref.itp | 569 +++++++++++++++++ .../tests/test_data/itp_to_ff/PEG_PBE/seq.txt | 1 + .../test_data/itp_to_ff/PEO_OHter/in_itp.itp | 327 ++++++++++ .../test_data/itp_to_ff/PEO_OHter/ref.itp | 308 ++++++++++ .../test_data/itp_to_ff/PEO_OHter/seq.txt | 1 + polyply/tests/test_itp_to_ff.py | 97 +++ 7 files changed, 1876 insertions(+) create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt create mode 100644 polyply/tests/test_itp_to_ff.py diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp new file mode 100644 index 00000000..4fb4521a --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp @@ -0,0 +1,573 @@ + +[ moleculetype ] +; Name nrexcl +PBE_PEO 3 +[ atoms ] +; nr type resnr residue atom cgnr charge mass + 1 opls_800 1 UNK C00 1 -0.2328 12.0110 + 2 opls_801 1 UNK C01 1 -0.1006 12.0110 + 3 opls_802 1 UNK C02 1 -0.1838 12.0110 + 4 opls_803 1 UNK C03 1 -0.2559 12.0110 + 5 opls_804 1 UNK C04 1 -0.1654 12.0110 + 6 opls_805 1 UNK C05 1 -0.0974 12.0110 + 7 opls_806 1 UNK C06 1 -0.1786 12.0110 + 8 opls_807 1 UNK C07 1 -0.2529 12.0110 + 9 opls_808 1 UNK C08 1 -0.1651 12.0110 + 10 opls_809 1 UNK C09 1 -0.0962 12.0110 + 11 opls_810 1 UNK C0A 1 -0.1791 12.0110 + 12 opls_811 1 UNK C0B 1 -0.2540 12.0110 + 13 opls_812 1 UNK C0C 1 -0.1626 12.0110 + 14 opls_813 1 UNK C0D 1 -0.0981 12.0110 + 15 opls_814 1 UNK C0E 1 -0.1725 12.0110 + 16 opls_815 1 UNK C0F 1 0.0098 12.0110 + 17 opls_816 1 UNK O0G 1 -0.3851 15.9990 + 18 opls_817 1 UNK C0H 1 0.0156 12.0110 + 19 opls_818 1 UNK C0I 1 0.0130 12.0110 + 20 opls_819 1 UNK O0J 1 -0.3669 15.9990 + 21 opls_820 1 UNK C0K 1 0.0119 12.0110 + 22 opls_821 1 UNK C0M 1 0.0272 12.0110 + 23 opls_822 1 UNK O0N 1 -0.6013 15.9990 + 24 opls_823 1 UNK H0O 1 0.4144 1.0080 + 25 opls_824 1 UNK C0P 1 -0.1809 12.0110 + 26 opls_825 1 UNK C0Q 1 -0.2618 12.0110 + 27 opls_826 1 UNK H0R 1 0.0850 1.0080 + 28 opls_827 1 UNK H0S 1 0.0850 1.0080 + 29 opls_828 1 UNK H0T 1 0.0850 1.0080 + 30 opls_829 1 UNK H0U 1 0.1144 1.0080 + 31 opls_830 1 UNK H0V 1 0.1385 1.0080 + 32 opls_831 1 UNK H0W 1 0.1264 1.0080 + 33 opls_832 1 UNK H0X 2 0.1264 1.0080 + 34 opls_833 1 UNK H0Y 2 0.0958 1.0080 + 35 opls_834 1 UNK H0Z 2 0.0958 1.0080 + 36 opls_835 1 UNK H10 2 0.1112 1.0080 + 37 opls_836 1 UNK H11 2 0.1395 1.0080 + 38 opls_837 1 UNK H12 2 0.1255 1.0080 + 39 opls_838 1 UNK H13 2 0.1255 1.0080 + 40 opls_839 1 UNK H14 2 0.0955 1.0080 + 41 opls_840 1 UNK H15 2 0.0955 1.0080 + 42 opls_841 1 UNK H16 2 0.1146 1.0080 + 43 opls_842 1 UNK H17 2 0.1385 1.0080 + 44 opls_843 1 UNK H18 2 0.1264 1.0080 + 45 opls_844 1 UNK H19 2 0.1264 1.0080 + 46 opls_845 1 UNK H1A 2 0.0969 1.0080 + 47 opls_846 1 UNK H1B 2 0.0969 1.0080 + 48 opls_847 1 UNK H1C 2 0.1149 1.0080 + 49 opls_848 1 UNK H1D 2 0.1074 1.0080 + 50 opls_849 1 UNK H1E 2 0.1074 1.0080 + 51 opls_850 1 UNK H1F 2 0.0768 1.0080 + 52 opls_851 1 UNK H1G 2 0.0768 1.0080 + 53 opls_852 1 UNK H1H 2 0.0868 1.0080 + 54 opls_853 1 UNK H1I 2 0.0868 1.0080 + 55 opls_854 1 UNK H1J 2 0.0841 1.0080 + 56 opls_855 1 UNK H1K 2 0.0841 1.0080 + 57 opls_856 1 UNK H1M 2 0.0840 1.0080 + 58 opls_857 1 UNK H1N 2 0.0840 1.0080 + 59 opls_858 1 UNK H1O 2 0.0812 1.0080 + 60 opls_859 1 UNK H1P 2 0.0812 1.0080 + 61 opls_860 1 UNK H1Q 2 0.1428 1.0080 + 62 opls_861 1 UNK H1R 2 0.1279 1.0080 + 63 opls_862 1 UNK H1S 2 0.1279 1.0080 +[ bonds ] + 2 1 1 0.1529 224262.400 + 3 2 1 0.1510 265265.600 + 4 3 1 0.1340 459403.200 + 5 2 1 0.1529 224262.400 + 6 5 1 0.1529 224262.400 + 7 6 1 0.1510 265265.600 + 8 7 1 0.1340 459403.200 + 9 6 1 0.1529 224262.400 + 10 9 1 0.1529 224262.400 + 11 10 1 0.1510 265265.600 + 12 11 1 0.1340 459403.200 + 13 10 1 0.1529 224262.400 + 14 13 1 0.1529 224262.400 + 15 14 1 0.1529 224262.400 + 16 15 1 0.1529 224262.400 + 17 16 1 0.1410 267776.000 + 18 17 1 0.1410 267776.000 + 19 18 1 0.1529 224262.400 + 20 19 1 0.1410 267776.000 + 21 20 1 0.1410 267776.000 + 22 21 1 0.1529 224262.400 + 23 22 1 0.1410 267776.000 + 24 23 1 0.0945 462750.400 + 25 14 1 0.1510 265265.600 + 26 25 1 0.1340 459403.200 + 27 1 1 0.1090 284512.000 + 28 1 1 0.1090 284512.000 + 29 1 1 0.1090 284512.000 + 30 2 1 0.1090 284512.000 + 31 3 1 0.1080 284512.000 + 32 4 1 0.1080 284512.000 + 33 4 1 0.1080 284512.000 + 34 5 1 0.1090 284512.000 + 35 5 1 0.1090 284512.000 + 36 6 1 0.1090 284512.000 + 37 7 1 0.1080 284512.000 + 38 8 1 0.1080 284512.000 + 39 8 1 0.1080 284512.000 + 40 9 1 0.1090 284512.000 + 41 9 1 0.1090 284512.000 + 42 10 1 0.1090 284512.000 + 43 11 1 0.1080 284512.000 + 44 12 1 0.1080 284512.000 + 45 12 1 0.1080 284512.000 + 46 13 1 0.1090 284512.000 + 47 13 1 0.1090 284512.000 + 48 14 1 0.1090 284512.000 + 49 15 1 0.1090 284512.000 + 50 15 1 0.1090 284512.000 + 51 16 1 0.1090 284512.000 + 52 16 1 0.1090 284512.000 + 53 18 1 0.1090 284512.000 + 54 18 1 0.1090 284512.000 + 55 19 1 0.1090 284512.000 + 56 19 1 0.1090 284512.000 + 57 21 1 0.1090 284512.000 + 58 21 1 0.1090 284512.000 + 59 22 1 0.1090 284512.000 + 60 22 1 0.1090 284512.000 + 61 25 1 0.1080 284512.000 + 62 26 1 0.1080 284512.000 + 63 26 1 0.1080 284512.000 + +[ angles ] +; ai aj ak funct c0 c1 c2 c3 + 1 2 3 1 111.100 527.184 + 2 3 4 1 124.000 585.760 + 1 2 5 1 112.700 488.273 + 2 5 6 1 112.700 488.273 + 5 6 7 1 111.100 527.184 + 6 7 8 1 124.000 585.760 + 5 6 9 1 112.700 488.273 + 6 9 10 1 112.700 488.273 + 9 10 11 1 111.100 527.184 + 10 11 12 1 124.000 585.760 + 9 10 13 1 112.700 488.273 + 10 13 14 1 112.700 488.273 + 13 14 15 1 112.700 488.273 + 14 15 16 1 112.700 488.273 + 15 16 17 1 109.500 418.400 + 16 17 18 1 109.500 502.080 + 17 18 19 1 109.500 418.400 + 18 19 20 1 109.500 418.400 + 19 20 21 1 109.500 502.080 + 20 21 22 1 109.500 418.400 + 21 22 23 1 109.500 418.400 + 22 23 24 1 108.500 460.240 + 13 14 25 1 111.100 527.184 + 14 25 26 1 124.000 585.760 + 2 1 27 1 110.700 313.800 + 2 1 28 1 110.700 313.800 + 2 1 29 1 110.700 313.800 + 1 2 30 1 110.700 313.800 + 2 3 31 1 117.000 292.880 + 3 4 32 1 120.000 292.880 + 3 4 33 1 120.000 292.880 + 2 5 34 1 110.700 313.800 + 2 5 35 1 110.700 313.800 + 5 6 36 1 110.700 313.800 + 6 7 37 1 117.000 292.880 + 7 8 38 1 120.000 292.880 + 7 8 39 1 120.000 292.880 + 6 9 40 1 110.700 313.800 + 6 9 41 1 110.700 313.800 + 9 10 42 1 110.700 313.800 + 10 11 43 1 117.000 292.880 + 11 12 44 1 120.000 292.880 + 11 12 45 1 120.000 292.880 + 10 13 46 1 110.700 313.800 + 10 13 47 1 110.700 313.800 + 13 14 48 1 110.700 313.800 + 14 15 49 1 110.700 313.800 + 14 15 50 1 110.700 313.800 + 15 16 51 1 110.700 313.800 + 15 16 52 1 110.700 313.800 + 17 18 53 1 109.500 292.880 + 17 18 54 1 109.500 292.880 + 18 19 55 1 110.700 313.800 + 18 19 56 1 110.700 313.800 + 20 21 57 1 109.500 292.880 + 20 21 58 1 109.500 292.880 + 21 22 59 1 110.700 313.800 + 21 22 60 1 110.700 313.800 + 14 25 61 1 117.000 292.880 + 25 26 62 1 120.000 292.880 + 25 26 63 1 120.000 292.880 + 16 15 50 1 110.700 313.800 + 6 5 34 1 110.700 313.800 + 27 1 29 1 107.800 276.144 + 51 16 52 1 107.800 276.144 + 7 6 36 1 109.500 292.880 + 20 19 55 1 109.500 292.880 + 16 15 49 1 110.700 313.800 + 23 22 59 1 109.500 292.880 + 19 18 54 1 110.700 313.800 + 22 21 57 1 110.700 313.800 + 49 15 50 1 107.800 276.144 + 22 21 58 1 110.700 313.800 + 12 11 43 1 120.000 292.880 + 57 21 58 1 107.800 276.144 + 11 10 13 1 111.100 527.184 + 10 9 41 1 110.700 313.800 + 25 14 48 1 109.500 292.880 + 40 9 41 1 107.800 276.144 + 23 22 60 1 109.500 292.880 + 34 5 35 1 107.800 276.144 + 14 13 47 1 110.700 313.800 + 26 25 61 1 120.000 292.880 + 17 16 52 1 109.500 292.880 + 59 22 60 1 107.800 276.144 + 62 26 63 1 117.000 292.880 + 3 2 30 1 109.500 292.880 + 3 2 5 1 111.100 527.184 + 13 10 42 1 110.700 313.800 + 44 12 45 1 117.000 292.880 + 4 3 31 1 120.000 292.880 + 28 1 29 1 107.800 276.144 + 14 13 46 1 110.700 313.800 + 5 2 30 1 110.700 313.800 + 6 5 35 1 110.700 313.800 + 9 6 36 1 110.700 313.800 + 27 1 28 1 107.800 276.144 + 7 6 9 1 111.100 527.184 + 10 9 40 1 110.700 313.800 + 38 8 39 1 117.000 292.880 + 20 19 56 1 109.500 292.880 + 55 19 56 1 107.800 276.144 + 19 18 53 1 110.700 313.800 + 46 13 47 1 107.800 276.144 + 8 7 37 1 120.000 292.880 + 11 10 42 1 109.500 292.880 + 15 14 48 1 110.700 313.800 + 15 14 25 1 111.100 527.184 + 53 18 54 1 107.800 276.144 + 17 16 51 1 109.500 292.880 + 32 4 33 1 117.000 292.880 + +[ dihedrals ] +; IMPROPER DIHEDRAL ANGLES +; ai aj ak al funct c0 c1 c2 c3 c4 c5 + 33 4 3 32 4 180.000 10.460 2 + 63 26 25 62 4 180.000 10.460 2 + 39 8 7 38 4 180.000 10.460 2 + 45 12 11 44 4 180.000 10.460 2 + 43 11 10 12 4 180.000 10.460 2 + 61 25 14 26 4 180.000 10.460 2 + 37 7 6 8 4 180.000 10.460 2 + 31 3 2 4 4 180.000 10.460 2 + +[ dihedrals ] +; PROPER DIHEDRAL ANGLES +; ai aj ak al funct c0 c1 c2 c3 c4 c5 + 4 3 2 1 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 26 25 14 15 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 26 25 14 13 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 8 7 6 5 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 12 11 10 9 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 25 14 15 16 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 25 14 13 10 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 7 6 5 2 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 11 10 9 6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 9 6 7 8 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 5 2 3 4 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 13 10 11 12 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 + 10 9 6 7 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 14 13 10 11 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 6 5 2 3 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 6 5 2 1 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 10 9 6 5 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 16 15 14 13 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 15 14 13 10 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 14 13 10 9 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 13 10 9 6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 9 6 5 2 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 19 18 17 16 3 1.715 2.845 1.046 -5.607 -0.000 0.000 + 22 21 20 19 3 1.715 2.845 1.046 -5.607 -0.000 0.000 + 21 20 19 18 3 1.715 2.845 1.046 -5.607 -0.000 0.000 + 18 17 16 15 3 1.715 2.845 1.046 -5.607 -0.000 0.000 + 38 8 7 6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 39 8 7 6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 44 12 11 10 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 62 26 25 14 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 32 4 3 2 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 63 26 25 14 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 45 12 11 10 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 33 4 3 2 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 33 4 3 31 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 44 12 11 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 32 4 3 31 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 62 26 25 61 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 45 12 11 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 39 8 7 37 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 38 8 7 37 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 63 26 25 61 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 61 25 14 13 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 43 11 10 9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 31 3 2 5 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 61 25 14 15 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 37 7 6 5 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 43 11 10 13 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 31 3 2 1 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 37 7 6 9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 + 31 3 2 30 3 0.665 1.996 0.000 -2.661 -0.000 0.000 + 43 11 10 42 3 0.665 1.996 0.000 -2.661 -0.000 0.000 + 37 7 6 36 3 0.665 1.996 0.000 -2.661 -0.000 0.000 + 61 25 14 48 3 0.665 1.996 0.000 -2.661 -0.000 0.000 + 48 14 25 26 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 + 42 10 11 12 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 + 30 2 3 4 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 + 36 6 7 8 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 + 40 9 10 11 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 34 5 2 3 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 46 13 10 11 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 29 1 2 3 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 47 13 10 11 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 34 5 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 40 9 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 50 15 14 25 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 28 1 2 3 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 47 13 14 25 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 46 13 14 25 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 35 5 2 3 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 41 9 10 11 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 35 5 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 49 15 14 25 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 41 9 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 27 1 2 3 3 0.766 2.297 0.000 -3.063 -0.000 0.000 + 49 15 14 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 52 16 15 14 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 36 6 9 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 48 14 13 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 50 15 14 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 47 13 10 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 27 1 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 40 9 10 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 29 1 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 41 9 6 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 51 16 15 14 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 35 5 6 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 46 13 14 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 35 5 2 1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 28 1 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 47 13 14 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 36 6 5 2 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 10 9 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 41 9 10 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 48 14 15 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 34 5 2 1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 40 9 6 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 10 13 14 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 2 5 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 46 13 10 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 34 5 6 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 48 14 13 47 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 46 13 10 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 35 5 2 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 10 9 40 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 2 1 28 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 51 16 15 49 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 2 1 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 56 19 18 54 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 36 6 5 34 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 60 22 21 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 60 22 21 58 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 56 19 18 53 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 10 9 41 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 52 16 15 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 55 19 18 53 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 52 16 15 49 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 49 15 14 48 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 48 14 13 46 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 50 15 14 48 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 55 19 18 54 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 59 22 21 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 51 16 15 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 47 13 10 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 40 9 6 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 2 1 27 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 36 6 5 35 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 59 22 21 58 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 41 9 6 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 34 5 2 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 58 21 22 23 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 57 21 22 23 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 53 18 19 20 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 56 19 18 17 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 49 15 16 17 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 59 22 21 20 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 54 18 19 20 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 60 22 21 20 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 55 19 18 17 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 50 15 16 17 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 60 22 23 24 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 59 22 23 24 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 56 19 20 21 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 51 16 17 18 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 57 21 20 19 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 55 19 20 21 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 53 18 17 16 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 54 18 17 16 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 58 21 20 19 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 52 16 17 18 3 1.590 4.770 0.000 -6.360 -0.000 0.000 + 24 23 22 21 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 + 23 22 21 20 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 + 17 16 15 14 3 2.874 0.582 2.092 -5.548 -0.000 0.000 + 20 19 18 17 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 + +[ pairs ] + 1 4 1 + 1 6 1 + 4 5 1 + 3 6 1 + 2 7 1 + 2 9 1 + 5 8 1 + 5 10 1 + 8 9 1 + 7 10 1 + 6 11 1 + 6 13 1 + 9 12 1 + 9 14 1 + 12 13 1 + 11 14 1 + 10 15 1 + 13 16 1 + 3 27 1 + 14 17 1 + 3 28 1 + 5 27 1 + 3 29 1 + 1 31 1 + 15 18 1 + 5 28 1 + 5 29 1 + 4 30 1 + 2 32 1 + 16 19 1 + 10 25 1 + 2 33 1 + 1 34 1 + 6 30 1 + 5 31 1 + 1 35 1 + 17 20 1 + 3 34 1 + 3 35 1 + 2 36 1 + 18 21 1 + 13 26 1 + 19 22 1 + 16 25 1 + 15 26 1 + 7 34 1 + 7 35 1 + 5 37 1 + 20 23 1 + 9 34 1 + 9 35 1 + 8 36 1 + 6 38 1 + 21 24 1 + 6 39 1 + 5 40 1 + 10 36 1 + 9 37 1 + 5 41 1 + 7 40 1 + 7 41 1 + 6 42 1 + 11 40 1 + 11 41 1 + 9 43 1 + 13 40 1 + 13 41 1 + 12 42 1 + 10 44 1 + 10 45 1 + 9 46 1 + 14 42 1 + 13 43 1 + 9 47 1 + 27 30 1 + 11 46 1 + 28 30 1 + 11 47 1 + 10 48 1 + 29 30 1 + 30 31 1 + 15 46 1 + 15 47 1 + 13 49 1 + 31 32 1 + 13 50 1 + 31 33 1 + 30 34 1 + 16 48 1 + 30 35 1 + 14 51 1 + 17 49 1 + 14 52 1 + 17 50 1 + 18 51 1 + 16 53 1 + 34 36 1 + 18 52 1 + 16 54 1 + 35 36 1 + 25 46 1 + 25 47 1 + 17 55 1 + 36 37 1 + 20 53 1 + 17 56 1 + 26 48 1 + 25 49 1 + 20 54 1 + 13 61 1 + 37 38 1 + 25 50 1 + 37 39 1 + 36 40 1 + 21 55 1 + 19 57 1 + 15 61 1 + 14 62 1 + 36 41 1 + 21 56 1 + 19 58 1 + 14 63 1 + 20 59 1 + 23 57 1 + 20 60 1 + 23 58 1 + 40 42 1 + 41 42 1 + 24 59 1 + 24 60 1 + 42 43 1 + 43 44 1 + 43 45 1 + 42 46 1 + 42 47 1 + 46 48 1 + 47 48 1 + 48 49 1 + 48 50 1 + 49 51 1 + 50 51 1 + 49 52 1 + 50 52 1 + 53 55 1 + 54 55 1 + 53 56 1 + 48 61 1 + 54 56 1 + 57 59 1 + 58 59 1 + 57 60 1 + 58 60 1 + 61 62 1 + 61 63 1 + diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp new file mode 100644 index 00000000..53941636 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp @@ -0,0 +1,569 @@ +; ../../bench.py + +; Please cite the following papers: + +[ moleculetype ] +new 3 + +[ atoms ] + 1 opls_800 1 CH3ter C0 1 -0.2327952380952381 12.011 + 2 opls_826 1 CH3ter H1 1 0.08500476190476192 1.008 + 3 opls_827 1 CH3ter H2 1 0.08500476190476192 1.008 + 4 opls_828 1 CH3ter H3 1 0.08500476190476192 1.008 + 5 opls_832 2 PBE H8 3 0.1264047619047619 1.008 + 6 opls_801 2 PBE C1 2 -0.10059523809523808 12.011 + 7 opls_802 2 PBE C2 2 -0.1837952380952381 12.011 + 8 opls_803 2 PBE C3 2 -0.2558952380952381 12.011 + 9 opls_804 2 PBE C0 2 -0.1653952380952381 12.011 +10 opls_833 2 PBE H4 3 0.0958047619047619 1.008 +11 opls_834 2 PBE H5 3 0.0958047619047619 1.008 +12 opls_829 2 PBE H6 2 0.11440476190476191 1.008 +13 opls_830 2 PBE H7 2 0.1385047619047619 1.008 +14 opls_831 2 PBE H9 2 0.1264047619047619 1.008 +15 opls_832 3 PBE H8 4 0.1264047619047619 1.008 +16 opls_801 3 PBE C1 3 -0.10059523809523808 12.011 +17 opls_802 3 PBE C2 3 -0.1837952380952381 12.011 +18 opls_803 3 PBE C3 3 -0.2558952380952381 12.011 +19 opls_804 3 PBE C0 3 -0.1653952380952381 12.011 +20 opls_833 3 PBE H4 4 0.0958047619047619 1.008 +21 opls_834 3 PBE H5 4 0.0958047619047619 1.008 +22 opls_829 3 PBE H6 3 0.11440476190476191 1.008 +23 opls_830 3 PBE H7 3 0.1385047619047619 1.008 +24 opls_831 3 PBE H9 3 0.1264047619047619 1.008 +25 opls_832 4 PBE H8 5 0.1264047619047619 1.008 +26 opls_801 4 PBE C1 4 -0.10059523809523808 12.011 +27 opls_802 4 PBE C2 4 -0.1837952380952381 12.011 +28 opls_803 4 PBE C3 4 -0.2558952380952381 12.011 +29 opls_804 4 PBE C0 4 -0.1653952380952381 12.011 +30 opls_833 4 PBE H4 5 0.0958047619047619 1.008 +31 opls_834 4 PBE H5 5 0.0958047619047619 1.008 +32 opls_829 4 PBE H6 4 0.11440476190476191 1.008 +33 opls_830 4 PBE H7 4 0.1385047619047619 1.008 +34 opls_831 4 PBE H9 4 0.1264047619047619 1.008 +35 opls_832 5 PBE H8 6 0.1264047619047619 1.008 +36 opls_801 5 PBE C1 5 -0.10059523809523808 12.011 +37 opls_802 5 PBE C2 5 -0.1837952380952381 12.011 +38 opls_803 5 PBE C3 5 -0.2558952380952381 12.011 +39 opls_804 5 PBE C0 5 -0.1653952380952381 12.011 +40 opls_833 5 PBE H4 6 0.0958047619047619 1.008 +41 opls_834 5 PBE H5 6 0.0958047619047619 1.008 +42 opls_829 5 PBE H6 5 0.11440476190476191 1.008 +43 opls_830 5 PBE H7 5 0.1385047619047619 1.008 +44 opls_831 5 PBE H9 5 0.1264047619047619 1.008 +45 opls_815 6 PEO C0 6 0.009804761904761906 12.011 +46 opls_816 6 PEO O1 6 -0.3850952380952381 15.999 +47 opls_817 6 PEO C2 6 0.015604761904761906 12.011 +48 opls_850 6 PEO H3 7 0.0768047619047619 1.008 +49 opls_851 6 PEO H4 7 0.0768047619047619 1.008 +50 opls_852 6 PEO H5 7 0.08680476190476191 1.008 +51 opls_853 6 PEO H6 7 0.08680476190476191 1.008 +52 opls_858 7 PEOter H10 9 0.0812047619047619 1.008 +53 opls_818 7 PEOter C0 8 0.013004761904761906 12.011 +54 opls_819 7 PEOter O1 8 -0.3668952380952381 15.999 +55 opls_820 7 PEOter C2 8 0.011904761904761908 12.011 +56 opls_821 7 PEOter C7 8 0.027204761904761905 12.011 +57 opls_822 7 PEOter O8 8 -0.601295238095238 15.999 +58 opls_823 7 PEOter H9 8 0.4144047619047619 1.008 +59 opls_854 7 PEOter H3 9 0.0841047619047619 1.008 +60 opls_855 7 PEOter H4 9 0.0841047619047619 1.008 +61 opls_856 7 PEOter H5 9 0.08400476190476192 1.008 +62 opls_859 7 PEOter H11 9 0.0812047619047619 1.008 +63 opls_857 7 PEOter H6 9 0.08400476190476192 1.008 + +[ bonds ] + 2 1 1 0.1090 284512.000 + 3 1 1 0.1090 284512.000 + 4 1 1 0.1090 284512.000 + 7 6 1 0.1510 265265.600 + 8 7 1 0.1340 459403.200 + 9 6 1 0.1529 224262.400 +12 6 1 0.1090 284512.000 +13 7 1 0.1080 284512.000 +14 8 1 0.1080 284512.000 + 5 8 1 0.1080 284512.000 +10 9 1 0.1090 284512.000 +11 9 1 0.1090 284512.000 +17 16 1 0.1510 265265.600 +18 17 1 0.1340 459403.200 +19 16 1 0.1529 224262.400 +22 16 1 0.1090 284512.000 +23 17 1 0.1080 284512.000 +24 18 1 0.1080 284512.000 +15 18 1 0.1080 284512.000 +20 19 1 0.1090 284512.000 +21 19 1 0.1090 284512.000 +27 26 1 0.1510 265265.600 +28 27 1 0.1340 459403.200 +29 26 1 0.1529 224262.400 +32 26 1 0.1090 284512.000 +33 27 1 0.1080 284512.000 +34 28 1 0.1080 284512.000 +25 28 1 0.1080 284512.000 +30 29 1 0.1090 284512.000 +31 29 1 0.1090 284512.000 +37 36 1 0.1510 265265.600 +38 37 1 0.1340 459403.200 +39 36 1 0.1529 224262.400 +42 36 1 0.1090 284512.000 +43 37 1 0.1080 284512.000 +44 38 1 0.1080 284512.000 +35 38 1 0.1080 284512.000 +40 39 1 0.1090 284512.000 +41 39 1 0.1090 284512.000 +46 45 1 0.1410 267776.000 +47 46 1 0.1410 267776.000 +48 45 1 0.1090 284512.000 +49 45 1 0.1090 284512.000 +50 47 1 0.1090 284512.000 +51 47 1 0.1090 284512.000 +54 53 1 0.1410 267776.000 +55 54 1 0.1410 267776.000 +56 55 1 0.1529 224262.400 +57 56 1 0.1410 267776.000 +58 57 1 0.0945 462750.400 +59 53 1 0.1090 284512.000 +60 53 1 0.1090 284512.000 +61 55 1 0.1090 284512.000 +63 55 1 0.1090 284512.000 +52 56 1 0.1090 284512.000 +62 56 1 0.1090 284512.000 + 6 1 1 0.1529 224262.400 ; link +16 9 1 0.1529 224262.400 ; link +26 19 1 0.1529 224262.400 ; link +36 29 1 0.1529 224262.400 ; link +45 39 1 0.1529 224262.400 ; link +53 47 1 0.1529 224262.400 ; link + +[ pairs ] + 8 9 1 + 8 12 1 + 6 14 1 + 6 5 1 + 9 13 1 + 7 10 1 + 7 11 1 +12 13 1 +13 14 1 +13 5 1 +12 10 1 +12 11 1 +18 19 1 +18 22 1 +16 24 1 +16 15 1 +19 23 1 +17 20 1 +17 21 1 +22 23 1 +23 24 1 +23 15 1 +22 20 1 +22 21 1 +28 29 1 +28 32 1 +26 34 1 +26 25 1 +29 33 1 +27 30 1 +27 31 1 +32 33 1 +33 34 1 +33 25 1 +32 30 1 +32 31 1 +38 39 1 +38 42 1 +36 44 1 +36 35 1 +39 43 1 +37 40 1 +37 41 1 +42 43 1 +43 44 1 +43 35 1 +42 40 1 +42 41 1 +47 48 1 +45 50 1 +47 49 1 +45 51 1 +53 56 1 +54 57 1 +55 58 1 +55 59 1 +53 61 1 +55 60 1 +53 63 1 +54 52 1 +57 61 1 +54 62 1 +57 63 1 +58 52 1 +58 62 1 +61 52 1 +63 52 1 +61 62 1 +63 62 1 + 1 8 1 ; link + 7 2 1 ; link + 7 3 1 ; link + 9 2 1 ; link + 7 4 1 ; link + 1 13 1 ; link + 9 3 1 ; link + 9 4 1 ; link + 1 10 1 ; link + 1 11 1 ; link + 2 12 1 ; link + 3 12 1 ; link + 4 12 1 ; link + 7 16 1 ; link + 6 17 1 ; link + 6 19 1 ; link + 9 18 1 ; link +16 12 1 ; link + 6 22 1 ; link +17 10 1 ; link +17 11 1 ; link + 9 23 1 ; link +19 10 1 ; link +19 11 1 ; link + 9 21 1 ; link + 9 20 1 ; link +10 22 1 ; link +11 22 1 ; link +17 26 1 ; link +16 27 1 ; link +16 29 1 ; link +19 28 1 ; link +26 22 1 ; link +16 32 1 ; link +27 20 1 ; link +27 21 1 ; link +19 33 1 ; link +29 20 1 ; link +29 21 1 ; link +19 31 1 ; link +19 30 1 ; link +20 32 1 ; link +21 32 1 ; link +27 36 1 ; link +26 37 1 ; link +26 39 1 ; link +29 38 1 ; link +36 32 1 ; link +26 42 1 ; link +37 30 1 ; link +37 31 1 ; link +29 43 1 ; link +39 30 1 ; link +39 31 1 ; link +29 41 1 ; link +29 40 1 ; link +30 42 1 ; link +31 42 1 ; link +36 46 1 ; link +39 47 1 ; link +45 37 1 ; link +45 42 1 ; link +36 48 1 ; link +46 40 1 ; link +36 49 1 ; link +46 41 1 ; link +40 48 1 ; link +41 48 1 ; link +40 49 1 ; link +41 49 1 ; link +45 53 1 ; link +46 54 1 ; link +47 55 1 ; link +46 59 1 ; link +54 50 1 ; link +46 60 1 ; link +54 51 1 ; link +50 59 1 ; link +51 59 1 ; link +50 60 1 ; link +51 60 1 ; link + 1 16 1 ; link + 9 26 1 ; link +19 36 1 ; link +29 45 1 ; link + +[ angles ] + 2 1 4 1 107.800 276.144 + 3 1 4 1 107.800 276.144 + 2 1 3 1 107.800 276.144 + 6 7 8 1 124.000 585.760 + 6 7 13 1 117.000 292.880 + 7 8 14 1 120.000 292.880 + 7 8 5 1 120.000 292.880 + 6 9 10 1 110.700 313.800 + 6 9 11 1 110.700 313.800 +10 9 11 1 107.800 276.144 + 7 6 12 1 109.500 292.880 + 7 6 9 1 111.100 527.184 + 8 7 13 1 120.000 292.880 + 9 6 12 1 110.700 313.800 +14 8 5 1 117.000 292.880 +16 17 18 1 124.000 585.760 +16 17 23 1 117.000 292.880 +17 18 24 1 120.000 292.880 +17 18 15 1 120.000 292.880 +16 19 20 1 110.700 313.800 +16 19 21 1 110.700 313.800 +20 19 21 1 107.800 276.144 +17 16 22 1 109.500 292.880 +17 16 19 1 111.100 527.184 +18 17 23 1 120.000 292.880 +19 16 22 1 110.700 313.800 +24 18 15 1 117.000 292.880 +26 27 28 1 124.000 585.760 +26 27 33 1 117.000 292.880 +27 28 34 1 120.000 292.880 +27 28 25 1 120.000 292.880 +26 29 30 1 110.700 313.800 +26 29 31 1 110.700 313.800 +30 29 31 1 107.800 276.144 +27 26 32 1 109.500 292.880 +27 26 29 1 111.100 527.184 +28 27 33 1 120.000 292.880 +29 26 32 1 110.700 313.800 +34 28 25 1 117.000 292.880 +36 37 38 1 124.000 585.760 +36 37 43 1 117.000 292.880 +37 38 44 1 120.000 292.880 +37 38 35 1 120.000 292.880 +36 39 40 1 110.700 313.800 +36 39 41 1 110.700 313.800 +40 39 41 1 107.800 276.144 +37 36 42 1 109.500 292.880 +37 36 39 1 111.100 527.184 +38 37 43 1 120.000 292.880 +39 36 42 1 110.700 313.800 +44 38 35 1 117.000 292.880 +45 46 47 1 109.500 502.080 +46 47 50 1 109.500 292.880 +46 47 51 1 109.500 292.880 +48 45 49 1 107.800 276.144 +46 45 49 1 109.500 292.880 +50 47 51 1 107.800 276.144 +46 45 48 1 109.500 292.880 +53 54 55 1 109.500 502.080 +54 55 56 1 109.500 418.400 +55 56 57 1 109.500 418.400 +56 57 58 1 108.500 460.240 +54 55 61 1 109.500 292.880 +54 55 63 1 109.500 292.880 +55 56 52 1 110.700 313.800 +55 56 62 1 110.700 313.800 +54 53 59 1 109.500 292.880 +57 56 52 1 109.500 292.880 +56 55 61 1 110.700 313.800 +56 55 63 1 110.700 313.800 +61 55 63 1 107.800 276.144 +57 56 62 1 109.500 292.880 +52 56 62 1 107.800 276.144 +54 53 60 1 109.500 292.880 +59 53 60 1 107.800 276.144 + 1 6 7 1 111.100 527.184 ; link + 1 6 9 1 112.700 488.273 ; link + 6 1 2 1 110.700 313.800 ; link + 6 1 3 1 110.700 313.800 ; link + 6 1 4 1 110.700 313.800 ; link + 1 6 12 1 110.700 313.800 ; link + 6 9 16 1 112.700 488.273 ; link + 9 16 17 1 111.100 527.184 ; link + 9 16 19 1 112.700 488.273 ; link + 9 16 22 1 110.700 313.800 ; link +16 9 10 1 110.700 313.800 ; link +16 9 11 1 110.700 313.800 ; link +16 19 26 1 112.700 488.273 ; link +19 26 27 1 111.100 527.184 ; link +19 26 29 1 112.700 488.273 ; link +19 26 32 1 110.700 313.800 ; link +26 19 20 1 110.700 313.800 ; link +26 19 21 1 110.700 313.800 ; link +26 29 36 1 112.700 488.273 ; link +29 36 37 1 111.100 527.184 ; link +29 36 39 1 112.700 488.273 ; link +29 36 42 1 110.700 313.800 ; link +36 29 30 1 110.700 313.800 ; link +36 29 31 1 110.700 313.800 ; link +36 39 45 1 112.700 488.273 ; link +39 45 46 1 109.500 418.400 ; link +39 45 48 1 110.700 313.800 ; link +39 45 49 1 110.700 313.800 ; link +45 39 41 1 110.700 313.800 ; link +45 39 40 1 110.700 313.800 ; link +46 47 53 1 109.500 418.400 ; link +47 53 54 1 109.500 418.400 ; link +47 53 59 1 110.700 313.800 ; link +47 53 60 1 110.700 313.800 ; link +53 47 51 1 110.700 313.800 ; link +53 47 50 1 110.700 313.800 ; link + +[ dihedrals ] + 5 8 7 14 4 180.000 10.460 2 +13 7 6 8 4 180.000 10.460 2 + 9 6 7 8 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 +14 8 7 6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 5 8 7 6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 + 5 8 7 13 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +14 8 7 13 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +13 7 6 9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 +13 7 6 12 3 0.665 1.996 0.000 -2.661 -0.000 0.000 +12 6 7 8 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 +10 9 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +11 9 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +11 9 6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +10 9 6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +15 18 17 24 4 180.000 10.460 2 +23 17 16 18 4 180.000 10.460 2 +19 16 17 18 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 +24 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +15 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +15 18 17 23 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +24 18 17 23 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +23 17 16 19 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 +23 17 16 22 3 0.665 1.996 0.000 -2.661 -0.000 0.000 +22 16 17 18 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 +20 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +21 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +21 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +20 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +25 28 27 34 4 180.000 10.460 2 +33 27 26 28 4 180.000 10.460 2 +29 26 27 28 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 +34 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +25 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +25 28 27 33 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +34 28 27 33 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +33 27 26 29 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 +33 27 26 32 3 0.665 1.996 0.000 -2.661 -0.000 0.000 +32 26 27 28 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 +30 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +31 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +31 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +30 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +35 38 37 44 4 180.000 10.460 2 +43 37 36 38 4 180.000 10.460 2 +39 36 37 38 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 +44 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +35 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +35 38 37 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +44 38 37 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 +43 37 36 39 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 +43 37 36 42 3 0.665 1.996 0.000 -2.661 -0.000 0.000 +42 36 37 38 3 -0.778 -2.335 0.000 3.113 -0.000 0.000 +40 39 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +41 39 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 +41 39 36 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +40 39 36 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +48 45 46 47 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +50 47 46 45 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +51 47 46 45 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +49 45 46 47 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +56 55 54 53 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +62 56 55 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +62 56 55 63 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +52 56 55 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +52 56 55 63 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +63 55 56 57 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +61 55 56 57 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +52 56 55 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +62 56 55 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +62 56 57 58 3 0.736 2.209 0.000 -2.946 -0.000 0.000 +52 56 57 58 3 0.736 2.209 0.000 -2.946 -0.000 0.000 +60 53 54 55 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +61 55 54 53 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +59 53 54 55 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +63 55 54 53 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +58 57 56 55 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 +57 56 55 54 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 + 8 7 6 1 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link +13 7 6 1 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link + 4 1 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link + 3 1 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link + 2 1 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link + 2 1 6 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link + 4 1 6 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +11 9 6 1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link + 3 1 6 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +10 9 6 1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +12 6 1 3 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +12 6 1 4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +12 6 1 2 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +18 17 16 9 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link +17 16 9 6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +16 9 6 7 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +19 16 9 6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +23 17 16 9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link +11 9 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +10 9 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +20 19 16 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +12 6 9 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +22 16 9 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +21 19 16 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +11 9 16 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +10 9 16 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +22 16 9 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +22 16 9 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +28 27 26 19 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link +27 26 19 16 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +26 19 16 17 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +29 26 19 16 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +33 27 26 19 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link +21 19 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +20 19 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +30 29 26 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +22 16 19 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +32 26 19 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +31 29 26 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +21 19 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +20 19 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +32 26 19 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +32 26 19 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +38 37 36 29 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link +37 36 29 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +36 29 26 27 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +39 36 29 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +43 37 36 29 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link +31 29 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +30 29 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link +40 39 36 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +32 26 29 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +42 36 29 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +41 39 36 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +31 29 36 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +30 29 36 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +42 36 29 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +42 36 29 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +37 36 39 45 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +47 46 45 39 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +49 45 39 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +48 45 39 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +42 36 39 45 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +48 45 39 40 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +49 45 39 41 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +49 45 39 40 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +48 45 39 41 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +40 39 45 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +41 39 45 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +46 45 39 36 3 2.874 0.582 2.092 -5.548 -0.000 0.000 ; link +53 47 46 45 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +55 54 53 47 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +60 53 47 51 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +60 53 47 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +59 53 47 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +59 53 47 51 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +50 47 53 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +60 53 47 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +51 47 53 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +59 53 47 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +54 53 47 46 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link +16 9 6 1 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +26 19 16 9 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +36 29 26 19 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +45 39 36 29 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link + diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt new file mode 100644 index 00000000..408d9986 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt @@ -0,0 +1 @@ +CH3ter PBE PBE PBE PBE PEO PEOter diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp b/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp new file mode 100644 index 00000000..b8659bb2 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp @@ -0,0 +1,327 @@ +; /coarse/fabian/current-projects/polymer_itp_builder/vermouth_dev/venv_py38/bin/polyply gen_params -lib oplsaaLigParGen -seq OHter:1 PEO:4 OHter:1 -o test.itp + +; Please cite the following papers: +; Jorgensen, W L; Tirado-Rives, J; Proceedings of the National Academy of Sciences 2005; 10.1073/pnas.0408037102 +; Dodda, L S; Vilseck, J Z; Tirado-Rives, J; Jorgensen, W L; The Journal of Physical Chemistry B 2017; 10.1021/acs.jpcb.7b00272 +; Grunewald, F; Alessandri, R; Kroon, P C; Monticelli, L; Souza, P C; Marrink, S J; Nature Communications 2022; 10.1038/s41467-021-27627-4 +; Dodda, L S; Cabeza de Vaca, I; Tirado-Rives, J; Jorgensen, W L; Nucleic Acids Research 2017; 10.1093/nar/gkx312 + +[ moleculetype ] +polymer 3 + +[ atoms ] + 1 opls_154 1 OHter OA1 1 -0.6887 15.999 + 2 opls_135 1 OHter C2 2 0.107 12.011 + 3 opls_004 1 OHter HA3 3 0.4173 1.008 + 4 opls_140 1 OHter H4 4 0.0822 1.008 + 5 opls_140 1 OHter H5 5 0.0822 1.008 + 6 opls_135 2 PEO C01 6 0.0089 12.011 + 7 opls_179 2 PEO O02 7 -0.3846 15.999 + 8 opls_135 2 PEO C03 8 0.0089 12.011 + 9 opls_140 2 PEO H04 9 0.0917 1.008 +10 opls_140 2 PEO H05 10 0.0917 1.008 +11 opls_140 2 PEO H06 11 0.0917 1.008 +12 opls_140 2 PEO H07 12 0.0917 1.008 +13 opls_135 3 PEO C01 13 0.0089 12.011 +14 opls_179 3 PEO O02 14 -0.3846 15.999 +15 opls_135 3 PEO C03 15 0.0089 12.011 +16 opls_140 3 PEO H04 16 0.0917 1.008 +17 opls_140 3 PEO H05 17 0.0917 1.008 +18 opls_140 3 PEO H06 18 0.0917 1.008 +19 opls_140 3 PEO H07 19 0.0917 1.008 +20 opls_135 4 PEO C01 20 0.0089 12.011 +21 opls_179 4 PEO O02 21 -0.3846 15.999 +22 opls_135 4 PEO C03 22 0.0089 12.011 +23 opls_140 4 PEO H04 23 0.0917 1.008 +24 opls_140 4 PEO H05 24 0.0917 1.008 +25 opls_140 4 PEO H06 25 0.0917 1.008 +26 opls_140 4 PEO H07 26 0.0917 1.008 +27 opls_135 5 PEO C01 27 0.0089 12.011 +28 opls_179 5 PEO O02 28 -0.3846 15.999 +29 opls_135 5 PEO C03 29 0.0089 12.011 +30 opls_140 5 PEO H04 30 0.0917 1.008 +31 opls_140 5 PEO H05 31 0.0917 1.008 +32 opls_140 5 PEO H06 32 0.0917 1.008 +33 opls_140 5 PEO H07 33 0.0917 1.008 +34 opls_154 6 OHter OA1 34 -0.6887 15.999 +35 opls_135 6 OHter C2 35 0.107 12.011 +36 opls_004 6 OHter HA3 36 0.4173 1.008 +37 opls_140 6 OHter H4 37 0.0822 1.008 +38 opls_140 6 OHter H5 38 0.0822 1.008 + +[ bonds ] + 2 1 1 0.1410 267776.000 + 3 1 1 0.0945 462750.400 + 4 2 1 0.1090 284512.000 + 5 2 1 0.1090 284512.000 + 7 6 1 0.1410 267776.000 + 8 7 1 0.1410 267776.000 + 9 6 1 0.1090 284512.000 +10 6 1 0.1090 284512.000 +11 8 1 0.1090 284512.000 +12 8 1 0.1090 284512.000 +14 13 1 0.1410 267776.000 +15 14 1 0.1410 267776.000 +16 13 1 0.1090 284512.000 +17 13 1 0.1090 284512.000 +18 15 1 0.1090 284512.000 +19 15 1 0.1090 284512.000 +21 20 1 0.1410 267776.000 +22 21 1 0.1410 267776.000 +23 20 1 0.1090 284512.000 +24 20 1 0.1090 284512.000 +25 22 1 0.1090 284512.000 +26 22 1 0.1090 284512.000 +28 27 1 0.1410 267776.000 +29 28 1 0.1410 267776.000 +30 27 1 0.1090 284512.000 +31 27 1 0.1090 284512.000 +32 29 1 0.1090 284512.000 +33 29 1 0.1090 284512.000 +35 34 1 0.1410 267776.000 +36 34 1 0.0945 462750.400 +37 35 1 0.1090 284512.000 +38 35 1 0.1090 284512.000 + +; connection +13 8 1 0.1529 224262.400 +20 15 1 0.1529 224262.400 +27 22 1 0.1529 224262.400 + +; termini + 6 2 1 0.1529 224262.400 ; OH-l-link +35 29 1 0.1529 224262.400 ; OH-r-link + +[ pairs ] + 3 4 1 + 3 5 1 + 8 9 1 + 6 11 1 + 8 10 1 + 6 12 1 +15 16 1 +13 18 1 +15 17 1 +13 19 1 +22 23 1 +20 25 1 +22 24 1 +20 26 1 +29 30 1 +27 32 1 +29 31 1 +27 33 1 +36 37 1 +36 38 1 + +; connection + 6 13 1 + 7 14 1 + 8 15 1 + 7 16 1 +14 11 1 + 7 17 1 +14 12 1 +11 16 1 +12 16 1 +11 17 1 +12 17 1 +13 20 1 +14 21 1 +15 22 1 +14 23 1 +21 18 1 +14 24 1 +21 19 1 +18 23 1 +19 23 1 +18 24 1 +19 24 1 +20 27 1 +21 28 1 +22 29 1 +21 30 1 +28 25 1 +21 31 1 +28 26 1 +25 30 1 +26 30 1 +25 31 1 +26 31 1 + +; termini + 1 7 1 ; OH-l-link + 2 8 1 ; OH-l-link + 6 3 1 ; OH-l-link + 1 9 1 ; OH-l-link + 7 4 1 ; OH-l-link + 1 10 1 ; OH-l-link + 7 5 1 ; OH-l-link + 4 9 1 ; OH-l-link + 5 9 1 ; OH-l-link + 4 10 1 ; OH-l-link + 5 10 1 ; OH-l-link +27 35 1 ; OH-r-link +28 34 1 ; OH-r-link +28 37 1 ; OH-r-link +34 32 1 ; OH-r-link +28 38 1 ; OH-r-link +34 33 1 ; OH-r-link +29 36 1 ; OH-r-link +32 37 1 ; OH-r-link +33 37 1 ; OH-r-link +32 38 1 ; OH-r-link +33 38 1 ; OH-r-link + +[ angles ] + 2 1 3 1 108.500 460.240 + 1 2 4 1 109.500 292.880 + 1 2 5 1 109.500 292.880 + 4 2 5 1 107.800 276.144 + 6 7 8 1 109.500 502.080 + 7 8 11 1 109.500 292.880 + 7 8 12 1 109.500 292.880 +11 8 12 1 107.800 276.144 + 7 6 10 1 109.500 292.880 + 9 6 10 1 107.800 276.144 + 7 6 9 1 109.500 292.880 +13 14 15 1 109.500 502.080 +14 15 18 1 109.500 292.880 +14 15 19 1 109.500 292.880 +18 15 19 1 107.800 276.144 +14 13 17 1 109.500 292.880 +16 13 17 1 107.800 276.144 +14 13 16 1 109.500 292.880 +20 21 22 1 109.500 502.080 +21 22 25 1 109.500 292.880 +21 22 26 1 109.500 292.880 +25 22 26 1 107.800 276.144 +21 20 24 1 109.500 292.880 +23 20 24 1 107.800 276.144 +21 20 23 1 109.500 292.880 +27 28 29 1 109.500 502.080 +28 29 32 1 109.500 292.880 +28 29 33 1 109.500 292.880 +32 29 33 1 107.800 276.144 +28 27 31 1 109.500 292.880 +30 27 31 1 107.800 276.144 +28 27 30 1 109.500 292.880 +35 34 36 1 108.500 460.240 +34 35 37 1 109.500 292.880 +34 35 38 1 109.500 292.880 +37 35 38 1 107.800 276.144 + +; connection + 7 8 13 1 109.500 418.400 + 8 13 14 1 109.500 418.400 + 8 13 16 1 110.700 313.800 + 8 13 17 1 110.700 313.800 +13 8 11 1 110.700 313.800 +13 8 12 1 110.700 313.800 +14 15 20 1 109.500 418.400 +15 20 21 1 109.500 418.400 +15 20 23 1 110.700 313.800 +15 20 24 1 110.700 313.800 +20 15 18 1 110.700 313.800 +20 15 19 1 110.700 313.800 +21 22 27 1 109.500 418.400 +22 27 28 1 109.500 418.400 +22 27 30 1 110.700 313.800 +22 27 31 1 110.700 313.800 +27 22 25 1 110.700 313.800 +27 22 26 1 110.700 313.800 + +; termini + 1 2 6 1 109.500 418.400 ; OH-l-link + 2 6 7 1 109.500 418.400 ; OH-l-link + 2 6 9 1 110.700 313.800 ; OH-l-link + 2 6 10 1 110.700 313.800 ; OH-l-link + 6 2 4 1 110.700 313.800 ; OH-l-link + 6 2 5 1 110.700 313.800 ; OH-l-link +28 29 35 1 109.500 418.400 ; OH-r-link +29 35 34 1 109.500 418.400 ; OH-r-link +29 35 37 1 110.700 313.800 ; OH-r-link +29 35 38 1 110.700 313.800 ; OH-r-link +35 29 32 1 110.700 313.800 ; OH-r-link +35 29 33 1 110.700 313.800 ; OH-r-link + +[ dihedrals ] + 5 2 1 3 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 4 2 1 3 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 9 6 7 8 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +12 8 7 6 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +10 6 7 8 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +11 8 7 6 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +16 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +19 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +17 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +18 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +23 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +26 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +24 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +25 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +30 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +33 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +31 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +32 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +38 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000 +37 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + +; connection +13 8 7 6 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +15 14 13 8 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +17 13 8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +17 13 8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +16 13 8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +16 13 8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +17 13 8 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +12 8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +11 8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +16 13 8 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +14 13 8 7 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 +20 15 14 13 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +22 21 20 15 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +24 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +24 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +23 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +23 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +24 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +19 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +18 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +23 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +21 20 15 14 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 +27 22 21 20 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +29 28 27 22 3 1.715 2.845 1.046 -5.607 -0.000 0.000 +31 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +31 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +30 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +30 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +31 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +26 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +25 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +30 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +28 27 22 21 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 + +; termini + 8 7 6 2 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; OH-l-link +10 6 2 4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link +10 6 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link + 9 6 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link + 9 6 2 4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link +10 6 2 1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link + 9 6 2 1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link + 4 2 6 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link + 5 2 6 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link + 3 1 2 6 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; OH-l-link + 7 6 2 1 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; OH-l-link +35 29 28 27 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; OH-r-link +37 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link +38 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link +38 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link +37 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link +33 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link +32 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link +38 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link +37 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link +36 34 35 29 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; OH-r-link +34 35 29 28 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; OH-r-link diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp b/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp new file mode 100644 index 00000000..a1962688 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp @@ -0,0 +1,308 @@ +; ../../bench.py + +; Please cite the following papers: + +[ moleculetype ] +new 3 + +[ atoms ] + 1 opls_154 1 OHter O1 1 -0.6887 15.999 + 2 opls_135 1 OHter C0 2 0.107 12.011 + 3 opls_004 1 OHter H4 3 0.4173 1.008 + 4 opls_140 1 OHter H3 4 0.0822 1.008 + 5 opls_140 1 OHter H2 5 0.0822 1.008 + 6 opls_135 2 PEO C0 11 0.008899999999999995 12.011 + 7 opls_179 2 PEO O1 12 -0.3846 15.999 + 8 opls_135 2 PEO C2 13 0.008899999999999995 12.011 + 9 opls_140 2 PEO H3 14 0.0917 1.008 +10 opls_140 2 PEO H4 15 0.0917 1.008 +11 opls_140 2 PEO H5 16 0.0917 1.008 +12 opls_140 2 PEO H6 17 0.0917 1.008 +13 opls_135 3 PEO C0 23 0.008899999999999995 12.011 +14 opls_179 3 PEO O1 24 -0.3846 15.999 +15 opls_135 3 PEO C2 25 0.008899999999999995 12.011 +16 opls_140 3 PEO H3 26 0.0917 1.008 +17 opls_140 3 PEO H4 27 0.0917 1.008 +18 opls_140 3 PEO H5 28 0.0917 1.008 +19 opls_140 3 PEO H6 29 0.0917 1.008 +20 opls_135 4 PEO C0 35 0.008899999999999995 12.011 +21 opls_179 4 PEO O1 36 -0.3846 15.999 +22 opls_135 4 PEO C2 37 0.008899999999999995 12.011 +23 opls_140 4 PEO H3 38 0.0917 1.008 +24 opls_140 4 PEO H4 39 0.0917 1.008 +25 opls_140 4 PEO H5 40 0.0917 1.008 +26 opls_140 4 PEO H6 41 0.0917 1.008 +27 opls_135 5 PEO C0 47 0.008899999999999995 12.011 +28 opls_179 5 PEO O1 48 -0.3846 15.999 +29 opls_135 5 PEO C2 49 0.008899999999999995 12.011 +30 opls_140 5 PEO H3 50 0.0917 1.008 +31 opls_140 5 PEO H4 51 0.0917 1.008 +32 opls_140 5 PEO H5 52 0.0917 1.008 +33 opls_140 5 PEO H6 53 0.0917 1.008 +34 opls_154 6 OHter O1 54 -0.6887 15.999 +35 opls_135 6 OHter C0 55 0.107 12.011 +36 opls_004 6 OHter H4 56 0.4173 1.008 +37 opls_140 6 OHter H3 57 0.0822 1.008 +38 opls_140 6 OHter H2 58 0.0822 1.008 + +[ bonds ] + 2 1 1 0.1410 267776.000 + 3 1 1 0.0945 462750.400 + 4 2 1 0.1090 284512.000 + 5 2 1 0.1090 284512.000 + 7 6 1 0.1410 267776.000 + 8 7 1 0.1410 267776.000 + 9 6 1 0.1090 284512.000 +10 6 1 0.1090 284512.000 +11 8 1 0.1090 284512.000 +12 8 1 0.1090 284512.000 +14 13 1 0.1410 267776.000 +15 14 1 0.1410 267776.000 +16 13 1 0.1090 284512.000 +17 13 1 0.1090 284512.000 +18 15 1 0.1090 284512.000 +19 15 1 0.1090 284512.000 +21 20 1 0.1410 267776.000 +22 21 1 0.1410 267776.000 +23 20 1 0.1090 284512.000 +24 20 1 0.1090 284512.000 +25 22 1 0.1090 284512.000 +26 22 1 0.1090 284512.000 +28 27 1 0.1410 267776.000 +29 28 1 0.1410 267776.000 +30 27 1 0.1090 284512.000 +31 27 1 0.1090 284512.000 +32 29 1 0.1090 284512.000 +33 29 1 0.1090 284512.000 +35 34 1 0.1410 267776.000 +36 34 1 0.0945 462750.400 +37 35 1 0.1090 284512.000 +38 35 1 0.1090 284512.000 +13 8 1 0.1529 224262.400 ; link +20 15 1 0.1529 224262.400 ; link +27 22 1 0.1529 224262.400 ; link + 6 2 1 0.1529 224262.400 ; link +35 29 1 0.1529 224262.400 ; link + +[ pairs ] + 3 4 1 + 3 5 1 + 8 9 1 + 6 11 1 + 8 10 1 + 6 12 1 +15 16 1 +13 18 1 +15 17 1 +13 19 1 +22 23 1 +20 25 1 +22 24 1 +20 26 1 +29 30 1 +27 32 1 +29 31 1 +27 33 1 +36 37 1 +36 38 1 + 6 13 1 ; link + 7 14 1 ; link + 8 15 1 ; link + 7 17 1 ; link +14 11 1 ; link + 7 16 1 ; link +14 12 1 ; link +11 17 1 ; link +12 17 1 ; link +11 16 1 ; link +12 16 1 ; link +13 20 1 ; link +14 21 1 ; link +15 22 1 ; link +14 24 1 ; link +21 18 1 ; link +14 23 1 ; link +21 19 1 ; link +18 24 1 ; link +19 24 1 ; link +18 23 1 ; link +19 23 1 ; link +20 27 1 ; link +21 28 1 ; link +22 29 1 ; link +21 31 1 ; link +28 25 1 ; link +21 30 1 ; link +28 26 1 ; link +25 31 1 ; link +26 31 1 ; link +25 30 1 ; link +26 30 1 ; link + 1 7 1 ; link + 2 8 1 ; link + 6 3 1 ; link + 1 9 1 ; link + 7 4 1 ; link + 1 10 1 ; link + 7 5 1 ; link + 4 9 1 ; link + 5 9 1 ; link + 4 10 1 ; link + 5 10 1 ; link +27 35 1 ; link +28 34 1 ; link +28 37 1 ; link +34 33 1 ; link +28 38 1 ; link +34 32 1 ; link +29 36 1 ; link +33 37 1 ; link +32 37 1 ; link +33 38 1 ; link +32 38 1 ; link + +[ angles ] + 2 1 3 1 108.500 460.240 + 1 2 4 1 109.500 292.880 + 1 2 5 1 109.500 292.880 + 4 2 5 1 107.800 276.144 + 6 7 8 1 109.500 502.080 + 7 8 11 1 109.500 292.880 + 7 8 12 1 109.500 292.880 +11 8 12 1 107.800 276.144 + 7 6 10 1 109.500 292.880 + 9 6 10 1 107.800 276.144 + 7 6 9 1 109.500 292.880 +13 14 15 1 109.500 502.080 +14 15 18 1 109.500 292.880 +14 15 19 1 109.500 292.880 +18 15 19 1 107.800 276.144 +14 13 17 1 109.500 292.880 +16 13 17 1 107.800 276.144 +14 13 16 1 109.500 292.880 +20 21 22 1 109.500 502.080 +21 22 25 1 109.500 292.880 +21 22 26 1 109.500 292.880 +25 22 26 1 107.800 276.144 +21 20 24 1 109.500 292.880 +23 20 24 1 107.800 276.144 +21 20 23 1 109.500 292.880 +27 28 29 1 109.500 502.080 +28 29 32 1 109.500 292.880 +28 29 33 1 109.500 292.880 +32 29 33 1 107.800 276.144 +28 27 31 1 109.500 292.880 +30 27 31 1 107.800 276.144 +28 27 30 1 109.500 292.880 +35 34 36 1 108.500 460.240 +34 35 37 1 109.500 292.880 +34 35 38 1 109.500 292.880 +37 35 38 1 107.800 276.144 + 7 8 13 1 109.500 418.400 ; link + 8 13 14 1 109.500 418.400 ; link + 8 13 17 1 110.700 313.800 ; link + 8 13 16 1 110.700 313.800 ; link +13 8 11 1 110.700 313.800 ; link +13 8 12 1 110.700 313.800 ; link +14 15 20 1 109.500 418.400 ; link +15 20 21 1 109.500 418.400 ; link +15 20 24 1 110.700 313.800 ; link +15 20 23 1 110.700 313.800 ; link +20 15 18 1 110.700 313.800 ; link +20 15 19 1 110.700 313.800 ; link +21 22 27 1 109.500 418.400 ; link +22 27 28 1 109.500 418.400 ; link +22 27 31 1 110.700 313.800 ; link +22 27 30 1 110.700 313.800 ; link +27 22 25 1 110.700 313.800 ; link +27 22 26 1 110.700 313.800 ; link + 1 2 6 1 109.500 418.400 ; link + 2 6 7 1 109.500 418.400 ; link + 2 6 9 1 110.700 313.800 ; link + 2 6 10 1 110.700 313.800 ; link + 6 2 4 1 110.700 313.800 ; link + 6 2 5 1 110.700 313.800 ; link +28 29 35 1 109.500 418.400 ; link +29 35 34 1 109.500 418.400 ; link +29 35 37 1 110.700 313.800 ; link +29 35 38 1 110.700 313.800 ; link +35 29 33 1 110.700 313.800 ; link +35 29 32 1 110.700 313.800 ; link + +[ dihedrals ] + 5 2 1 3 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 4 2 1 3 3 0.736 2.209 0.000 -2.946 -0.000 0.000 + 9 6 7 8 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +12 8 7 6 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +10 6 7 8 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +11 8 7 6 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +16 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +19 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +17 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +18 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +23 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +26 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +24 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +25 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +30 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +33 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +31 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +32 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000 +38 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000 +37 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000 +13 8 7 6 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +15 14 13 8 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +16 13 8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +16 13 8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +17 13 8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +17 13 8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +16 13 8 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +12 8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +11 8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +17 13 8 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +14 13 8 7 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link +20 15 14 13 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +22 21 20 15 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +23 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +23 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +24 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +24 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +23 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +19 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +18 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +24 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +21 20 15 14 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link +27 22 21 20 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +29 28 27 22 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +30 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +30 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +31 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +31 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +30 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +26 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +25 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +31 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +28 27 22 21 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link + 8 7 6 2 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +10 6 2 4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +10 6 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link + 9 6 2 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link + 9 6 2 4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +10 6 2 1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link + 9 6 2 1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link + 4 2 6 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link + 5 2 6 7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link + 3 1 2 6 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; link + 7 6 2 1 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; link +35 29 28 27 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link +37 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +38 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +38 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +37 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +32 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +33 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +38 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +37 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link +36 34 35 29 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; link +34 35 29 28 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; link + diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt b/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt new file mode 100644 index 00000000..31ad4f78 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt @@ -0,0 +1 @@ +OHter PEO PEO PEO PEO OHter diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py new file mode 100644 index 00000000..588515d7 --- /dev/null +++ b/polyply/tests/test_itp_to_ff.py @@ -0,0 +1,97 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Integration tests for the itp_to_ff utility program. +""" +from pathlib import Path +import numpy as np +import pytest +from vermouth.molecule import Molecule, Interaction +from vermouth.forcefield import ForceField +from vermouth.gmx.itp_read import read_itp +import polyply +from polyply import itp_to_ff, gen_params +from polyply.src.graph_utils import find_one_ismags_match +from .test_ffoutput import (_read_force_field, equal_ffs) +from .test_lib_files import _interaction_equal + +def _mass_match(node1, node2): + return node1['mass'] == node2['mass'] + +def _read_itp(itppath): + with open(itppath, "r") as _file: + lines = _file.readlines() + force_field = ForceField("tmp") + read_itp(lines, force_field) + block = next(iter(force_field.blocks.values())) + mol = block.to_molecule() + mol.make_edges_from_interaction_type(type_="bonds") + return mol + +def itp_equal(ref_mol, new_mol): + """ + Leightweight itp comparison. + """ + # new_node: ref_node + match = find_one_ismags_match(new_mol, ref_mol, _mass_match) + for node in new_mol.nodes: + # check if important attributes are the same + #assert new_mol.nodes[node]['atype'] == ref_mol.nodes[match[node]]['atype'] + # charge + assert np.isclose(new_mol.nodes[node]['charge'], + ref_mol.nodes[match[node]]['charge'], + atol=0.1) + + for inter_type in new_mol.interactions: + assert len(new_mol.interactions[inter_type]) == len(ref_mol.interactions[inter_type]) + for inter in new_mol.interactions[inter_type]: + new_atoms = [match[atom] for atom in inter.atoms] + new_inter = Interaction(atoms=new_atoms, + parameters=inter.parameters, + meta=inter.meta) + for other_inter in ref_mol.interactions[inter_type]: + if _interaction_equal(inter, other_inter, inter_type): + break + else: + assert False + return True + +@pytest.mark.parametrize("case, smiles, resnames, charge", [ + ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], 0), + ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], 0), +]) +def _test_ffoutput(tmp_path, case, smiles, resnames, charge): + """ + Call itp-to-ff and check if it generates the same force-field + as in the ref.ff file. + """ + tmp_path = Path("/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp") + tmp_file = Path(tmp_path) / "test.ff" + inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case + itp_to_ff(itppath=inpath/"in_itp.itp", + fragment_smiles=smiles, + resnames=resnames, + charge=charge, + term_prefix='ter', + outpath=tmp_file,) + # now generate an itp file with this ff-file + tmp_itp = tmp_path / "new.itp" + gen_params(inpath=[tmp_file], + seq_file=inpath/"seq.txt", + outpath=tmp_itp, name="new") + # read the itp-file and return a molecule + new_mol = _read_itp(tmp_itp) + ref_mol = _read_itp(inpath/"in_itp.itp") + # check if itps are the same + assert itp_equal(ref_mol, new_mol) From 888515bb1914a4602ba4ede9fda16b2fa0d21f86 Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Wed, 21 Jun 2023 20:14:41 +0200 Subject: [PATCH 17/82] fix input types --- bin/polyply | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/polyply b/bin/polyply index 5a14457c..8ff25efa 100755 --- a/bin/polyply +++ b/bin/polyply @@ -234,7 +234,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements help='Enable debug logging output. Can be given ' 'multiple times.', default=0) - parser_itp_ff.add_argument('-i', dest="itppath") + parser_itp_ff.add_argument('-i', dest="itppath", type=Path) parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*') parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*') parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter") From 44cc8675e0a0b2c07b47a19de4b6654df70768aa Mon Sep 17 00:00:00 2001 From: "f.grunewald" Date: Mon, 26 Jun 2023 11:28:49 +0200 Subject: [PATCH 18/82] add test print --- polyply/src/itp_to_ff.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 94214ce7..d21db023 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -200,6 +200,7 @@ def extract_links(molecule): link.interactions[inter_type].append(interaction) links.append(link) + print("--test--") print(links) return links From 32cd8f8fd0e5141dc10d4bdb1cdfd2b19af56bb6 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 15:47:11 +0100 Subject: [PATCH 19/82] clean up output --- polyply/src/ffoutput.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py index a1ac7b89..0e06ea3f 100644 --- a/polyply/src/ffoutput.py +++ b/polyply/src/ffoutput.py @@ -57,6 +57,7 @@ def write(self): for name, block in self.forcefield.blocks.items(): self.stream.write("[ moleculetype ]\n") excl = str(block.nrexcl) + self.max_idx = max(len(node) for node in block.nodes) self.stream.write(f"{name} {excl}\n") self.write_atoms_block(block.nodes(data=True)) self.write_interaction_dict(block.interactions) @@ -68,6 +69,7 @@ def write(self): nometa = True else: nometa = False + self.max_idx = max(len(node) for node in link.nodes) self.write_link_header() self.write_atoms_link(link.nodes(data=True), nometa) self.write_interaction_dict(link.interactions) @@ -91,11 +93,13 @@ def write_interaction_dict(self, inter_dict): for inter_type in inter_dict: self.stream.write(f"[ {inter_type} ]\n") for interaction in inter_dict[inter_type]: + atoms = ['{atom:>{imax}}'.format(atom=atom, + imax=self.max_idx) for atom in interaction.atoms] if inter_type not in ["virtual_sitesn", "virtual_sites1", "virtual_sites2", "virtual_sites3"]: - atom_string = " ".join(interaction.atoms) + atom_string = " ".join(atoms) param_string = " ".join(interaction.parameters) else: - atom_string = " ".join(interaction.atoms) + " -- " + atom_string = " ".join(atoms) + " -- " param_string = " ".join(interaction.parameters) meta_string = json.dumps(interaction.meta) @@ -113,7 +117,10 @@ def write_edges(self, edges): """ self.stream.write("[ edges ]\n") for idx, jdx in edges: - self.stream.write(f"{idx} {jdx}\n") + line = "{idx:>{imax}} {jdx:>{imax}}\n".format(idx=idx, + jdx=jdx, + imax=self.max_idx) + self.stream.write(line) def write_nonedges(self, edges): """ @@ -145,12 +152,23 @@ def write_atoms_block(self, nodes): pair-wise iteratable edge list """ self.stream.write("[ atoms ]\n") + max_length = {'idx': len(str(len(nodes)))} + for attribute in self.normal_order_block_atoms: + max_length[attribute] = max(len(str(atom.get(attribute, ''))) + for _, atom in nodes) + for idx, (node, attrs) in enumerate(nodes, start=1): - write_attrs = {attr: attrs[attr] for attr in self.normal_order_block_atoms if attr in attrs} - write_attrs = _choice_to_str(write_attrs) - attr_line = " ".join([str(value) for value in write_attrs.values()]) - line = f"{idx} " + attr_line + "\n" - self.stream.write(line) + write_attrs = {attr: str(attrs[attr]) for attr in self.normal_order_block_atoms if attr in attrs} + self.stream.write('{idx:>{max_length[idx]}} ' + '{atype:<{max_length[atype]}} ' + '{resid:>{max_length[resid]}} ' + '{resname:<{max_length[resname]}} ' + '{atomname:<{max_length[atomname]}} ' + '{charge_group:>{max_length[charge_group]}} ' + '{charge:>{max_length[charge]}} ' + '{mass:>{max_length[mass]}}\n'.format(idx=idx, + max_length=max_length, + **write_attrs)) def write_atoms_link(self, nodes, nometa=False): """ From a8d1bb9e0b602e969d5cfa85761ab74f9cdb3c4c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 15:47:31 +0100 Subject: [PATCH 20/82] methods to deal with charges --- polyply/src/charges.py | 101 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 polyply/src/charges.py diff --git a/polyply/src/charges.py b/polyply/src/charges.py new file mode 100644 index 00000000..ff640d4a --- /dev/null +++ b/polyply/src/charges.py @@ -0,0 +1,101 @@ +import numpy as np +import networkx as nx +import scipy.optimize + +def set_charges(block, res_graph, name): + resnames = nx.get_node_attributes(res_graph, 'resname') + centrality = nx.betweenness_centrality(res_graph) + score = -1 + most_central_node = None + for node, resname in resnames.items(): + if resname == name and centrality[node] > score: + score = centrality[node] + most_central_node = node + charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge') + atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname') + charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()} + for node in block.nodes: + block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']] + return block + +def bond_dipoles(bonds, charges): + bond_dipo = np.zeros((len(bonds))) + for kdx, (idx, jdx) in enumerate(bonds.keys()): + lb = bonds[(idx, jdx)] + bond_dipo[kdx] = lb*(charges[idx] - charges[jdx]) + return bond_dipo + +def _get_bonds(block, topology=None): + bonds = {} + atoms = block.nodes + nodes_to_count = {node: count for count, node in enumerate(block.nodes)} + for idx, jdx in block.edges: + for bond in block.interactions['bonds']: + if tuple(bond.atoms) in [(idx, jdx), (jdx, idx)]: + try: + bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(bond.parameters[1]) + except IndexError: + if topology: + batoms = (atoms[idx]['atype'], + atoms[jdx]['atype']) + if batoms in topology.types['bonds']: + params = topology.types['bonds'][batoms][0][0][1] + elif batoms[::-1] in topology.types['bonds']: + params = topology.types['bonds'][batoms[::-1]][0][0][1] + print(params) + bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params) + return bonds + +def equalize_charges(block, topology=None): + block.make_edges_from_interaction_type('bonds') + keys = nx.get_node_attributes(block, 'charge').keys() + charges = np.array(list(nx.get_node_attributes(block, 'charge').values())) + if np.isclose(charges.sum(), 0, atol=1*10**-6): + return block + + # we need to equalize the charge + bonds = _get_bonds(block, topology) + ref_dipoles = bond_dipoles(bonds, charges) + + # the loss consists of the deviation of the + # sum of charges from zero and the difference + # in the original bond dipole moments + def loss(arr): + arr.reshape(-1) + curr_dipoles = bond_dipoles(bonds, arr) + loss = np.abs(arr.sum()) + np.sum(np.square(ref_dipoles - curr_dipoles)) + return loss + + opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B', + options={'ftol': 0.001, 'maxiter': 100}) + balanced_charges = opt_results['x'] + nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge') + return block + + +#def equalize_charges(molecule, target_charge=0): +# """ +# Make sure that the total charge of molecule is equal to +# the target charge by substracting the differences split +# over all atoms. +# +# Parameters +# ---------- +# molecule: :class:`vermouth.molecule.Molecule` +# target_charge: float +# the charge of the molecule +# +# Returns +# ------- +# molecule +# the molecule with updated charge attribute +# """ +# total = nx.get_node_attributes(molecule, "charge") +# diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes) +# if np.isclose(diff, 0, atol=0.0001): +# return molecule +# for node in molecule.nodes: +# charge = float(molecule.nodes[node]['charge']) - diff +# molecule.nodes[node]['charge'] = charge +# total = nx.get_node_attributes(molecule, "charge") +# return molecule From 37bad71940aaa6ee77b4b46635c0c71e901d4df8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 15:47:40 +0100 Subject: [PATCH 21/82] methods to deal with charges --- polyply/src/fragment_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index d806c054..bde5316b 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -342,4 +342,4 @@ def extract_unique_fragments(self, fragment_graphs): # remake the residue graph since some resnames have changed self.make_res_graph() - return unique_fragments + return unique_fragments, self.res_graph From 74090983ed36d1756f1b6a29cdac51004fc47788 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 15:47:47 +0100 Subject: [PATCH 22/82] methods to deal with charges --- polyply/src/itp_to_ff.py | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index d21db023..d8f6d0b0 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -25,6 +25,7 @@ from polyply.src.generate_templates import extract_block from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter +from polyply.src.charges import equalize_charges from polyply.tests.test_lib_files import _interaction_equal def diffs_to_prefix(atoms, resid_diffs): @@ -200,35 +201,9 @@ def extract_links(molecule): link.interactions[inter_type].append(interaction) links.append(link) - print("--test--") - print(links) + #print(links) return links -def equalize_charges(molecule, target_charge=0): - """ - Make sure that the total charge of molecule is equal to - the target charge by substracting the differences split - over all atoms. - - Parameters - ---------- - molecule: :class:`vermouth.molecule.Molecule` - target_charge: float - the charge of the molecule - - Returns - ------- - molecule - the molecule with updated charge attribute - """ - total = nx.get_node_attributes(molecule, "charge") - diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes) - for node in molecule.nodes: - charge = float(molecule.nodes[node]['charge']) - diff - molecule.nodes[node]['charge'] = charge - total = nx.get_node_attributes(molecule, "charge") - return molecule - def handle_chirality(molecule, chiral_centers): pass @@ -239,6 +214,22 @@ def hcount(molecule, node): hcounter+= 1 return hcounter +def set_charges(block, res_graph, name): + resnames = nx.get_node_attributes(res_graph, 'resname') + centrality = nx.betweenness_centrality(res_graph) + score = -1 + most_central_node = None + for node, resname in resnames.items(): + if resname == name and centrality[node] > score: + score = centrality[node] + most_central_node = node + charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge') + atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname') + charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()} + for node in block.nodes: + block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']] + return block + def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): """ Main executable for itp to ff tool. @@ -247,7 +238,6 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 # read the topology file top = Topology.from_gmx_topfile(itppath, name="test") mol = top.molecules[0].molecule - mol = equalize_charges(mol, target_charge=charge) if itppath.suffix == ".itp": with open(itppath, "r") as _file: @@ -266,18 +256,23 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 fragment_graphs.append(fragment_graph) # identify and extract all unique fragments - unique_fragments = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs) + unique_fragments, res_graph = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs) force_field = ForceField("new") for name, fragment in unique_fragments.items(): new_block = extract_block(mol, list(fragment.nodes), defines={}) nx.set_node_attributes(new_block, 1, "resid") new_block.nrexcl = mol.nrexcl force_field.blocks[name] = new_block + set_charges(new_block, res_graph, name) + #print("here") + if itppath.suffix == ".top": + equalize_charges(new_block, top) # for node in mol.nodes: # print(mol.nodes[node]) force_field.links = extract_links(mol) + print("-----") with open(outpath, "w") as filehandle: ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() From 362372ebe5ac391f9ea811565445db6db8010502 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 15:50:40 +0100 Subject: [PATCH 23/82] adjust test --- polyply/tests/test_fragment_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py index 59155e77..7fb1478c 100644 --- a/polyply/tests/test_fragment_finder.py +++ b/polyply/tests/test_fragment_finder.py @@ -252,7 +252,7 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags): match_mols.append(frag) frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") - fragments = frag_finder.extract_unique_fragments(match_mols) + fragments, _ = frag_finder.extract_unique_fragments(match_mols) assert len(fragments) == len(uni_frags) for resname, graph in fragments.items(): frag_finder.match_keys = ['element', 'mass', 'resname'] From 4ed29798a32dd85f8cb9bd3ae4e66a2a1c46e2b8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 16:09:33 +0100 Subject: [PATCH 24/82] move extract block to molecule utils --- polyply/src/generate_templates.py | 76 +------ polyply/src/itp_to_ff.py | 218 +------------------ polyply/src/molecule_utils.py | 260 +++++++++++++++++++++++ polyply/tests/test_generate_templates.py | 6 +- 4 files changed, 267 insertions(+), 293 deletions(-) create mode 100644 polyply/src/molecule_utils.py diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index 509663d7..d5ccbe23 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -19,8 +19,8 @@ from .processor import Processor from .linalg_functions import (u_vect, center_of_geometry, radius_of_gyration) -from .topology import replace_defined_interaction from .linalg_functions import dih +from .molecule_utils import extract_block """ Processor generating coordinates for all residues of a meta_molecule matching those in the meta_molecule.molecule attribute. @@ -216,80 +216,6 @@ def map_from_CoG(coords): return out_vectors -def _relabel_interaction_atoms(interaction, mapping): - """ - Relables the atoms in interaction according to the - rules defined in mapping. - - Parameters - ---------- - interaction: `vermouth.molecule.Interaction` - mapping: `:class:dict` - - Returns - ------- - interaction: `vermouth.molecule.Interaction` - the new interaction with updated atoms - """ - new_atoms = [mapping[atom] for atom in interaction.atoms] - new_interaction = interaction._replace(atoms=new_atoms) - return new_interaction - -def extract_block(molecule, nodes, defines={}): - """ - Given a `vermouth.molecule` and a `resname` - extract the information of a block from the - molecule definition and replace all defines - if any are found. - - Parameters - ---------- - molecule: :class:vermouth.molecule.Molecule - nodes: abc.hashable - the nodes corresponding to the block to - extract - defines: dict - dict of type define: value - - Returns - ------- - :class:vermouth.molecule.Block - """ - resid = molecule.nodes[nodes[0]]["resid"] - resname = molecule.nodes[nodes[0]]["resname"] - block = vermouth.molecule.Block() - - # select all nodes with the same first resid and - # make sure the block node labels are atomnames - # also build a correspondance dict between node - # label in the molecule and in the block for - # relabeling the interactions - mapping = {} - for node in nodes: - attr_dict = molecule.nodes[node] - if attr_dict["resid"] == resid: - block.add_node(attr_dict["atomname"], **attr_dict) - mapping[node] = attr_dict["atomname"] - - for inter_type in molecule.interactions: - for interaction in molecule.interactions[inter_type]: - if all(atom in mapping for atom in interaction.atoms): - interaction = replace_defined_interaction(interaction, defines) - interaction = _relabel_interaction_atoms(interaction, mapping) - block.interactions[inter_type].append(interaction) - - for inter_type in ["bonds", "constraints", "virtual_sitesn", - "virtual_sites2", "virtual_sites3", "virtual_sites4"]: - block.make_edges_from_interaction_type(inter_type) - - if not nx.is_connected(block): - msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' - 'Make sure all atoms/particles in a residue are connected by bonds,' - ' constraints or virual-sites.') - raise IOError(msg.format(resname, resid)) - - return block - class GenerateTemplates(Processor): """ This processor takes a a class:`polyply.src.MetaMolecule` and diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index d8f6d0b0..dc03725c 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -11,234 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import itertools -from collections import defaultdict import numpy as np import networkx as nx import pysmiles import vermouth from vermouth.forcefield import ForceField -from vermouth.molecule import Interaction from vermouth.gmx.itp_read import read_itp from polyply.src.topology import Topology -from polyply.src.generate_templates import extract_block +from polyply.src.molecule_utils import extract_block, extract_links from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import equalize_charges -from polyply.tests.test_lib_files import _interaction_equal - -def diffs_to_prefix(atoms, resid_diffs): - """ - Given a list of atoms and corresponding differences - between their resids, generate the offset prefix for - the atomnames according to the vermouth sepcific offset - language. - - The reference atom must have resid_diff value of 0. - Other atoms either get - or + signs - depending on their resid offset. - - Parameters - ---------- - atoms: abc.itertable[str] - resid_diff: abc.itertable[int] - the differences in resid with respeect to - the smallest/largest resid which is 0 - - Returns - ------- - abc.itertable - list with prefixed atom names - """ - prefixed_atoms = [] - for atom, diff in zip(atoms, resid_diffs): - if diff > 0: - prefix = "".join(["+" for i in range(0, diff)]) - else: - prefix = "".join(["-" for i in range(diff, 0)]) - prefixed_atoms.append(prefix + atom) - return prefixed_atoms - -def _extract_edges_from_shortest_path(atoms, block, min_resid): - """ - Given a list atoms generate a list of edges correspoding to - all edges required to connect all atoms by at least one - shortest path. Edges are retunred on atomname basis with - prefix relative to the `min_resid`. See diffs_to_prefix. - - Paramters: - ---------- - atoms: abc.itertable - the atoms to collect edges for - block: :class:`vermouth.molecule.Block` - the molecule which to servey for edges - min_resid: int - the resid to which the prefix indicate relative resid - distance - - Returns - ------- - list[tuple] - the edge list by atomname with prefix indicating relative - residue distance to min_resid - """ - edges = [] - had_edges = [] - final_atoms = {} - resnames = {} - for origin, target in itertools.combinations(atoms, r=2): - path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0] - for edge in zip(path[:-1], path[1:]): - if edge not in had_edges: - resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid - atom_names = [block.nodes[node]["atomname"] for node in edge] - link_names = diffs_to_prefix(atom_names, resid_diffs) - final_atoms.update(dict(zip(edge, link_names))) - edges.append(link_names) - had_edges.append(edge) - resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) - return final_atoms, edges, resnames - -def extract_links(molecule): - """ - Given a molecule that has the resid and resname attributes - correctly set, extract the interactions which span more than - a single residue and generate a link. - - Parameters - ---------- - molecule: :class:`vermouth.molecule.Molecule` - the molecule from which to extract interactions - - Returns - ------- - list[:class:`vermouth.molecule.Links`] - a list with a links found - """ - links = [] - # patterns are a sqeuence of atoms that define an interaction - # sometimes multiple interactions are defined for one pattern - # in that case they are all collected in this dictionary - patterns = defaultdict(dict) - # for each found pattern the resnames are collected; this is important - # because the same pattern may apply to residues with different name - resnames_for_patterns = defaultdict(dict) - link_atoms_for_patterns = defaultdict(list) - # as additional safe-gaurd against false links we also collect the edges - # that span the interaction by finding the shortest simple path between - # all atoms in patterns. Note that the atoms in patterns not always have - # to be directly bonded. For example, pairs are not directly bonded and - # can span multiple residues - #edges_for_patterns = defaultdict(list) - for inter_type in molecule.interactions: - #print("TYPE", inter_type) - for kdx, interaction in enumerate(molecule.interactions[inter_type]): - # extract resids and resname corresponding to interaction atoms - resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms]) - resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms] - # compute the resid offset to be used for the atom prefixes - min_resid = min(resids) - diff = resids - min_resid - pattern = tuple(set(list(zip(diff, resnames)))) - - # in this case all interactions are in a block and we skip - if np.sum(diff) == 0: - continue - - # we collect the edges corresponding to the simple paths between pairs of atoms - # in the interaction - mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) - #print(kdx, resnames) - link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} - link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] - link_inter = Interaction(atoms=link_atoms, - parameters=interaction.parameters, - meta={}) - #print("inter number", kdx) - # here we deal with filtering redundancy - if pattern in patterns and inter_type in patterns[pattern]: - #print(pattern) - # if pattern == ((0, 'PEO'), (1, 'PEO')): - # print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n") - - for other_inter in patterns[pattern].get(inter_type, []): - if _interaction_equal(other_inter, link_inter, inter_type): - break - else: - patterns[pattern][inter_type].append(link_inter) - resnames_for_patterns[pattern].update(resnames) - link_atoms_for_patterns[pattern] += link_atoms - else: - patterns[pattern][inter_type] = [link_inter] - resnames_for_patterns[pattern].update(resnames) - #edges_for_patterns[pattern] += edges - link_atoms_for_patterns[pattern] += link_atoms - #print('resnames', resnames_for_patterns[pattern], '\n') -# for inter in patterns[list(patterns.keys())[0]]['angles']: -# print(inter) - # we make new links for each unique interaction per type - for pattern in patterns: - link = vermouth.molecule.Link() - link.add_nodes_from(set(link_atoms_for_patterns[pattern])) - #link.add_edges_from(edges_for_patterns[pattern]) - resnames = resnames_for_patterns[pattern] - # print(resnames) - nx.set_node_attributes(link, resnames, "resname") - - had_parameters = [] - for inter_type, inters in patterns[pattern].items(): - for idx, interaction in enumerate(inters): - #new_parameters = interaction.parameters - new_meta = interaction.meta - #new_atoms = interaction.atoms - # to account for the fact when multiple interactions with the same - # atom patterns need to be written to ff - new_meta.update({"version": idx}) - new_meta.update({"comment": "link"}) - had_parameters.append(interaction.parameters) - # map atoms to proper atomnames .. - link.interactions[inter_type].append(interaction) - - links.append(link) - #print(links) - return links - -def handle_chirality(molecule, chiral_centers): - pass - -def hcount(molecule, node): - hcounter = 0 - for node in molecule.neighbors(node): - if molecule.nodes[node]["element"] == "H": - hcounter+= 1 - return hcounter - -def set_charges(block, res_graph, name): - resnames = nx.get_node_attributes(res_graph, 'resname') - centrality = nx.betweenness_centrality(res_graph) - score = -1 - most_central_node = None - for node, resname in resnames.items(): - if resname == name and centrality[node] > score: - score = centrality[node] - most_central_node = node - charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge') - atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname') - charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()} - for node in block.nodes: - block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']] - return block def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): """ Main executable for itp to ff tool. """ + # read the topology file if itppath.suffix == ".top": - # read the topology file top = Topology.from_gmx_topfile(itppath, name="test") mol = top.molecules[0].molecule - + # read itp file if itppath.suffix == ".itp": with open(itppath, "r") as _file: lines = _file.readlines() @@ -264,15 +57,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 new_block.nrexcl = mol.nrexcl force_field.blocks[name] = new_block set_charges(new_block, res_graph, name) - #print("here") if itppath.suffix == ".top": equalize_charges(new_block, top) -# for node in mol.nodes: -# print(mol.nodes[node]) - force_field.links = extract_links(mol) - print("-----") with open(outpath, "w") as filehandle: ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py new file mode 100644 index 00000000..7da9ce43 --- /dev/null +++ b/polyply/src/molecule_utils.py @@ -0,0 +1,260 @@ +# Copyright 2022 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from collections import defaultdict +import numpy as np +import networkx as nx +import vermouth +from vermouth.molecule import Interaction +from polyply.tests.test_lib_files import _interaction_equal +from .topology import replace_defined_interaction + +def diffs_to_prefix(atoms, resid_diffs): + """ + Given a list of atoms and corresponding differences + between their resids, generate the offset prefix for + the atomnames according to the vermouth sepcific offset + language. + + The reference atom must have resid_diff value of 0. + Other atoms either get - or + signs + depending on their resid offset. + + Parameters + ---------- + atoms: abc.itertable[str] + resid_diff: abc.itertable[int] + the differences in resid with respeect to + the smallest/largest resid which is 0 + + Returns + ------- + abc.itertable + list with prefixed atom names + """ + prefixed_atoms = [] + for atom, diff in zip(atoms, resid_diffs): + if diff > 0: + prefix = "".join(["+" for i in range(0, diff)]) + else: + prefix = "".join(["-" for i in range(diff, 0)]) + prefixed_atoms.append(prefix + atom) + return prefixed_atoms + +def _extract_edges_from_shortest_path(atoms, block, min_resid): + """ + Given a list atoms generate a list of edges correspoding to + all edges required to connect all atoms by at least one + shortest path. Edges are retunred on atomname basis with + prefix relative to the `min_resid`. See diffs_to_prefix. + + Paramters: + ---------- + atoms: abc.itertable + the atoms to collect edges for + block: :class:`vermouth.molecule.Block` + the molecule which to servey for edges + min_resid: int + the resid to which the prefix indicate relative resid + distance + + Returns + ------- + list[tuple] + the edge list by atomname with prefix indicating relative + residue distance to min_resid + """ + edges = [] + had_edges = [] + final_atoms = {} + resnames = {} + for origin, target in itertools.combinations(atoms, r=2): + path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0] + for edge in zip(path[:-1], path[1:]): + if edge not in had_edges: + resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid + atom_names = [block.nodes[node]["atomname"] for node in edge] + link_names = diffs_to_prefix(atom_names, resid_diffs) + final_atoms.update(dict(zip(edge, link_names))) + edges.append(link_names) + had_edges.append(edge) + resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) + return final_atoms, edges, resnames + + +def extract_links(molecule): + """ + Given a molecule that has the resid and resname attributes + correctly set, extract the interactions which span more than + a single residue and generate a link. + + Parameters + ---------- + molecule: :class:`vermouth.molecule.Molecule` + the molecule from which to extract interactions + + Returns + ------- + list[:class:`vermouth.molecule.Links`] + a list with a links found + """ + links = [] + # patterns are a sqeuence of atoms that define an interaction + # sometimes multiple interactions are defined for one pattern + # in that case they are all collected in this dictionary + patterns = defaultdict(dict) + # for each found pattern the resnames are collected; this is important + # because the same pattern may apply to residues with different name + resnames_for_patterns = defaultdict(dict) + link_atoms_for_patterns = defaultdict(list) + # as additional safe-gaurd against false links we also collect the edges + # that span the interaction by finding the shortest simple path between + # all atoms in patterns. Note that the atoms in patterns not always have + # to be directly bonded. For example, pairs are not directly bonded and + # can span multiple residues + for inter_type in molecule.interactions: + for kdx, interaction in enumerate(molecule.interactions[inter_type]): + # extract resids and resname corresponding to interaction atoms + resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms]) + resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms] + # compute the resid offset to be used for the atom prefixes + min_resid = min(resids) + diff = resids - min_resid + pattern = tuple(set(list(zip(diff, resnames)))) + + # in this case all interactions are in a block and we skip + if np.sum(diff) == 0: + continue + + # we collect the edges corresponding to the simple paths between pairs of atoms + # in the interaction + mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) + link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} + link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] + link_inter = Interaction(atoms=link_atoms, + parameters=interaction.parameters, + meta={}) + + # here we deal with filtering redundancy + if pattern in patterns and inter_type in patterns[pattern]: + for other_inter in patterns[pattern].get(inter_type, []): + if _interaction_equal(other_inter, link_inter, inter_type): + break + else: + patterns[pattern][inter_type].append(link_inter) + resnames_for_patterns[pattern].update(resnames) + link_atoms_for_patterns[pattern] += link_atoms + else: + patterns[pattern][inter_type] = [link_inter] + resnames_for_patterns[pattern].update(resnames) + link_atoms_for_patterns[pattern] += link_atoms + + # we make new links for each unique interaction per type + for pattern in patterns: + link = vermouth.molecule.Link() + link.add_nodes_from(set(link_atoms_for_patterns[pattern])) + resnames = resnames_for_patterns[pattern] + nx.set_node_attributes(link, resnames, "resname") + + had_parameters = [] + for inter_type, inters in patterns[pattern].items(): + for idx, interaction in enumerate(inters): + #new_parameters = interaction.parameters + new_meta = interaction.meta + #new_atoms = interaction.atoms + # to account for the fact when multiple interactions with the same + # atom patterns need to be written to ff + new_meta.update({"version": idx}) + new_meta.update({"comment": "link"}) + had_parameters.append(interaction.parameters) + # map atoms to proper atomnames .. + link.interactions[inter_type].append(interaction) + links.append(link) + return links + + +def _relabel_interaction_atoms(interaction, mapping): + """ + Relables the atoms in interaction according to the + rules defined in mapping. + + Parameters + ---------- + interaction: `vermouth.molecule.Interaction` + mapping: `:class:dict` + + Returns + ------- + interaction: `vermouth.molecule.Interaction` + the new interaction with updated atoms + """ + new_atoms = [mapping[atom] for atom in interaction.atoms] + new_interaction = interaction._replace(atoms=new_atoms) + return new_interaction + + +def extract_block(molecule, nodes, defines={}): + """ + Given a `vermouth.molecule` and a `resname` + extract the information of a block from the + molecule definition and replace all defines + if any are found. + + Parameters + ---------- + molecule: :class:vermouth.molecule.Molecule + nodes: abc.hashable + the nodes corresponding to the block to + extract + defines: dict + dict of type define: value + + Returns + ------- + :class:vermouth.molecule.Block + """ + resid = molecule.nodes[nodes[0]]["resid"] + resname = molecule.nodes[nodes[0]]["resname"] + block = vermouth.molecule.Block() + + # select all nodes with the same first resid and + # make sure the block node labels are atomnames + # also build a correspondance dict between node + # label in the molecule and in the block for + # relabeling the interactions + mapping = {} + for node in nodes: + attr_dict = molecule.nodes[node] + if attr_dict["resid"] == resid: + block.add_node(attr_dict["atomname"], **attr_dict) + mapping[node] = attr_dict["atomname"] + + for inter_type in molecule.interactions: + for interaction in molecule.interactions[inter_type]: + if all(atom in mapping for atom in interaction.atoms): + interaction = replace_defined_interaction(interaction, defines) + interaction = _relabel_interaction_atoms(interaction, mapping) + block.interactions[inter_type].append(interaction) + + for inter_type in ["bonds", "constraints", "virtual_sitesn", + "virtual_sites2", "virtual_sites3", "virtual_sites4"]: + block.make_edges_from_interaction_type(inter_type) + + if not nx.is_connected(block): + msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' + 'Make sure all atoms/particles in a residue are connected by bonds,' + ' constraints or virual-sites.') + raise IOError(msg.format(resname, resid)) + + return block diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index 6c21fc63..1d852cb7 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -27,10 +27,10 @@ from polyply.src.linalg_functions import center_of_geometry from polyply.src.generate_templates import (find_atoms, _expand_inital_coords, - _relabel_interaction_atoms, compute_volume, map_from_CoG, - extract_block, GenerateTemplates, - find_interaction_involving) + GenerateTemplates, + find_interaction_involving) +from polyply.src.molecule_utils import (extract_block, _relabel_interaction_atoms) class TestGenTemps: From fa32f76906344638475a5b8da9f71774a1e6ef24 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 16:22:24 +0100 Subject: [PATCH 25/82] small fix --- polyply/src/itp_to_ff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index dc03725c..55bc8a7f 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -21,7 +21,7 @@ from polyply.src.molecule_utils import extract_block, extract_links from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter -from polyply.src.charges import equalize_charges +from polyply.src.charges import equalize_charges, set_charges def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): """ From 520780189f5f3878e1e2139d982ca7c9cdb3391b Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 23 Nov 2023 13:16:48 +0100 Subject: [PATCH 26/82] allow for charged residues and make pysmiles optional import --- polyply/src/charges.py | 5 +++-- polyply/src/itp_to_ff.py | 14 +++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/polyply/src/charges.py b/polyply/src/charges.py index ff640d4a..7672a8c8 100644 --- a/polyply/src/charges.py +++ b/polyply/src/charges.py @@ -46,7 +46,7 @@ def _get_bonds(block, topology=None): bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params) return bonds -def equalize_charges(block, topology=None): +def equalize_charges(block, topology=None, charge=0): block.make_edges_from_interaction_type('bonds') keys = nx.get_node_attributes(block, 'charge').keys() charges = np.array(list(nx.get_node_attributes(block, 'charge').values())) @@ -63,7 +63,8 @@ def equalize_charges(block, topology=None): def loss(arr): arr.reshape(-1) curr_dipoles = bond_dipoles(bonds, arr) - loss = np.abs(arr.sum()) + np.sum(np.square(ref_dipoles - curr_dipoles)) + crg_dev = np.abs(charge - arr.sum()) + loss = crg_dev + np.sum(np.square(ref_dipoles - curr_dipoles)) return loss opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B', diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 55bc8a7f..25a4a424 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -13,7 +13,10 @@ # limitations under the License. import numpy as np import networkx as nx -import pysmiles +try: + import pysmiles +except ImportError: + raise ImportError("To use polyply itp_to_ff you need to install pysmiles.") import vermouth from vermouth.forcefield import ForceField from vermouth.gmx.itp_read import read_itp @@ -23,10 +26,13 @@ from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import equalize_charges, set_charges -def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): +def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None): """ Main executable for itp to ff tool. """ + # what charges belong to which resname + if charges: + crg_dict = dict(zip(resnames, charges)) # read the topology file if itppath.suffix == ".top": top = Topology.from_gmx_topfile(itppath, name="test") @@ -58,7 +64,9 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 force_field.blocks[name] = new_block set_charges(new_block, res_graph, name) if itppath.suffix == ".top": - equalize_charges(new_block, top) + base_resname = name.split(term_prefix)[0].split('_')[0] + print(base_resname) + equalize_charges(new_block, top, crg_dict[base_resname]) force_field.links = extract_links(mol) From 737b45ce15f9a17794b0bbf8ed498c7d0f9e1264 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 23 Nov 2023 13:21:00 +0100 Subject: [PATCH 27/82] make mass optional --- polyply/src/ffoutput.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py index 0e06ea3f..1db13586 100644 --- a/polyply/src/ffoutput.py +++ b/polyply/src/ffoutput.py @@ -159,16 +159,19 @@ def write_atoms_block(self, nodes): for idx, (node, attrs) in enumerate(nodes, start=1): write_attrs = {attr: str(attrs[attr]) for attr in self.normal_order_block_atoms if attr in attrs} - self.stream.write('{idx:>{max_length[idx]}} ' - '{atype:<{max_length[atype]}} ' - '{resid:>{max_length[resid]}} ' - '{resname:<{max_length[resname]}} ' - '{atomname:<{max_length[atomname]}} ' - '{charge_group:>{max_length[charge_group]}} ' - '{charge:>{max_length[charge]}} ' - '{mass:>{max_length[mass]}}\n'.format(idx=idx, - max_length=max_length, - **write_attrs)) + template = ('{idx:>{max_length[idx]}} ' + '{atype:<{max_length[atype]}} ' + '{resid:>{max_length[resid]}} ' + '{resname:<{max_length[resname]}} ' + '{atomname:<{max_length[atomname]}} ' + '{charge_group:>{max_length[charge_group]}} ' + '{charge:>{max_length[charge]}} ') + if 'mass' in write_attrs: + template += '{mass:>{max_length[mass]}}\n' + else: + template += '\n' + + self.stream.write(template.format(idx=idx, max_length=max_length, **write_attrs)) def write_atoms_link(self, nodes, nometa=False): """ From 9a1e800475cd5463e30088d0d84538ec521456ed Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 23 Nov 2023 13:33:36 +0100 Subject: [PATCH 28/82] add doc-strings and rename equalize_charge --- polyply/src/charges.py | 103 +++++++++++++++++++++++++++------------ polyply/src/itp_to_ff.py | 4 +- 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/polyply/src/charges.py b/polyply/src/charges.py index 7672a8c8..d53bae3d 100644 --- a/polyply/src/charges.py +++ b/polyply/src/charges.py @@ -3,6 +3,25 @@ import scipy.optimize def set_charges(block, res_graph, name): + """ + Set the charges of `block` by finding the most central + residue in res_graph that matches the residue `name` of + block. + + Parameters + ---------- + block: :class:`vermouth.molecule.Block` + block describing single residue + res_graph: nx.Graph + residue graph + name: str + residue name + + Returns + ------- + :class:`vermouth.molecule.Block` + the block with updated charges + """ resnames = nx.get_node_attributes(res_graph, 'resname') centrality = nx.betweenness_centrality(res_graph) score = -1 @@ -19,6 +38,23 @@ def set_charges(block, res_graph, name): return block def bond_dipoles(bonds, charges): + """ + Compute bond dipole moments from charges + and bondlengths. The charges array must + match the numeric bond dict keys. + + Parameters + ---------- + bonds: dict[tuple(int, int)][float] + the bond length indexed by atom indices + charges: np.array + array of charges + + Returns + ------- + np.array + the bond dipoles + """ bond_dipo = np.zeros((len(bonds))) for kdx, (idx, jdx) in enumerate(bonds.keys()): lb = bonds[(idx, jdx)] @@ -26,6 +62,20 @@ def bond_dipoles(bonds, charges): return bond_dipo def _get_bonds(block, topology=None): + """ + Extract a bond length dict from block. If topology + is given bond lengths may be looked up by type. + + Parameters + ---------- + block: :class:`vermouth.molecule.Block` + topology: :class:`polyply.src.topology.Topology` + + Returns + ------- + dict + a dict of edges and their bond length + """ bonds = {} atoms = block.nodes nodes_to_count = {node: count for count, node in enumerate(block.nodes)} @@ -42,11 +92,32 @@ def _get_bonds(block, topology=None): params = topology.types['bonds'][batoms][0][0][1] elif batoms[::-1] in topology.types['bonds']: params = topology.types['bonds'][batoms[::-1]][0][0][1] - print(params) bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params) return bonds -def equalize_charges(block, topology=None, charge=0): +def balance_charges(block, topology=None, charge=0): + """ + Given a block and a total charge for that block + balance the charge until the total charge of the + block is exactly the same as set. The balancing + takes also into account to retain the bond dipole + moments as closely as possible such that ideally + the electrostatics are as little influenced as + possible due to rescaling. A topology is only + needed if the force field uses bondtypes. + + Parameters + ---------- + block: :class:`vermouth.molecule.Block` + topology: :class:`polyply.src.topology.Topology` + charge: float + total charge of the residue + + Returns + ------- + :class:`vermouth.molecule.Block` + block with updated charges + """ block.make_edges_from_interaction_type('bonds') keys = nx.get_node_attributes(block, 'charge').keys() charges = np.array(list(nx.get_node_attributes(block, 'charge').values())) @@ -72,31 +143,3 @@ def loss(arr): balanced_charges = opt_results['x'] nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge') return block - - -#def equalize_charges(molecule, target_charge=0): -# """ -# Make sure that the total charge of molecule is equal to -# the target charge by substracting the differences split -# over all atoms. -# -# Parameters -# ---------- -# molecule: :class:`vermouth.molecule.Molecule` -# target_charge: float -# the charge of the molecule -# -# Returns -# ------- -# molecule -# the molecule with updated charge attribute -# """ -# total = nx.get_node_attributes(molecule, "charge") -# diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes) -# if np.isclose(diff, 0, atol=0.0001): -# return molecule -# for node in molecule.nodes: -# charge = float(molecule.nodes[node]['charge']) - diff -# molecule.nodes[node]['charge'] = charge -# total = nx.get_node_attributes(molecule, "charge") -# return molecule diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 25a4a424..76b8bf0d 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -24,7 +24,7 @@ from polyply.src.molecule_utils import extract_block, extract_links from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter -from polyply.src.charges import equalize_charges, set_charges +from polyply.src.charges import balance_charges, set_charges def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None): """ @@ -66,7 +66,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges= if itppath.suffix == ".top": base_resname = name.split(term_prefix)[0].split('_')[0] print(base_resname) - equalize_charges(new_block, top, crg_dict[base_resname]) + balance_charges(new_block, top, crg_dict[base_resname]) force_field.links = extract_links(mol) From c9621a3396dc54a71392b0d9e8a2c06aa1931dd8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 10:38:10 +0100 Subject: [PATCH 29/82] remove print --- polyply/tests/test_lib_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polyply/tests/test_lib_files.py b/polyply/tests/test_lib_files.py index 98d748eb..cb8f1b18 100644 --- a/polyply/tests/test_lib_files.py +++ b/polyply/tests/test_lib_files.py @@ -161,8 +161,8 @@ def _interaction_equal(interaction1, interaction2, inter_type): a1.reverse() if a1 == a2: return True - else: - print(a1, a2) + # else: + # print(a1, a2) elif inter_type in ["angles"]: return a1[1] == a2[1] and frozenset([a1[0], a1[2]]) == frozenset([a2[0], a2[2]]) From fdae3db7c55d9fe0de5b84299fadd7a2423998c6 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 11:12:03 +0100 Subject: [PATCH 30/82] remove martini2 from ffoutput test as it fails on GH --- polyply/tests/test_ffoutput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py index c5855bd6..5b8ecaa7 100644 --- a/polyply/tests/test_ffoutput.py +++ b/polyply/tests/test_ffoutput.py @@ -69,7 +69,7 @@ def equal_ffs(ff1, ff2): '2016H66', 'gromos53A6', 'oplsaaLigParGen', - 'martini2', + # 'martini2', 'parmbsc1', ]) def test_ffoutput(tmp_path, libname): From e50e232ad3f2e0e2fb518ea687ccb626ec8f7707 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 12:47:25 +0100 Subject: [PATCH 31/82] add test for extract links --- polyply/tests/test_molecule_utils.py | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 polyply/tests/test_molecule_utils.py diff --git a/polyply/tests/test_molecule_utils.py b/polyply/tests/test_molecule_utils.py new file mode 100644 index 00000000..de15dc1d --- /dev/null +++ b/polyply/tests/test_molecule_utils.py @@ -0,0 +1,77 @@ +# Copyright 2022 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test the fragment finder for itp_to_ff. +""" +import pytest +from pathlib import Path +import networkx as nx +from vermouth.molecule import Interaction +from polyply.src.molecule_utils import extract_links +from .test_apply_links import example_meta_molecule + +@pytest.mark.parametrize('inters, expected',( + # simple bond spanning two residues + ({'bonds':[Interaction(atoms=(0, 1), parameters=['1', '0.33', '500'], meta={}), + Interaction(atoms=(1, 2), parameters=['1', '0.33', '500'], meta={}), + Interaction(atoms=(1, 4), parameters=['1', '0.30', '500'], meta={}), + Interaction(atoms=(4, 5), parameters=['1', '0.35', '500'], meta={}),]}, + {'bonds': [Interaction(atoms=['BB1', '+BB'], + parameters=['1', '0.30', '500'], + meta={'version': 0, 'comment': 'link'}), + ]}, + ), + # double version dihedral spanning two residues + ({'dihedrals':[Interaction(atoms=(0, 1, 4, 5), + parameters=['9', '120', '4', '1'], + meta={}), + Interaction(atoms=(0, 1, 4, 5), + parameters=['9', '120', '4', '2'], + meta={}), + Interaction(atoms=(0, 1, 2, 3), + parameters=['9', '120', '4', '2'], + meta={})] + }, + {'dihedrals': [Interaction(atoms=['BB', 'BB1', '+BB', '+BB1'], + parameters=['9', '120', '4', '1'], + meta={'version': 0, 'comment': 'link'}), + Interaction(atoms=['BB', 'BB1', '+BB', '+BB1'], + parameters=['9', '120', '4', '2'], + meta={'version': 1, 'comment': 'link'}),] + }, + ), + # 1-5 pairs spanning 3 residues + ({'pairs': [Interaction(atoms=(1, 9), + parameters=[1], + meta={})]}, + {'pairs': [Interaction(atoms=['BB1', '++BB'], + parameters=[1], + meta={'version': 0, 'comment': 'link'})] + }), +)) +def test_extract_links(example_meta_molecule, inters, expected): + mol = example_meta_molecule.molecule + mol.add_edges_from([(1, 4), (8, 9)]) + nx.set_node_attributes(mol, {0: "resA", 1: "resA", 2: "resA", 3: "resA", + 4: "resB", 5: "resB", 6: "resB", 7: "resB", 8: "resB", + 9: "resA", 10: "resA", 11: "resA", 12: "resA"}, "resname") + nx.set_node_attributes(mol, {0: "BB", 1: "BB1", 2: "SC1", 3: "SC2", + 4: "BB", 5: "BB1", 6: "BB2", 7: "SC1", 8: "SC2", + 9: "BB", 10: "BB1", 11: "SC1", 12: "SC2"}, "atomname") + mol.interactions.update(inters) + link = extract_links(mol)[0] + for inter_type in expected: + assert expected[inter_type] == link.interactions[inter_type] + + From 6c94485f37cd7b32f82fe5007c3c168326ddfe9c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 12:57:04 +0100 Subject: [PATCH 32/82] add test for extract links with redundant interaction --- polyply/tests/test_molecule_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/polyply/tests/test_molecule_utils.py b/polyply/tests/test_molecule_utils.py index de15dc1d..8af59cab 100644 --- a/polyply/tests/test_molecule_utils.py +++ b/polyply/tests/test_molecule_utils.py @@ -59,6 +59,18 @@ parameters=[1], meta={'version': 0, 'comment': 'link'})] }), + # redundant pair + ({'pairs': [Interaction(atoms=(1, 5), + parameters=[1], + meta={}), + Interaction(atoms=(5, 9), + parameters=[1], + meta={}), + ],}, + {'pairs': [Interaction(atoms=['BB1', '+BB1'], + parameters=[1], + meta={'version': 0, 'comment': 'link'})] + }), )) def test_extract_links(example_meta_molecule, inters, expected): mol = example_meta_molecule.molecule From e343211a48240126629079a938f4aab480e97f73 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 13:56:49 +0100 Subject: [PATCH 33/82] test for charge balancing --- polyply/tests/test_charges.py | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 polyply/tests/test_charges.py diff --git a/polyply/tests/test_charges.py b/polyply/tests/test_charges.py new file mode 100644 index 00000000..59b3c5ff --- /dev/null +++ b/polyply/tests/test_charges.py @@ -0,0 +1,51 @@ +# Copyright 2022 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test the charge modification functions used in itp_to_ff. +""" +import textwrap +import pytest +from pathlib import Path +import networkx as nx +import vermouth +import polyply +from polyply.src.charges import balance_charges +@pytest.mark.parametrize('charges, target',( + ({0: 0.2, 1: -0.4, 2: 0.23, 3: 0.001}, + 0.0,), + ({0: 0.6, 1: -0.2, 2: 0.5, 3: 0.43}, + 0.5,), +)) +def test_balance_charges(charges, target): + lines = """ + [ moleculetype ] + test 1 + [ atoms ] + 1 P4 1 GLY BB 1 + 2 P3 1 GLY SC1 2 + 3 P2 1 ALA SC2 3 + 4 P2 1 ALA SC3 3 + [ bonds ] + 1 2 1 0.2 100 + 2 3 1 0.6 700 + 3 4 1 0.2 700 + """ + lines = textwrap.dedent(lines).splitlines() + ff = vermouth.forcefield.ForceField(name='test_ff') + polyply.src.polyply_parser.read_polyply(lines, ff) + block = ff.blocks['test'] + nx.set_node_attributes(block, charges, 'charge') + balance_charges(block, topology=None, charge=target, tol=10**-4, decimals=4) + new_charges = nx.get_node_attributes(block, 'charge') + assert pytest.approx(sum(new_charges.values()),abs=0.00001) == target From 7db6462697a7a1863106323546703c39fb081619 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 14:05:06 +0100 Subject: [PATCH 34/82] test for charge balancing --- polyply/tests/test_charges.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polyply/tests/test_charges.py b/polyply/tests/test_charges.py index 59b3c5ff..7f974478 100644 --- a/polyply/tests/test_charges.py +++ b/polyply/tests/test_charges.py @@ -26,6 +26,8 @@ 0.0,), ({0: 0.6, 1: -0.2, 2: 0.5, 3: 0.43}, 0.5,), + ({0: -0.633, 1: -0.532, 2: 0.512, 3: 0.0}, + -0.6,), )) def test_balance_charges(charges, target): lines = """ @@ -46,6 +48,6 @@ def test_balance_charges(charges, target): polyply.src.polyply_parser.read_polyply(lines, ff) block = ff.blocks['test'] nx.set_node_attributes(block, charges, 'charge') - balance_charges(block, topology=None, charge=target, tol=10**-4, decimals=4) + balance_charges(block, topology=None, charge=target, tol=10**-5, decimals=5) new_charges = nx.get_node_attributes(block, 'charge') - assert pytest.approx(sum(new_charges.values()),abs=0.00001) == target + assert pytest.approx(sum(new_charges.values()),abs=0.0001) == target From c9dadac5b1c40952b0aebe2617c1bc28697f4e55 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 14:08:04 +0100 Subject: [PATCH 35/82] implement tolerances for charge balancing --- polyply/src/charges.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/polyply/src/charges.py b/polyply/src/charges.py index d53bae3d..cfd50235 100644 --- a/polyply/src/charges.py +++ b/polyply/src/charges.py @@ -95,7 +95,7 @@ def _get_bonds(block, topology=None): bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params) return bonds -def balance_charges(block, topology=None, charge=0): +def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None): """ Given a block and a total charge for that block balance the charge until the total charge of the @@ -121,7 +121,7 @@ def balance_charges(block, topology=None, charge=0): block.make_edges_from_interaction_type('bonds') keys = nx.get_node_attributes(block, 'charge').keys() charges = np.array(list(nx.get_node_attributes(block, 'charge').values())) - if np.isclose(charges.sum(), 0, atol=1*10**-6): + if np.isclose(charges.sum(), 0, atol=tol): return block # we need to equalize the charge @@ -139,7 +139,7 @@ def loss(arr): return loss opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B', - options={'ftol': 0.001, 'maxiter': 100}) - balanced_charges = opt_results['x'] + options={'ftol': tol, 'maxiter': 100}) + balanced_charges = np.around(opt_results['x'], decimals) nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge') return block From b6937354ef333553afed5838cf8fc6a42c033f75 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 14:34:42 +0100 Subject: [PATCH 36/82] add integration tests itp_to_ff and adjust CLI --- bin/polyply | 4 +++- polyply/tests/test_itp_to_ff.py | 14 +++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/bin/polyply b/bin/polyply index 8ff25efa..eff60024 100755 --- a/bin/polyply +++ b/bin/polyply @@ -239,7 +239,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*') parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter") parser_itp_ff.add_argument('-o', dest="outpath", type=Path) - parser_itp_ff.add_argument('-c', dest="charge", type=float, default=0.0) + parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*') + parser_itp_ff.add_argument('-tol', dest="tolerance", type=float, default=1e-5) + parser_itp_ff.add_argument('-d', dest="decimals", type=int, default=5) parser_itp_ff.set_defaults(func=itp_to_ff) diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py index 588515d7..df97d73e 100644 --- a/polyply/tests/test_itp_to_ff.py +++ b/polyply/tests/test_itp_to_ff.py @@ -67,22 +67,22 @@ def itp_equal(ref_mol, new_mol): assert False return True -@pytest.mark.parametrize("case, smiles, resnames, charge", [ - ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], 0), - ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], 0), +@pytest.mark.parametrize("case, smiles, resnames, charges", [ + ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]), + ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]), ]) -def _test_ffoutput(tmp_path, case, smiles, resnames, charge): +def test_itp_to_ff(tmp_path, case, smiles, resnames, charges): """ Call itp-to-ff and check if it generates the same force-field as in the ref.ff file. """ - tmp_path = Path("/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp") + tmp_path = Path("/Users/fabian/ProgramDev/polyply_1.0/polyply/tests/test_data/itp_to_ff/PEG_PBE/tmp") tmp_file = Path(tmp_path) / "test.ff" inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case itp_to_ff(itppath=inpath/"in_itp.itp", fragment_smiles=smiles, resnames=resnames, - charge=charge, + charges=charges, term_prefix='ter', outpath=tmp_file,) # now generate an itp file with this ff-file @@ -92,6 +92,6 @@ def _test_ffoutput(tmp_path, case, smiles, resnames, charge): outpath=tmp_itp, name="new") # read the itp-file and return a molecule new_mol = _read_itp(tmp_itp) - ref_mol = _read_itp(inpath/"in_itp.itp") + ref_mol = _read_itp(inpath/"ref.itp") # check if itps are the same assert itp_equal(ref_mol, new_mol) From 1c542cbcb68b1edf938a75650c232152aa8affd1 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 14:35:14 +0100 Subject: [PATCH 37/82] fix bug in integration tests itp_to_ff --- polyply/tests/test_itp_to_ff.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py index df97d73e..ac727795 100644 --- a/polyply/tests/test_itp_to_ff.py +++ b/polyply/tests/test_itp_to_ff.py @@ -76,7 +76,6 @@ def test_itp_to_ff(tmp_path, case, smiles, resnames, charges): Call itp-to-ff and check if it generates the same force-field as in the ref.ff file. """ - tmp_path = Path("/Users/fabian/ProgramDev/polyply_1.0/polyply/tests/test_data/itp_to_ff/PEG_PBE/tmp") tmp_file = Path(tmp_path) / "test.ff" inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case itp_to_ff(itppath=inpath/"in_itp.itp", From aa0865d683fc1336d1d7f85798bb31768a77b8ad Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 15:04:59 +0100 Subject: [PATCH 38/82] complex integration test itp_to_ff plus charged mol --- polyply/src/itp_to_ff.py | 4 +- .../tests/test_data/itp_to_ff/ACOL/in_itp.itp | 680 ++++++++++++++++++ .../tests/test_data/itp_to_ff/ACOL/ref.itp | 677 +++++++++++++++++ .../tests/test_data/itp_to_ff/ACOL/ref.top | 28 + .../tests/test_data/itp_to_ff/ACOL/seq.txt | 1 + polyply/tests/test_itp_to_ff.py | 5 + 6 files changed, 1394 insertions(+), 1 deletion(-) create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ref.itp create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ref.top create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/seq.txt diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 76b8bf0d..bd08e1bd 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -66,7 +66,9 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges= if itppath.suffix == ".top": base_resname = name.split(term_prefix)[0].split('_')[0] print(base_resname) - balance_charges(new_block, top, crg_dict[base_resname]) + balance_charges(new_block, + topology=top, + charge=crg_dict[base_resname]) force_field.links = extract_links(mol) diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp b/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp new file mode 100644 index 00000000..566a82c1 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp @@ -0,0 +1,680 @@ +[ moleculetype ] +; Name nrexcl +ref 3 +[ atoms ] +; nr type resnr residue atom cgnr charge mass + 1 opls_800 1 UNL O00 1 -0.3942 15.9990 + 2 opls_801 1 UNL C01 1 0.3911 12.0110 + 3 opls_802 1 UNL C02 1 -0.1501 12.0110 + 4 opls_803 1 UNL O03 1 -0.3449 15.9990 + 5 opls_804 1 UNL C04 1 -0.1595 12.0110 + 6 opls_805 1 UNL H05 1 0.1269 1.0080 + 7 opls_806 1 UNL H06 1 0.1269 1.0080 + 8 opls_807 1 UNL C07 1 -0.0916 12.0110 + 9 opls_808 1 UNL H08 1 0.1135 1.0080 + 10 opls_809 1 UNL H09 1 0.1135 1.0080 + 11 opls_810 1 UNL C0A 1 -0.1496 12.0110 + 12 opls_811 1 UNL C0B 1 0.3901 12.0110 + 13 opls_812 1 UNL H0C 1 0.1118 1.0080 + 14 opls_813 1 UNL C0D 1 -0.0920 12.0110 + 15 opls_814 1 UNL H0E 1 0.1149 1.0080 + 16 opls_815 1 UNL H0F 1 0.1149 1.0080 + 17 opls_816 1 UNL O0G 1 -0.3434 15.9990 + 18 opls_817 1 UNL O0H 1 -0.3876 15.9990 + 19 opls_818 1 UNL C0I 1 -0.1460 12.0110 + 20 opls_819 1 UNL C0J 1 0.3959 12.0110 + 21 opls_820 1 UNL H0K 1 0.1177 1.0080 + 22 opls_821 1 UNL C0M 1 -0.0273 12.0110 + 23 opls_822 1 UNL C0N 1 -0.0916 12.0110 + 24 opls_823 1 UNL H0O 1 0.1194 1.0080 + 25 opls_824 1 UNL H0P 1 0.1194 1.0080 + 26 opls_825 1 UNL O0Q 1 -0.3478 15.9990 + 27 opls_826 1 UNL O0R 1 -0.3336 15.9990 + 28 opls_827 1 UNL C0S 1 -0.1411 12.0110 + 29 opls_828 1 UNL C0T 1 0.3737 12.0110 + 30 opls_829 1 UNL H0U 1 0.1083 1.0080 + 31 opls_830 1 UNL C0V 1 0.0287 12.0110 + 32 opls_831 1 UNL C0W 1 -0.0926 12.0110 + 33 opls_832 1 UNL H0X 2 0.1142 1.0080 + 34 opls_833 1 UNL H0Y 2 0.1142 1.0080 + 35 opls_834 1 UNL O0Z 2 -0.3484 15.9990 + 36 opls_835 1 UNL O10 2 -0.3544 15.9990 + 37 opls_836 1 UNL C11 2 -0.1709 12.0110 + 38 opls_837 1 UNL H12 2 0.0965 1.0080 + 39 opls_838 1 UNL H13 2 0.0965 1.0080 + 40 opls_839 1 UNL C14 2 -0.2114 12.0110 + 41 opls_840 1 UNL C15 2 0.3799 12.0110 + 42 opls_841 1 UNL H16 2 0.1129 1.0080 + 43 opls_842 1 UNL C17 2 -0.0170 12.0110 + 44 opls_843 1 UNL H18 2 0.0946 1.0080 + 45 opls_844 1 UNL H19 2 0.0946 1.0080 + 46 opls_845 1 UNL H1A 2 0.0946 1.0080 + 47 opls_846 1 UNL O1B 2 -0.3369 15.9990 + 48 opls_847 1 UNL O1C 2 -0.3839 15.9990 + 49 opls_848 1 UNL H1D 2 0.0757 1.0080 + 50 opls_849 1 UNL H1E 2 0.0757 1.0080 + 51 opls_850 1 UNL H1F 2 0.0757 1.0080 + 52 opls_851 1 UNL C1G 2 -0.0289 12.0110 + 53 opls_852 1 UNL H1H 2 0.0867 1.0080 + 54 opls_853 1 UNL H1I 2 0.0867 1.0080 + 55 opls_854 1 UNL H1J 2 0.0867 1.0080 + 56 opls_855 1 UNL N1K 2 0.1659 14.0070 + 57 opls_856 1 UNL H1M 2 0.1558 1.0080 + 58 opls_857 1 UNL H1N 2 0.1558 1.0080 + 59 opls_858 1 UNL C1O 2 -0.2247 12.0110 + 60 opls_859 1 UNL C1P 2 -0.2238 12.0110 + 61 opls_860 1 UNL C1Q 2 -0.2254 12.0110 + 62 opls_861 1 UNL H1R 2 0.1443 1.0080 + 63 opls_862 1 UNL H1S 2 0.1443 1.0080 + 64 opls_863 1 UNL H1T 2 0.1443 1.0080 + 65 opls_864 1 UNL H1U 2 0.1436 1.0080 + 66 opls_865 1 UNL H1V 3 0.1436 1.0080 + 67 opls_866 1 UNL H1W 3 0.1436 1.0080 + 68 opls_867 1 UNL H1X 3 0.1427 1.0080 + 69 opls_868 1 UNL H1Y 3 0.1427 1.0080 + 70 opls_869 1 UNL H1Z 3 0.1427 1.0080 + 71 opls_870 1 UNL H20 3 0.0844 1.0080 + 72 opls_871 1 UNL H21 3 0.0844 1.0080 + 73 opls_872 1 UNL H22 3 0.0844 1.0080 + 74 opls_873 1 UNL C23 3 -0.0241 12.0110 + 75 opls_874 1 UNL H24 3 0.0894 1.0080 + 76 opls_875 1 UNL H25 3 0.0894 1.0080 + 77 opls_876 1 UNL H26 3 0.0894 1.0080 +[ bonds ] + 2 1 1 0.1229 476976.000 + 3 2 1 0.1522 265265.600 + 4 2 1 0.1327 179075.200 + 5 3 1 0.1529 224262.400 + 6 3 1 0.1090 284512.000 + 7 3 1 0.1090 284512.000 + 8 5 1 0.1529 224262.400 + 9 5 1 0.1090 284512.000 + 10 5 1 0.1090 284512.000 + 11 8 1 0.1529 224262.400 + 12 8 1 0.1522 265265.600 + 13 8 1 0.1090 284512.000 + 14 11 1 0.1529 224262.400 + 15 11 1 0.1090 284512.000 + 16 11 1 0.1090 284512.000 + 17 12 1 0.1327 179075.200 + 18 12 1 0.1229 476976.000 + 19 14 1 0.1529 224262.400 + 20 14 1 0.1522 265265.600 + 21 14 1 0.1090 284512.000 + 22 17 1 0.1410 267776.000 + 23 19 1 0.1529 224262.400 + 24 19 1 0.1090 284512.000 + 25 19 1 0.1090 284512.000 + 26 20 1 0.1327 179075.200 + 27 20 1 0.1229 476976.000 + 28 23 1 0.1529 224262.400 + 29 23 1 0.1522 265265.600 + 30 23 1 0.1090 284512.000 + 31 26 1 0.1410 267776.000 + 32 28 1 0.1529 224262.400 + 33 28 1 0.1090 284512.000 + 34 28 1 0.1090 284512.000 + 35 29 1 0.1327 179075.200 + 36 29 1 0.1229 476976.000 + 37 31 1 0.1529 224262.400 + 38 31 1 0.1090 284512.000 + 39 31 1 0.1090 284512.000 + 40 32 1 0.1529 224262.400 + 41 32 1 0.1522 265265.600 + 42 32 1 0.1090 284512.000 + 43 35 1 0.1410 267776.000 + 44 40 1 0.1090 284512.000 + 45 40 1 0.1090 284512.000 + 46 40 1 0.1090 284512.000 + 47 41 1 0.1327 179075.200 + 48 41 1 0.1229 476976.000 + 49 43 1 0.1090 284512.000 + 50 43 1 0.1090 284512.000 + 51 43 1 0.1090 284512.000 + 52 47 1 0.1410 267776.000 + 53 52 1 0.1090 284512.000 + 54 52 1 0.1090 284512.000 + 55 52 1 0.1090 284512.000 + 56 37 1 0.1471 307105.600 + 57 37 1 0.1090 284512.000 + 58 37 1 0.1090 284512.000 + 59 56 1 0.1471 307105.600 + 60 56 1 0.1471 307105.600 + 61 56 1 0.1471 307105.600 + 62 59 1 0.1090 284512.000 + 63 59 1 0.1090 284512.000 + 64 59 1 0.1090 284512.000 + 65 60 1 0.1090 284512.000 + 66 60 1 0.1090 284512.000 + 67 60 1 0.1090 284512.000 + 68 61 1 0.1090 284512.000 + 69 61 1 0.1090 284512.000 + 70 61 1 0.1090 284512.000 + 71 22 1 0.1090 284512.000 + 72 22 1 0.1090 284512.000 + 73 22 1 0.1090 284512.000 + 74 4 1 0.1410 267776.000 + 75 74 1 0.1090 284512.000 + 76 74 1 0.1090 284512.000 + 77 74 1 0.1090 284512.000 + +[ angles ] +; ai aj ak funct c0 c1 c2 c3 + 1 2 3 1 120.400 669.440 + 1 2 4 1 123.400 694.544 + 2 3 5 1 111.100 527.184 + 2 3 6 1 109.500 292.880 + 2 3 7 1 109.500 292.880 + 3 5 8 1 112.700 488.273 + 3 5 9 1 110.700 313.800 + 3 5 10 1 110.700 313.800 + 5 8 11 1 112.700 488.273 + 5 8 12 1 111.100 527.184 + 5 8 13 1 110.700 313.800 + 8 11 14 1 112.700 488.273 + 8 11 15 1 110.700 313.800 + 8 11 16 1 110.700 313.800 + 8 12 17 1 111.400 677.808 + 8 12 18 1 120.400 669.440 + 11 14 19 1 112.700 488.273 + 11 14 20 1 111.100 527.184 + 11 14 21 1 110.700 313.800 + 12 17 22 1 116.900 694.544 + 14 19 23 1 112.700 488.273 + 14 19 24 1 110.700 313.800 + 14 19 25 1 110.700 313.800 + 14 20 26 1 111.400 677.808 + 14 20 27 1 120.400 669.440 + 19 23 28 1 112.700 488.273 + 19 23 29 1 111.100 527.184 + 19 23 30 1 110.700 313.800 + 20 26 31 1 116.900 694.544 + 23 28 32 1 112.700 488.273 + 23 28 33 1 110.700 313.800 + 23 28 34 1 110.700 313.800 + 23 29 35 1 111.400 677.808 + 23 29 36 1 120.400 669.440 + 26 31 37 1 109.500 418.400 + 26 31 38 1 109.500 292.880 + 26 31 39 1 109.500 292.880 + 28 32 40 1 112.700 488.273 + 28 32 41 1 111.100 527.184 + 28 32 42 1 110.700 313.800 + 29 35 43 1 116.900 694.544 + 32 40 44 1 110.700 313.800 + 32 40 45 1 110.700 313.800 + 32 40 46 1 110.700 313.800 + 32 41 47 1 111.400 677.808 + 32 41 48 1 120.400 669.440 + 35 43 49 1 109.500 292.880 + 35 43 50 1 109.500 292.880 + 35 43 51 1 109.500 292.880 + 41 47 52 1 116.900 694.544 + 47 52 53 1 109.500 292.880 + 47 52 54 1 109.500 292.880 + 47 52 55 1 109.500 292.880 + 31 37 56 1 111.200 669.440 + 31 37 57 1 110.700 313.800 + 31 37 58 1 110.700 313.800 + 37 56 59 1 113.000 418.400 + 37 56 60 1 113.000 418.400 + 37 56 61 1 113.000 418.400 + 56 59 62 1 109.500 292.880 + 56 59 63 1 109.500 292.880 + 56 59 64 1 109.500 292.880 + 56 60 65 1 109.500 292.880 + 56 60 66 1 109.500 292.880 + 56 60 67 1 109.500 292.880 + 56 61 68 1 109.500 292.880 + 56 61 69 1 109.500 292.880 + 56 61 70 1 109.500 292.880 + 17 22 71 1 109.500 292.880 + 17 22 72 1 109.500 292.880 + 17 22 73 1 109.500 292.880 + 2 4 74 1 116.900 694.544 + 4 74 75 1 109.500 292.880 + 4 74 76 1 109.500 292.880 + 4 74 77 1 109.500 292.880 + 49 43 50 1 107.800 276.144 + 23 19 25 1 110.700 313.800 + 45 40 46 1 107.800 276.144 + 54 52 55 1 107.800 276.144 + 28 23 30 1 110.700 313.800 + 65 60 66 1 107.800 276.144 + 62 59 64 1 107.800 276.144 + 41 32 42 1 109.500 292.880 + 75 74 76 1 107.800 276.144 + 37 31 39 1 110.700 313.800 + 59 56 60 1 113.000 418.400 + 14 11 16 1 110.700 313.800 + 44 40 45 1 107.800 276.144 + 26 20 27 1 123.400 694.544 + 56 37 57 1 109.500 292.880 + 76 74 77 1 107.800 276.144 + 32 28 34 1 110.700 313.800 + 37 31 38 1 110.700 313.800 + 29 23 30 1 109.500 292.880 + 32 28 33 1 110.700 313.800 + 23 19 24 1 110.700 313.800 + 65 60 67 1 107.800 276.144 + 19 14 21 1 110.700 313.800 + 71 22 73 1 107.800 276.144 + 53 52 54 1 107.800 276.144 + 56 37 58 1 109.500 292.880 + 66 60 67 1 107.800 276.144 + 72 22 73 1 107.800 276.144 + 60 56 61 1 113.000 418.400 + 5 3 6 1 110.700 313.800 + 63 59 64 1 107.800 276.144 + 71 22 72 1 107.800 276.144 + 62 59 63 1 107.800 276.144 + 11 8 12 1 111.100 527.184 + 35 29 36 1 123.400 694.544 + 50 43 51 1 107.800 276.144 + 68 61 70 1 107.800 276.144 + 15 11 16 1 107.800 276.144 + 5 3 7 1 110.700 313.800 + 57 37 58 1 107.800 276.144 + 17 12 18 1 123.400 694.544 + 44 40 46 1 107.800 276.144 + 75 74 77 1 107.800 276.144 + 8 5 10 1 110.700 313.800 + 20 14 21 1 109.500 292.880 + 6 3 7 1 107.800 276.144 + 53 52 55 1 107.800 276.144 + 59 56 61 1 113.000 418.400 + 8 5 9 1 110.700 313.800 + 33 28 34 1 107.800 276.144 + 38 31 39 1 107.800 276.144 + 40 32 41 1 111.100 527.184 + 11 8 13 1 110.700 313.800 + 14 11 15 1 110.700 313.800 + 24 19 25 1 107.800 276.144 + 9 5 10 1 107.800 276.144 + 68 61 69 1 107.800 276.144 + 69 61 70 1 107.800 276.144 + 3 2 4 1 111.400 677.808 + 28 23 29 1 111.100 527.184 + 19 14 20 1 111.100 527.184 + 49 43 51 1 107.800 276.144 + 40 32 42 1 110.700 313.800 + 47 41 48 1 123.400 694.544 + 12 8 13 1 109.500 292.880 + +[ dihedrals ] +; IMPROPER DIHEDRAL ANGLES +; ai aj ak al funct c0 c1 c2 c3 c4 c5 + 18 12 8 17 4 180.000 43.932 2 + 27 20 14 26 4 180.000 43.932 2 + 48 41 32 47 4 180.000 43.932 2 + 36 29 23 35 4 180.000 43.932 2 + 4 2 1 3 4 180.000 43.932 2 + +[ dihedrals ] +; PROPER DIHEDRAL ANGLES +; ai aj ak al funct c0 c1 c2 c3 c4 c5 + 12 8 5 3 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 29 23 19 14 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 41 32 28 23 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 20 14 11 8 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 20 14 11 15 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 41 32 28 33 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 12 8 5 9 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 29 23 19 24 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 20 14 11 16 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 12 8 5 10 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 41 32 28 34 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 29 23 19 25 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 5 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 5 3 2 4 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 14 11 8 12 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 23 19 14 20 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 32 28 23 29 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 8 5 3 2 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 + 28 23 19 14 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 23 19 14 11 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 11 8 5 3 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 32 28 23 19 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 14 11 8 5 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 19 14 11 8 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 40 32 28 23 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 + 28 23 19 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 40 32 28 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 8 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 23 19 14 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 40 32 28 34 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 28 23 19 24 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 32 28 23 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 8 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 11 8 5 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 14 11 8 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 11 8 5 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 19 14 11 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 19 14 11 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 37 31 26 20 3 -2.197 5.201 0.527 -3.531 -0.000 0.000 + 61 56 37 31 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 + 59 56 37 31 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 + 60 56 37 31 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 + 61 56 37 57 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 60 56 37 58 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 59 56 37 57 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 60 56 37 57 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 59 56 37 58 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 61 56 37 58 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 31 26 20 14 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 + 74 4 2 3 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 + 43 35 29 23 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 + 22 17 12 8 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 + 52 47 41 32 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 + 74 4 2 1 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 22 17 12 18 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 43 35 29 36 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 31 26 20 27 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 52 47 41 48 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 7 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 6 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 7 3 2 4 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 6 3 2 4 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 46 40 32 41 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 10 5 3 2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 15 11 8 12 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 44 40 32 41 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 45 40 32 41 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 34 28 23 29 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 24 19 14 20 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 9 5 3 2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 25 19 14 20 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 16 11 8 12 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 33 28 23 29 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 34 28 23 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 13 8 5 3 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 15 11 8 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 45 40 32 28 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 24 19 14 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 16 11 8 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 32 28 23 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 33 28 23 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 46 40 32 28 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 21 14 11 8 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 23 19 14 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 44 40 32 28 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 25 19 14 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 9 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 45 40 32 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 58 37 31 38 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 24 19 14 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 21 14 11 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 13 8 5 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 15 11 8 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 32 28 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 46 40 32 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 13 8 5 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 57 37 31 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 58 37 31 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 34 28 23 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 23 19 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 9 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 10 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 30 23 19 24 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 10 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 21 14 11 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 16 11 8 13 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 57 37 31 38 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 25 19 14 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 33 28 23 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 44 40 32 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 42 32 28 34 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 58 37 31 26 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 57 37 31 26 3 0.979 2.937 0.000 -3.916 -0.000 0.000 + 70 61 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 63 59 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 64 59 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 63 59 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 66 60 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 67 60 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 65 60 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 67 60 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 64 59 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 63 59 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 68 61 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 70 61 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 62 59 56 61 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 69 61 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 65 60 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 69 61 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 70 61 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 68 61 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 66 60 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 66 60 56 59 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 62 59 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 64 59 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 69 61 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 68 61 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 65 60 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 67 60 56 37 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 62 59 56 60 3 0.632 1.895 0.000 -2.527 -0.000 0.000 + 49 43 35 29 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 75 74 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 71 22 17 12 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 39 31 26 20 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 50 43 35 29 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 38 31 26 20 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 72 22 17 12 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 73 22 17 12 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 77 74 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 54 52 47 41 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 55 52 47 41 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 53 52 47 41 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 76 74 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 51 43 35 29 3 0.414 1.243 0.000 -1.657 -0.000 0.000 + 56 37 31 38 3 0.803 2.410 0.000 -3.213 -0.000 0.000 + 56 37 31 39 3 0.803 2.410 0.000 -3.213 -0.000 0.000 + 56 37 31 26 3 16.736 -16.736 0.000 -0.000 -0.000 0.000 + 36 29 23 28 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 36 29 23 19 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 48 41 32 28 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 27 20 14 11 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 18 12 8 11 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 48 41 32 40 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 18 12 8 5 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 27 20 14 19 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 27 20 14 21 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 36 29 23 30 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 18 12 8 13 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 48 41 32 42 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 17 12 8 5 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 17 12 8 11 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 26 20 14 11 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 35 29 23 19 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 47 41 32 28 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 35 29 23 28 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 26 20 14 19 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 47 41 32 40 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 + 17 12 8 13 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 47 41 32 42 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 35 29 23 30 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 26 20 14 21 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + +[ pairs ] + 1 5 1 + 1 6 1 + 1 7 1 + 4 5 1 + 4 6 1 + 2 8 1 + 4 7 1 + 2 9 1 + 2 10 1 + 6 8 1 + 3 11 1 + 7 8 1 + 6 9 1 + 3 12 1 + 7 9 1 + 6 10 1 + 3 13 1 + 7 10 1 + 5 14 1 + 9 11 1 + 5 15 1 + 10 11 1 + 9 12 1 + 5 16 1 + 10 12 1 + 9 13 1 + 5 17 1 + 10 13 1 + 5 18 1 + 12 14 1 + 13 14 1 + 12 15 1 + 8 19 1 + 13 15 1 + 12 16 1 + 11 17 1 + 8 20 1 + 13 16 1 + 11 18 1 + 8 21 1 + 13 17 1 + 8 22 1 + 13 18 1 + 15 19 1 + 11 23 1 + 16 19 1 + 15 20 1 + 11 24 1 + 16 20 1 + 15 21 1 + 11 25 1 + 16 21 1 + 11 26 1 + 11 27 1 + 18 22 1 + 14 28 1 + 20 23 1 + 14 29 1 + 21 23 1 + 20 24 1 + 14 30 1 + 21 24 1 + 20 25 1 + 19 26 1 + 14 31 1 + 21 25 1 + 19 27 1 + 21 26 1 + 21 27 1 + 19 32 1 + 24 28 1 + 19 33 1 + 25 28 1 + 24 29 1 + 19 34 1 + 25 29 1 + 24 30 1 + 19 35 1 + 25 30 1 + 19 36 1 + 20 37 1 + 27 31 1 + 20 38 1 + 20 39 1 + 29 32 1 + 30 32 1 + 29 33 1 + 30 33 1 + 29 34 1 + 28 35 1 + 23 40 1 + 30 34 1 + 28 36 1 + 23 41 1 + 30 35 1 + 23 42 1 + 30 36 1 + 23 43 1 + 28 44 1 + 33 40 1 + 28 45 1 + 34 40 1 + 33 41 1 + 28 46 1 + 34 41 1 + 33 42 1 + 28 47 1 + 1 74 1 + 34 42 1 + 28 48 1 + 3 74 1 + 2 75 1 + 29 49 1 + 2 76 1 + 36 43 1 + 29 50 1 + 2 77 1 + 29 51 1 + 26 56 1 + 26 57 1 + 12 71 1 + 32 52 1 + 26 58 1 + 12 72 1 + 41 44 1 + 12 73 1 + 42 44 1 + 41 45 1 + 42 45 1 + 41 46 1 + 40 47 1 + 42 46 1 + 40 48 1 + 42 47 1 + 42 48 1 + 31 59 1 + 31 60 1 + 31 61 1 + 41 53 1 + 38 56 1 + 41 54 1 + 39 56 1 + 38 57 1 + 41 55 1 + 39 57 1 + 38 58 1 + 39 58 1 + 37 62 1 + 48 52 1 + 37 63 1 + 37 64 1 + 37 65 1 + 37 66 1 + 37 67 1 + 37 68 1 + 37 69 1 + 37 70 1 + 57 59 1 + 58 59 1 + 57 60 1 + 58 60 1 + 57 61 1 + 58 61 1 + 60 62 1 + 61 62 1 + 60 63 1 + 61 63 1 + 60 64 1 + 59 65 1 + 61 64 1 + 59 66 1 + 61 65 1 + 59 67 1 + 61 66 1 + 59 68 1 + 61 67 1 + 60 68 1 + 59 69 1 + 60 69 1 + 59 70 1 + 60 70 1 + diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp b/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp new file mode 100644 index 00000000..9aba902f --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp @@ -0,0 +1,677 @@ +; ../../bench.py + +; Please cite the following papers: + +[ moleculetype ] +new 3 + +[ atoms ] + 1 opls_800 1 Mter O3 1 -0.39899 15.999 + 2 opls_801 1 Mter C2 1 0.38641 12.011 + 3 opls_802 1 Mter C1 1 -0.15511 12.011 + 4 opls_803 1 Mter O4 1 -0.34963 15.999 + 5 opls_804 1 Mter C0 1 -0.16566 12.011 + 6 opls_805 1 Mter H8 1 0.12065 1.008 + 7 opls_806 1 Mter H12 1 0.12065 1.008 + 8 opls_808 1 Mter H6 1 0.10725 1.008 + 9 opls_809 1 Mter H7 1 0.1087 1.008 +10 opls_873 1 Mter C5 3 -0.02807 12.011 +11 opls_874 1 Mter H9 3 0.0846 1.008 +12 opls_875 1 Mter H10 3 0.0846 1.008 +13 opls_876 1 Mter H11 3 0.0846 1.008 +14 opls_870 2 M H9 6 0.08562 1.008 +15 opls_807 2 M C1 4 -0.09038 12.011 +16 opls_871 2 M H10 6 0.08562 1.008 +17 opls_872 2 M H11 6 0.08562 1.008 +18 opls_810 2 M C0 4 -0.14838 12.011 +19 opls_811 2 M C2 4 0.39132 12.011 +20 opls_812 2 M H8 4 0.11302 1.008 +21 opls_814 2 M H6 4 0.11612 1.008 +22 opls_815 2 M H7 4 0.11612 1.008 +23 opls_816 2 M O4 4 -0.34218 15.999 +24 opls_817 2 M O3 4 -0.38638 15.999 +25 opls_821 2 M C5 4 -0.02608 12.011 +26 opls_813 3 AOL C1 5 -0.09123 12.011 +27 opls_818 3 AOL C0 5 -0.14523 12.011 +28 opls_819 3 AOL C2 5 0.39667 12.011 +29 opls_820 3 AOL H13 5 0.11847 1.008 +30 opls_823 3 AOL H12 5 0.12017 1.008 +31 opls_824 3 AOL H11 5 0.12017 1.008 +32 opls_825 3 AOL O4 5 -0.34703 15.999 +33 opls_826 3 AOL O3 5 -0.33283 15.999 +34 opls_830 3 AOL C5 5 0.02947 12.011 +35 opls_836 3 AOL C6 6 -0.17013 12.011 +36 opls_837 3 AOL H14 6 0.09727 1.008 +37 opls_838 3 AOL H15 6 0.09727 1.008 +38 opls_855 3 AOL N7 6 0.16667 14.007 +39 opls_856 3 AOL H17 6 0.15657 1.008 +40 opls_857 3 AOL H16 6 0.15657 1.008 +41 opls_858 3 AOL C8 6 -0.22393 12.011 +42 opls_859 3 AOL C9 6 -0.22303 12.011 +43 opls_860 3 AOL C10 6 -0.22463 12.011 +44 opls_861 3 AOL H18 6 0.14507 1.008 +45 opls_862 3 AOL H19 6 0.14507 1.008 +46 opls_863 3 AOL H20 6 0.14507 1.008 +47 opls_864 3 AOL H21 6 0.14437 1.008 +48 opls_865 3 AOL H22 7 0.14437 1.008 +49 opls_866 3 AOL H23 7 0.14437 1.008 +50 opls_867 3 AOL H24 7 0.14347 1.008 +51 opls_868 3 AOL H25 7 0.14347 1.008 +52 opls_869 3 AOL H26 7 0.14347 1.008 +53 opls_870 4 M H9 10 0.08562 1.008 +54 opls_807 4 M C1 8 -0.09038 12.011 +55 opls_871 4 M H10 10 0.08562 1.008 +56 opls_872 4 M H11 10 0.08562 1.008 +57 opls_810 4 M C0 8 -0.14838 12.011 +58 opls_811 4 M C2 8 0.39132 12.011 +59 opls_812 4 M H8 8 0.11302 1.008 +60 opls_814 4 M H6 8 0.11612 1.008 +61 opls_815 4 M H7 8 0.11612 1.008 +62 opls_816 4 M O4 8 -0.34218 15.999 +63 opls_817 4 M O3 8 -0.38638 15.999 +64 opls_821 4 M C5 8 -0.02608 12.011 +65 opls_839 5 Mter_1 C0 10 -0.21009 12.011 +66 opls_840 5 Mter_1 C2 10 0.38121 12.011 +67 opls_841 5 Mter_1 H8 10 0.11421 1.008 +68 opls_843 5 Mter_1 H6 10 0.09591 1.008 +69 opls_844 5 Mter_1 H7 10 0.09591 1.008 +70 opls_845 5 Mter_1 H12 10 0.09591 1.008 +71 opls_846 5 Mter_1 O4 10 -0.33559 15.999 +72 opls_847 5 Mter_1 O3 10 -0.38259 15.999 +73 opls_851 5 Mter_1 C5 10 -0.02759 12.011 +74 opls_852 5 Mter_1 H9 10 0.08801 1.008 +75 opls_853 5 Mter_1 H10 10 0.08801 1.008 +76 opls_854 5 Mter_1 H11 10 0.08801 1.008 +77 opls_831 5 Mter_1 C1 9 -0.09129 12.011 + +[ bonds ] + 2 1 1 0.1229 476976.000 + 3 2 1 0.1522 265265.600 + 4 2 1 0.1327 179075.200 + 5 3 1 0.1529 224262.400 + 6 3 1 0.1090 284512.000 + 7 3 1 0.1090 284512.000 + 8 5 1 0.1090 284512.000 + 9 5 1 0.1090 284512.000 +10 4 1 0.1410 267776.000 +11 10 1 0.1090 284512.000 +12 10 1 0.1090 284512.000 +13 10 1 0.1090 284512.000 +18 15 1 0.1529 224262.400 +19 15 1 0.1522 265265.600 +20 15 1 0.1090 284512.000 +21 18 1 0.1090 284512.000 +22 18 1 0.1090 284512.000 +23 19 1 0.1327 179075.200 +24 19 1 0.1229 476976.000 +25 23 1 0.1410 267776.000 +14 25 1 0.1090 284512.000 +16 25 1 0.1090 284512.000 +17 25 1 0.1090 284512.000 +27 26 1 0.1529 224262.400 +28 26 1 0.1522 265265.600 +29 26 1 0.1090 284512.000 +30 27 1 0.1090 284512.000 +31 27 1 0.1090 284512.000 +32 28 1 0.1327 179075.200 +33 28 1 0.1229 476976.000 +34 32 1 0.1410 267776.000 +35 34 1 0.1529 224262.400 +36 34 1 0.1090 284512.000 +37 34 1 0.1090 284512.000 +38 35 1 0.1471 307105.600 +39 35 1 0.1090 284512.000 +40 35 1 0.1090 284512.000 +41 38 1 0.1471 307105.600 +42 38 1 0.1471 307105.600 +43 38 1 0.1471 307105.600 +44 41 1 0.1090 284512.000 +45 41 1 0.1090 284512.000 +46 41 1 0.1090 284512.000 +47 42 1 0.1090 284512.000 +48 42 1 0.1090 284512.000 +49 42 1 0.1090 284512.000 +50 43 1 0.1090 284512.000 +51 43 1 0.1090 284512.000 +52 43 1 0.1090 284512.000 +57 54 1 0.1529 224262.400 +58 54 1 0.1522 265265.600 +59 54 1 0.1090 284512.000 +60 57 1 0.1090 284512.000 +61 57 1 0.1090 284512.000 +62 58 1 0.1327 179075.200 +63 58 1 0.1229 476976.000 +64 62 1 0.1410 267776.000 +53 64 1 0.1090 284512.000 +55 64 1 0.1090 284512.000 +56 64 1 0.1090 284512.000 +65 77 1 0.1529 224262.400 +66 77 1 0.1522 265265.600 +67 77 1 0.1090 284512.000 +68 65 1 0.1090 284512.000 +69 65 1 0.1090 284512.000 +70 65 1 0.1090 284512.000 +71 66 1 0.1327 179075.200 +72 66 1 0.1229 476976.000 +73 71 1 0.1410 267776.000 +74 73 1 0.1090 284512.000 +75 73 1 0.1090 284512.000 +76 73 1 0.1090 284512.000 +15 5 1 0.1529 224262.400 ; link +26 18 1 0.1529 224262.400 ; link +54 27 1 0.1529 224262.400 ; link +77 57 1 0.1529 224262.400 ; link + +[ pairs ] + 1 5 1 + 1 6 1 + 1 7 1 + 4 5 1 + 4 6 1 + 4 7 1 + 2 8 1 + 2 9 1 + 6 8 1 + 7 8 1 + 6 9 1 + 7 9 1 + 1 10 1 + 3 10 1 + 2 11 1 + 2 12 1 + 2 13 1 +19 21 1 +20 21 1 +19 22 1 +18 23 1 +20 22 1 +18 24 1 +20 23 1 +15 25 1 +20 24 1 +24 25 1 +19 14 1 +19 16 1 +19 17 1 +28 30 1 +29 30 1 +28 31 1 +27 32 1 +26 34 1 +29 31 1 +27 33 1 +29 32 1 +29 33 1 +28 35 1 +33 34 1 +28 36 1 +28 37 1 +32 38 1 +32 39 1 +32 40 1 +34 41 1 +34 42 1 +34 43 1 +36 38 1 +37 38 1 +36 39 1 +37 39 1 +36 40 1 +37 40 1 +35 44 1 +35 45 1 +35 46 1 +35 47 1 +35 48 1 +35 49 1 +35 50 1 +35 51 1 +35 52 1 +39 41 1 +40 41 1 +39 42 1 +40 42 1 +39 43 1 +40 43 1 +42 44 1 +43 44 1 +42 45 1 +43 45 1 +42 46 1 +41 47 1 +43 46 1 +41 48 1 +43 47 1 +41 49 1 +43 48 1 +41 50 1 +43 49 1 +42 50 1 +41 51 1 +42 51 1 +41 52 1 +42 52 1 +58 60 1 +59 60 1 +58 61 1 +57 62 1 +59 61 1 +57 63 1 +59 62 1 +54 64 1 +59 63 1 +63 64 1 +58 53 1 +58 55 1 +58 56 1 +77 73 1 +66 68 1 +67 68 1 +66 69 1 +67 69 1 +66 70 1 +65 71 1 +67 70 1 +65 72 1 +67 71 1 +67 72 1 +66 74 1 +66 75 1 +66 76 1 +72 73 1 + 2 15 1 ; link + 6 15 1 ; link + 3 18 1 ; link + 7 15 1 ; link + 3 19 1 ; link + 3 20 1 ; link + 8 18 1 ; link + 5 21 1 ; link + 9 18 1 ; link + 8 19 1 ; link + 5 22 1 ; link + 9 19 1 ; link + 8 20 1 ; link + 5 23 1 ; link + 9 20 1 ; link + 5 24 1 ; link +19 26 1 ; link +20 26 1 ; link +15 27 1 ; link +15 28 1 ; link +15 29 1 ; link +21 27 1 ; link +22 27 1 ; link +21 28 1 ; link +18 30 1 ; link +22 28 1 ; link +21 29 1 ; link +18 31 1 ; link +22 29 1 ; link +18 32 1 ; link +18 33 1 ; link +26 57 1 ; link +28 54 1 ; link +26 58 1 ; link +29 54 1 ; link +26 59 1 ; link +30 57 1 ; link +27 60 1 ; link +31 57 1 ; link +30 58 1 ; link +27 61 1 ; link +31 58 1 ; link +30 59 1 ; link +27 62 1 ; link +31 59 1 ; link +27 63 1 ; link +58 77 1 ; link +59 77 1 ; link +54 65 1 ; link +54 66 1 ; link +54 67 1 ; link +57 68 1 ; link +60 65 1 ; link +57 69 1 ; link +61 65 1 ; link +60 66 1 ; link +57 70 1 ; link +61 66 1 ; link +60 67 1 ; link +57 71 1 ; link +61 67 1 ; link +57 72 1 ; link + 5 26 1 ; link +18 54 1 ; link +27 77 1 ; link + +[ angles ] + 1 2 3 1 120.400 669.440 + 1 2 4 1 123.400 694.544 + 2 3 5 1 111.100 527.184 + 2 3 6 1 109.500 292.880 + 2 3 7 1 109.500 292.880 + 3 5 8 1 110.700 313.800 + 3 5 9 1 110.700 313.800 + 2 4 10 1 116.900 694.544 + 4 10 11 1 109.500 292.880 + 4 10 12 1 109.500 292.880 + 4 10 13 1 109.500 292.880 +11 10 12 1 107.800 276.144 +12 10 13 1 107.800 276.144 + 5 3 6 1 110.700 313.800 + 5 3 7 1 110.700 313.800 +11 10 13 1 107.800 276.144 + 6 3 7 1 107.800 276.144 + 8 5 9 1 107.800 276.144 + 3 2 4 1 111.400 677.808 +15 18 21 1 110.700 313.800 +15 18 22 1 110.700 313.800 +15 19 23 1 111.400 677.808 +15 19 24 1 120.400 669.440 +19 23 25 1 116.900 694.544 +23 25 14 1 109.500 292.880 +23 25 16 1 109.500 292.880 +23 25 17 1 109.500 292.880 +14 25 17 1 107.800 276.144 +16 25 17 1 107.800 276.144 +14 25 16 1 107.800 276.144 +18 15 19 1 111.100 527.184 +21 18 22 1 107.800 276.144 +23 19 24 1 123.400 694.544 +18 15 20 1 110.700 313.800 +19 15 20 1 109.500 292.880 +26 27 30 1 110.700 313.800 +26 27 31 1 110.700 313.800 +26 28 32 1 111.400 677.808 +26 28 33 1 120.400 669.440 +28 32 34 1 116.900 694.544 +32 34 35 1 109.500 418.400 +32 34 36 1 109.500 292.880 +32 34 37 1 109.500 292.880 +34 35 38 1 111.200 669.440 +34 35 39 1 110.700 313.800 +34 35 40 1 110.700 313.800 +35 38 41 1 113.000 418.400 +35 38 42 1 113.000 418.400 +35 38 43 1 113.000 418.400 +38 41 44 1 109.500 292.880 +38 41 45 1 109.500 292.880 +38 41 46 1 109.500 292.880 +38 42 47 1 109.500 292.880 +38 42 48 1 109.500 292.880 +38 42 49 1 109.500 292.880 +38 43 50 1 109.500 292.880 +38 43 51 1 109.500 292.880 +38 43 52 1 109.500 292.880 +47 42 48 1 107.800 276.144 +44 41 46 1 107.800 276.144 +35 34 37 1 110.700 313.800 +41 38 42 1 113.000 418.400 +32 28 33 1 123.400 694.544 +38 35 39 1 109.500 292.880 +35 34 36 1 110.700 313.800 +47 42 49 1 107.800 276.144 +27 26 29 1 110.700 313.800 +38 35 40 1 109.500 292.880 +48 42 49 1 107.800 276.144 +42 38 43 1 113.000 418.400 +45 41 46 1 107.800 276.144 +44 41 45 1 107.800 276.144 +50 43 52 1 107.800 276.144 +39 35 40 1 107.800 276.144 +28 26 29 1 109.500 292.880 +41 38 43 1 113.000 418.400 +36 34 37 1 107.800 276.144 +30 27 31 1 107.800 276.144 +50 43 51 1 107.800 276.144 +51 43 52 1 107.800 276.144 +27 26 28 1 111.100 527.184 +54 57 60 1 110.700 313.800 +54 57 61 1 110.700 313.800 +54 58 62 1 111.400 677.808 +54 58 63 1 120.400 669.440 +58 62 64 1 116.900 694.544 +62 64 53 1 109.500 292.880 +62 64 55 1 109.500 292.880 +62 64 56 1 109.500 292.880 +53 64 56 1 107.800 276.144 +55 64 56 1 107.800 276.144 +53 64 55 1 107.800 276.144 +57 54 58 1 111.100 527.184 +60 57 61 1 107.800 276.144 +62 58 63 1 123.400 694.544 +57 54 59 1 110.700 313.800 +58 54 59 1 109.500 292.880 +77 65 68 1 110.700 313.800 +77 65 69 1 110.700 313.800 +77 65 70 1 110.700 313.800 +77 66 71 1 111.400 677.808 +77 66 72 1 120.400 669.440 +66 71 73 1 116.900 694.544 +71 73 74 1 109.500 292.880 +71 73 75 1 109.500 292.880 +71 73 76 1 109.500 292.880 +69 65 70 1 107.800 276.144 +75 73 76 1 107.800 276.144 +66 77 67 1 109.500 292.880 +68 65 69 1 107.800 276.144 +74 73 75 1 107.800 276.144 +68 65 70 1 107.800 276.144 +74 73 76 1 107.800 276.144 +65 77 66 1 111.100 527.184 +65 77 67 1 110.700 313.800 +71 66 72 1 123.400 694.544 + 3 5 15 1 112.700 488.273 ; link + 5 15 18 1 112.700 488.273 ; link + 5 15 19 1 111.100 527.184 ; link + 5 15 20 1 110.700 313.800 ; link +15 5 9 1 110.700 313.800 ; link +15 5 8 1 110.700 313.800 ; link +15 18 26 1 112.700 488.273 ; link +18 26 27 1 112.700 488.273 ; link +18 26 28 1 111.100 527.184 ; link +18 26 29 1 110.700 313.800 ; link +26 18 22 1 110.700 313.800 ; link +26 18 21 1 110.700 313.800 ; link +26 27 54 1 112.700 488.273 ; link +27 54 57 1 112.700 488.273 ; link +27 54 58 1 111.100 527.184 ; link +27 54 59 1 110.700 313.800 ; link +54 27 31 1 110.700 313.800 ; link +54 27 30 1 110.700 313.800 ; link +54 57 77 1 112.700 488.273 ; link +57 77 65 1 112.700 488.273 ; link +57 77 66 1 111.100 527.184 ; link +57 77 67 1 110.700 313.800 ; link +77 57 61 1 110.700 313.800 ; link +77 57 60 1 110.700 313.800 ; link + +[ dihedrals ] + 4 2 1 3 4 180.000 43.932 2 + 5 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 5 3 2 4 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 +10 4 2 3 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 +10 4 2 1 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 + 7 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 6 3 2 1 3 0.000 0.000 0.000 -0.000 -0.000 0.000 + 7 3 2 4 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 6 3 2 4 3 0.276 0.828 0.000 -1.105 -0.000 0.000 + 9 5 3 2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 8 5 3 2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 + 8 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 8 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 9 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 + 9 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +11 10 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +13 10 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +12 10 4 2 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +24 19 15 23 4 180.000 43.932 2 +25 23 19 15 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 +25 23 19 24 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 +21 18 15 19 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +22 18 15 19 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +21 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +22 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +14 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +16 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +17 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +24 19 15 18 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +24 19 15 20 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +23 19 15 18 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 +23 19 15 20 3 0.276 0.828 0.000 -1.105 -0.000 0.000 +33 28 26 32 4 180.000 43.932 2 +35 34 32 28 3 -2.197 5.201 0.527 -3.531 -0.000 0.000 +43 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 +41 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 +42 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000 +43 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +42 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +41 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +42 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +41 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +43 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +34 32 28 26 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 +34 32 28 33 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 +30 27 26 28 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +31 27 26 28 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +40 35 34 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +30 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +39 35 34 37 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +40 35 34 37 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +39 35 34 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +31 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +40 35 34 32 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +39 35 34 32 3 0.979 2.937 0.000 -3.916 -0.000 0.000 +52 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +45 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +46 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +45 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +48 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +49 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +47 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +49 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +46 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +45 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +50 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +52 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +44 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +51 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +47 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +51 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +52 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +50 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +48 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +48 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +44 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +46 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +51 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +50 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +47 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +49 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +44 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000 +37 34 32 28 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +36 34 32 28 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +38 35 34 36 3 0.803 2.410 0.000 -3.213 -0.000 0.000 +38 35 34 37 3 0.803 2.410 0.000 -3.213 -0.000 0.000 +38 35 34 32 3 16.736 -16.736 0.000 -0.000 -0.000 0.000 +33 28 26 27 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +33 28 26 29 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +32 28 26 27 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 +32 28 26 29 3 0.276 0.828 0.000 -1.105 -0.000 0.000 +63 58 54 62 4 180.000 43.932 2 +64 62 58 54 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 +64 62 58 63 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 +60 57 54 58 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +61 57 54 58 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +60 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +61 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +53 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +55 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +56 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +63 58 54 57 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +63 58 54 59 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +62 58 54 57 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 +62 58 54 59 3 0.276 0.828 0.000 -1.105 -0.000 0.000 +72 66 77 71 4 180.000 43.932 2 +73 71 66 77 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000 +73 71 66 72 3 21.439 0.000 -21.439 -0.000 -0.000 0.000 +70 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +68 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +69 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 +69 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +70 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +68 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000 +75 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +76 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +74 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000 +72 66 77 65 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +72 66 77 67 3 0.000 0.000 0.000 -0.000 -0.000 0.000 +71 66 77 65 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 +71 66 77 67 3 0.276 0.828 0.000 -1.105 -0.000 0.000 +19 15 5 3 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +19 15 5 8 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +19 15 5 9 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +15 5 3 2 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +18 15 5 3 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +15 5 3 7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +15 5 3 6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +18 15 5 8 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +18 15 5 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +20 15 5 3 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +21 18 15 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +22 18 15 5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +20 15 5 8 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +20 15 5 9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +24 19 15 5 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link +23 19 15 5 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link +28 26 18 15 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +28 26 18 21 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +28 26 18 22 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +26 18 15 19 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +27 26 18 15 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +26 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +27 26 18 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +27 26 18 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +30 27 26 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +29 26 18 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +31 27 26 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +29 26 18 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +29 26 18 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +33 28 26 18 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link +32 28 26 18 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link +58 54 27 26 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +58 54 27 30 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +58 54 27 31 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +54 27 26 28 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +57 54 27 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +57 54 27 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +54 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +57 54 27 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +61 57 54 27 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +60 57 54 27 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +59 54 27 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +59 54 27 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +59 54 27 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +63 58 54 27 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link +62 58 54 27 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link +66 77 57 54 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +66 77 57 60 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +66 77 57 61 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link +77 57 54 58 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link +65 77 57 54 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +65 77 57 60 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +65 77 57 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +77 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +69 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +67 77 57 54 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +70 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +68 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +67 77 57 60 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +67 77 57 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link +72 66 77 57 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link +71 66 77 57 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link +54 27 26 18 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +77 57 54 27 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link +26 18 15 5 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link + diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ref.top b/polyply/tests/test_data/itp_to_ff/ACOL/ref.top new file mode 100644 index 00000000..f6d5e4e9 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/ACOL/ref.top @@ -0,0 +1,28 @@ +#define _FF_OPLS +#define _FF_OPLSAA + +; This force field uses a format that requires Gromacs 3.1.4 or later. +; +; References for the OPLS-AA force field: +; +; W. L. Jorgensen, D. S. Maxwell, and J. Tirado-Rives, +; J. Am. Chem. Soc. 118, 11225-11236 (1996). +; W. L. Jorgensen and N. A. McDonald, Theochem 424, 145-155 (1998). +; W. L. Jorgensen and N. A. McDonald, J. Phys. Chem. B 102, 8049-8059 (1998). +; R. C. Rizzo and W. L. Jorgensen, J. Am. Chem. Soc. 121, 4827-4836 (1999). +; M. L. Price, D. Ostrovsky, and W. L. Jorgensen, J. Comp. Chem. (2001). +; E. K. Watkins and W. L. Jorgensen, J. Phys. Chem. A 105, 4118-4125 (2001). +; G. A. Kaminski, R.A. Friesner, J.Tirado-Rives and W.L. Jorgensen, J. Phys. Chem. B 105, 6474 (2001). +; + +[ defaults ] +; nbfunc comb-rule gen-pairs fudgeLJ fudgeQQ +1 3 yes 0.5 0.5 + +#include "ligpargen.itp" +#include "in_itp.itp" + +[system] +test +[molecules] +ref 1 diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt new file mode 100644 index 00000000..1a088a04 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt @@ -0,0 +1 @@ +Mter M AOL M Mter_1 diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py index ac727795..db2a9984 100644 --- a/polyply/tests/test_itp_to_ff.py +++ b/polyply/tests/test_itp_to_ff.py @@ -70,6 +70,11 @@ def itp_equal(ref_mol, new_mol): @pytest.mark.parametrize("case, smiles, resnames, charges", [ ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]), ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]), + ("ACOL", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]", + "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])", + "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"], + ["M", "M", "AOL", "M", "M"], + [0, 0, 1, 0, 0]), ]) def test_itp_to_ff(tmp_path, case, smiles, resnames, charges): """ From a39c3acb06614d99fa6181a702e62123f823561f Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 24 Nov 2023 15:17:25 +0100 Subject: [PATCH 39/82] use top file for ACOL test and fix bug in test --- .../test_data/itp_to_ff/ACOL/ligpargen.itp | 83 +++++++++++++++++++ polyply/tests/test_itp_to_ff.py | 14 ++-- 2 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp b/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp new file mode 100644 index 00000000..dddc1fc4 --- /dev/null +++ b/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp @@ -0,0 +1,83 @@ + +; +; GENERATED BY LigParGen Server +; Jorgensen Lab @ Yale University +; +[ atomtypes ] + opls_846 O846 1 15.9990 0.000 A 2.90000E-01 5.85760E-01 + opls_835 O835 1 15.9990 0.000 A 2.96000E-01 8.78640E-01 + opls_839 C839 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_867 H867 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_803 O803 1 15.9990 0.000 A 2.90000E-01 5.85760E-01 + opls_806 H806 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_864 H864 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_818 C818 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_855 N855 1 14.0070 0.000 A 3.25000E-01 7.11280E-01 + opls_874 H874 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_843 H843 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_826 O826 1 15.9990 0.000 A 2.96000E-01 8.78640E-01 + opls_862 H862 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_827 C827 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_849 H849 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_834 O834 1 15.9990 0.000 A 2.90000E-01 5.85760E-01 + opls_844 H844 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_802 C802 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_815 H815 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_851 C851 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_814 H814 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_825 O825 1 15.9990 0.000 A 2.90000E-01 5.85760E-01 + opls_808 H808 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_807 C807 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_842 C842 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_838 H838 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_876 H876 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_805 H805 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_804 C804 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_824 H824 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_820 H820 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_801 C801 1 12.0110 0.000 A 3.55000E-01 2.92880E-01 + opls_837 H837 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_819 C819 1 12.0110 0.000 A 3.55000E-01 2.92880E-01 + opls_829 H829 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_822 C822 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_832 H832 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_875 H875 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_848 H848 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_856 H856 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_800 O800 1 15.9990 0.000 A 2.96000E-01 8.78640E-01 + opls_823 H823 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_811 C811 1 12.0110 0.000 A 3.55000E-01 2.92880E-01 + opls_833 H833 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_813 C813 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_816 O816 1 15.9990 0.000 A 2.90000E-01 5.85760E-01 + opls_869 H869 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_831 C831 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_868 H868 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_841 H841 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_871 H871 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_821 C821 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_810 C810 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_861 H861 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_847 O847 1 15.9990 0.000 A 2.96000E-01 8.78640E-01 + opls_857 H857 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_852 H852 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_870 H870 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_866 H866 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_860 C860 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_850 H850 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_817 O817 1 15.9990 0.000 A 2.96000E-01 8.78640E-01 + opls_853 H853 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_873 C873 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_812 H812 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_858 C858 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_865 H865 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_809 H809 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_859 C859 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_830 C830 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_863 H863 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_828 C828 1 12.0110 0.000 A 3.55000E-01 2.92880E-01 + opls_836 C836 1 12.0110 0.000 A 3.50000E-01 2.76144E-01 + opls_845 H845 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_840 C840 1 12.0110 0.000 A 3.55000E-01 2.92880E-01 + opls_854 H854 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 + opls_872 H872 1 1.0080 0.000 A 2.50000E-01 1.25520E-01 diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py index db2a9984..13afaf0a 100644 --- a/polyply/tests/test_itp_to_ff.py +++ b/polyply/tests/test_itp_to_ff.py @@ -67,23 +67,25 @@ def itp_equal(ref_mol, new_mol): assert False return True -@pytest.mark.parametrize("case, smiles, resnames, charges", [ - ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]), - ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]), - ("ACOL", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]", +@pytest.mark.parametrize("case, fname, smiles, resnames, charges", [ + ("PEO_OHter", "in_itp.itp", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], + ["OH", "PEO", "OH"], [0, 0, 0]), + ("PEG_PBE", "in_itp.itp", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], + ["CH3", "PBE", "PEO"], [0, 0, 0]), + ("ACOL","ref.top", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])", "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"], ["M", "M", "AOL", "M", "M"], [0, 0, 1, 0, 0]), ]) -def test_itp_to_ff(tmp_path, case, smiles, resnames, charges): +def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges): """ Call itp-to-ff and check if it generates the same force-field as in the ref.ff file. """ tmp_file = Path(tmp_path) / "test.ff" inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case - itp_to_ff(itppath=inpath/"in_itp.itp", + itp_to_ff(itppath=inpath/fname, fragment_smiles=smiles, resnames=resnames, charges=charges, From 39f7ad4e9b5923c18ff8a529294cabe877b44a9c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 28 Dec 2023 12:57:58 +0100 Subject: [PATCH 40/82] fix toplevel itp_to_ff parser --- bin/polyply | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/polyply b/bin/polyply index eff60024..1eb4d6f9 100755 --- a/bin/polyply +++ b/bin/polyply @@ -240,8 +240,6 @@ def main(): # pylint: disable=too-many-locals,too-many-statements parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter") parser_itp_ff.add_argument('-o', dest="outpath", type=Path) parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*') - parser_itp_ff.add_argument('-tol', dest="tolerance", type=float, default=1e-5) - parser_itp_ff.add_argument('-d', dest="decimals", type=int, default=5) parser_itp_ff.set_defaults(func=itp_to_ff) From 8f80a99952a20665aab75aa708c0d294aacfabf2 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 15 Jan 2024 11:47:59 +0100 Subject: [PATCH 41/82] bigsmile_draft --- polyply/src/big_smiles.py | 93 +++++++++++++++ polyply/src/big_smiles_helper.py | 193 +++++++++++++++++++++++++++++++ polyply/src/fragment_finder.py | 30 ++++- polyply/src/new.py | 76 ++++++++++++ 4 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 polyply/src/big_smiles.py create mode 100644 polyply/src/big_smiles_helper.py create mode 100644 polyply/src/new.py diff --git a/polyply/src/big_smiles.py b/polyply/src/big_smiles.py new file mode 100644 index 00000000..41e8535e --- /dev/null +++ b/polyply/src/big_smiles.py @@ -0,0 +1,93 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def find_token_indices(line, target): + idxs = [idx for idx, token in enumerate(line) if token == target] + for idx in idxs: + yield idx + +def compatible(left, right): + if left == right: + return True + if left[0] == "<" and right[0] == ">": + if left[1:] == right[1:]: + return True + if left[0] == ">" and right[0] == "<": + if left[1:] == right[1:]: + return True + return False + +def find_compatible_pair(polymol, residue, bond_type="bond_type", eligible_nodes=None): + ref_nodes = nx.get_node_attributes(polymol, bond_type) + target_nodes = nx.get_node_attributes(residue, bond_type) + for ref_node in ref_nodes: + if eligible_nodes and\ + polymol.nodes[ref_node]['resid'] not in eligible_nodes: + continue + for target_node in target_nodes: + if compatible(ref_nodes[ref_node], + target_nodes[target_node]): + return ref_node, target_node + return None + +class BigSmileParser: + + def __init__(self): + self.molecule = + + def parse_stochastic_object(): + + +def read_simplified_big_smile_string(line): + + # split the different stochastic objects + line = line.strip() + # a stochastic object is enclosed in '{' and '}' + start_idx = next(find_token_indices(line, "{")) + stop_idx = next(find_token_indices(line, "}")) + stoch_line = line[start_idx+1:stop_idx] + # residues are separated by , and end + # groups by ; + if ';' in stoch_line: + residue_string, terminii_string = stoch_line.split(';') + else: + residue_string = stoch_line + terminii_string = None + # let's read the smile residue strings + residues = [] + count = 0 + for residue_string in residue_string.split(','): + # figure out if this is a named object + if residue_string[0] == "#": + jdx = next(find_token_indices(residue_string, "=")) + name = residue_string[:jdx] + residue_string = residue_string[jdx:] + else: + name = count + + mol_graph = read_smiles(residue_string) + residues.append((name, mol_graph)) + count += 1 + # let's read the terminal residue strings + end_groups = [] + if terminii_string: + for terminus_string in terminii_string.split(','): + mol_graph = read_smiles(terminus_string) + bond_types = nx.get_node_attributes(mol_graph, "bond_type") + nx.set_node_attributes(mol_graph, bond_types, "ter_bond_type") + end_groups.append(mol_graph) + return cls(dict(residues), end_groups) + + + diff --git a/polyply/src/big_smiles_helper.py b/polyply/src/big_smiles_helper.py new file mode 100644 index 00000000..ae546ffe --- /dev/null +++ b/polyply/src/big_smiles_helper.py @@ -0,0 +1,193 @@ +# Copyright 2020 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +try: + import pysmiles +except ImportError: + msg = "The tool you are using requires pysmiles as dependcy." + raise ImportError(msg) + +from pysmiles.read_smiles import _tokenize + +def find_anchor(mol, pre_mol, atom): + anchors = list(pre_mol.neighbors(atom)) + for anchor in anchors: + if anchor in mol.nodes: + return False, anchor + for anchor in nx.ego_graph(pre_mol, atom, radius=2).nodes: + if anchor in mol.nodes: + return True, anchor + raise RuntimeError + +def parse_atom(atom): + """ + Parses a SMILES atom token, and returns a dict with the information. + + Note + ---- + Can not deal with stereochemical information yet. This gets discarded. + + Parameters + ---------- + atom : str + The atom string to interpret. Looks something like one of the + following: "C", "c", "[13CH3-1:2]" + + Returns + ------- + dict + A dictionary containing at least 'element', 'aromatic', and 'charge'. If + present, will also contain 'hcount', 'isotope', and 'class'. + """ + defaults = {'charge': 0, 'hcount': 0, 'aromatic': False} + if atom.startswith('[') and any(mark in atom for mark in ['$', '>', '<']): + bond_type = atom[1:-1] + # we have a big smile bond anchor + defaults.update({"element": None, + "bond_type": bond_type}) + return defaults + + if atom.startswith('[') and '#' == atom[1]: + # this atom is a replacable place holder + defaults.update({"element": None, "replace": atom[2:-1]}) + return defaults + + if not atom.startswith('[') and not atom.endswith(']'): + if atom != '*': + # Don't specify hcount to signal we don't actually know anything + # about it + return {'element': atom.capitalize(), 'charge': 0, + 'aromatic': atom.islower()} + else: + return defaults.copy() + + match = ATOM_PATTERN.match(atom) + + if match is None: + raise ValueError('The atom {} is malformatted'.format(atom)) + + out = defaults.copy() + out.update({k: v for k, v in match.groupdict().items() if v is not None}) + + if out.get('element', 'X').islower(): + out['aromatic'] = True + + parse_helpers = { + 'isotope': int, + 'element': str.capitalize, + 'stereo': lambda x: x, + 'hcount': parse_hcount, + 'charge': parse_charge, + 'class': int, + 'aromatic': lambda x: x, + } + + for attr, val_str in out.items(): + out[attr] = parse_helpers[attr](val_str) + + if out['element'] == '*': + del out['element'] + + if out.get('element') == 'H' and out.get('hcount', 0): + raise ValueError("A hydrogen atom can't have hydrogens") + + if 'stereo' in out: + LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom) + + return out + +def big_smile_str_to_graph(smile_str): + """ + + """ + bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0} + pre_mol = nx.Graph() + anchor = None + idx = 0 + default_bond = 1 + next_bond = None + branches = [] + ring_nums = {} + for tokentype, token in _tokenize(smiles): + if tokentype == TokenType.ATOM: + pre_mol.add_node(idx, **parse_atom(token)) + if anchor is not None: + if next_bond is None: + next_bond = default_bond + if next_bond or zero_order_bonds: + pre_mol.add_edge(anchor, idx, order=next_bond) + next_bond = None + anchor = idx + idx += 1 + elif tokentype == TokenType.BRANCH_START: + branches.append(anchor) + elif tokentype == TokenType.BRANCH_END: + anchor = branches.pop() + elif tokentype == TokenType.BOND_TYPE: + if next_bond is not None: + raise ValueError('Previous bond (order {}) not used. ' + 'Overwritten by "{}"'.format(next_bond, token)) + next_bond = bond_to_order[token] + elif tokentype == TokenType.RING_NUM: + if token in ring_nums: + jdx, order = ring_nums[token] + if next_bond is None and order is None: + next_bond = default_bond + elif order is None: # Note that the check is needed, + next_bond = next_bond # But this could be pass. + elif next_bond is None: + next_bond = order + elif next_bond != order: # Both are not None + raise ValueError('Conflicting bond orders for ring ' + 'between indices {}'.format(token)) + # idx is the index of the *next* atom we're adding. So: -1. + if pre_mol.has_edge(idx-1, jdx): + raise ValueError('Edge specified by marker {} already ' + 'exists'.format(token)) + if idx-1 == jdx: + raise ValueError('Marker {} specifies a bond between an ' + 'atom and itself'.format(token)) + if next_bond or zero_order_bonds: + pre_mol.add_edge(idx - 1, jdx, order=next_bond) + next_bond = None + del ring_nums[token] + else: + if idx == 0: + raise ValueError("Can't have a marker ({}) before an atom" + "".format(token)) + # idx is the index of the *next* atom we're adding. So: -1. + ring_nums[token] = (idx - 1, next_bond) + next_bond = None + elif tokentype == TokenType.EZSTEREO: + LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token) + if ring_nums: + raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) + + return pre_mol + +def mol_graph_from_big_smile_graph(pre_mol): + # here we condense any BigSmilesBonding information + clean_nodes = [node for node in pre_mol.nodes(data=True) if 'bond_type' not in node[1]] + mol = nx.Graph() + mol.add_nodes_from(clean_nodes) + mol.add_edges_from([edge for edge in pre_mol.edges if edge[0] in mol.nodes and edge[1] in mol.nodes]) + for node in pre_mol.nodes: + if 'bond_type' in pre_mol.nodes[node]: + terminus, anchor = find_anchor(mol, pre_mol, node) + if terminus: + mol.nodes[anchor].update({"ter_bond_type": pre_mol.nodes[node]['bond_type'], + "ter_bond_probs": pre_mol.nodes[node]['bond_probs']}) + else: + mol.nodes[anchor].update({"bond_type": pre_mol.nodes[node]['bond_type'], + "bond_probs": pre_mol.nodes[node]['bond_probs']}) + return mol diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index bde5316b..060fbb44 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import numpy as np import networkx as nx from vermouth.graph_utils import make_residue_graph from polyply.src.graph_utils import find_one_ismags_match @@ -123,6 +123,33 @@ def __init__(self, molecule, prefix): self.molecule.nodes[node]["element"] = self.masses_to_element[mass] self.molecule.nodes[node]["degree"] = self.molecule.degree(node) + def linearize_resids(self, unique_fragments): + resids = np.arange(0, len(self.res_graph)) + old_resids = {} + # find the first terminal + ter = self.ter_prefix + ter_nodes = [ node[0] for node in self.res_graph.nodes(data=True) if ter in node[1]['resname'] ] + print(ter_nodes[0]) + #assert 0 > len(ter_nodes) < 3 + path = nx.dfs_edges(self.res_graph, source=ter_nodes[0]) + old_resids = {self.res_graph.nodes[ter_nodes[0]]['resid']: resids[0]} + self.res_graph.nodes[ter_nodes[0]]['resid'] = resids[0] + for mol_node in self.res_graph.nodes[ter_nodes[0]]['graph'].nodes: + self.res_graph.nodes[ter_nodes[0]]['graph'].nodes[mol_node]['resid'] = resids[0] + self.molecule.nodes[mol_node]['resid'] = resids[0] + + for new_resid, (_, node) in zip(resids[1:], path): + print('node', node) + old_resids[self.res_graph.nodes[node]['resid']] = new_resid + self.res_graph.nodes[node]['resid'] = new_resid + for mol_node in self.res_graph.nodes[node]['graph'].nodes: + self.res_graph.nodes[node]['graph'].nodes[mol_node]['resid'] = new_resid + self.molecule.nodes[mol_node]['resid'] = new_resid + print(old_resids) + for fragment in unique_fragments.values(): + for node in fragment.nodes: + fragment.nodes[node]['resid'] = old_resids[fragment.nodes[node]['resid']] + def _node_match(self, node1, node2): """ Check if two node dicts match. @@ -342,4 +369,5 @@ def extract_unique_fragments(self, fragment_graphs): # remake the residue graph since some resnames have changed self.make_res_graph() + self.linearize_resids(unique_fragments) return unique_fragments, self.res_graph diff --git a/polyply/src/new.py b/polyply/src/new.py new file mode 100644 index 00000000..4ed025ec --- /dev/null +++ b/polyply/src/new.py @@ -0,0 +1,76 @@ +import re + +PATTERNS = {"bond_anchor": "\[\$.*?\]", + "place_holder": "\[\#.*?\]", + "annotation": "\|.*?\|", + "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)', + "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'} + +def read_big_smile(line): + res_graphs = [] + seq_str, patterns = re.findall(PATTERNS['seq_pattern'], line)[0] + fragments = dict(re.findall(PATTERNS['fragment'], patterns)) + for fragment in fragments: + res_graphs.append(read_smile_w_bondtypes(fragment_smile)) + + # now stitch together .. + # 1 segement the seq_str + # allocate any leftover atoms + # add the residues + targets = set() + for match in re.finditer(PATTERNS['place_holder'], seq_str): + targets.add(match.group(0)) + for target in targets: + seq_str = seq_str.replace(target, fragments[target[2:-1]]) + + return seq_str + +def read_smile_w_bondtypes(line): + smile = line + bonds=[] + # find all bond types and remove them from smile + for bond in re.finditer(PATTERNS['bond_anchor'], ex_str): + smile=smile.replace(bond.group(0), "") + bonds.append((bond.span(0), bond.group(0)[1:-1])) + + # read smile and make molecule + mol = read_smiles(smile) + pos_to_node = position_to_node(smile) + + # strip the first terminal anchor if there is any // + + # associate the bond atoms with the smile atoms + for bond in bonds: + # the bondtype contains the zero index so it + # referes to the first smile node + if bond[0][0] == 0: + mol.nodes[0]['bondtype'] = bond[1] + else: + anchor = find_anchor(smile, bond[0][0]) + mol.nodes[anchor]['bondtype'] = bond[1] + + return mol + + +def find_anchor(smile, start): + branch = False + sub_smile=smile[:start] + for idx, token in enumerate(sub_smile[::-1]): + if token == ")": + branch = True + continue + if token == "(" and branch: + branch = False + continue + if not branch: + return start-idx + raise IndexError + +def position_to_node(smile): + count=0 + pos_to_node={} + for idx, token in enumerate(smile): + if token not in ['[', ']', '$', '@', '(', ')']: + pos_to_node[idx] = count + count+=1 + return pos_to_node From 40b89af2fe29eda0c1d61123cdd4b2eb20318eb3 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 19 Jan 2024 10:44:59 +0100 Subject: [PATCH 42/82] infrastructure for big smile parsing --- polyply/src/big_smile_parsing.py | 222 +++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 polyply/src/big_smile_parsing.py diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py new file mode 100644 index 00000000..72e504e6 --- /dev/null +++ b/polyply/src/big_smile_parsing.py @@ -0,0 +1,222 @@ +import re +import pysmiles +import networkx as nx +from vermouth.forcefield import ForceField +from vermouth.molecule import Block +from polyply.src.meta_molecule import MetaMolecule + +PATTERNS = {"bond_anchor": "\[\$.*?\]", + "place_holder": "\[\#.*?\]", + "annotation": "\|.*?\|", + "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)', + "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'} + +def res_pattern_to_meta_mol(pattern): + """ + Generate a :class:`polyply.MetaMolecule` from a + pattern string describing a residue graph with the + simplified big-smile syntax. + + The syntax scheme consists of two curly braces + enclosing the residue graph sequence. It can contain + any enumeration of residues by writing them as if they + were smile atoms but the atomname is given by # + resname. + This input fomat can handle branching as well ,however, + macrocycles are currently not supported. + + General Pattern + '{' + [#resname_1][#resname_2]... + '}' + + In addition to plain enumeration any residue may be + followed by a '|' and an integern number that + specifies how many times the given residue should + be added within a sequence. For example, a pentamer + of PEO can be written as: + + {[#PEO][#PEO][#PEO][#PEO][#PEO]} + + or + + {[#PEO]|5} + + The block syntax also applies to branches. Here the convetion + is that the complete branch including it's first anchoring + residue is repeated. For example, to generate a PMA-g-PEG + polymer the following syntax is permitted: + + {[#PMA]([#PEO][#PEO])|5} + + Parameters + ---------- + pattern: str + a string describing the meta-molecule + + Returns + ------- + :class:`polyply.MetaMolecule` + """ + meta_mol = MetaMolecule() + current = 0 + branch_anchor = 0 + prev_node = None + branching = False + for match in re.finditer(PATTERNS['place_holder'], pattern): + start, stop = match.span() + # new branch here + if pattern[start-1] == '(': + branching = True + branch_anchor = prev_node + recipie = [(meta_mol.nodes[prev_node]['resname'], 1)] + if stop < len(pattern) and pattern[stop] == '|': + n_mon = int(pattern[stop+1:pattern.find('[', stop)]) + else: + n_mon = 1 + + resname = match.group(0)[2:-1] + # collect all residues in branch + if branching: + recipie.append((resname, n_mon)) + + # add the new residue + connection = [] + for _ in range(0, n_mon): + if prev_node is not None: + connection = [(prev_node, current)] + meta_mol.add_monomer(current, + resname, + connection) + prev_node = current + current += 1 + + # terminate branch and jump back to anchor + if stop < len(pattern) and pattern[stop] == ')' and branching: + branching = False + prev_node = branch_anchor + # we have to multiply the branch n-times + if stop+1 < len(pattern) and pattern[stop+1] == "|": + for _ in range(0,int(pattern[stop+2:pattern.find('[', stop)])): + for bdx, (resname, n_mon) in enumerate(recipie): + if bdx == 0: + anchor = current + for _ in range(0, n_mon): + connection = [(prev_node, current)] + meta_mol.add_monomer(current, + resname, + connection) + prev_node = current + current += 1 + prev_node = anchor + return meta_mol + +def _big_smile_iter(smile): + for token in smile: + yield token + +def tokenize_big_smile(big_smile): + """ + Processes a BigSmile string by storing the + the BigSmile specific bonding descriptors + in a dict with refernce to the atom they + refer to. Furthermore, a cleaned smile + string is generated with the BigSmile + specific syntax removed. + + Parameters + ---------- + smile: str + a BigSmile smile string + + Returns + ------- + str + a canonical smile string + dict + a dict mapping bonding descriptors + to the nodes within the smile + """ + smile_iter = _big_smile_iter(big_smile) + bonding_descrpt = {} + smile = "" + node_count = 0 + prev_node = 0 + for token in smile_iter: + if token == '[': + peek = next(smile_iter) + if peek in ['$', '>', '<']: + bond_descrp = peek + peek = next(smile_iter) + while peek != ']': + bond_descrp += peek + peek = next(smile_iter) + bonding_descrpt[prev_node] = bond_descrp + else: + smile = smile + token + peek + prev_node = node_count + node_count += 1 + + elif token == '(': + anchor = prev_node + smile += token + elif token == ')': + prev_node = anchor + smile += token + else: + if token not in '@ . - = # $ : / \\ + - %': + prev_node = node_count + node_count += 1 + smile += token + return smile, bonding_descrpt + +def fragment_iter(fragment_str): + """ + Iterates over fragments defined in a BigSmile string. + Fragments are named residues that consist of a single + smile string together with the BigSmile specific bonding + descriptors. The function returns the resname of a named + fragment as well as a plain nx.Graph of the molecule + described by the smile. Bonding descriptors are annotated + as node attributes with the keyword bonding. + + Parameters + ---------- + fragment_str: str + the string describing the fragments + + Yields + ------ + str, nx.Graph + """ + for fragment in fragment_str[1:-1].split(','): + delim = fragment.find('=', 0) + resname = fragment[1:delim] + big_smile = fragment[delim+1:] + smile, bonding_descrpt = tokenize_big_smile(big_smile) + mol_graph = pysmiles.read_smiles(smile) + atomnames = [str(node[0])+node[1]['element'] for node in mol_graph.nodes(data=True) ] + nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') + nx.set_node_attributes(mol_graph, atomnames, 'atomname') + nx.set_node_attributes(mol_graph, resname, 'resname') + yield resname, mol_graph + +def force_field_from_fragments(fragment_str): + """ + Collects the fragments defined in a BigSmile string + as :class:`vermouth.molecule.Blocks` in a force-field + object. Bonding descriptors are annotated as node + attribtues. + + Parameters + ---------- + fragment_str: str + string using BigSmile fragment syntax + + Returns + ------- + :class:`vermouth.forcefield.ForceField` + """ + force_field = ForceField("big_smile_ff") + frag_iter = fragment_iter(fragment_str) + for resname, mol_graph in frag_iter: + mol_block = Block(mol_graph) + force_field.blocks[resname] = mol_block + return forxe_field From 05ed0456919ad1865925f45b07d2e95396053734 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 19 Jan 2024 10:47:06 +0100 Subject: [PATCH 43/82] optional dep. for pysmiles --- polyply/src/big_smile_parsing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 72e504e6..2ad65a7b 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -1,5 +1,10 @@ import re -import pysmiles +try: + import pysmiles +except ImportError: + msg = ("You are using a functionality that requires " + "the pysmiles package. Use pip install pysmiles ") + raise ImportError(msg) import networkx as nx from vermouth.forcefield import ForceField from vermouth.molecule import Block From 82a2acc2bc0aa6a3e47eb9f51e4d4db06f4f0dbe Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 19 Jan 2024 10:50:13 +0100 Subject: [PATCH 44/82] add a processor that reads a big smile string and returns a full metamolecule including edges. --- polyply/src/big_smile_mol_processsor.py | 99 +++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 polyply/src/big_smile_mol_processsor.py diff --git a/polyply/src/big_smile_mol_processsor.py b/polyply/src/big_smile_mol_processsor.py new file mode 100644 index 00000000..8131e009 --- /dev/null +++ b/polyply/src/big_smile_mol_processsor.py @@ -0,0 +1,99 @@ +import networkx as nx +from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, + force_field_from_fragments) +from polyply.src.map_to_molecule import MapToMolecule + +def compatible(left, right): + """ + Check bonding descriptor compatibility according + to the BigSmiles syntax convetions. + + Parameters + ---------- + left: str + right: str + + Returns + ------- + bool + """ + if left == right: + return True + if left[0] == "<" and right[0] == ">": + if left[1:] == right[1:]: + return True + if left[0] == ">" and right[0] == "<": + if left[1:] == right[1:]: + return True + return False + +def generate_edge(source, target, bond_type="bonding"): + """ + Given a source and a target graph, which have bonding + descriptors stored as node attributes, find a pair of + matching descriptors and return the respective nodes. + The function also returns the bonding descriptors. If + no bonding descriptor is found an instance of LookupError + is raised. + + Parameters + ---------- + source: :class:`nx.Graph` + target: :class:`nx.Graph` + bond_type: `abc.hashable` + under which attribute are the bonding descriptors + stored. + + Returns + ------- + ((abc.hashable, abc.hashable), (str, str)) + the nodes as well as bonding descriptors + + Raises + ------ + LookupError + if no match is found + """ + source_nodes = nx.get_node_attributes(source, bond_type) + target_nodes = nx.get_node_attributes(target, bond_type) + for source_node in source_nodes: + for target_node in target_nodes: + bond_source = source_nodes[source_node] + bond_target = target_nodes[target_node] + if compatible(bond_source, bond_target): + return ((source_node, target_node), (bond_source, bond_target)) + raise LookupError + +class DefBigSmileParser: + """ + Parse an a string instance of a defined BigSmile, + which describes a polymer molecule. + """ + + def __init__(self): + self.force_field = None + self.meta_molecule = None + self.molecule = None + + def edges_from_bonding_descrpt(self): + """ + Make edges according to the bonding descriptors stored + in the node attributes of meta_molecule residue graph. + If a bonding descriptor is consumed it is set to None, + however, the meta_molecule edge gets an attribute with the + bonding descriptors that formed the edge. + """ + for prev_node, node in nx.dfs_edges(self.meta_molecule): + edge, bonding = generate_edge(self.meta_molecule.nodes[prev_node]['graph'], + self.meta_molecule.nodes[node]['graph']) + self.meta_molecule.nodes[prev_node]['graph'][edge[0]]['bonding'] = None + self.meta_molecule.nodes[prev_node]['graph'][edge[1]]['bonding'] = None + self.meta_molecule.molecule.add_edge(edge, bonding=bonding) + + def parse(self, big_smile_str): + res_pattern, residues = big_smile_str.split('.') + self.meta_molecule = res_pattern_to_meta_mol(res_pattern) + self.force_field = force_field_from_fragments(residues) + MapToMolecule(self.force_field).run_molecule(self.meta_molecule) + self.edges_from_bonding_descrpt() + return self.meta_molecule From 257b76b7665355d217dfb0b71249e64255096a35 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Sat, 20 Jan 2024 15:43:12 +0100 Subject: [PATCH 45/82] atest-big-smile parsing part I --- polyply/tests/test_big_smile_parsing.py | 64 +++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 polyply/tests/test_big_smile_parsing.py diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py new file mode 100644 index 00000000..43045a83 --- /dev/null +++ b/polyply/tests/test_big_smile_parsing.py @@ -0,0 +1,64 @@ +import pytest +import networkx as nx +from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, + tokenize_big_smile) + +@pytest.mark.parametrize('smile, nodes, edges',( + # smiple linear seqeunce + ("{[#PMA][#PEO][#PMA]}", + ["PMA", "PEO", "PMA"], + [(0, 1), (1, 2)]), + # simple branched sequence + ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}", + ["PMA", "PMA", "PEO", "PEO", "PMA"], + [(0, 1), (1, 2), (2, 3), (1, 4)]), + # simple sequence two branches + ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}", + ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"], + [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)]), + # simple linear sequence with expansion + ("{[#PMA]|3}", + ["PMA", "PMA", "PMA"], + [(0, 1), (1, 2)]), + ## simple branched with expansion + #("{[#PMA]([#PEO]|3)|2}", + #["PMA", "PEO", "PEO", "PEO", + # "PMA", "PEO", "PEO", "PEO"], + #[(0, 1), (1, 2), (2, 3), + # (0, 4), (4, 5), (5, 6), (6, 7)] + # ) +)) +def test_res_pattern_to_meta_mol(smile, nodes, edges): + """ + Test that the meta-molecule is correctly reproduced + from the simplified smile string syntax. + """ + meta_mol = res_pattern_to_meta_mol(smile) + assert len(meta_mol.edges) == len(edges) + for edge in edges: + assert meta_mol.has_edge(*edge) + resnames = nx.get_node_attributes(meta_mol, 'resname') + assert nodes == list(resnames.values()) + +@pytest.mark.parametrize('big_smile, smile, bonding',( + # smiple symmetric bonding + ("[$]COC[$]", + "COC", + {0: '$', 2: '$'}), + # named different bonding descriptors + ("[$1]CCCC[$2]", + "CCCC", + {0: "$1", 3: "$2"}), + # bonding descript. after branch + ("C(COC[$1])[$2]CCC[$3]", + "C(COC)CCC", + {0: '$2', 3: '$1', 6: '$3'}), + # left rigth bonding desciptors + ("[>]COC[<]", + "COC", + {0: '>', 2: '<'}) +)) +def test_tokenize_big_smile(big_smile, smile, bonding): + new_smile, new_bonding = tokenize_big_smile(big_smile) + assert new_smile == smile + assert new_bonding == bonding From 061c8efc60931b2fff5ef4c866b0c3a952ebded9 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jan 2024 16:37:32 +0100 Subject: [PATCH 46/82] fix hcount for single atom; fix nexted branches --- polyply/src/big_smile_parsing.py | 54 +++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 2ad65a7b..ddb9bd2a 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -1,4 +1,5 @@ import re +import numpy as np try: import pysmiles except ImportError: @@ -16,6 +17,12 @@ "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)', "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'} +def _find_next_character(string, chars, start): + for idx, token in enumerate(string[start:]): + if token in chars: + return idx+start + return np.inf + def res_pattern_to_meta_mol(pattern): """ Generate a :class:`polyply.MetaMolecule` from a @@ -67,13 +74,15 @@ def res_pattern_to_meta_mol(pattern): branching = False for match in re.finditer(PATTERNS['place_holder'], pattern): start, stop = match.span() + print(pattern[start:stop]) # new branch here if pattern[start-1] == '(': branching = True branch_anchor = prev_node recipie = [(meta_mol.nodes[prev_node]['resname'], 1)] if stop < len(pattern) and pattern[stop] == '|': - n_mon = int(pattern[stop+1:pattern.find('[', stop)]) + eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop) + n_mon = int(pattern[stop+1:eon]) else: n_mon = 1 @@ -94,12 +103,17 @@ def res_pattern_to_meta_mol(pattern): current += 1 # terminate branch and jump back to anchor - if stop < len(pattern) and pattern[stop] == ')' and branching: + branch_stop = _find_next_character(pattern, ['['], stop) >\ + _find_next_character(pattern, [')'], stop) + if stop <= len(pattern) and branch_stop and branching: branching = False prev_node = branch_anchor # we have to multiply the branch n-times - if stop+1 < len(pattern) and pattern[stop+1] == "|": - for _ in range(0,int(pattern[stop+2:pattern.find('[', stop)])): + eon_a = _find_next_character(pattern, [')'], stop) + if stop+1 < len(pattern) and pattern[eon_a+1] == "|": + eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1) + # -1 because one branch has already been added at this point + for _ in range(0,int(pattern[eon_a+2:eon_b])-1): for bdx, (resname, n_mon) in enumerate(recipie): if bdx == 0: anchor = current @@ -166,12 +180,36 @@ def tokenize_big_smile(big_smile): prev_node = anchor smile += token else: - if token not in '@ . - = # $ : / \\ + - %': + if token not in '@ . - = # $ : / \\ + - %'\ + and not token.isdigit(): prev_node = node_count node_count += 1 smile += token return smile, bonding_descrpt +def _rebuild_h_atoms(mol_graph): + # special hack around to fix + # pysmiles bug for a single + # atom molecule; we assume that the + # hcount is just wrong and set it to + # the valance number minus bonds minus + # bonding connectors + if len(mol_graph.nodes) == 1: + ele = mol_graph.nodes[0]['element'] + # for N and P we assume the regular valency + hcount = pysmiles.smiles_helper.VALENCES[ele][0] + if mol_graph.nodes[0].get('bonding', False): + hcount -= 1 + mol_graph.nodes[0]['hcount'] = hcount + else: + for node in mol_graph.nodes: + if mol_graph.nodes[node].get('bonding', False): + hcount = mol_graph.nodes[node]['hcount'] + mol_graph.nodes[node]['hcount'] = hcount - 1 + + pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph) + return mol_graph + def fragment_iter(fragment_str): """ Iterates over fragments defined in a BigSmile string. @@ -197,8 +235,10 @@ def fragment_iter(fragment_str): big_smile = fragment[delim+1:] smile, bonding_descrpt = tokenize_big_smile(big_smile) mol_graph = pysmiles.read_smiles(smile) - atomnames = [str(node[0])+node[1]['element'] for node in mol_graph.nodes(data=True) ] nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') + # we need to rebuild hydrogen atoms now + _rebuild_h_atoms(mol_graph) + atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)} nx.set_node_attributes(mol_graph, atomnames, 'atomname') nx.set_node_attributes(mol_graph, resname, 'resname') yield resname, mol_graph @@ -224,4 +264,4 @@ def force_field_from_fragments(fragment_str): for resname, mol_graph in frag_iter: mol_block = Block(mol_graph) force_field.blocks[resname] = mol_block - return forxe_field + return force_field From 20e2e4917500ef8621a696f4395494a3dfd5a6e8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jan 2024 16:37:58 +0100 Subject: [PATCH 47/82] tests for smile iter and test nested branches --- polyply/tests/test_big_smile_parsing.py | 71 ++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py index 43045a83..3265564c 100644 --- a/polyply/tests/test_big_smile_parsing.py +++ b/polyply/tests/test_big_smile_parsing.py @@ -1,7 +1,8 @@ import pytest import networkx as nx from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, - tokenize_big_smile) + tokenize_big_smile, + fragment_iter) @pytest.mark.parametrize('smile, nodes, edges',( # smiple linear seqeunce @@ -20,13 +21,20 @@ ("{[#PMA]|3}", ["PMA", "PMA", "PMA"], [(0, 1), (1, 2)]), - ## simple branched with expansion - #("{[#PMA]([#PEO]|3)|2}", - #["PMA", "PEO", "PEO", "PEO", - # "PMA", "PEO", "PEO", "PEO"], - #[(0, 1), (1, 2), (2, 3), - # (0, 4), (4, 5), (5, 6), (6, 7)] - # ) + # simple branch expension + ("{[#PMA]([#PEO][#PEO][#OHter])|2}", + ["PMA", "PEO", "PEO", "OHter", + "PMA", "PEO", "PEO", "OHter"], + [(0, 1), (1, 2), (2, 3), + (0, 4), (4, 5), (5, 6), (6, 7)] + ), + # nested branched with expansion + ("{[#PMA]([#PEO]|3)|2}", + ["PMA", "PEO", "PEO", "PEO", + "PMA", "PEO", "PEO", "PEO"], + [(0, 1), (1, 2), (2, 3), + (0, 4), (4, 5), (5, 6), (6, 7)] + ) )) def test_res_pattern_to_meta_mol(smile, nodes, edges): """ @@ -49,6 +57,10 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges): ("[$1]CCCC[$2]", "CCCC", {0: "$1", 3: "$2"}), + # ring and bonding descriptors + ("[$1]CC[$2]C1CCCCC1", + "CCC1CCCCC1", + {0: "$1", 1: "$2"}), # bonding descript. after branch ("C(COC[$1])[$2]CCC[$3]", "C(COC)CCC", @@ -62,3 +74,46 @@ def test_tokenize_big_smile(big_smile, smile, bonding): new_smile, new_bonding = tokenize_big_smile(big_smile) assert new_smile == smile assert new_bonding == bonding + +@pytest.mark.parametrize('fragment_str, nodes, edges',( + # single fragment + ("{#PEO=[$]COC[$]}", + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}), + (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}), + (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), + (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), + (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), + (6, {"atomname": "H6", "resname": "PEO", "element": "H"}), + )}, + {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}), + # test NH3 terminal + ("{#AMM=N[$]}", + {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": "$", "element": "N"}), + (1, {"atomname": "H1", "resname": "AMM", "element": "H"}), + (2, {"atomname": "H2", "resname": "AMM", "element": "H"}), + )}, + {"AMM": [(0, 1), (0, 2)]}), + # single fragment + 1 terminal (i.e. only 1 bonding descrpt + ("{#PEO=[$]COC[$],#OHter=[$][OH]}", + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}), + (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}), + (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), + (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), + (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), + (6, {"atomname": "H6", "resname": "PEO", "element": "H"}), + ), + "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": "$", "element": "O"}), + (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))}, + {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)], + "OHter": [(0, 1)]}), +)) +def test_fragment_iter(fragment_str, nodes, edges): + for resname, mol_graph in fragment_iter(fragment_str): + assert len(mol_graph.nodes) == len(nodes[resname]) + for node, ref_node in zip(mol_graph.nodes(data=True), nodes[resname]): + assert node[0] == ref_node[0] + for key in ref_node[1]: + assert ref_node[1][key] == node[1][key] + assert sorted(mol_graph.edges) == sorted(edges[resname]) From f505129f98cdff951f9137a832cb604f44bce8f9 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jan 2024 16:40:17 +0100 Subject: [PATCH 48/82] add pysmiles to test requrm. --- requirements-tests.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-tests.txt b/requirements-tests.txt index 595a4902..03357910 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -4,3 +4,4 @@ pytest-cov pylint codecov tqdm +pysmiles From 0c67ecc17c530c50aa98781fe7df0bb37324e983 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jan 2024 18:49:28 +0100 Subject: [PATCH 49/82] add tests for bonding descriptor evaluation --- polyply/tests/test_big_smile_mol_proc.py | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 polyply/tests/test_big_smile_mol_proc.py diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py new file mode 100644 index 00000000..7bcdf9f9 --- /dev/null +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -0,0 +1,37 @@ +import pytest +import networkx as nx +from polyply.src.big_smile_mol_processor import (DefBigSmileParser, + generate_edge) + +@pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',( + # single bond source each + ({0: "$"}, + {3: "$"}, + (0, 3), + ('$', '$')), + # multiple sources one match + ({0: '$1', 2: '$2'}, + {1: '$2', 3: '$'}, + (2, 1), + ('$2', '$2')), + # left right selective bonding + ({0: '$', 1: '>', 3: '<'}, + {0: '>', 1: '$5'}, + (3, 0), + ('<', '>')), + # left right selective bonding + # with identifier + ({0: '$', 1: '>', 3: '<1'}, + {0: '>', 1: '$5', 2: '>1'}, + (3, 2), + ('<1', '>1')), + +)) +def test_generate_edge(bonds_source, bonds_target, edge, btypes): + source = nx.path_graph(5) + target = nx.path_graph(4) + nx.set_node_attributes(source, bonds_source, "bonding") + nx.set_node_attributes(target, bonds_target, "bonding") + new_edge, new_btypes = generate_edge(source, target, bond_type="bonding") + assert new_edge == edge + assert new_btypes == btypes From 52235c91887a87b1be7bea3d50902f9470e286e2 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 23 Jan 2024 18:57:46 +0100 Subject: [PATCH 50/82] add tests for big smile molecule prc --- polyply/tests/test_big_smile_mol_proc.py | 66 ++++++++++++++++++++---- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index 7bcdf9f9..58667ed8 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -2,27 +2,32 @@ import networkx as nx from polyply.src.big_smile_mol_processor import (DefBigSmileParser, generate_edge) - +import matplotlib.pyplot as plt @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',( # single bond source each - ({0: "$"}, - {3: "$"}, + ({0: ["$"]}, + {3: ["$"]}, + (0, 3), + ('$', '$')), + # include a None + ({0: ["$"], 1: []}, + {3: ["$"]}, (0, 3), ('$', '$')), # multiple sources one match - ({0: '$1', 2: '$2'}, - {1: '$2', 3: '$'}, + ({0: ['$1'], 2: ['$2']}, + {1: ['$2'], 3: ['$']}, (2, 1), ('$2', '$2')), # left right selective bonding - ({0: '$', 1: '>', 3: '<'}, - {0: '>', 1: '$5'}, + ({0: ['$'], 1: ['>'], 3: ['<']}, + {0: ['>'], 1: ['$5']}, (3, 0), ('<', '>')), # left right selective bonding # with identifier - ({0: '$', 1: '>', 3: '<1'}, - {0: '>', 1: '$5', 2: '>1'}, + ({0: ['$'], 1: ['>'], 3: ['<1']}, + {0: ['>'], 1: ['$5'], 2: ['>1']}, (3, 2), ('<1', '>1')), @@ -35,3 +40,46 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): new_edge, new_btypes = generate_edge(source, target, bond_type="bonding") assert new_edge == edge assert new_btypes == btypes + + +@pytest.mark.parametrize('smile, ref_nodes, ref_edges',( + # smiple linear seqeunce + ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O]}", + # 0 1 2 3 4 5 6 7 8 + [('OHter', 'O H'), ('PEO', 'C O C H H H H'), + # 9 10 11 12 13 14 15 16 17 + ('PEO', 'C O C H H H H'), ('OHter', 'O H')], + [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7), + (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13), + (11, 14), (11, 15), (11, 16), (16, 17)]), + # simple branched sequence + ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}", + [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'), + ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'), ('Hter', 'H')], + [(0, 1), (1, 2), (1, 3), (1, 4), (2, 5), (2, 6), (2, 14), (6, 7), (6, 9), (6, 10), (7, 8), + (8, 11), (8, 12), (8, 13), (14, 15), (14, 16), (14, 17), (15, 18), (15, 19), (15, 27), + (19, 20), (19, 22), (19, 23), (20, 21), (21, 24), (21, 25), (21, 26)]), + # something with a ring + # 012 34567 + # 890123456 + ("{[#Hter][#PS]|2[#Hter]}.{#PS=[$]CC[$]c1ccccc1,#Hter=[$]H}", + [('Hter', 'H'), ('PS', 'C C C C C C C C H H H H H H H H'), + ('PS', 'C C C C C C C C H H H H H H H H'), ('Hter', 'H')], + [(0, 1), (1, 2), (1, 9), (1, 10), (2, 3), (2, 11), (2, 17), + (3, 4), (3, 8), (4, 5), (4, 12), (5, 6), (5, 13), (6, 7), + (6, 14), (7, 8), (7, 15), (8, 16), (17, 18), (17, 25), + (17, 26), (18, 19), (18, 27), (18, 33), (19, 20), (19, 24), + (20, 21), (20, 28), (21, 22), (21, 29), (22, 23), (22, 30), + (23, 24), (23, 31), (24, 32)]), + +)) +def test_def_big_smile_parser(smile, ref_nodes, ref_edges): + meta_mol = DefBigSmileParser().parse(smile) + for node, ref in zip(meta_mol.nodes, ref_nodes): + assert meta_mol.nodes[node]['resname'] == ref[0] + block_graph = meta_mol.nodes[node]['graph'] + elements = list(nx.get_node_attributes(block_graph, 'element').values()) + assert elements == ref[1].split() + #nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element')) + #plt.show() + assert sorted(meta_mol.molecule.edges) == sorted(ref_edges) From 9a0a674fa685af88df1567d9051a0bbb308e80d4 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 23 Jan 2024 18:58:18 +0100 Subject: [PATCH 51/82] allow multiple bonding per atom; fix bugs --- polyply/src/big_smile_mol_processor.py | 117 +++++++++++++++++++++++++ polyply/src/big_smile_parsing.py | 22 +++-- 2 files changed, 132 insertions(+), 7 deletions(-) create mode 100644 polyply/src/big_smile_mol_processor.py diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py new file mode 100644 index 00000000..8499e7e3 --- /dev/null +++ b/polyply/src/big_smile_mol_processor.py @@ -0,0 +1,117 @@ +import networkx as nx +from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, + force_field_from_fragments) +from polyply.src.map_to_molecule import MapToMolecule + +def compatible(left, right): + """ + Check bonding descriptor compatibility according + to the BigSmiles syntax convetions. + + Parameters + ---------- + left: str + right: str + + Returns + ------- + bool + """ + if left == right and left not in '> <': + return True + if left[0] == "<" and right[0] == ">": + if left[1:] == right[1:]: + return True + if left[0] == ">" and right[0] == "<": + if left[1:] == right[1:]: + return True + return False + +def generate_edge(source, target, bond_type="bonding"): + """ + Given a source and a target graph, which have bonding + descriptors stored as node attributes, find a pair of + matching descriptors and return the respective nodes. + The function also returns the bonding descriptors. If + no bonding descriptor is found an instance of LookupError + is raised. + + Parameters + ---------- + source: :class:`nx.Graph` + target: :class:`nx.Graph` + bond_type: `abc.hashable` + under which attribute are the bonding descriptors + stored. + + Returns + ------- + ((abc.hashable, abc.hashable), (str, str)) + the nodes as well as bonding descriptors + + Raises + ------ + LookupError + if no match is found + """ + source_nodes = nx.get_node_attributes(source, bond_type) + target_nodes = nx.get_node_attributes(target, bond_type) + for source_node in source_nodes: + for target_node in target_nodes: + #print(source_node, target_node) + bond_sources = source_nodes[source_node] + bond_targets = target_nodes[target_node] + for bond_source in bond_sources: + for bond_target in bond_targets: + #print(bond_source, bond_target) + if compatible(bond_source, bond_target): + return ((source_node, target_node), (bond_source, bond_target)) + raise LookupError + +class DefBigSmileParser: + """ + Parse an a string instance of a defined BigSmile, + which describes a polymer molecule. + """ + + def __init__(self): + self.force_field = None + self.meta_molecule = None + self.molecule = None + + def edges_from_bonding_descrpt(self): + """ + Make edges according to the bonding descriptors stored + in the node attributes of meta_molecule residue graph. + If a bonding descriptor is consumed it is set to None, + however, the meta_molecule edge gets an attribute with the + bonding descriptors that formed the edge. + """ + for prev_node, node in nx.dfs_edges(self.meta_molecule): + prev_graph = self.meta_molecule.nodes[prev_node]['graph'] + node_graph = self.meta_molecule.nodes[node]['graph'] + edge, bonding = generate_edge(prev_graph, + node_graph) + # this is a bit of a workaround because at this stage the + # bonding list is actually shared between all residues of + # of the same type; so we first make a copy then we replace + # the list sans used bonding descriptor + prev_bond_list = prev_graph.nodes[edge[0]]['bonding'].copy() + prev_bond_list.remove(bonding[0]) + prev_graph.nodes[edge[0]]['bonding'] = prev_bond_list + node_bond_list = node_graph.nodes[edge[1]]['bonding'].copy() + node_bond_list.remove(bonding[1]) + node_graph.nodes[edge[1]]['bonding'] = node_bond_list + self.meta_molecule.molecule.add_edge(edge[0], edge[1], bonding=bonding) + + def parse(self, big_smile_str): + res_pattern, residues = big_smile_str.split('.') + self.meta_molecule = res_pattern_to_meta_mol(res_pattern) + self.force_field = force_field_from_fragments(residues) + MapToMolecule(self.force_field).run_molecule(self.meta_molecule) + self.edges_from_bonding_descrpt() + return self.meta_molecule + +# ToDo +# - replace non consumed bonding descrpt by hydrogen +# - diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index ddb9bd2a..fa6348cc 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -1,3 +1,4 @@ +from collections import defaultdict import re import numpy as np try: @@ -154,7 +155,7 @@ def tokenize_big_smile(big_smile): to the nodes within the smile """ smile_iter = _big_smile_iter(big_smile) - bonding_descrpt = {} + bonding_descrpt = defaultdict(list) smile = "" node_count = 0 prev_node = 0 @@ -167,7 +168,7 @@ def tokenize_big_smile(big_smile): while peek != ']': bond_descrp += peek peek = next(smile_iter) - bonding_descrpt[prev_node] = bond_descrp + bonding_descrpt[prev_node].append(bond_descrp) else: smile = smile + token + peek prev_node = node_count @@ -205,7 +206,7 @@ def _rebuild_h_atoms(mol_graph): for node in mol_graph.nodes: if mol_graph.nodes[node].get('bonding', False): hcount = mol_graph.nodes[node]['hcount'] - mol_graph.nodes[node]['hcount'] = hcount - 1 + mol_graph.nodes[node]['hcount'] = hcount - len(mol_graph.nodes[node]['bonding']) pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph) return mol_graph @@ -234,10 +235,17 @@ def fragment_iter(fragment_str): resname = fragment[1:delim] big_smile = fragment[delim+1:] smile, bonding_descrpt = tokenize_big_smile(big_smile) - mol_graph = pysmiles.read_smiles(smile) - nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') - # we need to rebuild hydrogen atoms now - _rebuild_h_atoms(mol_graph) + + if smile == "H": + mol_graph = nx.Graph() + mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0]) + nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') + else: + mol_graph = pysmiles.read_smiles(smile) + nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') + # we need to rebuild hydrogen atoms now + _rebuild_h_atoms(mol_graph) + atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)} nx.set_node_attributes(mol_graph, atomnames, 'atomname') nx.set_node_attributes(mol_graph, resname, 'resname') From ceccc3d53fab73921c87bfd29a885fda7e284726 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 24 Jan 2024 10:59:53 +0100 Subject: [PATCH 52/82] remove mpl import --- polyply/tests/test_big_smile_mol_proc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index 58667ed8..6975b885 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -2,7 +2,7 @@ import networkx as nx from polyply.src.big_smile_mol_processor import (DefBigSmileParser, generate_edge) -import matplotlib.pyplot as plt +#import matplotlib.pyplot as plt @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',( # single bond source each ({0: ["$"]}, From 158fd3734f321d2084d7b466297dfe4c0c851d30 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 24 Jan 2024 11:12:13 +0100 Subject: [PATCH 53/82] add changed tests for multiple bonding per atom --- polyply/tests/test_big_smile_parsing.py | 40 ++++++++++++++++++------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py index 3265564c..f7faf71a 100644 --- a/polyply/tests/test_big_smile_parsing.py +++ b/polyply/tests/test_big_smile_parsing.py @@ -52,23 +52,27 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges): # smiple symmetric bonding ("[$]COC[$]", "COC", - {0: '$', 2: '$'}), + {0: ["$"], 2: ["$"]}), + # smiple symmetric bonding; multiple descript + ("[$]COC[$][$1]", + "COC", + {0: ["$"], 2: ["$", "$1"]}), # named different bonding descriptors ("[$1]CCCC[$2]", "CCCC", - {0: "$1", 3: "$2"}), + {0: ["$1"], 3: ["$2"]}), # ring and bonding descriptors ("[$1]CC[$2]C1CCCCC1", "CCC1CCCCC1", - {0: "$1", 1: "$2"}), + {0: ["$1"], 1: ["$2"]}), # bonding descript. after branch ("C(COC[$1])[$2]CCC[$3]", "C(COC)CCC", - {0: '$2', 3: '$1', 6: '$3'}), + {0: ["$2"], 3: ["$1"], 6: ["$3"]}), # left rigth bonding desciptors ("[>]COC[<]", "COC", - {0: '>', 2: '<'}) + {0: [">"], 2: ["<"]}) )) def test_tokenize_big_smile(big_smile, smile, bonding): new_smile, new_bonding = tokenize_big_smile(big_smile) @@ -78,9 +82,9 @@ def test_tokenize_big_smile(big_smile, smile, bonding): @pytest.mark.parametrize('fragment_str, nodes, edges',( # single fragment ("{#PEO=[$]COC[$]}", - {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}), + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}), (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), - (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}), (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), @@ -89,25 +93,39 @@ def test_tokenize_big_smile(big_smile, smile, bonding): {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}), # test NH3 terminal ("{#AMM=N[$]}", - {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": "$", "element": "N"}), + {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": ["$"], "element": "N"}), (1, {"atomname": "H1", "resname": "AMM", "element": "H"}), (2, {"atomname": "H2", "resname": "AMM", "element": "H"}), )}, {"AMM": [(0, 1), (0, 2)]}), # single fragment + 1 terminal (i.e. only 1 bonding descrpt ("{#PEO=[$]COC[$],#OHter=[$][OH]}", - {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}), + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}), (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), - (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}), (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), (6, {"atomname": "H6", "resname": "PEO", "element": "H"}), ), - "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": "$", "element": "O"}), + "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}), (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))}, {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)], "OHter": [(0, 1)]}), + # single fragment + 1 terminal but multiple bond descritp. + # this adjust the hydrogen count + ("{#PEO=[$]COC[$][$1],#OHter=[$][OH]}", + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}), + (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}), + (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), + (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), + (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), + ), + "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}), + (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))}, + {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)], + "OHter": [(0, 1)]}), )) def test_fragment_iter(fragment_str, nodes, edges): for resname, mol_graph in fragment_iter(fragment_str): From 8f2887f5d2149e94330014a8b32f47d64caf1b3d Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 24 Jan 2024 12:13:41 +0100 Subject: [PATCH 54/82] delete old processor file --- polyply/src/big_smile_mol_processsor.py | 99 ------------------------- 1 file changed, 99 deletions(-) delete mode 100644 polyply/src/big_smile_mol_processsor.py diff --git a/polyply/src/big_smile_mol_processsor.py b/polyply/src/big_smile_mol_processsor.py deleted file mode 100644 index 8131e009..00000000 --- a/polyply/src/big_smile_mol_processsor.py +++ /dev/null @@ -1,99 +0,0 @@ -import networkx as nx -from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, - force_field_from_fragments) -from polyply.src.map_to_molecule import MapToMolecule - -def compatible(left, right): - """ - Check bonding descriptor compatibility according - to the BigSmiles syntax convetions. - - Parameters - ---------- - left: str - right: str - - Returns - ------- - bool - """ - if left == right: - return True - if left[0] == "<" and right[0] == ">": - if left[1:] == right[1:]: - return True - if left[0] == ">" and right[0] == "<": - if left[1:] == right[1:]: - return True - return False - -def generate_edge(source, target, bond_type="bonding"): - """ - Given a source and a target graph, which have bonding - descriptors stored as node attributes, find a pair of - matching descriptors and return the respective nodes. - The function also returns the bonding descriptors. If - no bonding descriptor is found an instance of LookupError - is raised. - - Parameters - ---------- - source: :class:`nx.Graph` - target: :class:`nx.Graph` - bond_type: `abc.hashable` - under which attribute are the bonding descriptors - stored. - - Returns - ------- - ((abc.hashable, abc.hashable), (str, str)) - the nodes as well as bonding descriptors - - Raises - ------ - LookupError - if no match is found - """ - source_nodes = nx.get_node_attributes(source, bond_type) - target_nodes = nx.get_node_attributes(target, bond_type) - for source_node in source_nodes: - for target_node in target_nodes: - bond_source = source_nodes[source_node] - bond_target = target_nodes[target_node] - if compatible(bond_source, bond_target): - return ((source_node, target_node), (bond_source, bond_target)) - raise LookupError - -class DefBigSmileParser: - """ - Parse an a string instance of a defined BigSmile, - which describes a polymer molecule. - """ - - def __init__(self): - self.force_field = None - self.meta_molecule = None - self.molecule = None - - def edges_from_bonding_descrpt(self): - """ - Make edges according to the bonding descriptors stored - in the node attributes of meta_molecule residue graph. - If a bonding descriptor is consumed it is set to None, - however, the meta_molecule edge gets an attribute with the - bonding descriptors that formed the edge. - """ - for prev_node, node in nx.dfs_edges(self.meta_molecule): - edge, bonding = generate_edge(self.meta_molecule.nodes[prev_node]['graph'], - self.meta_molecule.nodes[node]['graph']) - self.meta_molecule.nodes[prev_node]['graph'][edge[0]]['bonding'] = None - self.meta_molecule.nodes[prev_node]['graph'][edge[1]]['bonding'] = None - self.meta_molecule.molecule.add_edge(edge, bonding=bonding) - - def parse(self, big_smile_str): - res_pattern, residues = big_smile_str.split('.') - self.meta_molecule = res_pattern_to_meta_mol(res_pattern) - self.force_field = force_field_from_fragments(residues) - MapToMolecule(self.force_field).run_molecule(self.meta_molecule) - self.edges_from_bonding_descrpt() - return self.meta_molecule From 08021c2801ae8df9169c52bf2d2fc7238fa9516f Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 24 Jan 2024 16:03:48 +0100 Subject: [PATCH 55/82] have charge balancing for itps but raise error when bond length is missing --- polyply/src/charges.py | 4 ++++ polyply/src/itp_to_ff.py | 11 +++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/polyply/src/charges.py b/polyply/src/charges.py index cfd50235..bb7505fe 100644 --- a/polyply/src/charges.py +++ b/polyply/src/charges.py @@ -93,6 +93,10 @@ def _get_bonds(block, topology=None): elif batoms[::-1] in topology.types['bonds']: params = topology.types['bonds'][batoms[::-1]][0][0][1] bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params) + else: + msg = ("Cannot find bond lengths. If your force field uses bondtypes lile" + "Charmm you need to provide a topology file.") + raise ValueError(msg) return bonds def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None): diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index bd08e1bd..8bf0a659 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -39,6 +39,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges= mol = top.molecules[0].molecule # read itp file if itppath.suffix == ".itp": + top = None with open(itppath, "r") as _file: lines = _file.readlines() force_field = ForceField("tmp") @@ -63,12 +64,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges= new_block.nrexcl = mol.nrexcl force_field.blocks[name] = new_block set_charges(new_block, res_graph, name) - if itppath.suffix == ".top": - base_resname = name.split(term_prefix)[0].split('_')[0] - print(base_resname) - balance_charges(new_block, - topology=top, - charge=crg_dict[base_resname]) + base_resname = name.split(term_prefix)[0].split('_')[0] + balance_charges(new_block, + topology=top, + charge=crg_dict[base_resname]) force_field.links = extract_links(mol) From 681004f10e08629450ef6d8780b53766347369ae Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 29 Feb 2024 15:38:23 +0100 Subject: [PATCH 56/82] add closing bracket to special characters --- polyply/src/big_smile_parsing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index fa6348cc..6969a31c 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -75,7 +75,6 @@ def res_pattern_to_meta_mol(pattern): branching = False for match in re.finditer(PATTERNS['place_holder'], pattern): start, stop = match.span() - print(pattern[start:stop]) # new branch here if pattern[start-1] == '(': branching = True @@ -181,7 +180,7 @@ def tokenize_big_smile(big_smile): prev_node = anchor smile += token else: - if token not in '@ . - = # $ : / \\ + - %'\ + if token not in '] H @ . - = # $ : / \\ + - %'\ and not token.isdigit(): prev_node = node_count node_count += 1 From 353723958b54487558c45bd43cc8da12ee22c052 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 29 Feb 2024 15:38:49 +0100 Subject: [PATCH 57/82] only balance charges for blocks with at least 2 atoms --- polyply/src/charges.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polyply/src/charges.py b/polyply/src/charges.py index bb7505fe..5a08a854 100644 --- a/polyply/src/charges.py +++ b/polyply/src/charges.py @@ -122,6 +122,9 @@ def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None): :class:`vermouth.molecule.Block` block with updated charges """ + if len(block.nodes) < 2: + return block + block.make_edges_from_interaction_type('bonds') keys = nx.get_node_attributes(block, 'charge').keys() charges = np.array(list(nx.get_node_attributes(block, 'charge').values())) From 929b5d1a8a6b41525f9d0493f190814fd677ea73 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 29 Feb 2024 15:39:36 +0100 Subject: [PATCH 58/82] refactor fragment finder --- polyply/src/fragment_finder.py | 256 +++------------------------------ 1 file changed, 20 insertions(+), 236 deletions(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index 060fbb44..dcf92c87 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -11,22 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import networkx as nx from vermouth.graph_utils import make_residue_graph from polyply.src.graph_utils import find_one_ismags_match -def _element_match(node1, node2): - """ - Checks if the element attribute of two nodes - is the same. - - Returns - -------- - bool - """ - return node1["element"] == node2["element"] - class FragmentFinder(): """ This class enables finding and labelling of fragments @@ -63,7 +51,7 @@ class FragmentFinder(): the resname is appended by a number. """ - def __init__(self, molecule, prefix): + def __init__(self, molecule): """ Initalize the fragment finder with a molecule, setting the resid attribute to None, and correctly assining elements @@ -97,14 +85,8 @@ def __init__(self, molecule, prefix): res_graph: :class:`vermouth.molecule.Molecule` residue graph of the molecule """ - self.max_by_resid = {} - self.ter_prefix = prefix - self.resid = 1 - self.res_assigment = [] - self.assigned_atoms = [] self.molecule = molecule - self.known_atom = None - self.match_keys = ['element', 'mass', 'degree'] #, 'charge'] + self.match_keys = ['element'] #, 'mass', 'degree'] #, 'charge'] self.masses_to_element = {16: "O", 14: "N", 12: "C", @@ -123,33 +105,6 @@ def __init__(self, molecule, prefix): self.molecule.nodes[node]["element"] = self.masses_to_element[mass] self.molecule.nodes[node]["degree"] = self.molecule.degree(node) - def linearize_resids(self, unique_fragments): - resids = np.arange(0, len(self.res_graph)) - old_resids = {} - # find the first terminal - ter = self.ter_prefix - ter_nodes = [ node[0] for node in self.res_graph.nodes(data=True) if ter in node[1]['resname'] ] - print(ter_nodes[0]) - #assert 0 > len(ter_nodes) < 3 - path = nx.dfs_edges(self.res_graph, source=ter_nodes[0]) - old_resids = {self.res_graph.nodes[ter_nodes[0]]['resid']: resids[0]} - self.res_graph.nodes[ter_nodes[0]]['resid'] = resids[0] - for mol_node in self.res_graph.nodes[ter_nodes[0]]['graph'].nodes: - self.res_graph.nodes[ter_nodes[0]]['graph'].nodes[mol_node]['resid'] = resids[0] - self.molecule.nodes[mol_node]['resid'] = resids[0] - - for new_resid, (_, node) in zip(resids[1:], path): - print('node', node) - old_resids[self.res_graph.nodes[node]['resid']] = new_resid - self.res_graph.nodes[node]['resid'] = new_resid - for mol_node in self.res_graph.nodes[node]['graph'].nodes: - self.res_graph.nodes[node]['graph'].nodes[mol_node]['resid'] = new_resid - self.molecule.nodes[mol_node]['resid'] = new_resid - print(old_resids) - for fragment in unique_fragments.values(): - for node in fragment.nodes: - fragment.nodes[node]['resid'] = old_resids[fragment.nodes[node]['resid']] - def _node_match(self, node1, node2): """ Check if two node dicts match. @@ -172,124 +127,7 @@ def _node_match(self, node1, node2): def make_res_graph(self): self.res_graph = make_residue_graph(self.molecule) - def pre_match(self, fragment_graph): - """ - Find one match of fragment graph in the molecule - and then extract degrees and atom-types for further - matching. This is a safety measure because even though - the fragment graph is subgraph isomorphic the underlying - itp parameters might not be. - - Parameters - ----------- - fragment_graph: 'nx.Graph' - must have attributes element for each node - - Returns - ------- - 'nx.Graph' - the labelled fragment graph - """ - template_atoms = list(fragment_graph.nodes) - # find subgraph isomorphic matches to the target fragment - # based on the element only - GM = nx.isomorphism.GraphMatcher(self.molecule, - fragment_graph, - node_match=_element_match,) - - for one_match in GM.subgraph_isomorphisms_iter(): - rev_current_match = {val: key for key, val in one_match.items()} - atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] - if self.is_valid_match(one_match, atoms)[0]: - break - - for mol_atom, tempt_atom in one_match.items(): - for attr in self.match_keys: - fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr] - return fragment_graph - - def is_valid_match(self, match, atoms): - """ - Check if the found isomorphism match is valid. - """ - # is the match connected to the previous residue - if not self.is_connected_to_prev(match.keys(), self.assigned_atoms,): - return False, 1 - # check if atoms are already assigned - if frozenset(atoms) in self.res_assigment: - return False, 2 - # check if there is any partial overlap - if any([atom in self.assigned_atoms for atom in atoms]): - return False, 3 - - return True, 4 - - def is_connected_to_prev(self, current, prev): - """ - Check if the atoms in the lists current or - prev are connected. - - Parameters - ---------- - current: list[abc.hashable] - list of current nodes - prev: list[abc.hashable] - list of prev nodes - """ - # no atoms have been assigned - if len(prev) == 0: - return True - - for node in current: - for neigh_node in self.molecule.neighbors(node): - if neigh_node in prev: - return True - return False - - def label_fragment_from_graph(self, fragment_graph): - """ - For the `self.molecule` label all atoms, that match - the `fragment_graph`, with a resid attribute and set - the atom-name to the element name plus index relative - to the atoms in the fragment. - - Parameters - ---------- - fragment_graph: nx.Graph - graph describing the fragment; must have the - element attribute - """ - # pre-match one residue and extract the atomtypes and degrees - # this is needed to enforce symmetry in matching the other - # residues - fragment_graph = self.pre_match(fragment_graph) - # find all isomorphic matches to the target fragments - GM = nx.isomorphism.GraphMatcher(self.molecule, - fragment_graph, - node_match=self._node_match, - ) - template_atoms = list(fragment_graph.nodes) - resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0] - raw_matchs = list(GM.subgraph_isomorphisms_iter()) - # loop over all matchs and check if the atoms are already - # assigned - symmetric matches must be skipped - for current_match in raw_matchs: - # the graph matcher can return the matchs in any order so we need to sort them - # according to our tempalte molecule - rev_current_match = {val: key for key, val in current_match.items()} - atoms = [ rev_current_match[template_atom] for template_atom in template_atoms] - if self.is_valid_match(current_match, atoms)[0]: - self.res_assigment.append(frozenset(atoms)) - for idx, atom in enumerate(atoms): - self.molecule.nodes[atom]["resid"] = self.resid - self.molecule.nodes[atom]["atomname"] = self.molecule.nodes[atom]["element"] + str(idx) - self.molecule.nodes[atom]["resname"] = resname - self.max_by_resid[self.resid] = idx - self.known_atom = atom - self.assigned_atoms.append(atom) - self.resid += 1 - - def label_fragments_from_graph(self, fragment_graphs): + def extract_unique_fragments(self, reference_graph): """ Call the label_fragment method for multiple fragments. @@ -297,77 +135,23 @@ def label_fragments_from_graph(self, fragment_graphs): ---------- fragment_graphs: list[nx.Graph] """ - for fragment_graph in fragment_graphs: - self.label_fragment_from_graph(fragment_graph) - - def label_unmatched_atoms(self): - """ - After all atoms have been assigned to target fragments using - the label_fragment method all left-over atoms are assigned to - the first fragment they are attached to. This method sets the - atom-name to the element name and element count and resid - attribute. - """ - for from_node, to_node in nx.dfs_edges(self.molecule, source=self.known_atom): - if not self.molecule.nodes[to_node]["resid"]: - resid = self.molecule.nodes[from_node]["resid"] - self.max_by_resid[resid] = self.max_by_resid[resid] + 1 - self.molecule.nodes[to_node]["resid"] = resid - self.molecule.nodes[to_node]["resname"] = self.molecule.nodes[from_node]["resname"] - self.molecule.nodes[to_node]["atomname"] = self.molecule.nodes[to_node]["element"] + str(self.max_by_resid[resid]) - - def extract_unique_fragments(self, fragment_graphs): - """ - Given a list of fragment-graphs assing all atoms to fragments and - generate new fragments by assinging the left-over atoms to the - connecting fragment. Fragments get a unique resid in the molecule. - Then make the residue graph and filter out all unique residues - and return them. - - Parameters - ---------- - fragment_graphs: list[nx.Graph] - - Returns - ------- - list[nx.Graph] - all unique fragment graphs - """ - # first we find and label all fragments in the molecule - self.label_fragments_from_graph(fragment_graphs) - # then we assign all left-over atoms to the existing residues - self.label_unmatched_atoms() - # make the residue graph + # find one correspondance + mapping = find_one_ismags_match(self.molecule, + reference_graph, + node_match=self._node_match) + # now assign the attributes from the reference graph to + # the target molecule + for target, ref in mapping.items(): + for attr in ['resname', 'resid', 'atomname']: + self.molecule.nodes[target][attr] = reference_graph.nodes[ref][attr] + + # now we make the residue graph and extract self.make_res_graph() - # now we make the residue graph and find all unique residues - unique_fragments = {} - had_resnames = {} - for node in self.res_graph.nodes: - resname = self.res_graph.nodes[node]['resname'] - # this fragment is terminal located so we give it a special prefix - fragment = self.res_graph.nodes[node]['graph'] - if self.res_graph.degree(node) == 1: - resname = resname + self.ter_prefix - nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") - nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname") - # here we extract the fragments and set appropiate residue names - for other_frag in unique_fragments.values(): - if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match): - mapping = find_one_ismags_match(fragment, other_frag, self._node_match) - if mapping: - for source, target in mapping.items(): - self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname'] - break - else: - if resname in unique_fragments: - resname = resname + "_" + str(had_resnames[resname] + 1) - nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname") - nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname") - else: - had_resnames[resname] = 0 - unique_fragments[resname] = fragment - # remake the residue graph since some resnames have changed - self.make_res_graph() - self.linearize_resids(unique_fragments) + # finally we simply collect one graph per restype + unique_fragments = {} + for res in self.res_graph: + resname = self.res_graph.nodes[res]['resname'] + if resname not in unique_fragments: + unique_fragments[resname] = self.res_graph.nodes[res]['graph'] return unique_fragments, self.res_graph From 87510bbfd5f4f414b42e6697c5884a712799a660 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 29 Feb 2024 15:43:39 +0100 Subject: [PATCH 59/82] refactor fragment itp_to_ff --- polyply/src/itp_to_ff.py | 60 ++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 8bf0a659..b39df391 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -11,13 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import networkx as nx -try: - import pysmiles -except ImportError: - raise ImportError("To use polyply itp_to_ff you need to install pysmiles.") -import vermouth from vermouth.forcefield import ForceField from vermouth.gmx.itp_read import read_itp from polyply.src.topology import Topology @@ -25,51 +19,57 @@ from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import balance_charges, set_charges +from polyply.src.big_smile_mol_processor import DefBigSmileParser -def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None): +def _read_itp_file(itppath): + """ + small wrapper for reading itps + """ + with open(itppath, "r") as _file: + lines = _file.readlines() + force_field = ForceField("tmp") + read_itp(lines, force_field) + block = next(iter(force_field.blocks.values())) + mol = block.to_molecule() + mol.make_edges_from_interaction_type(type_="bonds") + return mol + +def itp_to_ff(itppath, smile_str, outpath, res_charges=None): """ Main executable for itp to ff tool. """ # what charges belong to which resname - if charges: - crg_dict = dict(zip(resnames, charges)) + if res_charges: + crg_dict = dict(res_charges) + # read the topology file if itppath.suffix == ".top": top = Topology.from_gmx_topfile(itppath, name="test") - mol = top.molecules[0].molecule + target_mol = top.molecules[0].molecule # read itp file - if itppath.suffix == ".itp": + elif itppath.suffix == ".itp": top = None - with open(itppath, "r") as _file: - lines = _file.readlines() - force_field = ForceField("tmp") - read_itp(lines, force_field) - block = next(iter(force_field.blocks.values())) - mol = block.to_molecule() - mol.make_edges_from_interaction_type(type_="bonds") + target_mol = _read_itp_file(itppath) - # read the target fragments and convert to graph - fragment_graphs = [] - for resname, smile in zip(resnames, fragment_smiles): - fragment_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True) - nx.set_node_attributes(fragment_graph, resname, "resname") - fragment_graphs.append(fragment_graph) + # read the big-smile representation + meta_mol = DefBigSmileParser().parse(smile_str) # identify and extract all unique fragments - unique_fragments, res_graph = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs) + unique_fragments, res_graph = FragmentFinder(target_mol).extract_unique_fragments(meta_mol.molecule) + + # extract the blocks with parameters force_field = ForceField("new") for name, fragment in unique_fragments.items(): - new_block = extract_block(mol, list(fragment.nodes), defines={}) + new_block = extract_block(target_mol, list(fragment.nodes), defines={}) nx.set_node_attributes(new_block, 1, "resid") - new_block.nrexcl = mol.nrexcl + new_block.nrexcl = target_mol.nrexcl force_field.blocks[name] = new_block set_charges(new_block, res_graph, name) - base_resname = name.split(term_prefix)[0].split('_')[0] balance_charges(new_block, topology=top, - charge=crg_dict[base_resname]) + charge=crg_dict[name]) - force_field.links = extract_links(mol) + force_field.links = extract_links(target_mol) with open(outpath, "w") as filehandle: ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() From 05df2e56a67e89de471108b7142b1b7ba1dd5489 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 29 Feb 2024 15:44:09 +0100 Subject: [PATCH 60/82] change input for itp_to_ff to allow bigmsiles --- bin/polyply | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/bin/polyply b/bin/polyply index fb997c9c..3cb3d0bd 100755 --- a/bin/polyply +++ b/bin/polyply @@ -237,12 +237,10 @@ def main(): # pylint: disable=too-many-locals,too-many-statements help='Enable debug logging output. Can be given ' 'multiple times.', default=0) - parser_itp_ff.add_argument('-i', dest="itppath", type=Path) - parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*') - parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*') - parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter") + parser_itp_ff.add_argument('-i', dest="itppath", type=Path, required=True) + parser_itp_ff.add_argument('-s', dest="smile_str", required=True) parser_itp_ff.add_argument('-o', dest="outpath", type=Path) - parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*') + parser_itp_ff.add_argument('-c', dest="res_charges", nargs='+', type=lambda s: s.split(':'),) parser_itp_ff.set_defaults(func=itp_to_ff) From 0ebfa6ac8b73e0f280d6ab901e89ee319a013da7 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 1 Mar 2024 17:55:40 +0100 Subject: [PATCH 61/82] take most central fragment --- polyply/src/fragment_finder.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py index dcf92c87..07849508 100644 --- a/polyply/src/fragment_finder.py +++ b/polyply/src/fragment_finder.py @@ -149,9 +149,13 @@ def extract_unique_fragments(self, reference_graph): self.make_res_graph() # finally we simply collect one graph per restype + # which are the most centrail (i.e. avoid ends) unique_fragments = {} + frag_centrality = {} + centrality = nx.betweenness_centrality(self.res_graph) for res in self.res_graph: resname = self.res_graph.nodes[res]['resname'] - if resname not in unique_fragments: + if resname not in unique_fragments or frag_centrality[resname] < centrality[res]: unique_fragments[resname] = self.res_graph.nodes[res]['graph'] + frag_centrality[resname] = centrality[res] return unique_fragments, self.res_graph From a7cd590fb70d0b136c84d8bf2e334a88307ce489 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 1 Mar 2024 17:56:00 +0100 Subject: [PATCH 62/82] add special links for terminal modifications --- polyply/src/itp_to_ff.py | 5 ++- polyply/src/molecule_utils.py | 71 ++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index b39df391..9ff02f47 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -15,7 +15,7 @@ from vermouth.forcefield import ForceField from vermouth.gmx.itp_read import read_itp from polyply.src.topology import Topology -from polyply.src.molecule_utils import extract_block, extract_links +from polyply.src.molecule_utils import extract_block, extract_links, find_termini_mods from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import balance_charges, set_charges @@ -69,7 +69,10 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None): topology=top, charge=crg_dict[name]) + # extract the regular links force_field.links = extract_links(target_mol) + # extract links that span the terminii + find_termini_mods(res_graph, target_mol, force_field) with open(outpath, "w") as filehandle: ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py index 7da9ce43..a960ca85 100644 --- a/polyply/src/molecule_utils.py +++ b/polyply/src/molecule_utils.py @@ -19,6 +19,7 @@ from vermouth.molecule import Interaction from polyply.tests.test_lib_files import _interaction_equal from .topology import replace_defined_interaction +from .graph_utils import find_connecting_edges def diffs_to_prefix(atoms, resid_diffs): """ @@ -140,7 +141,7 @@ def extract_links(molecule): # we collect the edges corresponding to the simple paths between pairs of atoms # in the interaction mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) - link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} + #link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] link_inter = Interaction(atoms=link_atoms, parameters=interaction.parameters, @@ -258,3 +259,71 @@ def extract_block(molecule, nodes, defines={}): raise IOError(msg.format(resname, resid)) return block + +def find_termini_mods(meta_molecule, molecule, force_field): + """ + Terminii are a bit special in the sense that they are often + different from a repeat unit of the polymer in the polymer. + """ + terminal_nodes = [ node for node in meta_molecule.nodes if meta_molecule.degree(node) == 1 ] + for meta_node in terminal_nodes: + # get the node that is next to the terminal; by definition + # it can only be one neighbor + neigh_node = next(nx.neighbors(meta_molecule, meta_node)) + + # some useful info + neigh_resname = meta_molecule.nodes[neigh_node]['resname'] + resids = [meta_molecule.nodes[neigh_node]['resid'], + meta_molecule.nodes[meta_node]['resid']] + ref_block = force_field.blocks[neigh_resname] + target_block = meta_molecule.nodes[neigh_node]['graph'] + + # find different properties + replace_dict = defaultdict(dict) + for node in target_block.nodes: + target_attrs = target_block.nodes[node] + ref_attrs = ref_block.nodes[target_attrs['atomname']] + for attr in ['atype', 'mass']: + if target_attrs[attr] != ref_attrs[attr]: + replace_dict[node][attr] = target_attrs[attr] + + # bonded interactions could be different too so we need to check them + overwrite_inters = defaultdict(list) + for inter_type in ref_block.interactions: + for ref_inter in ref_block.interactions[inter_type]: + for target_inter in target_block.interactions[inter_type]: + target_atoms = [target_block.nodes[atom]['atomname'] for atom in target_inter.atoms] + if target_atoms == ref_inter.atoms and\ + target_inter.parameters != ref_inter.parameters: + mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(target_inter.atoms, + molecule, + min(resids)) + #link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} + link_atoms = [mol_atoms_to_link_atoms[atom] for atom in target_inter.atoms] + link_inter = Interaction(atoms=link_atoms, + parameters=target_inter.parameters, + meta={}) + overwrite_inters[inter_type].append(link_inter) + + # we make a link + mol_atoms = list(replace_dict.keys()) + list(meta_molecule.nodes[meta_node]['graph'].nodes) + link = vermouth.molecule.Link() + mol_to_link, edges, resnames = _extract_edges_from_shortest_path(mol_atoms, + molecule, + min(resids)) + link_atoms = mol_to_link.values() + link = vermouth.molecule.Link() + link.add_nodes_from(link_atoms) + for node in mol_atoms: + link.nodes[mol_to_link[node]]['resname'] = molecule.nodes[node]['resname'] + link.nodes[mol_to_link[node]]['replace'] = replace_dict[node] + + force_field.links.append(link) + for inter_type in overwrite_inters: + link.interactions[inter_type].append(overwrite_inters) + + edges = find_connecting_edges(meta_molecule, molecule, [meta_node, neigh_node]) + for ndx, jdx in edges: + link.add_edge(mol_to_link[ndx], mol_to_link[jdx]) + + return force_field From 8e0c2577373b92988f668a1966615d83f71424c6 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Sun, 3 Mar 2024 14:26:57 +0100 Subject: [PATCH 63/82] type the charges to float in itp to ff --- polyply/src/itp_to_ff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 9ff02f47..a26248d6 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -67,7 +67,7 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None): set_charges(new_block, res_graph, name) balance_charges(new_block, topology=top, - charge=crg_dict[name]) + charge=float(crg_dict[name])) # extract the regular links force_field.links = extract_links(target_mol) From 7cb3b4cf32fc8eb52b42b4b8a795fafb3cf7faa6 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Sun, 3 Mar 2024 14:30:49 +0100 Subject: [PATCH 64/82] add H and ] as special characters in big smile parser --- polyply/src/big_smile_parsing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index fa6348cc..6969a31c 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -75,7 +75,6 @@ def res_pattern_to_meta_mol(pattern): branching = False for match in re.finditer(PATTERNS['place_holder'], pattern): start, stop = match.span() - print(pattern[start:stop]) # new branch here if pattern[start-1] == '(': branching = True @@ -181,7 +180,7 @@ def tokenize_big_smile(big_smile): prev_node = anchor smile += token else: - if token not in '@ . - = # $ : / \\ + - %'\ + if token not in '] H @ . - = # $ : / \\ + - %'\ and not token.isdigit(): prev_node = node_count node_count += 1 From 097ec842efefb5d4e5ca8b32d38365174f9af10a Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Sun, 3 Mar 2024 14:54:11 +0100 Subject: [PATCH 65/82] account for explicit hydrogen in the smiles string input --- polyply/src/big_smile_parsing.py | 11 +++++++-- polyply/tests/test_big_smile_parsing.py | 30 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 6969a31c..55d3a6aa 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -204,8 +204,15 @@ def _rebuild_h_atoms(mol_graph): else: for node in mol_graph.nodes: if mol_graph.nodes[node].get('bonding', False): - hcount = mol_graph.nodes[node]['hcount'] - mol_graph.nodes[node]['hcount'] = hcount - len(mol_graph.nodes[node]['bonding']) + # get the degree + ele = mol_graph.nodes[0]['element'] + # hcoung is the valance minus the degree minus + # the number of bonding descriptors + hcount = pysmiles.smiles_helper.VALENCES[ele][0] -\ + mol_graph.degree(node) -\ + len(mol_graph.nodes[node]['bonding']) + + mol_graph.nodes[node]['hcount'] = hcount pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph) return mol_graph diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py index f7faf71a..ba3f5f69 100644 --- a/polyply/tests/test_big_smile_parsing.py +++ b/polyply/tests/test_big_smile_parsing.py @@ -53,6 +53,10 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges): ("[$]COC[$]", "COC", {0: ["$"], 2: ["$"]}), + # simple symmetric but with explicit hydrogen + ("[$][CH2]O[CH2][$]", + "[CH2]O[CH2]", + {0: ["$"], 2: ["$"]}), # smiple symmetric bonding; multiple descript ("[$]COC[$][$1]", "COC", @@ -91,6 +95,17 @@ def test_tokenize_big_smile(big_smile, smile, bonding): (6, {"atomname": "H6", "resname": "PEO", "element": "H"}), )}, {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}), + # single fragment but with explicit hydrogen in smiles + ("{#PEO=[$][CH2]O[CH2][$]}", + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}), + (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}), + (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), + (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), + (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), + (6, {"atomname": "H6", "resname": "PEO", "element": "H"}), + )}, + {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}), # test NH3 terminal ("{#AMM=N[$]}", {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": ["$"], "element": "N"}), @@ -126,6 +141,21 @@ def test_tokenize_big_smile(big_smile, smile, bonding): (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))}, {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)], "OHter": [(0, 1)]}), + # single fragment + 1 terminal but multiple bond descritp. + # but explicit hydrogen in the smiles string + ("{#PEO=[$][CH2]O[CH2][$][$1],#OHter=[$][OH]}", + {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}), + (1, {"atomname": "O1", "resname": "PEO", "element": "O"}), + (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}), + (3, {"atomname": "H3", "resname": "PEO", "element": "H"}), + (4, {"atomname": "H4", "resname": "PEO", "element": "H"}), + (5, {"atomname": "H5", "resname": "PEO", "element": "H"}), + ), + "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}), + (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))}, + {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)], + "OHter": [(0, 1)]}), + )) def test_fragment_iter(fragment_str, nodes, edges): for resname, mol_graph in fragment_iter(fragment_str): From 514ba1b2408da1bf592845719b05baeb4dc61d12 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Sun, 3 Mar 2024 15:57:12 +0100 Subject: [PATCH 66/82] test accounting for explicit hydrogen in the smiles string input --- polyply/src/big_smile_mol_processor.py | 25 ++++++++++++++++++++---- polyply/tests/test_big_smile_mol_proc.py | 13 ++++++++++-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index 8499e7e3..f474fe76 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -104,14 +104,31 @@ def edges_from_bonding_descrpt(self): node_graph.nodes[edge[1]]['bonding'] = node_bond_list self.meta_molecule.molecule.add_edge(edge[0], edge[1], bonding=bonding) + def replace_unconsumed_bonding_descrpt(self): + """ + We allow multiple bonding descriptors per atom, which + however, are not always consumed. In this case the left + over bonding descriptors are replaced by hydrogen atoms. + """ + for node in self.meta_molecule.nodes: + graph = self.meta_molecule.nodes[node]['graph'] + bonding = nx.get_node_attributes(graph, "bonding") + for node, bondings in bonding.items(): + attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']} + attrs['element'] = 'H' + for new_id in range(1, len(bondings)+1): + new_node = len(self.meta_molecule.molecule.nodes) + 1 + graph.add_edge(node, new_node) + attrs['atomname'] = "H" + str(new_id + len(graph.nodes)) + graph.nodes[new_node].update(attrs) + self.meta_molecule.molecule.add_edge(node, new_node) + self.meta_molecule.molecule.nodes[new_node].update(attrs) + def parse(self, big_smile_str): res_pattern, residues = big_smile_str.split('.') self.meta_molecule = res_pattern_to_meta_mol(res_pattern) self.force_field = force_field_from_fragments(residues) MapToMolecule(self.force_field).run_molecule(self.meta_molecule) self.edges_from_bonding_descrpt() + self.replace_unconsumed_bonding_descrpt() return self.meta_molecule - -# ToDo -# - replace non consumed bonding descrpt by hydrogen -# - diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index 6975b885..26e85ba6 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -52,6 +52,15 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7), (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13), (11, 14), (11, 15), (11, 16), (16, 17)]), + # uncomsumed bonding IDs; note that this is not the same + # molecule as previous test case. Here one of the OH branches + # and replaces an CH2 group with CH-OH + ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[>][$1]COC[<],#OHter=[$1][O]}", + [('OHter', 'O H'), ('PEO', 'C O C H H H H'), + ('PEO', 'C O C H H H H'), ('OHter', 'O H')], + [(0, 1), (0, 2), (2, 3), (2, 5), (2, 10), (3, 4), + (4, 6), (4, 7), (4, 17), (8, 9), (8, 11), (8, 14), + (8, 18), (9, 10), (10, 12), (10, 13), (14, 15)]), # simple branched sequence ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}", [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'), @@ -75,11 +84,11 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): )) def test_def_big_smile_parser(smile, ref_nodes, ref_edges): meta_mol = DefBigSmileParser().parse(smile) +# nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element')) +# plt.show() for node, ref in zip(meta_mol.nodes, ref_nodes): assert meta_mol.nodes[node]['resname'] == ref[0] block_graph = meta_mol.nodes[node]['graph'] elements = list(nx.get_node_attributes(block_graph, 'element').values()) assert elements == ref[1].split() - #nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element')) - #plt.show() assert sorted(meta_mol.molecule.edges) == sorted(ref_edges) From 3e4a737aae34c8ce842c8a885782b4dd2750a87e Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 4 Mar 2024 13:12:45 +0100 Subject: [PATCH 67/82] read provided ff file and use these blocks instead of making new ones --- bin/polyply | 2 ++ polyply/src/big_smile_mol_processor.py | 4 ++-- polyply/src/big_smile_parsing.py | 10 ++++++---- polyply/src/itp_to_ff.py | 18 ++++++++++++++---- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/bin/polyply b/bin/polyply index 3cb3d0bd..68395b21 100755 --- a/bin/polyply +++ b/bin/polyply @@ -241,6 +241,8 @@ def main(): # pylint: disable=too-many-locals,too-many-statements parser_itp_ff.add_argument('-s', dest="smile_str", required=True) parser_itp_ff.add_argument('-o', dest="outpath", type=Path) parser_itp_ff.add_argument('-c', dest="res_charges", nargs='+', type=lambda s: s.split(':'),) + parser_itp_ff.add_argument('-f', dest='inpath', type=Path, required=False, default=[], + help='Input file (ITP|FF)', nargs='*') parser_itp_ff.set_defaults(func=itp_to_ff) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index 8499e7e3..cd899655 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -74,8 +74,8 @@ class DefBigSmileParser: which describes a polymer molecule. """ - def __init__(self): - self.force_field = None + def __init__(self, force_field): + self.force_field = force_field self.meta_molecule = None self.molecule = None diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 6969a31c..57972078 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -250,7 +250,7 @@ def fragment_iter(fragment_str): nx.set_node_attributes(mol_graph, resname, 'resname') yield resname, mol_graph -def force_field_from_fragments(fragment_str): +def force_field_from_fragments(fragment_str, force_field=None): """ Collects the fragments defined in a BigSmile string as :class:`vermouth.molecule.Blocks` in a force-field @@ -266,9 +266,11 @@ def force_field_from_fragments(fragment_str): ------- :class:`vermouth.forcefield.ForceField` """ - force_field = ForceField("big_smile_ff") + if force_field is None: + force_field = ForceField("big_smile_ff") frag_iter = fragment_iter(fragment_str) for resname, mol_graph in frag_iter: - mol_block = Block(mol_graph) - force_field.blocks[resname] = mol_block + if resname not in force_field.blocks: + mol_block = Block(mol_graph) + force_field.blocks[resname] = mol_block return force_field diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index a26248d6..7ffaec93 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -20,6 +20,7 @@ from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import balance_charges, set_charges from polyply.src.big_smile_mol_processor import DefBigSmileParser +from .load_library import load_ff_library def _read_itp_file(itppath): """ @@ -34,10 +35,17 @@ def _read_itp_file(itppath): mol.make_edges_from_interaction_type(type_="bonds") return mol -def itp_to_ff(itppath, smile_str, outpath, res_charges=None): +def itp_to_ff(itppath, smile_str, outpath, inpath=[], res_charges=None): """ Main executable for itp to ff tool. """ + # load FF files if given + if inpath: + force_field = load_ff_library("new", None, inpath) + # if none are given we create an empty ff + else: + force_field = ForceField("new") + # what charges belong to which resname if res_charges: crg_dict = dict(res_charges) @@ -52,14 +60,16 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None): target_mol = _read_itp_file(itppath) # read the big-smile representation - meta_mol = DefBigSmileParser().parse(smile_str) + meta_mol = DefBigSmileParser(force_field).parse(smile_str) # identify and extract all unique fragments unique_fragments, res_graph = FragmentFinder(target_mol).extract_unique_fragments(meta_mol.molecule) # extract the blocks with parameters - force_field = ForceField("new") for name, fragment in unique_fragments.items(): + # don't overwrite existing blocks + if name in force_field.blocks: + continue new_block = extract_block(target_mol, list(fragment.nodes), defines={}) nx.set_node_attributes(new_block, 1, "resid") new_block.nrexcl = target_mol.nrexcl @@ -70,7 +80,7 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None): charge=float(crg_dict[name])) # extract the regular links - force_field.links = extract_links(target_mol) + force_field.links.append(extract_links(target_mol)) # extract links that span the terminii find_termini_mods(res_graph, target_mol, force_field) From d97632d57a427479f1a77fe380acc41df97ef3d1 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 4 Mar 2024 15:35:24 +0100 Subject: [PATCH 68/82] adjust doc string --- polyply/src/big_smile_mol_processor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index f474fe76..b533e818 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -83,9 +83,10 @@ def edges_from_bonding_descrpt(self): """ Make edges according to the bonding descriptors stored in the node attributes of meta_molecule residue graph. - If a bonding descriptor is consumed it is set to None, + If a bonding descriptor is consumed it is removed from the list, however, the meta_molecule edge gets an attribute with the - bonding descriptors that formed the edge. + bonding descriptors that formed the edge. Later uncomsumed + bonding descriptors are replaced by hydrogen atoms. """ for prev_node, node in nx.dfs_edges(self.meta_molecule): prev_graph = self.meta_molecule.nodes[prev_node]['graph'] From b6acc737c2e7e0ec426bc5ac5fd3172feff1f4e3 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 4 Mar 2024 16:35:33 +0100 Subject: [PATCH 69/82] skip termini mods if none atoms are different --- polyply/src/molecule_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py index a960ca85..f4249d74 100644 --- a/polyply/src/molecule_utils.py +++ b/polyply/src/molecule_utils.py @@ -286,6 +286,10 @@ def find_termini_mods(meta_molecule, molecule, force_field): for attr in ['atype', 'mass']: if target_attrs[attr] != ref_attrs[attr]: replace_dict[node][attr] = target_attrs[attr] + # a little dangerous but mostly ok; if there are no changes to + # the atoms we can continue + if len(replace_dict) == 0: + continue # bonded interactions could be different too so we need to check them overwrite_inters = defaultdict(list) From 9d9ee891c4ac8e56c99bf02722f298889c9dfc71 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 6 Mar 2024 17:52:18 +0100 Subject: [PATCH 70/82] redo hydrogen based on valency not based on how many bonding descriptors are leftover --- polyply/src/big_smile_mol_processor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index 461801ce..640c40e1 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -1,8 +1,12 @@ import networkx as nx +import pysmiles from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, force_field_from_fragments) from polyply.src.map_to_molecule import MapToMolecule +VALENCES = pysmiles.smiles_helper.VALENCES +VALENCES.update({"H":(1,)}) + def compatible(left, right): """ Check bonding descriptor compatibility according @@ -115,9 +119,12 @@ def replace_unconsumed_bonding_descrpt(self): graph = self.meta_molecule.nodes[node]['graph'] bonding = nx.get_node_attributes(graph, "bonding") for node, bondings in bonding.items(): + element = graph.nodes[node]['element'] + hcount = VALENCES[element][0] -\ + self.meta_molecule.molecule.degree(node) + 1 attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']} attrs['element'] = 'H' - for new_id in range(1, len(bondings)+1): + for new_id in range(1, hcount): new_node = len(self.meta_molecule.molecule.nodes) + 1 graph.add_edge(node, new_node) attrs['atomname'] = "H" + str(new_id + len(graph.nodes)) From c4f16527532172f887689b86bc3659284ce33f97 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 6 Mar 2024 18:00:36 +0100 Subject: [PATCH 71/82] parse force-field in molprocessor, adjust hydrogen reconstruction --- polyply/src/big_smile_mol_processor.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index b533e818..640c40e1 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -1,8 +1,12 @@ import networkx as nx +import pysmiles from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, force_field_from_fragments) from polyply.src.map_to_molecule import MapToMolecule +VALENCES = pysmiles.smiles_helper.VALENCES +VALENCES.update({"H":(1,)}) + def compatible(left, right): """ Check bonding descriptor compatibility according @@ -74,8 +78,8 @@ class DefBigSmileParser: which describes a polymer molecule. """ - def __init__(self): - self.force_field = None + def __init__(self, force_field): + self.force_field = force_field self.meta_molecule = None self.molecule = None @@ -115,9 +119,12 @@ def replace_unconsumed_bonding_descrpt(self): graph = self.meta_molecule.nodes[node]['graph'] bonding = nx.get_node_attributes(graph, "bonding") for node, bondings in bonding.items(): + element = graph.nodes[node]['element'] + hcount = VALENCES[element][0] -\ + self.meta_molecule.molecule.degree(node) + 1 attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']} attrs['element'] = 'H' - for new_id in range(1, len(bondings)+1): + for new_id in range(1, hcount): new_node = len(self.meta_molecule.molecule.nodes) + 1 graph.add_edge(node, new_node) attrs['atomname'] = "H" + str(new_id + len(graph.nodes)) From 7a5dd1f74e76e8c15103a5ad93e9490d253403bf Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 6 Mar 2024 19:12:26 +0100 Subject: [PATCH 72/82] fix tests --- polyply/tests/test_big_smile_mol_proc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index 26e85ba6..28c5390d 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -1,5 +1,6 @@ import pytest import networkx as nx +from vermouth.forcefield import ForceField from polyply.src.big_smile_mol_processor import (DefBigSmileParser, generate_edge) #import matplotlib.pyplot as plt @@ -83,7 +84,8 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): )) def test_def_big_smile_parser(smile, ref_nodes, ref_edges): - meta_mol = DefBigSmileParser().parse(smile) + ff = ForceField("new") + meta_mol = DefBigSmileParser(ff).parse(smile) # nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element')) # plt.show() for node, ref in zip(meta_mol.nodes, ref_nodes): From 2b9e7a9cdc414a3961efb802e667db74a7572a56 Mon Sep 17 00:00:00 2001 From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com> Date: Wed, 6 Mar 2024 19:16:53 +0100 Subject: [PATCH 73/82] Apply suggestions from code review Co-authored-by: Peter C Kroon --- polyply/src/big_smile_mol_processor.py | 9 +++------ polyply/src/big_smile_parsing.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index 640c40e1..365b61bc 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -23,12 +23,9 @@ def compatible(left, right): """ if left == right and left not in '> <': return True - if left[0] == "<" and right[0] == ">": - if left[1:] == right[1:]: - return True - if left[0] == ">" and right[0] == "<": - if left[1:] == right[1:]: - return True + l, r = left[0], right[0] + if (l, r) == ('<', '>') or (l, r) == ('>', '<'): + return left[1:] == right[1:] return False def generate_edge(source, target, bond_type="bonding"): diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 55d3a6aa..90e171a3 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -3,10 +3,10 @@ import numpy as np try: import pysmiles -except ImportError: +except ImportError as error: msg = ("You are using a functionality that requires " "the pysmiles package. Use pip install pysmiles ") - raise ImportError(msg) + raise ImportError(msg) from error import networkx as nx from vermouth.forcefield import ForceField from vermouth.molecule import Block @@ -41,7 +41,7 @@ def res_pattern_to_meta_mol(pattern): '{' + [#resname_1][#resname_2]... + '}' In addition to plain enumeration any residue may be - followed by a '|' and an integern number that + followed by a '|' and an integer number that specifies how many times the given residue should be added within a sequence. For example, a pentamer of PEO can be written as: @@ -52,10 +52,10 @@ def res_pattern_to_meta_mol(pattern): {[#PEO]|5} - The block syntax also applies to branches. Here the convetion + The block syntax also applies to branches. Here the convention is that the complete branch including it's first anchoring residue is repeated. For example, to generate a PMA-g-PEG - polymer the following syntax is permitted: + polymer containing 15 residues the following syntax is permitted: {[#PMA]([#PEO][#PEO])|5} @@ -79,7 +79,7 @@ def res_pattern_to_meta_mol(pattern): if pattern[start-1] == '(': branching = True branch_anchor = prev_node - recipie = [(meta_mol.nodes[prev_node]['resname'], 1)] + recipe = [(meta_mol.nodes[prev_node]['resname'], 1)] if stop < len(pattern) and pattern[stop] == '|': eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop) n_mon = int(pattern[stop+1:eon]) @@ -89,7 +89,7 @@ def res_pattern_to_meta_mol(pattern): resname = match.group(0)[2:-1] # collect all residues in branch if branching: - recipie.append((resname, n_mon)) + recipe.append((resname, n_mon)) # add the new residue connection = [] @@ -135,7 +135,7 @@ def tokenize_big_smile(big_smile): """ Processes a BigSmile string by storing the the BigSmile specific bonding descriptors - in a dict with refernce to the atom they + in a dict with reference to the atom they refer to. Furthermore, a cleaned smile string is generated with the BigSmile specific syntax removed. From b6d891f6f32bbc60ed96fd30d75953f717d21117 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 14:24:47 +0100 Subject: [PATCH 74/82] allow nested branch expansion --- polyply/src/big_smile_parsing.py | 81 ++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index 90e171a3..e396a5e3 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -24,6 +24,22 @@ def _find_next_character(string, chars, start): return idx+start return np.inf +def _expand_branch(meta_mol, current, anchor, recipe): + prev_node = anchor + for bdx, (resname, n_mon) in enumerate(recipe): + if bdx == 0: + anchor = current + for _ in range(0, n_mon): + connection = [(prev_node, current)] + print(connection) + meta_mol.add_monomer(current, + resname, + connection) + prev_node = current + current += 1 + prev_node = anchor + return meta_mol, current, prev_node + def res_pattern_to_meta_mol(pattern): """ Generate a :class:`polyply.MetaMolecule` from a @@ -70,17 +86,30 @@ def res_pattern_to_meta_mol(pattern): """ meta_mol = MetaMolecule() current = 0 - branch_anchor = 0 + # stores one or more branch anchors; each next + # anchor belongs to a nested branch + branch_anchor = [] + # used for storing composition protocol for + # for branches; each entry is a list of + # branches from extending from the anchor + # point + recipes = defaultdict(list) + # the previous node prev_node = None + # do we have an open branch branching = False for match in re.finditer(PATTERNS['place_holder'], pattern): start, stop = match.span() # new branch here if pattern[start-1] == '(': branching = True - branch_anchor = prev_node - recipe = [(meta_mol.nodes[prev_node]['resname'], 1)] + branch_anchor.append(prev_node) + # the recipe for making the branch includes the anchor; which + # is hence the first atom in the list + if len(branch_anchor) == 1: + recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)] if stop < len(pattern) and pattern[stop] == '|': + # eon => end of next eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop) n_mon = int(pattern[stop+1:eon]) else: @@ -89,7 +118,7 @@ def res_pattern_to_meta_mol(pattern): resname = match.group(0)[2:-1] # collect all residues in branch if branching: - recipe.append((resname, n_mon)) + recipes[branch_anchor[-1]].append((resname, n_mon)) # add the new residue connection = [] @@ -105,26 +134,40 @@ def res_pattern_to_meta_mol(pattern): # terminate branch and jump back to anchor branch_stop = _find_next_character(pattern, ['['], stop) >\ _find_next_character(pattern, [')'], stop) - if stop <= len(pattern) and branch_stop and branching: + + if stop <= len(pattern) and branch_stop: # and branching: branching = False - prev_node = branch_anchor + prev_node = branch_anchor.pop() + if branch_anchor: + branching = True # we have to multiply the branch n-times eon_a = _find_next_character(pattern, [')'], stop) if stop+1 < len(pattern) and pattern[eon_a+1] == "|": eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1) - # -1 because one branch has already been added at this point - for _ in range(0,int(pattern[eon_a+2:eon_b])-1): - for bdx, (resname, n_mon) in enumerate(recipie): - if bdx == 0: - anchor = current - for _ in range(0, n_mon): - connection = [(prev_node, current)] - meta_mol.add_monomer(current, - resname, - connection) - prev_node = current - current += 1 - prev_node = anchor + # the outermost loop goes over how often a the branch has to be + # added to the existing sequence + for idx in range(0,int(pattern[eon_a+2:eon_b])-1): + prev_anchor = None + skip = 0 + for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]: + print("-->", recipe) + if prev_anchor: + offset = ref_anchor - prev_anchor + prev_node = prev_node + offset + #skip = 1 + print(prev_node) + meta_mol, current, prev_node = _expand_branch(meta_mol, + current=current, + anchor=prev_node, + recipe=recipe) #[skip:]) + if prev_anchor is None: + base_anchor = prev_node + prev_anchor = ref_anchor + print(base_anchor) + prev_node = base_anchor + # if all branches are done we need to reset the lists + # branch_anchor = [] + # recipes = defaultdict(list) return meta_mol def _big_smile_iter(smile): From a867329a82d2f4988e43839b84357032841534a4 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 14:56:16 +0100 Subject: [PATCH 75/82] test branch expansion --- polyply/src/big_smile_parsing.py | 17 +++--- polyply/tests/test_big_smile_parsing.py | 69 +++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index e396a5e3..d93265ec 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -31,7 +31,6 @@ def _expand_branch(meta_mol, current, anchor, recipe): anchor = current for _ in range(0, n_mon): connection = [(prev_node, current)] - print(connection) meta_mol.add_monomer(current, resname, connection) @@ -106,8 +105,8 @@ def res_pattern_to_meta_mol(pattern): branch_anchor.append(prev_node) # the recipe for making the branch includes the anchor; which # is hence the first atom in the list - if len(branch_anchor) == 1: - recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)] + #if len(branch_anchor) == 1: + recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)] if stop < len(pattern) and pattern[stop] == '|': # eon => end of next eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop) @@ -150,24 +149,22 @@ def res_pattern_to_meta_mol(pattern): prev_anchor = None skip = 0 for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]: - print("-->", recipe) if prev_anchor: offset = ref_anchor - prev_anchor prev_node = prev_node + offset - #skip = 1 - print(prev_node) + skip = 1 meta_mol, current, prev_node = _expand_branch(meta_mol, current=current, anchor=prev_node, - recipe=recipe) #[skip:]) + recipe=recipe[skip:]) if prev_anchor is None: base_anchor = prev_node prev_anchor = ref_anchor - print(base_anchor) prev_node = base_anchor # if all branches are done we need to reset the lists - # branch_anchor = [] - # recipes = defaultdict(list) + # when all nested branches are completed + if len(branch_anchor) == 0: + recipes = defaultdict(list) return meta_mol def _big_smile_iter(smile): diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py index ba3f5f69..5c1491b8 100644 --- a/polyply/tests/test_big_smile_parsing.py +++ b/polyply/tests/test_big_smile_parsing.py @@ -22,11 +22,13 @@ ["PMA", "PMA", "PMA"], [(0, 1), (1, 2)]), # simple branch expension - ("{[#PMA]([#PEO][#PEO][#OHter])|2}", + ("{[#PMA]([#PEO][#PEO][#OHter])|3}", ["PMA", "PEO", "PEO", "OHter", + "PMA", "PEO", "PEO", "OHter", "PMA", "PEO", "PEO", "OHter"], [(0, 1), (1, 2), (2, 3), - (0, 4), (4, 5), (5, 6), (6, 7)] + (0, 4), (4, 5), (5, 6), (6, 7), + (4, 8), (8, 9), (9, 10), (10, 11)] ), # nested branched with expansion ("{[#PMA]([#PEO]|3)|2}", @@ -34,7 +36,68 @@ "PMA", "PEO", "PEO", "PEO"], [(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (6, 7)] - ) + ), + # nested braching + # 0 1 2 3 4 5 6 + ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}", + ["PMA", "PMA", "PEO", "PEO", "OH", + "PEO", "PMA"], + [(0, 1), (1, 2), (2, 3), + (3, 4), (3, 5), (1, 6)] + ), + # nested braching plus expansion + # 0 1 2 3 4/5 6 7 + ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}", + ["PMA", "PMA", "PEO", "PEO", "OH", "OH", + "PEO", "PMA"], + [(0, 1), (1, 2), (2, 3), + (3, 4), (4, 5), (3, 6), (1, 7)] + ), + # nested braching plus expansion incl. branch + # 0 1 2 3 4 5 + # 6 7 8 9 10 11 + ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])|2[#PMA]}", + ["PMA", "PMA", "PEO", "PEO", "OH", "PEO", + "PMA", "PEO", "PEO", "PEO", "OH", "PMA"], + [(0, 1), (1, 2), (2, 3), + (3, 4), (3, 5), (1, 6), (6, 7), (7, 8), + (8, 9), (8, 10), (6, 11)] + ), + # nested braching plus expansion of nested branch + # here the nested branch is expended + # 0 - 1 - 10 + # | + # 2 + # | + # 3 {- 5 - 7 } - 9 -> the expanded fragment + # | | | + # 4 6 8 + ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]}", + ["PMA", "PMA", "PEO", "PQ", "OH", + "PQ", "OH", "PQ", "OH", "PEO", "PMA"], + [(0, 1), (1, 2), (1, 10), + (2, 3), (3, 4), (3, 5), (5, 6), + (5, 7), (7, 8), (7, 9)] + ), + # nested braching plus expansion of nested branch + # here the nested branch is expended and a complete + # new branch is added + # 11 13 + # | | + # 0 - 1 - 10 - 12 + # | + # 2 + # | + # 3 {- 5 - 7 } - 9 -> the expanded fragment + # | | | + # 4 6 8 + ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]([#CH3])|2}", + ["PMA", "PMA", "PEO", "PQ", "OH", + "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"], + [(0, 1), (1, 2), (1, 10), + (2, 3), (3, 4), (3, 5), (5, 6), + (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)] + ), )) def test_res_pattern_to_meta_mol(smile, nodes, edges): """ From b6f5cc0d4a101948ca853cf1226afb598f6b96f3 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 15:19:09 +0100 Subject: [PATCH 76/82] add comments all over residue expansion functions --- polyply/src/big_smile_parsing.py | 62 +++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index d93265ec..b043ebd9 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -97,29 +97,42 @@ def res_pattern_to_meta_mol(pattern): prev_node = None # do we have an open branch branching = False + # each element in the for loop matches a pattern + # '[' + '#' + some alphanumeric name + ']' for match in re.finditer(PATTERNS['place_holder'], pattern): start, stop = match.span() - # new branch here + # we start a new branch when the residue is preceded by '(' + # as in ... ([#PEO] ... if pattern[start-1] == '(': branching = True branch_anchor.append(prev_node) # the recipe for making the branch includes the anchor; which - # is hence the first atom in the list - #if len(branch_anchor) == 1: + # is hence the first residue in the list recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)] + # here we check if the atom is followed by a expansion character '|' + # as in ... [#PEO]| if stop < len(pattern) and pattern[stop] == '|': # eon => end of next + # we find the next character that starts a new residue, ends a branch or + # ends the complete pattern eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop) + # between the expansion character and the eon character + # is any number that correspnds to the number of times (i.e. monomers) + # that this atom should be added n_mon = int(pattern[stop+1:eon]) else: n_mon = 1 + # the resname starts at the second character and ends + # one before the last according to the above pattern resname = match.group(0)[2:-1] - # collect all residues in branch + # if this residue is part of a branch we store it in + # the recipe dict together with the anchor residue + # and expansion number if branching: recipes[branch_anchor[-1]].append((resname, n_mon)) - # add the new residue + # new we add new residue as often as required connection = [] for _ in range(0, n_mon): if prev_node is not None: @@ -130,36 +143,69 @@ def res_pattern_to_meta_mol(pattern): prev_node = current current += 1 - # terminate branch and jump back to anchor + # here we check if the residue considered before is the + # last residue of a branch (i.e. '...[#residue])' + # that is the case if the branch closure comes before + # any new atom begins branch_stop = _find_next_character(pattern, ['['], stop) >\ _find_next_character(pattern, [')'], stop) - if stop <= len(pattern) and branch_stop: # and branching: + # if the branch ends we reset the anchor + # and set branching False unless we are in + # a nested branch + if stop <= len(pattern) and branch_stop: branching = False prev_node = branch_anchor.pop() if branch_anchor: branching = True - # we have to multiply the branch n-times + #======================================== + # expansion for branches + #======================================== + # We need to know how often the branch has + # to be added so we first identify the branch + # terminal character ')' called eon_a. eon_a = _find_next_character(pattern, [')'], stop) + # Then we check if the expansion character + # is next. if stop+1 < len(pattern) and pattern[eon_a+1] == "|": + # If there is one we find the beginning + # of the next branch, residue or end of the string + # As before all characters inbetween are a number that + # is how often the branch is expanded. eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1) # the outermost loop goes over how often a the branch has to be # added to the existing sequence for idx in range(0,int(pattern[eon_a+2:eon_b])-1): prev_anchor = None skip = 0 + # in principle each branch can contain any number of nested branches + # each branch is itself a recipe that has an anchor atom for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]: + # starting from the first nested branch we have to do some + # math to find the anchor atom relative to the first branch + # we also skip the first residue in recipe, which is the + # anchor residue. Only the outermost branch in an expansion + # is expanded including the anchor. This allows easy description + # of graft polymers. if prev_anchor: offset = ref_anchor - prev_anchor prev_node = prev_node + offset skip = 1 + # this function simply adds the residues of the paticular + # branch meta_mol, current, prev_node = _expand_branch(meta_mol, current=current, anchor=prev_node, recipe=recipe[skip:]) + # if this is the first branch we want to set the anchor + # as the base anchor to which we jump back after all nested + # branches have been added if prev_anchor is None: base_anchor = prev_node + # store the previous anchor so we can do the math for nested + # branches prev_anchor = ref_anchor + # all branches added; then go back to the base anchor prev_node = base_anchor # if all branches are done we need to reset the lists # when all nested branches are completed From f965e1d42c6b3afb41b690407c36aeee0a4e8493 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 15:33:45 +0100 Subject: [PATCH 77/82] address comments --- polyply/src/big_smile_mol_processor.py | 8 ++++---- polyply/src/big_smile_parsing.py | 12 ++++-------- polyply/tests/test_big_smile_mol_proc.py | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index 365b61bc..e706217a 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -28,7 +28,7 @@ def compatible(left, right): return left[1:] == right[1:] return False -def generate_edge(source, target, bond_type="bonding"): +def generate_edge(source, target, bond_attribute="bonding"): """ Given a source and a target graph, which have bonding descriptors stored as node attributes, find a pair of @@ -41,7 +41,7 @@ def generate_edge(source, target, bond_type="bonding"): ---------- source: :class:`nx.Graph` target: :class:`nx.Graph` - bond_type: `abc.hashable` + bond_attribute: `abc.hashable` under which attribute are the bonding descriptors stored. @@ -55,8 +55,8 @@ def generate_edge(source, target, bond_type="bonding"): LookupError if no match is found """ - source_nodes = nx.get_node_attributes(source, bond_type) - target_nodes = nx.get_node_attributes(target, bond_type) + source_nodes = nx.get_node_attributes(source, bond_attribute) + target_nodes = nx.get_node_attributes(target, bond_attribute) for source_node in source_nodes: for target_node in target_nodes: #print(source_node, target_node) diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index b043ebd9..d591eecd 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -213,10 +213,6 @@ def res_pattern_to_meta_mol(pattern): recipes = defaultdict(list) return meta_mol -def _big_smile_iter(smile): - for token in smile: - yield token - def tokenize_big_smile(big_smile): """ Processes a BigSmile string by storing the @@ -229,17 +225,17 @@ def tokenize_big_smile(big_smile): Parameters ---------- smile: str - a BigSmile smile string + a BigSmile smiles string Returns ------- str - a canonical smile string + a canonical smiles string dict a dict mapping bonding descriptors - to the nodes within the smile + to the nodes within the smiles string """ - smile_iter = _big_smile_iter(big_smile) + smile_iter = iter(big_smile) bonding_descrpt = defaultdict(list) smile = "" node_count = 0 diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index 28c5390d..c40f96bd 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -38,7 +38,7 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): target = nx.path_graph(4) nx.set_node_attributes(source, bonds_source, "bonding") nx.set_node_attributes(target, bonds_target, "bonding") - new_edge, new_btypes = generate_edge(source, target, bond_type="bonding") + new_edge, new_btypes = generate_edge(source, target, bond_attribute="bonding") assert new_edge == edge assert new_btypes == btypes From 0335956072b84fcc8d59f2d9b6264917ce971879 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 16:00:29 +0100 Subject: [PATCH 78/82] allow for ionic bonds with . syntax --- polyply/src/big_smile_mol_processor.py | 6 +++++- polyply/src/big_smile_parsing.py | 4 ++++ polyply/tests/test_big_smile_mol_proc.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py index e706217a..1801a437 100644 --- a/polyply/src/big_smile_mol_processor.py +++ b/polyply/src/big_smile_mol_processor.py @@ -1,3 +1,4 @@ +import re import networkx as nx import pysmiles from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol, @@ -130,10 +131,13 @@ def replace_unconsumed_bonding_descrpt(self): self.meta_molecule.molecule.nodes[new_node].update(attrs) def parse(self, big_smile_str): - res_pattern, residues = big_smile_str.split('.') + res_pattern, residues = re.findall(r"\{[^\}]+\}", big_smile_str) self.meta_molecule = res_pattern_to_meta_mol(res_pattern) self.force_field = force_field_from_fragments(residues) MapToMolecule(self.force_field).run_molecule(self.meta_molecule) self.edges_from_bonding_descrpt() self.replace_unconsumed_bonding_descrpt() return self.meta_molecule + +# ToDo +# - clean copying of bond-list attributes L100 diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py index d591eecd..ec136bea 100644 --- a/polyply/src/big_smile_parsing.py +++ b/polyply/src/big_smile_parsing.py @@ -361,3 +361,7 @@ def force_field_from_fragments(fragment_str): mol_block = Block(mol_graph) force_field.blocks[resname] = mol_block return force_field + +# ToDos +# - remove special case hydrogen line 327ff +# - check rebuild_h and clean up diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py index c40f96bd..b6fe8e03 100644 --- a/polyply/tests/test_big_smile_mol_proc.py +++ b/polyply/tests/test_big_smile_mol_proc.py @@ -53,6 +53,16 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes): [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7), (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13), (11, 14), (11, 15), (11, 16), (16, 17)]), + # smiple linear seqeunce with ionic bond + ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O].[Na+]}", + # 0 1 2 3 4 5 6 7 8 + [('OHter', 'O Na'), ('PEO', 'C O C H H H H'), + # 9 10 11 12 13 14 15 16 17 + ('PEO', 'C O C H H H H'), ('OHter', 'O Na')], + [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7), + (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13), + (11, 14), (11, 15), (11, 16), (16, 17)]), + # uncomsumed bonding IDs; note that this is not the same # molecule as previous test case. Here one of the OH branches # and replaces an CH2 group with CH-OH From 47fef2382fa3a35d895ac0c0d2c852ab499a4274 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 17:01:37 +0100 Subject: [PATCH 79/82] fix previous issue with link appending --- polyply/src/itp_to_ff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index 7ffaec93..14437fe1 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -80,7 +80,7 @@ def itp_to_ff(itppath, smile_str, outpath, inpath=[], res_charges=None): charge=float(crg_dict[name])) # extract the regular links - force_field.links.append(extract_links(target_mol)) + force_field.links += extract_links(target_mol) # extract links that span the terminii find_termini_mods(res_graph, target_mol, force_field) From 7f7fe21c055d272e077ef566cf2b906c56917dc4 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 18:02:49 +0100 Subject: [PATCH 80/82] update itp_to_ff tests --- .../tests/test_data/itp_to_ff/ACOL/seq.txt | 2 +- .../tests/test_data/itp_to_ff/PEG_PBE/ref.itp | 14 ++++-- .../tests/test_data/itp_to_ff/PEG_PBE/seq.txt | 2 +- polyply/tests/test_itp_to_ff.py | 45 ++++++++++++------- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt index 1a088a04..db7ea3e6 100644 --- a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt +++ b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt @@ -1 +1 @@ -Mter M AOL M Mter_1 +ter1 PMA AOL PMA ter2 diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp index 53941636..b878a1a1 100644 --- a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp +++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp @@ -289,6 +289,8 @@ new 3 9 26 1 ; link 19 36 1 ; link 29 45 1 ; link +; added manually +39 53 1 [ angles ] 2 1 4 1 107.800 276.144 @@ -404,7 +406,8 @@ new 3 53 47 50 1 110.700 313.800 ; link [ dihedrals ] - 5 8 7 14 4 180.000 10.460 2 +; 5 8 7 14 4 180.000 10.460 2 +14 8 7 5 4 180.000 10.460 2 13 7 6 8 4 180.000 10.460 2 9 6 7 8 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 14 8 7 6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 @@ -418,7 +421,8 @@ new 3 11 9 6 7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 11 9 6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 10 9 6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 -15 18 17 24 4 180.000 10.460 2 +;15 18 17 24 4 180.000 10.460 2 +24 18 17 15 4 180.000 10.460 2 23 17 16 18 4 180.000 10.460 2 19 16 17 18 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 24 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 @@ -432,7 +436,8 @@ new 3 21 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 21 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 20 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 -25 28 27 34 4 180.000 10.460 2 +;25 28 27 34 4 180.000 10.460 2 +34 28 27 25 4 180.000 10.460 2 33 27 26 28 4 180.000 10.460 2 29 26 27 28 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 34 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 @@ -446,7 +451,8 @@ new 3 31 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 31 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 30 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 -35 38 37 44 4 180.000 10.460 2 +;35 38 37 44 4 180.000 10.460 2 +44 38 37 35 4 180.000 10.460 2 43 37 36 38 4 180.000 10.460 2 39 36 37 38 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 44 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000 diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt index 408d9986..5225a4e5 100644 --- a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt +++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt @@ -1 +1 @@ -CH3ter PBE PBE PBE PBE PEO PEOter +CH3ter PBE PBE PBE PBE PEO PEO OHter diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py index 13afaf0a..caa6f66a 100644 --- a/polyply/tests/test_itp_to_ff.py +++ b/polyply/tests/test_itp_to_ff.py @@ -54,31 +54,44 @@ def itp_equal(ref_mol, new_mol): atol=0.1) for inter_type in new_mol.interactions: + print(inter_type) + print(len(new_mol.interactions[inter_type]), len(ref_mol.interactions[inter_type])) assert len(new_mol.interactions[inter_type]) == len(ref_mol.interactions[inter_type]) for inter in new_mol.interactions[inter_type]: - new_atoms = [match[atom] for atom in inter.atoms] + new_atoms = tuple([match[atom] for atom in inter.atoms]) new_inter = Interaction(atoms=new_atoms, parameters=inter.parameters, meta=inter.meta) + print(new_inter) for other_inter in ref_mol.interactions[inter_type]: - if _interaction_equal(inter, other_inter, inter_type): + if _interaction_equal(new_inter, other_inter, inter_type): break else: + print("--") assert False return True -@pytest.mark.parametrize("case, fname, smiles, resnames, charges", [ - ("PEO_OHter", "in_itp.itp", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], - ["OH", "PEO", "OH"], [0, 0, 0]), - ("PEG_PBE", "in_itp.itp", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], - ["CH3", "PBE", "PEO"], [0, 0, 0]), - ("ACOL","ref.top", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]", - "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])", - "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"], - ["M", "M", "AOL", "M", "M"], - [0, 0, 1, 0, 0]), +@pytest.mark.parametrize("case, fname, bigsmile, charges", [ + # test case 1 PEO with OHtermini + ("PEO_OHter", + "in_itp.itp", + "{[#OHter][#PEO]|4[#OHter]}.{#PEO=[$]COC[$],#OHter=[$]CO}", + [("OHter", 0), ("PEO", 0)], + ), + # test case 2 PEO-PBE block cooplymer with two termini + ("PEG_PBE", + "in_itp.itp", + "{[#CH3ter][#PBE]|4[#PEO]|2[#OHter]}.{#PEO=[>]COC[<],#OHter=[<]CO,#CH3ter=[>][CH3],#PBE=[>]CC[<]C=C}", + [("CH3ter", 0), ("PBE", 0), ("PEO", 0), ("OHter", 0)], + ), + # test case 3 complex sequence with charged ion in the center + ("ACOL", + "ref.top", + "{[#ter1][#PMA][#AOL][#PMA][#ter2]}.{#Hter=[>][<]H,#ter1=CC[<]C(=O)OC,#ter2=[>]CCC(=O)OC,#PMA=[>]CC[<]C(=O)OC,#AOL=[>]CC[<]C(=O)OCC[N+](C)(C)(C)}", + [("ter1", 0), ("PMA", 0), ("AOL", 1), ("ter2", 0)], + ) ]) -def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges): +def test_itp_to_ff(tmp_path, case, fname, bigsmile, charges): """ Call itp-to-ff and check if it generates the same force-field as in the ref.ff file. @@ -86,10 +99,8 @@ def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges): tmp_file = Path(tmp_path) / "test.ff" inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case itp_to_ff(itppath=inpath/fname, - fragment_smiles=smiles, - resnames=resnames, - charges=charges, - term_prefix='ter', + smile_str=bigsmile, + res_charges=charges, outpath=tmp_file,) # now generate an itp file with this ff-file tmp_itp = tmp_path / "new.itp" From 726866371ce70bd9a50bbfdc655a29d657281084 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 21:06:16 +0100 Subject: [PATCH 81/82] update tests for fragment finder --- polyply/tests/test_fragment_finder.py | 293 ++++++-------------------- 1 file changed, 61 insertions(+), 232 deletions(-) diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py index 7fb1478c..77c60a29 100644 --- a/polyply/tests/test_fragment_finder.py +++ b/polyply/tests/test_fragment_finder.py @@ -14,33 +14,12 @@ """ Test the fragment finder for itp_to_ff. """ - -import textwrap +import random import pytest -from pathlib import Path -import numpy as np import networkx as nx -import vermouth.forcefield -import vermouth.molecule -from vermouth.gmx.itp_read import read_itp -from polyply import TEST_DATA -import polyply.src.meta_molecule -from polyply.src.meta_molecule import (MetaMolecule, Monomer) +from vermouth.forcefield import ForceField import polyply -from collections import defaultdict -import pysmiles - -@pytest.mark.parametrize( - "node1, node2, expected", - [ - ({"element": "C"}, {"element": "C"}, True), - ({"element": "H"}, {"element": "O"}, False), - ({"element": "N"}, {"element": "N"}, True), - ({"element": "O"}, {"element": "S"}, False), - ], -) -def test_element_match(node1, node2, expected): - assert polyply.src.fragment_finder._element_match(node1, node2) == expected +from polyply.src.big_smile_mol_processor import DefBigSmileParser @pytest.mark.parametrize( "match_keys, node1, node2, expected", @@ -53,224 +32,74 @@ def test_element_match(node1, node2, expected): ) def test_node_match(match_keys, node1, node2, expected): # molecule and terminal label don't matter - frag_finder = polyply.src.fragment_finder.FragmentFinder(None, "ter") + frag_finder = polyply.src.fragment_finder.FragmentFinder(None) frag_finder.match_keys = match_keys assert frag_finder._node_match(node1, node2) == expected -def find_studs(mol): - """ - By element find all undersatisfied connections - at the all-atom level. - """ - atom_degrees = {"H":1, - "C":4, - "O":2, - "N":3} - for node in mol.nodes: - ele = mol.nodes[node]['element'] - if mol.degree(node) != atom_degrees[ele]: - yield node - -def set_mass(mol): - masses = {"O": 16, "N":14,"C":12, - "S":32, "H":1} - - for atom in mol.nodes: - mol.nodes[atom]['mass'] = masses[mol.nodes[atom]['element']] - return mol - -def polymer_from_fragments(fragments, resnames, remove_resid=True): - """ - Given molecule fragments as smiles - combine them into different polymer - molecules. - """ - fragments_to_mol = [] - frag_mols = [] - frag_graph = pysmiles.read_smiles(fragments[0], explicit_hydrogen=True) - nx.set_node_attributes(frag_graph, 1, "resid") - nx.set_node_attributes(frag_graph, resnames[0], "resname") - frag_mols.append(frag_graph) - mol = vermouth.Molecule(frag_graph) - # terminals should have one stud anyways - prev_stud = next(find_studs(frag_graph)) - fragments_to_mol.append({node: node for node in mol.nodes}) - for resname, smile in zip(resnames[1:], fragments[1:]): - frag_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True) - nx.set_node_attributes(frag_graph, resname, "resname") - frag_mols.append(frag_graph) - next_mol = vermouth.Molecule(frag_graph) - correspondance = mol.merge_molecule(next_mol) - fragments_to_mol.append(correspondance) - stud_iter = find_studs(frag_graph) - mol.add_edge(prev_stud, correspondance[next(stud_iter)]) - - try: - prev_stud = correspondance[next(stud_iter)] - except StopIteration: - # we're done molecule is complete - continue - mol = set_mass(mol) - if remove_resid: - nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resid") - nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resname") - return mol, frag_mols, fragments_to_mol +def _scramble_nodes(graph): + element_to_masses = {"O": 16, + "N": 14, + "C": 12, + "S": 32, + "H": 1} + # Get a list of all nodes in the original graph + nodes = list(graph.nodes()) + # Generate a randomized list of new node names/indices + randomized_nodes = nodes.copy() + random.shuffle(randomized_nodes) + # Create a mapping from old nodes to new nodes + node_mapping = {old_node: new_node for old_node, new_node in zip(nodes, randomized_nodes)} + # Generate a new graph by applying the mapping to the original graph + randomized_graph = nx.relabel_nodes(graph, node_mapping) + for node in randomized_graph.nodes: + for attr in ['resid', 'resname']: + del randomized_graph.nodes[node][attr] + ele = randomized_graph.nodes[node]['element'] + randomized_graph.nodes[node]['mass'] = element_to_masses[ele] + return randomized_graph @pytest.mark.parametrize( - "smiles, resnames", + "big_smile, resnames", [ - # completely defined molecule with two termini - (["[CH3]", "[CH2]O[CH2]", "[CH3]"], ["CH3", "PEO", "CH3"]), - # two different termini - (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "CH3"]), - # two different termini with the same repeat unit - (["[OH][CH2]", "[CH2]O[CH2]","[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "PEO", "CH3"]), - # sequence with two monomers and multiple "wrong" matchs - (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["CH3", "PBD", "PEO", "OH"]), - # sequence with two monomers, four repeats and multiple "wrong" matchs - (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", - "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", - "[CH2][OH]"], ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"]), - # super symmtry - worst case scenario - (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], - ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"]), - ]) -def test_label_fragments(smiles, resnames): - molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames) - frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") - unique_fragments = frag_finder.label_fragments_from_graph(frag_mols) - for resid, (resname, frag_to_mol) in enumerate(zip(resnames, fragments_in_mol), start=1): - for frag_node, mol_node in frag_to_mol.items(): - assert frag_finder.molecule.nodes[mol_node]['resname'] == resname - assert frag_finder.molecule.nodes[mol_node]['resid'] == resid - -@pytest.mark.parametrize( - "smiles, resnames, remove, new_name", - [ - # do not match termini - (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"], - ["CH3", "PEO", "PEO", "PEO", "CH3"], - {1:2, 6:3}, - {1: "PEO", "4": "PEO"}, + # two residues no branches + ("{[#CH3][#PEO]|4[#CH3]}.{#PEO=[$]COC[$],#CH3=[$]C}", + ["CH3", "PEO"], ), - # have dangling atom in center - (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], - ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"], - {4:5}, - {4:"PE"}, + # three residues no branches + ("{[#OH][#PEO]|4[#CH3]}.{#PEO=[$]COC[$],#CH3=[$]C,#OH=[$]O}", + ["CH3", "PEO", "OH"], ), + # simple branch expansion + ("{[#PMA]([#PEO][#PEO][#OH])|3}.{#PEO=[$]COC[$],#PMA=[>]CC[<]C(=O)OC[$],#OH=[$]O}", + ["PMA", "PEO", "OH"]), + # something with sulphur + ("{[#P3HT]|3}.{#P3HT=CCCCCCC1=C[$]SC[$]=C1}", + ["P3HT"]) ]) -def test_label_unmatched_atoms(smiles, resnames, remove, new_name): - molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=False) - nodes_to_label = {} - max_by_resid = {} - - for node in molecule.nodes: - resid = molecule.nodes[node]['resid'] - if resid in remove: - del molecule.nodes[node]['resid'] - del molecule.nodes[node]['resname'] - nodes_to_label[node] = resid - else: - if resid in max_by_resid: - known_atom = node - max_by_resid[resid] += 1 - else: - max_by_resid[resid] = 1 +def test_extract_fragments(big_smile, resnames): + ff = ForceField("new") + parser = DefBigSmileParser(ff) + meta = parser.parse(big_smile) + ff = parser.force_field + # strips resid, resname, and scrambles order + target_molecule = _scramble_nodes(meta.molecule) - resids = nx.get_node_attributes(molecule, "resid") - # the frag finder removes resid attributes so we have to later reset them - frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") - nx.set_node_attributes(frag_finder.molecule, resids, "resid") - frag_finder.max_by_resid = max_by_resid - frag_finder.known_atom = known_atom - frag_finder.label_unmatched_atoms() - for node, old_id in nodes_to_label.items(): - assert frag_finder.molecule.nodes[node]['resid'] == remove[old_id] - assert frag_finder.molecule.nodes[node]['resname'] == new_name[old_id] + # initialize the fragment finder + frag_finder = polyply.src.fragment_finder.FragmentFinder(target_molecule) + fragments, res_graph = frag_finder.extract_unique_fragments(meta.molecule) -@pytest.mark.parametrize( - "smiles, resnames, remove, uni_frags", - [ - # completely defined molecule with two termini - (["[CH3]", "[CH2]O[CH2]", "[CH3]"], - ["CH3", "PEO", "CH3"], - {}, - {"CH3ter": 0, "PEO": 1} - ), - # two different termini - (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], - ["OH", "PEO", "CH3"], - {}, - {"OHter": 0, "PEO": 1, "CH3ter": 2} - ), - # sequence with two monomers, four repeats and multiple "wrong" matchs - (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", - "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", - "[CH2][OH]"], - ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"], - {}, - {"CH3ter": 0, "PBE": 1, "PEO": 5, "OHter": 9} - ), - # super symmtry - worst case scenario - (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], - ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"], - {}, - {"CH3ter":0, "PE": 1} - ), - # different fragments with same resname - (["[CH3]O[CH2]", "[CH2]O[CH2]", "[CH3]"], - ["PEO", "PEO", "CH3"], - {3:2}, - {"PEOter": 0, "PEOter_1": (1,2)} - ), - # do not match termini - (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"], - ["CH3", "PEO", "PEO", "PEO", "CH3"], - {5: 4}, - {"CH3ter":0, "PEO": 1, "PEOter": (3, 4)}, - ), - # have dangling atom in center; this is a bit akward but essentially serves - # as a guard of having really shitty input - (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"], - ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"], - {4: 3}, - {"CH3ter": 0, "PE": 1, "PEter": (2, 3, 4, 5, 6, 7)}, - ), - ]) -def test_extract_fragments(smiles, resnames, remove, uni_frags): - molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=True) - for node in molecule.nodes: - resid = molecule.nodes[node]['resid'] - if resid in remove: - del molecule.nodes[node]['resid'] - del molecule.nodes[node]['resname'] + def _res_node_match(a, b): + return a['resname'] == b['resname'] - match_mols = [] - for idx, frag in enumerate(frag_mols): - if idx not in remove.values(): - match_mols.append(frag) + def _frag_node_match(a, b): + for attr in ['element', 'resname']: + if a[attr] != b[attr]: + return False + return True - frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter") - fragments, _ = frag_finder.extract_unique_fragments(match_mols) - assert len(fragments) == len(uni_frags) - for resname, graph in fragments.items(): - frag_finder.match_keys = ['element', 'mass', 'resname'] - if type(uni_frags[resname]) == tuple: - new_smiles = [smiles[idx] for idx in uni_frags[resname]] - new_resnames = [resnames[idx] for idx in uni_frags[resname]] - ref, _, _ = polymer_from_fragments(new_smiles, new_resnames) - nx.set_node_attributes(ref, resname, "resname") - else: - ref = frag_mols[uni_frags[resname]] - # because the terminii are not labelled yet in the fragment - # graphs used to make the match - nx.set_node_attributes(ref, resname, "resname") - assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match) - # make sure all molecule nodes are named correctly - frag_finder.match_keys = ['atomname', 'resname'] - for node in frag_finder.res_graph: - resname_mol = frag_finder.res_graph.nodes[node]["resname"] - if resname == resname_mol: - target = frag_finder.res_graph.nodes[node]["graph"] - assert nx.is_isomorphic(target, graph, node_match=frag_finder._node_match) + assert set(fragments.keys()) == set(resnames) + assert nx.is_isomorphic(res_graph, meta, node_match=_res_node_match) + for resname in resnames: + assert nx.is_isomorphic(fragments[resname], + ff.blocks[resname], + node_match=_frag_node_match) From 15be6a6a25476dfccdb667a5c0070ca5d19d3ee2 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 7 Mar 2024 21:15:10 +0100 Subject: [PATCH 82/82] remove leftover files --- polyply/src/big_smiles.py | 93 --------------- polyply/src/big_smiles_helper.py | 193 ------------------------------- 2 files changed, 286 deletions(-) delete mode 100644 polyply/src/big_smiles.py delete mode 100644 polyply/src/big_smiles_helper.py diff --git a/polyply/src/big_smiles.py b/polyply/src/big_smiles.py deleted file mode 100644 index 41e8535e..00000000 --- a/polyply/src/big_smiles.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2020 University of Groningen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -def find_token_indices(line, target): - idxs = [idx for idx, token in enumerate(line) if token == target] - for idx in idxs: - yield idx - -def compatible(left, right): - if left == right: - return True - if left[0] == "<" and right[0] == ">": - if left[1:] == right[1:]: - return True - if left[0] == ">" and right[0] == "<": - if left[1:] == right[1:]: - return True - return False - -def find_compatible_pair(polymol, residue, bond_type="bond_type", eligible_nodes=None): - ref_nodes = nx.get_node_attributes(polymol, bond_type) - target_nodes = nx.get_node_attributes(residue, bond_type) - for ref_node in ref_nodes: - if eligible_nodes and\ - polymol.nodes[ref_node]['resid'] not in eligible_nodes: - continue - for target_node in target_nodes: - if compatible(ref_nodes[ref_node], - target_nodes[target_node]): - return ref_node, target_node - return None - -class BigSmileParser: - - def __init__(self): - self.molecule = - - def parse_stochastic_object(): - - -def read_simplified_big_smile_string(line): - - # split the different stochastic objects - line = line.strip() - # a stochastic object is enclosed in '{' and '}' - start_idx = next(find_token_indices(line, "{")) - stop_idx = next(find_token_indices(line, "}")) - stoch_line = line[start_idx+1:stop_idx] - # residues are separated by , and end - # groups by ; - if ';' in stoch_line: - residue_string, terminii_string = stoch_line.split(';') - else: - residue_string = stoch_line - terminii_string = None - # let's read the smile residue strings - residues = [] - count = 0 - for residue_string in residue_string.split(','): - # figure out if this is a named object - if residue_string[0] == "#": - jdx = next(find_token_indices(residue_string, "=")) - name = residue_string[:jdx] - residue_string = residue_string[jdx:] - else: - name = count - - mol_graph = read_smiles(residue_string) - residues.append((name, mol_graph)) - count += 1 - # let's read the terminal residue strings - end_groups = [] - if terminii_string: - for terminus_string in terminii_string.split(','): - mol_graph = read_smiles(terminus_string) - bond_types = nx.get_node_attributes(mol_graph, "bond_type") - nx.set_node_attributes(mol_graph, bond_types, "ter_bond_type") - end_groups.append(mol_graph) - return cls(dict(residues), end_groups) - - - diff --git a/polyply/src/big_smiles_helper.py b/polyply/src/big_smiles_helper.py deleted file mode 100644 index ae546ffe..00000000 --- a/polyply/src/big_smiles_helper.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright 2020 University of Groningen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -try: - import pysmiles -except ImportError: - msg = "The tool you are using requires pysmiles as dependcy." - raise ImportError(msg) - -from pysmiles.read_smiles import _tokenize - -def find_anchor(mol, pre_mol, atom): - anchors = list(pre_mol.neighbors(atom)) - for anchor in anchors: - if anchor in mol.nodes: - return False, anchor - for anchor in nx.ego_graph(pre_mol, atom, radius=2).nodes: - if anchor in mol.nodes: - return True, anchor - raise RuntimeError - -def parse_atom(atom): - """ - Parses a SMILES atom token, and returns a dict with the information. - - Note - ---- - Can not deal with stereochemical information yet. This gets discarded. - - Parameters - ---------- - atom : str - The atom string to interpret. Looks something like one of the - following: "C", "c", "[13CH3-1:2]" - - Returns - ------- - dict - A dictionary containing at least 'element', 'aromatic', and 'charge'. If - present, will also contain 'hcount', 'isotope', and 'class'. - """ - defaults = {'charge': 0, 'hcount': 0, 'aromatic': False} - if atom.startswith('[') and any(mark in atom for mark in ['$', '>', '<']): - bond_type = atom[1:-1] - # we have a big smile bond anchor - defaults.update({"element": None, - "bond_type": bond_type}) - return defaults - - if atom.startswith('[') and '#' == atom[1]: - # this atom is a replacable place holder - defaults.update({"element": None, "replace": atom[2:-1]}) - return defaults - - if not atom.startswith('[') and not atom.endswith(']'): - if atom != '*': - # Don't specify hcount to signal we don't actually know anything - # about it - return {'element': atom.capitalize(), 'charge': 0, - 'aromatic': atom.islower()} - else: - return defaults.copy() - - match = ATOM_PATTERN.match(atom) - - if match is None: - raise ValueError('The atom {} is malformatted'.format(atom)) - - out = defaults.copy() - out.update({k: v for k, v in match.groupdict().items() if v is not None}) - - if out.get('element', 'X').islower(): - out['aromatic'] = True - - parse_helpers = { - 'isotope': int, - 'element': str.capitalize, - 'stereo': lambda x: x, - 'hcount': parse_hcount, - 'charge': parse_charge, - 'class': int, - 'aromatic': lambda x: x, - } - - for attr, val_str in out.items(): - out[attr] = parse_helpers[attr](val_str) - - if out['element'] == '*': - del out['element'] - - if out.get('element') == 'H' and out.get('hcount', 0): - raise ValueError("A hydrogen atom can't have hydrogens") - - if 'stereo' in out: - LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom) - - return out - -def big_smile_str_to_graph(smile_str): - """ - - """ - bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0} - pre_mol = nx.Graph() - anchor = None - idx = 0 - default_bond = 1 - next_bond = None - branches = [] - ring_nums = {} - for tokentype, token in _tokenize(smiles): - if tokentype == TokenType.ATOM: - pre_mol.add_node(idx, **parse_atom(token)) - if anchor is not None: - if next_bond is None: - next_bond = default_bond - if next_bond or zero_order_bonds: - pre_mol.add_edge(anchor, idx, order=next_bond) - next_bond = None - anchor = idx - idx += 1 - elif tokentype == TokenType.BRANCH_START: - branches.append(anchor) - elif tokentype == TokenType.BRANCH_END: - anchor = branches.pop() - elif tokentype == TokenType.BOND_TYPE: - if next_bond is not None: - raise ValueError('Previous bond (order {}) not used. ' - 'Overwritten by "{}"'.format(next_bond, token)) - next_bond = bond_to_order[token] - elif tokentype == TokenType.RING_NUM: - if token in ring_nums: - jdx, order = ring_nums[token] - if next_bond is None and order is None: - next_bond = default_bond - elif order is None: # Note that the check is needed, - next_bond = next_bond # But this could be pass. - elif next_bond is None: - next_bond = order - elif next_bond != order: # Both are not None - raise ValueError('Conflicting bond orders for ring ' - 'between indices {}'.format(token)) - # idx is the index of the *next* atom we're adding. So: -1. - if pre_mol.has_edge(idx-1, jdx): - raise ValueError('Edge specified by marker {} already ' - 'exists'.format(token)) - if idx-1 == jdx: - raise ValueError('Marker {} specifies a bond between an ' - 'atom and itself'.format(token)) - if next_bond or zero_order_bonds: - pre_mol.add_edge(idx - 1, jdx, order=next_bond) - next_bond = None - del ring_nums[token] - else: - if idx == 0: - raise ValueError("Can't have a marker ({}) before an atom" - "".format(token)) - # idx is the index of the *next* atom we're adding. So: -1. - ring_nums[token] = (idx - 1, next_bond) - next_bond = None - elif tokentype == TokenType.EZSTEREO: - LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token) - if ring_nums: - raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) - - return pre_mol - -def mol_graph_from_big_smile_graph(pre_mol): - # here we condense any BigSmilesBonding information - clean_nodes = [node for node in pre_mol.nodes(data=True) if 'bond_type' not in node[1]] - mol = nx.Graph() - mol.add_nodes_from(clean_nodes) - mol.add_edges_from([edge for edge in pre_mol.edges if edge[0] in mol.nodes and edge[1] in mol.nodes]) - for node in pre_mol.nodes: - if 'bond_type' in pre_mol.nodes[node]: - terminus, anchor = find_anchor(mol, pre_mol, node) - if terminus: - mol.nodes[anchor].update({"ter_bond_type": pre_mol.nodes[node]['bond_type'], - "ter_bond_probs": pre_mol.nodes[node]['bond_probs']}) - else: - mol.nodes[anchor].update({"bond_type": pre_mol.nodes[node]['bond_type'], - "bond_probs": pre_mol.nodes[node]['bond_probs']}) - return mol