From 5968f834b718df2003f8aadb8b1cf2a142e89952 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 13 Jun 2023 16:02:57 +0200
Subject: [PATCH 01/82] init draft itp to ff

---
 bin/polyply                        |  21 +-
 polyply/__init__.py                |   1 +
 polyply/src/ff_directive_writer.py |   2 +
 polyply/src/ffoutput.py            | 135 ++++++++++++
 polyply/src/fragment_finder.py     | 195 ++++++++++++++++++
 polyply/src/graph_utils.py         |  12 ++
 polyply/src/itp_to_ff.py           | 320 +++++++++++++++++++++++++++++
 7 files changed, 685 insertions(+), 1 deletion(-)
 create mode 100644 polyply/src/ff_directive_writer.py
 create mode 100644 polyply/src/ffoutput.py
 create mode 100644 polyply/src/fragment_finder.py
 create mode 100644 polyply/src/itp_to_ff.py

diff --git a/bin/polyply b/bin/polyply
index 3776c9e9..5a14457c 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -23,7 +23,7 @@ import argparse
 from pathlib import Path
 import numpy as np
 import polyply
-from polyply import (gen_itp, gen_coords, gen_seq, DATA_PATH)
+from polyply import (gen_itp, gen_coords, gen_seq, itp_to_ff, DATA_PATH)
 from polyply.src.load_library import load_ff_library
 from polyply.src.logging import LOGGER, LOGLEVELS
 
@@ -51,6 +51,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
     parser_gen_itp = subparsers.add_parser('gen_params', aliases=['gen_itp'])
     parser_gen_coords = subparsers.add_parser('gen_coords')
     parser_gen_seq = subparsers.add_parser('gen_seq')
+    parser_itp_ff = subparsers.add_parser('itp_to_ff')
 
     # =============================================================================
     # Input Arguments for the itp generation tool
@@ -225,6 +226,24 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
                            default=[])
     parser_gen_seq.set_defaults(func=gen_seq)
 
+    # =============================================================================
+    # Input Arguments for the itp to ff tool
+    # =============================================================================
+
+    parser_itp_ff.add_argument('-v', dest='verbosity', action='count',
+                                help='Enable debug logging output. Can be given '
+                                'multiple times.', default=0)
+
+    parser_itp_ff.add_argument('-i', dest="itppath")
+    parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*')
+    parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*')
+    parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter")
+    parser_itp_ff.add_argument('-o', dest="outpath", type=Path)
+    parser_itp_ff.add_argument('-c', dest="charge", type=float, default=0.0)
+
+    parser_itp_ff.set_defaults(func=itp_to_ff)
+
+
     # ============================================================================
     # Deal with queries of the polyply library
     # ============================================================================
diff --git a/polyply/__init__.py b/polyply/__init__.py
index 9ef7d384..2ae08752 100644
--- a/polyply/__init__.py
+++ b/polyply/__init__.py
@@ -49,3 +49,4 @@
 from .src.gen_itp import gen_itp, gen_params
 from .src.gen_coords import gen_coords
 from .src.gen_seq import gen_seq
+from .src.itp_to_ff import itp_to_ff
diff --git a/polyply/src/ff_directive_writer.py b/polyply/src/ff_directive_writer.py
new file mode 100644
index 00000000..139597f9
--- /dev/null
+++ b/polyply/src/ff_directive_writer.py
@@ -0,0 +1,2 @@
+
+
diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py
new file mode 100644
index 00000000..8beb7a6e
--- /dev/null
+++ b/polyply/src/ffoutput.py
@@ -0,0 +1,135 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class ForceFieldDirectiveWriter():
+    """
+    Write force-field files according to the
+    vermouth force-field definition.
+
+    Note that this is a leightweight writer
+    which does not offer the complete rich
+    syntax of the ff file format.
+    """
+    def __init__(self, forcefield, stream):
+        """
+        Parameters
+        ----------
+        forcefield: `:class:vermouth.forcefield.ForceField`
+            the force-field object to write
+
+        stream: ``
+            the stream to which to write; must have a write method
+        """
+        self.forcefield = forcefield
+        self.stream = stream
+        # these attributes have a specific order in the moleculetype section
+        self.normal_order_block_atoms = ["atype", "resid", "resname",
+                                         "atomname", "charge_group", "charge", "mass"]
+
+    def write(self):
+        """
+        Write the forcefield to file.
+        """
+        for name, block in self.forcefield.blocks.items():
+            self.stream.write("[ moleculetype ]\n")
+            excl = str(block.nrexcl)
+            self.stream.write(f"{name} {excl}\n")
+            self.write_atoms_block(block.nodes(data=True))
+            self.write_interaction_dict(block.interactions)
+
+        for link in self.forcefield.links:
+            self.write_link_header()
+            self.write_atoms_link(link.nodes(data=True))
+            self.write_interaction_dict(link.interactions)
+            self.write_edges(link.edges)
+
+    def write_interaction_dict(self, inter_dict):
+        """
+        Writes interactions to `self.stream`, with a new
+        interaction directive per type. Meta attributes
+        are kept and written as json parasable dicts.
+
+        Parameters
+        ----------
+        inter_dict: `class:dict[list[vermouth.molecule.Interaction]]`
+            the interaction dict to write
+        """
+        for inter_type in inter_dict:
+            self.stream.write(f"[ {inter_type} ]\n")
+            for interaction in inter_dict[inter_type]:
+                atom_string = " ".join(interaction.atoms)
+                param_string = " ".join(interaction.parameters)
+                meta_string = "{" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in interaction.meta.items()]) + "}"
+                line = atom_string + " " + param_string + " " + meta_string + "\n"
+                self.stream.write(line)
+
+    def write_edges(self, edges):
+        """
+        Writes edges to `self.stream` into the edges directive.
+
+        Parameters
+        ----------
+        edges: abc.iteratable
+            pair-wise iteratable edge list
+        """
+        self.stream.write("[ edges ]\n")
+        for idx, jdx in edges:
+            self.stream.write(f"{idx} {jdx}\n")
+
+    def write_atoms_block(self, nodes):
+        """
+        Writes the nodes/atoms of the block atomtype directive to `self.stream`.
+        All attributes are written following the GROMACS atomtype directive
+        style.
+
+        Parameters
+        ----------
+        edges: abc.iteratable
+            pair-wise iteratable edge list
+        """
+        self.stream.write("[ atoms ]\n")
+        for idx, (node, attrs) in enumerate(nodes):
+            idx += 1
+            attr_line = " ".join([str(attrs[attr]) for attr in self.normal_order_block_atoms ])
+            line = f"{idx} " + attr_line + "\n"
+            self.stream.write(line)
+
+    def write_atoms_link(self, nodes):
+        """
+        Writes the nodes/atoms of the link atomtype directive to `self.stream`.
+        All attributes are written as json style dicts.
+
+        Parameters:
+        -----------
+        nodes: abc.itertable[tuple(abc.hashable, dict)]
+            list of nodes in form of a list with hashable node-key and dict
+            of attributes. The format is the same as returned by networkx.nodes(data=True)
+        """
+        self.stream.write("[ atoms ]\n")
+        for node_key, attributes  in nodes:
+            attr_line = " {" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in attributes.items()]) + "}"
+            line = str(node_key) + attr_line + "\n"
+            self.stream.write(line)
+
+    def write_link_header(self):
+        """
+        Write the link directive header, with the resnames written
+        in form readable to geenerate a `:class:vermouth.molecule.Choice`
+        object.
+
+        Prameters
+        ---------
+        resnames: `abc.itertable[str]`
+        """
+        self.stream.write("[ link ]\n")
diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
new file mode 100644
index 00000000..cd1f4d7f
--- /dev/null
+++ b/polyply/src/fragment_finder.py
@@ -0,0 +1,195 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import networkx as nx
+from vermouth.graph_utils import make_residue_graph
+from polyply.src.graph_utils import find_one_ismags_match
+
+def _element_match(node1, node2):
+    """
+    Checks if the element attribute of two nodes
+    is the same.
+
+    Returns:
+    --------
+    bool
+    """
+    return node1["element"] == node2["element"]
+
+class FragmentFinder():
+    """
+    Find, label and extract unique fragments from a vermouth.molecule.Molecule.
+
+    Wrire process HERE
+    """
+
+    def __init__(self, molecule, prefix):
+        """
+        Initalize the fragment finder with a molecule, setting the
+        resid attribute to None, and correctly assining elements
+        based on atomic masses.
+
+        Parameters
+        ----------
+        molecule: :class:`vermouth.molecule.Molecule`
+        """
+        self.max_by_resid = {}
+        self.ter_prefix = prefix
+        self.resid = 1
+        self.res_assigment = []
+        self.assigned_atoms = []
+        self.molecule = molecule
+        self.known_atom = None
+        self.match_keys = ['element', 'mass'] #, 'charge']
+        self.masses_to_element = {16: "O",
+                                  12: "C",
+                                  32: "S",
+                                   1: "H"}
+
+        # resids are not reliable so we set them all to None
+        nx.set_node_attributes(self.molecule, None, "resid")
+
+        # set the element attribute for each atom in the
+        # molecule
+        for node in self.molecule.nodes:
+            mass = round(self.molecule.nodes[node]["mass"])
+            self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
+
+    def _node_match(self, node1, node2):
+        for attr in self.match_keys:
+            if node1[attr] != node2[attr]:
+                return False
+        return True
+
+    def label_fragment_from_graph(self, fragment_graph):
+        """
+        For the `self.molecule` label all atoms that match
+        the `fragment_graph` with a resid attribute and set
+        the atom-name to the element name plus index relative
+        to the atoms in the fragment.
+
+        Parameters
+        ----------
+        fragment_graph: nx.Graph
+            graph describing the fragment; must have the
+            element attribute
+        """
+        # find all isomorphic matches to the target fragments
+        GM = nx.isomorphism.GraphMatcher(self.molecule,
+                                         fragment_graph,
+                                         node_match=_element_match,
+                                        )
+        template_atoms = list(fragment_graph.nodes)
+        # the below statement scales super duper extra poorly
+        resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0]
+        raw_matchs = GM.subgraph_isomorphisms_iter()
+        # loop over all matchs and check if the atoms are already
+        # assigned - symmetric matches must be skipped
+        for current_match in raw_matchs:
+            # the graph matcher can return the matchs in any order so we need to sort them
+            # according to our tempalte molecule
+            rev_current_match = {val: key for key, val in current_match.items()}
+            atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
+            if frozenset(atoms) not in self.res_assigment and not any([atom in self.assigned_atoms for atom in atoms]):
+                self.res_assigment.append(frozenset(atoms))
+                for idx, atom in enumerate(atoms):
+                    self.molecule.nodes[atom]["resid"] = self.resid
+                    self.molecule.nodes[atom]["atomname"] = self.molecule.nodes[atom]["element"] + str(idx)
+                    self.molecule.nodes[atom]["resname"] = resname
+                    self.max_by_resid[self.resid] = idx
+                    self.known_atom = atom
+                    self.assigned_atoms.append(atom)
+                    print(self.molecule.nodes[atom]["element"])
+                self.resid += 1
+
+    def label_fragments_from_graph(self, fragment_graphs):
+        """
+        Call the label_fragment method for multiple fragments.
+
+        Parameters
+        ----------
+        fragment_graphs: list[nx.Graph]
+        """
+        for fragment_graph in fragment_graphs:
+            self.label_fragment_from_graph(fragment_graph)
+
+    def label_unmatched_atoms(self):
+        """
+        After all atoms have been assigned to target fragments using
+        the label_fragment method all left-over atoms are assigned to
+        the first fragment they are attached to. This method sets the
+        atom-name to the element name and element count and resid
+        attribute.
+        """
+        for from_node, to_node in nx.dfs_edges(self.molecule, source=self.known_atom):
+            if not self.molecule.nodes[to_node]["resid"]:
+                resid = self.molecule.nodes[from_node]["resid"]
+                self.max_by_resid[resid] = self.max_by_resid[resid] + 1
+                self.molecule.nodes[to_node]["resid"] = resid
+                self.molecule.nodes[to_node]["resname"] = self.molecule.nodes[from_node]["resname"]
+                self.molecule.nodes[to_node]["atomname"] = self.molecule.nodes[to_node]["element"] + str(self.max_by_resid[resid])
+
+    def extract_unique_fragments(self, fragment_graphs):
+        """
+        Given a list of fragment-graphs assing all atoms to fragments and
+        generate new fragments by assinging the left-over atoms to the
+        connecting fragment. Fragments get a unique resid in the molecule.
+        Then make the residue graph and filter out all unique residues
+        and return them.
+
+        Parameters
+        ----------
+        fragment_graphs: list[nx.Graph]
+
+        Returns
+        -------
+        list[nx.Graph]
+            all unique fragment graphs
+        """
+        # first we find and label all fragments in the molecule
+        self.label_fragments_from_graph(fragment_graphs)
+        # then we assign all left-over atoms to the existing residues
+        self.label_unmatched_atoms()
+        # now we make the residue graph and find all unique residues
+        unique_fragments = {}
+        res_graph = make_residue_graph(self.molecule)
+        had_resnames = {}
+        for node in res_graph.nodes:
+            resname = res_graph.nodes[node]['resname']
+            # this fragment is terminal located so we give it a special prefix
+            fragment = res_graph.nodes[node]['graph']
+            if res_graph.degree(node) == 1:
+               resname = resname + self.ter_prefix
+               nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
+            # here we extract the fragments and set appropiate residue names
+            for other_frag in unique_fragments.values():
+                if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match):
+                    # it can happen that two fragments are completely isomorphic but have different
+                    # atom names because we don't know the order of atoms when looping over the molecule
+                    # and setting the names. In this case we simply take the atom-names of the known
+                    # fragment. Better ideas anyone?
+                    mapping = find_one_ismags_match(fragment, other_frag, self._node_match)
+                    if mapping:
+                        for source, target in mapping.items():
+                            self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname']
+                        break
+            else:
+                if resname in unique_fragments:
+                    resname = resname + "_" + str(had_resnames[resname] + 1)
+                    nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
+                else:
+                    had_resnames[resname] = 0
+                unique_fragments[resname] = fragment
+
+        return unique_fragments
diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py
index b0300d3c..489ba118 100644
--- a/polyply/src/graph_utils.py
+++ b/polyply/src/graph_utils.py
@@ -214,3 +214,15 @@ def get_all_predecessors(graph, node, start_node=0):
     predecessors.reverse()
     return predecessors
 
+def find_one_ismags_match(graph1, graph2, node_match):
+    """
+    Returns one ismags match when graphs are isomorphic
+    otherwise None.
+    """
+    GM = nx.isomorphism.GraphMatcher(graph1, graph2, node_match=node_match)
+    raw_matches = GM.subgraph_isomorphisms_iter()
+    try:
+        mapping = next(raw_matches)
+        return mapping
+    except StopIteration:
+        return None
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
new file mode 100644
index 00000000..ef9c1ba9
--- /dev/null
+++ b/polyply/src/itp_to_ff.py
@@ -0,0 +1,320 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+from collections import defaultdict
+import numpy as np
+import networkx as nx
+import pysmiles
+import vermouth
+from vermouth.forcefield import ForceField
+from vermouth.molecule import Interaction
+from polyply.src.topology import Topology
+from polyply.src.generate_templates import _relabel_interaction_atoms
+from polyply.src.fragment_finder import FragmentFinder
+from polyply.src.ffoutput import ForceFieldDirectiveWriter
+
+def diffs_to_prefix(atoms, resid_diffs):
+    """
+    Given a list of atoms and corresponding differences
+    between their resids, generate the offset prefix for
+    the atomnames according to the vermouth sepcific offset
+    language.
+
+    The reference atom must have resid_diff value of 0.
+    Other atoms either get - or + signs
+    depending on their resid offset.
+
+    Parameters
+    ----------
+    atoms: abc.itertable[str]
+    resid_diff: abc.itertable[int]
+        the differences in resid with respeect to
+        the smallest/largest resid which is 0
+
+    Returns
+    -------
+    abc.itertable
+        list with prefixed atom names
+    """
+    prefixed_atoms = []
+    for atom, diff in zip(atoms, resid_diffs):
+        if diff > 0:
+            prefix = "".join(["+" for i in range(0, diff)])
+        else:
+            prefix = "".join(["-" for i in range(diff, 0)])
+        prefixed_atoms.append(prefix + atom)
+    return prefixed_atoms
+
+def _extract_edges_from_shortest_path(atoms, block, min_resid):
+    """
+    Given a list atoms generate a list of edges correspoding to
+    all edges required to connect all atoms by at least one
+    shortest path. Edges are retunred on atomname basis with
+    prefix relative to the `min_resid`. See diffs_to_prefix.
+
+    Paramters:
+    ----------
+    atoms: abc.itertable
+        the atoms to collect edges for
+    block: :class:`vermouth.molecule.Block`
+        the molecule which to servey for edges
+    min_resid: int
+        the resid to which the prefix indicate relative resid
+        distance
+
+    Returns
+    -------
+    list[tuple]
+        the edge list by atomname with prefix indicating relative
+        residue distance to min_resid
+    """
+    edges = []
+    had_edges = []
+    final_atoms = {}
+    resnames = {}
+    for origin, target in itertools.combinations(atoms, r=2):
+        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
+        for edge in zip(path[:-1], path[1:]):
+            if edge not in had_edges:
+                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
+                atom_names = [block.nodes[node]["atomname"] for node in edge]
+                link_names = diffs_to_prefix(atom_names, resid_diffs)
+                final_atoms.update(dict(zip(edge, link_names)))
+                edges.append(link_names)
+                had_edges.append(edge)
+                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
+    return final_atoms, edges, resnames
+
+def extract_block(molecule, nodes, defines):
+    """
+    Given a `vermouth.molecule` and a `resname`
+    extract the information of a block from the
+    molecule definition and replace all defines
+    if any are found.
+
+    Parameters
+    ----------
+    molecule:  :class:vermouth.molecule.Molecule
+    resname:   str
+    defines:   dict
+      dict of type define: value
+
+    Returns
+    -------
+    :class:vermouth.molecule.Block
+    """
+    resid = molecule.nodes[nodes[0]]["resid"]
+    block = vermouth.molecule.Block()
+
+    # select all nodes with the same first resid and
+    # make sure the block node labels are atomnames
+    # also build a correspondance dict between node
+    # label in the molecule and in the block for
+    # relabeling the interactions
+    mapping = {}
+    for node in nodes:
+        attr_dict = molecule.nodes[node]
+        if attr_dict["resid"] == resid:
+            block.add_node(attr_dict["atomname"], **attr_dict)
+            mapping[node] = attr_dict["atomname"]
+
+    for inter_type in molecule.interactions:
+        for interaction in molecule.interactions[inter_type]:
+            if all(atom in mapping for atom in interaction.atoms):
+                interaction = _relabel_interaction_atoms(interaction, mapping)
+                block.interactions[inter_type].append(interaction)
+
+    for inter_type in ["bonds", "constraints", "virtual_sitesn",
+                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
+        block.make_edges_from_interaction_type(inter_type)
+
+    if not nx.is_connected(block):
+        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
+               'Make sure all atoms/particles in a residue are connected by bonds,'
+               ' constraints or virual-sites.')
+        raise IOError(msg.format(resname, resid))
+
+    return block
+
+def extract_links(molecule):
+    """
+    Given a molecule that has the resid and resname attributes
+    correctly set, extract the interactions which span more than
+    a single residue and generate a link.
+
+    Parameters
+    ----------
+    molecule: :class:`vermouth.molecule.Molecule`
+        the molecule from which to extract interactions
+
+    Returns
+    -------
+    list[:class:`vermouth.molecule.Links`]
+        a list with a links found
+    """
+    links = []
+    # patterns are a sqeuence of atoms that define an interaction
+    # sometimes multiple interactions are defined for one pattern
+    # in that case they are all collected in this dictionary
+    patterns = defaultdict(dict)
+    # for each found pattern the resnames are collected; this is important
+    # because the same pattern may apply to residues with different name
+    resnames_for_patterns = defaultdict(dict)
+    link_atoms_for_patterns = defaultdict(list)
+    # as additional safe-gaurd against false links we also collect the edges
+    # that span the interaction by finding the shortest simple path between
+    # all atoms in patterns. Note that the atoms in patterns not always have
+    # to be directly bonded. For example, pairs are not directly bonded and
+    # can span multiple residues
+    #edges_for_patterns = defaultdict(list)
+    for inter_type in molecule.interactions:
+        #print("TYPE", inter_type)
+        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
+            # extract resids and resname corresponding to interaction atoms
+            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
+            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
+            # compute the resid offset to be used for the atom prefixes
+            min_resid = min(resids)
+            diff = resids - min_resid
+            pattern = tuple(set(list(zip(diff, resnames))))
+
+            # in this case all interactions are in a block and we skip
+            if np.sum(diff) == 0:
+                continue
+
+            # we collect the edges corresponding to the simple paths between pairs of atoms
+            # in the interaction
+            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
+            #print(kdx, resnames)
+            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
+            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
+            link_inter = Interaction(atoms=link_atoms,
+                                     parameters=interaction.parameters,
+                                     meta={})
+            #print("inter number", kdx)
+            # here we deal with filtering redundancy
+            if pattern in patterns and inter_type in patterns[pattern]:
+                #print(pattern)
+           #     if pattern == ((0, 'PEO'), (1, 'PEO')):
+           #         print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n")
+
+                for other_inter in patterns[pattern].get(inter_type, []):
+                    if other_inter.atoms == link_inter.atoms:
+                        if  other_inter.parameters == link_inter.parameters:
+                            break
+                else:
+                    patterns[pattern][inter_type].append(link_inter)
+                    resnames_for_patterns[pattern].update(resnames)
+                    link_atoms_for_patterns[pattern] += link_atoms
+            else:
+                patterns[pattern][inter_type] = [link_inter]
+                resnames_for_patterns[pattern].update(resnames)
+                #edges_for_patterns[pattern] += edges
+                link_atoms_for_patterns[pattern] += link_atoms
+            #print('resnames', resnames_for_patterns[pattern], '\n')
+#    for inter in patterns[list(patterns.keys())[0]]['angles']:
+#        print(inter)
+    # we make new links for each unique interaction per type
+    for pattern in patterns:
+        link = vermouth.molecule.Link()
+        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
+        #link.add_edges_from(edges_for_patterns[pattern])
+        resnames = resnames_for_patterns[pattern]
+     #   print(resnames)
+        nx.set_node_attributes(link, resnames, "resname")
+
+        had_parameters = []
+        for inter_type, inters in patterns[pattern].items():
+            for idx, interaction in enumerate(inters):
+                #new_parameters = interaction.parameters
+                new_meta = interaction.meta
+                #new_atoms = interaction.atoms
+                # to account for the fact when multiple interactions with the same
+                # atom patterns need to be written to ff
+                new_meta.update({"version": idx})
+                new_meta.update({"comment": "link"})
+                had_parameters.append(interaction.parameters)
+                # map atoms to proper atomnames ..
+                link.interactions[inter_type].append(interaction)
+
+        links.append(link)
+    print(links)
+    return links
+
+def equalize_charges(molecule, target_charge=0):
+    """
+    Make sure that the total charge of molecule is equal to
+    the target charge by substracting the differences split
+    over all atoms.
+
+    Parameters
+    ----------
+    molecule: :class:`vermouth.molecule.Molecule`
+    target_charge: float
+        the charge of the molecule
+
+    Returns
+    -------
+    molecule
+        the molecule with updated charge attribute
+    """
+    total = nx.get_node_attributes(molecule, "charge")
+    diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes)
+    for node in molecule.nodes:
+        charge = float(molecule.nodes[node]['charge']) - diff
+        molecule.nodes[node]['charge'] = charge
+    total = nx.get_node_attributes(molecule, "charge")
+    return molecule
+
+def handle_chirality(molecule, chiral_centers):
+    pass
+
+def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
+    """
+    Main executable for itp to ff tool.
+    """
+    # read the target itp-file
+    top = Topology.from_gmx_topfile(itppath, name="test")
+    mol = top.molecules[0].molecule
+    mol = equalize_charges(mol, target_charge=charge)
+
+    # read the target fragments and convert to graph
+    fragment_graphs = []
+    for resname, smile in zip(resnames, fragment_smiles):
+        fragment_graph = pysmiles.read_smiles(smile)
+        nx.set_node_attributes(fragment_graph, resname, "resname")
+        fragment_graphs.append(fragment_graph)
+
+    # identify and extract all unique fragments
+    unique_fragments = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs)
+    force_field = ForceField("new")
+    for name, fragment in unique_fragments.items():
+        new_block = extract_block(mol, list(fragment.nodes), defines={})
+        nx.set_node_attributes(new_block, 1, "resid")
+        new_block.nrexcl = mol.nrexcl
+        force_field.blocks[name] = new_block
+
+    for node in mol.nodes:
+        if mol.nodes[node]['resid'] == 3:
+            print(mol.nodes[node])
+    print("\n\n")
+    for node in mol.nodes:
+        if mol.nodes[node]['resid'] == 4:
+            print(mol.nodes[node])
+
+    force_field.links = extract_links(mol)
+
+    with open(outpath, "w") as filehandle:
+        ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()

From c5770527104ff6dffa89a6e15cb970a3029c98c6 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Thu, 15 Jun 2023 15:33:19 +0200
Subject: [PATCH 02/82] imporve graph matching

---
 polyply/src/fragment_finder.py | 87 ++++++++++++++++++++++++++++++----
 polyply/src/graph_utils.py     |  1 +
 polyply/src/itp_to_ff.py       | 18 +++----
 3 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index cd1f4d7f..062ce602 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -15,6 +15,7 @@
 import networkx as nx
 from vermouth.graph_utils import make_residue_graph
 from polyply.src.graph_utils import find_one_ismags_match
+import matplotlib.pyplot as plt
 
 def _element_match(node1, node2):
     """
@@ -51,7 +52,7 @@ def __init__(self, molecule, prefix):
         self.assigned_atoms = []
         self.molecule = molecule
         self.known_atom = None
-        self.match_keys = ['element', 'mass'] #, 'charge']
+        self.match_keys = ['element', 'mass', 'degree'] #, 'charge']
         self.masses_to_element = {16: "O",
                                   12: "C",
                                   32: "S",
@@ -65,6 +66,7 @@ def __init__(self, molecule, prefix):
         for node in self.molecule.nodes:
             mass = round(self.molecule.nodes[node]["mass"])
             self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
+            self.molecule.nodes[node]["degree"] = self.molecule.degree(node)
 
     def _node_match(self, node1, node2):
         for attr in self.match_keys:
@@ -72,6 +74,39 @@ def _node_match(self, node1, node2):
                 return False
         return True
 
+    def make_res_graph(self):
+        self.res_graph = make_residue_graph(self.molecule)
+
+    def pre_match(self, fragment_graph):
+        """
+        Find one match of fragment graph in the molecule
+        and then extract degrees and atom-types for further
+        matching. This is a safety measure because even though
+        the fragment graph is subgraph isomorphic the underlying
+        itp parameters might not be.
+        """
+        # find subgraph isomorphic matches to the target fragment
+        # based on the element only
+        GM = nx.isomorphism.GraphMatcher(self.molecule,
+                                         fragment_graph,
+                                         node_match=_element_match,)
+        one_match = next(GM.subgraph_isomorphisms_iter())
+        for mol_atom, tempt_atom in one_match.items():
+            for attr in self.match_keys:
+                fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr]
+        return fragment_graph
+
+    def is_connected_to_prev(self, current, prev):
+        """
+        Check if the atoms in the lists current or
+        prev are connected.
+        """
+        for node in current:
+            for neigh_node in self.molecule.neighbors(node):
+                if neigh_node in prev:
+                    return True
+        return False
+
     def label_fragment_from_graph(self, fragment_graph):
         """
         For the `self.molecule` label all atoms that match
@@ -85,15 +120,19 @@ def label_fragment_from_graph(self, fragment_graph):
             graph describing the fragment; must have the
             element attribute
         """
+        # pre-match one residue and extract the atomtypes and degrees
+        # this is needed to enforce symmetry in matching the other
+        # residues
+        fragment_graph = self.pre_match(fragment_graph)
         # find all isomorphic matches to the target fragments
         GM = nx.isomorphism.GraphMatcher(self.molecule,
                                          fragment_graph,
-                                         node_match=_element_match,
+                                         node_match=self._node_match,
                                         )
         template_atoms = list(fragment_graph.nodes)
         # the below statement scales super duper extra poorly
         resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0]
-        raw_matchs = GM.subgraph_isomorphisms_iter()
+        raw_matchs = list(GM.subgraph_isomorphisms_iter())
         # loop over all matchs and check if the atoms are already
         # assigned - symmetric matches must be skipped
         for current_match in raw_matchs:
@@ -101,7 +140,19 @@ def label_fragment_from_graph(self, fragment_graph):
             # according to our tempalte molecule
             rev_current_match = {val: key for key, val in current_match.items()}
             atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
-            if frozenset(atoms) not in self.res_assigment and not any([atom in self.assigned_atoms for atom in atoms]):
+            if self.assigned_atoms:
+                connected = self.is_connected_to_prev(current_match.keys(),
+                                                      self.assigned_atoms,)
+            else:
+                connected = True
+
+            #print(connected, frozenset(atoms) not in self.res_assigment, not any([atom in self.assigned_atoms for atom in atoms]))
+
+            if frozenset(atoms) not in self.res_assigment and \
+                not any([atom in self.assigned_atoms for atom in atoms]) and \
+                connected:
+
+              #  print(current_match.keys())
                 self.res_assigment.append(frozenset(atoms))
                 for idx, atom in enumerate(atoms):
                     self.molecule.nodes[atom]["resid"] = self.resid
@@ -110,7 +161,6 @@ def label_fragment_from_graph(self, fragment_graph):
                     self.max_by_resid[self.resid] = idx
                     self.known_atom = atom
                     self.assigned_atoms.append(atom)
-                    print(self.molecule.nodes[atom]["element"])
                 self.resid += 1
 
     def label_fragments_from_graph(self, fragment_graphs):
@@ -157,19 +207,25 @@ def extract_unique_fragments(self, fragment_graphs):
         list[nx.Graph]
             all unique fragment graphs
         """
+       # nx.draw(self.molecule, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
+       # plt.show()
         # first we find and label all fragments in the molecule
         self.label_fragments_from_graph(fragment_graphs)
+       # labeldict = nx.get_node_attributes(self.molecule, "atomname")
+       # nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
+       # plt.show()
         # then we assign all left-over atoms to the existing residues
         self.label_unmatched_atoms()
+        # make the residue graph
+        self.make_res_graph()
         # now we make the residue graph and find all unique residues
         unique_fragments = {}
-        res_graph = make_residue_graph(self.molecule)
         had_resnames = {}
-        for node in res_graph.nodes:
-            resname = res_graph.nodes[node]['resname']
+        for node in self.res_graph.nodes:
+            resname = self.res_graph.nodes[node]['resname']
             # this fragment is terminal located so we give it a special prefix
-            fragment = res_graph.nodes[node]['graph']
-            if res_graph.degree(node) == 1:
+            fragment = self.res_graph.nodes[node]['graph']
+            if self.res_graph.degree(node) == 1:
                resname = resname + self.ter_prefix
                nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
             # here we extract the fragments and set appropiate residue names
@@ -192,4 +248,15 @@ def extract_unique_fragments(self, fragment_graphs):
                     had_resnames[resname] = 0
                 unique_fragments[resname] = fragment
 
+        print("--")
+        resid_col = {0: "r", 1: "g", 2:"b", 3:"c", 4:"m", 5:"y", 6:"orange", 7:"pink"}
+        labeldict = nx.get_node_attributes(self.molecule, "atomname")
+        resids  = nx.get_node_attributes(self.molecule, "resid")
+        colors = [resid_col[resid] for node, resid in resids.items()]
+        print(colors)
+        print(labeldict)
+        nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule), node_color=colors)
+        plt.show()
+        print("--")
         return unique_fragments
+
diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py
index 489ba118..1bced361 100644
--- a/polyply/src/graph_utils.py
+++ b/polyply/src/graph_utils.py
@@ -225,4 +225,5 @@ def find_one_ismags_match(graph1, graph2, node_match):
         mapping = next(raw_matches)
         return mapping
     except StopIteration:
+        raise IOError("no match_found")
         return None
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index ef9c1ba9..9ba46c21 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -281,6 +281,13 @@ def equalize_charges(molecule, target_charge=0):
 def handle_chirality(molecule, chiral_centers):
     pass
 
+def hcount(molecule, node):
+    hcounter = 0
+    for node in molecule.neighbors(node):
+        if molecule.nodes[node]["element"] == "H":
+            hcounter+= 1
+    return hcounter
+
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """
     Main executable for itp to ff tool.
@@ -293,7 +300,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
     # read the target fragments and convert to graph
     fragment_graphs = []
     for resname, smile in zip(resnames, fragment_smiles):
-        fragment_graph = pysmiles.read_smiles(smile)
+        fragment_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True)
         nx.set_node_attributes(fragment_graph, resname, "resname")
         fragment_graphs.append(fragment_graph)
 
@@ -306,13 +313,8 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
 
-    for node in mol.nodes:
-        if mol.nodes[node]['resid'] == 3:
-            print(mol.nodes[node])
-    print("\n\n")
-    for node in mol.nodes:
-        if mol.nodes[node]['resid'] == 4:
-            print(mol.nodes[node])
+#    for node in mol.nodes:
+#        print(mol.nodes[node])
 
     force_field.links = extract_links(mol)
 

From 7eff22a32b3d4bc95c8a5b41aaabb458e535f2e5 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Mon, 19 Jun 2023 11:32:46 +0200
Subject: [PATCH 03/82] fragment finder with prints

---
 polyply/src/fragment_finder.py | 98 ++++++++++++++++++++++++++++++----
 1 file changed, 89 insertions(+), 9 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index 062ce602..53f9d9e1 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -22,7 +22,7 @@ def _element_match(node1, node2):
     Checks if the element attribute of two nodes
     is the same.
 
-    Returns:
+    Returns
     --------
     bool
     """
@@ -30,9 +30,38 @@ def _element_match(node1, node2):
 
 class FragmentFinder():
     """
-    Find, label and extract unique fragments from a vermouth.molecule.Molecule.
+    This class enables finding and labelling of fragments
+    in the all-atom description of molecules. Fragments are
+    small networkx graphs. It makes a number of implicit
+    assumptions:
 
-    Wrire process HERE
+    - the molecule is connected and acyclic
+    - the residue graph of the molecule is linear
+    - the nodes by index increase with increasing resid order
+    - the graphs provided as fragment graphs follow the sequence
+      of residues. For example, given a polymer A5-B2-C3-A3
+      residue sequence, fragments should be provided as a list
+      A,B,C,A. The length of the block does not matter.
+
+    The algorithm loops over the fragments and finds a match
+    between a fragment and the molecule graph using a subgraph
+    isomorphism based on the element attribute. This match is
+    then used to set the degree attribute on the fragment. Next
+    all other subgraph isomorphisms are found under the condition
+    that each found match must connected to the previous residue.
+    Nodes are labelled with a resid and resname. This part is done
+    by the `self.label_fragment_from_graph` class method.
+
+    Subsequently, the algorithm proceeds to merge all left-over
+    atoms to the residue they are connected with assining a resid
+    and resname from that residue. This procedure is done by
+    `self.label_unmatched_atoms`.
+
+    Finally, the code goes over all residues and assigns a prefix to
+    all terminal residues. In addition residues with the same resname
+    are compared to each other using a subgraph isomorphism and if
+    they are not isomorphic as result of assigning left-over atoms,
+    the resname is appended by a number.
     """
 
     def __init__(self, molecule, prefix):
@@ -44,6 +73,28 @@ def __init__(self, molecule, prefix):
         Parameters
         ----------
         molecule: :class:`vermouth.molecule.Molecule`
+        prefix: str
+            the prefix used to label termini
+
+        Attributes
+        ----------
+        max_by_resid: dict[int][int]
+            number of atoms by resid
+        ter_prefix: str
+            the terminal prefix
+        resid: int
+            highest resid
+        assigned_atoms: list[`abc.hashable`]
+            atoms assinged to residues
+        molecule: :class:`vermouth.molecule.Molecule`
+            the molecule to match against
+        known_atom: `abc.hashable`
+            any atom that has been matched to a fragment
+        match_keys: `list[str]`
+            molecule properties to use in matching the fragment
+            graphs in the second stage.
+        masses_to_elements: dict[int][str]
+            matches masses to elements
         """
         self.max_by_resid = {}
         self.ter_prefix = prefix
@@ -54,6 +105,7 @@ def __init__(self, molecule, prefix):
         self.known_atom = None
         self.match_keys = ['element', 'mass', 'degree'] #, 'charge']
         self.masses_to_element = {16: "O",
+                                  14: "N",
                                   12: "C",
                                   32: "S",
                                    1: "H"}
@@ -74,6 +126,7 @@ def _node_match(self, node1, node2):
                 return False
         return True
 
+    # this could be a property??
     def make_res_graph(self):
         self.res_graph = make_residue_graph(self.molecule)
 
@@ -84,6 +137,11 @@ def pre_match(self, fragment_graph):
         matching. This is a safety measure because even though
         the fragment graph is subgraph isomorphic the underlying
         itp parameters might not be.
+
+        Parameters
+        -----------
+        fragment_graph: 'nx.Graph'
+            must have attributes element for each node
         """
         # find subgraph isomorphic matches to the target fragment
         # based on the element only
@@ -100,6 +158,13 @@ def is_connected_to_prev(self, current, prev):
         """
         Check if the atoms in the lists current or
         prev are connected.
+
+        Parameters
+        ----------
+        current: list[abc.hashable]
+            list of current nodes
+        prev: list[abc.hashable]
+            list of prev nodes
         """
         for node in current:
             for neigh_node in self.molecule.neighbors(node):
@@ -109,8 +174,8 @@ def is_connected_to_prev(self, current, prev):
 
     def label_fragment_from_graph(self, fragment_graph):
         """
-        For the `self.molecule` label all atoms that match
-        the `fragment_graph` with a resid attribute and set
+        For the `self.molecule` label all atoms, that match
+        the `fragment_graph`, with a resid attribute and set
         the atom-name to the element name plus index relative
         to the atoms in the fragment.
 
@@ -133,9 +198,12 @@ def label_fragment_from_graph(self, fragment_graph):
         # the below statement scales super duper extra poorly
         resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0]
         raw_matchs = list(GM.subgraph_isomorphisms_iter())
+        print('\n', resname)
         # loop over all matchs and check if the atoms are already
         # assigned - symmetric matches must be skipped
         for current_match in raw_matchs:
+            if resname == "OH":
+                print(current_match)
             # the graph matcher can return the matchs in any order so we need to sort them
             # according to our tempalte molecule
             rev_current_match = {val: key for key, val in current_match.items()}
@@ -172,6 +240,9 @@ def label_fragments_from_graph(self, fragment_graphs):
         fragment_graphs: list[nx.Graph]
         """
         for fragment_graph in fragment_graphs:
+            labeldict = nx.get_node_attributes(fragment_graph, "element")
+            nx.draw(fragment_graph, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(fragment_graph))
+            plt.show()
             self.label_fragment_from_graph(fragment_graph)
 
     def label_unmatched_atoms(self):
@@ -207,8 +278,9 @@ def extract_unique_fragments(self, fragment_graphs):
         list[nx.Graph]
             all unique fragment graphs
         """
-       # nx.draw(self.molecule, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
-       # plt.show()
+        labeldict = nx.get_node_attributes(self.molecule, "element")
+        nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
+        plt.show()
         # first we find and label all fragments in the molecule
         self.label_fragments_from_graph(fragment_graphs)
        # labeldict = nx.get_node_attributes(self.molecule, "atomname")
@@ -249,9 +321,17 @@ def extract_unique_fragments(self, fragment_graphs):
                 unique_fragments[resname] = fragment
 
         print("--")
-        resid_col = {0: "r", 1: "g", 2:"b", 3:"c", 4:"m", 5:"y", 6:"orange", 7:"pink"}
-        labeldict = nx.get_node_attributes(self.molecule, "atomname")
+        resid_col = {}
         resids  = nx.get_node_attributes(self.molecule, "resid")
+        one = True
+        for resid in set(resids.values()):
+            if one:
+                resid_col[resid] = 'tab:red'
+                one = False
+            else:
+                resid_col[resid] = 'tab:blue'
+                one = True
+        labeldict = nx.get_node_attributes(self.molecule, "atomname")
         colors = [resid_col[resid] for node, resid in resids.items()]
         print(colors)
         print(labeldict)

From 95c4b87544ea989c090109d85f01c0ac1014bfae Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Mon, 19 Jun 2023 18:33:42 +0200
Subject: [PATCH 04/82] add tests for fragment finder

---
 polyply/tests/test_fragment_finder.py | 262 ++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)
 create mode 100644 polyply/tests/test_fragment_finder.py

diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py
new file mode 100644
index 00000000..3e58f5c9
--- /dev/null
+++ b/polyply/tests/test_fragment_finder.py
@@ -0,0 +1,262 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the fragment finder for itp_to_ff.
+"""
+
+import textwrap
+import pytest
+from pathlib import Path
+import numpy as np
+import networkx as nx
+import vermouth.forcefield
+import vermouth.molecule
+from vermouth.gmx.itp_read import read_itp
+from polyply import TEST_DATA
+import polyply.src.meta_molecule
+from polyply.src.meta_molecule import (MetaMolecule, Monomer)
+import polyply
+from collections import defaultdict
+import pysmiles
+
+@pytest.mark.parametrize(
+    "node1, node2, expected",
+    [
+        ({"element": "C"}, {"element": "C"}, True),
+        ({"element": "H"}, {"element": "O"}, False),
+        ({"element": "N"}, {"element": "N"}, True),
+        ({"element": "O"}, {"element": "S"}, False),
+    ],
+)
+def test_element_match(node1, node2, expected):
+    assert polyply.src.fragment_finder._element_match(node1, node2) == expected
+
+@pytest.mark.parametrize(
+    "match_keys, node1, node2, expected",
+    [
+        (["element"], {"element": "C"}, {"element": "C"}, True),
+        (["element"], {"element": "H"}, {"element": "O"}, False),
+        (["element", "charge"], {"element": "N", "charge": 0}, {"element": "N", "charge": 1}, False),
+        (["element", "charge"], {"element": "O", "charge": -1}, {"element": "O", "charge": -1}, True),
+    ],
+)
+def test_node_match(match_keys, node1, node2, expected):
+    # molecule and terminal label don't matter
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(None, "ter")
+    frag_finder.match_keys = match_keys
+    assert frag_finder._node_match(node1, node2) == expected
+
+def find_studs(mol):
+    """
+    By element find all undersatisfied connections
+    at the all-atom level.
+    """
+    atom_degrees = {"H":1,
+                    "C":4,
+                    "O":2,
+                    "N":3}
+    for node in mol.nodes:
+        ele = mol.nodes[node]['element']
+        if mol.degree(node) != atom_degrees[ele]:
+            yield node
+
+def set_mass(mol):
+    masses = {"O": 16, "N":14,"C":12,
+              "S":32, "H":1}
+
+    for atom in mol.nodes:
+        mol.nodes[atom]['mass'] = masses[mol.nodes[atom]['element']]
+    return mol
+
+def polymer_from_fragments(fragments, resnames, remove_resid=True):
+    """
+    Given molecule fragments as smiles
+    combine them into different polymer
+    molecules.
+    """
+    fragments_to_mol = []
+    frag_mols = []
+    frag_graph = pysmiles.read_smiles(fragments[0], explicit_hydrogen=True)
+    nx.set_node_attributes(frag_graph, 1, "resid")
+    nx.set_node_attributes(frag_graph, resnames[0], "resname")
+    frag_mols.append(frag_graph)
+    mol = vermouth.Molecule(frag_graph)
+    # terminals should have one stud anyways
+    prev_stud = next(find_studs(frag_graph))
+    fragments_to_mol.append({node: node for node in mol.nodes})
+    for resname, smile in zip(resnames[1:], fragments[1:]):
+        frag_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True)
+        nx.set_node_attributes(frag_graph, resname, "resname")
+        frag_mols.append(frag_graph)
+        next_mol = vermouth.Molecule(frag_graph)
+        correspondance = mol.merge_molecule(next_mol)
+        fragments_to_mol.append(correspondance)
+        stud_iter = find_studs(frag_graph)
+        mol.add_edge(prev_stud, correspondance[next(stud_iter)])
+
+        try:
+            prev_stud = correspondance[next(stud_iter)]
+        except StopIteration:
+            # we're done molecule is complete
+            continue
+    mol = set_mass(mol)
+    if remove_resid:
+        nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resid")
+        nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resname")
+    return mol, frag_mols, fragments_to_mol
+
+@pytest.mark.parametrize(
+    "smiles, resnames",
+    [
+     # completely defined molecule with two termini
+     (["[CH3]", "[CH2]O[CH2]", "[CH3]"], ["CH3", "PEO", "CH3"]),
+     # two different termini
+     (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "CH3"]),
+     # two different termini with the same repeat unit
+     (["[OH][CH2]", "[CH2]O[CH2]","[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "PEO", "CH3"]),
+     # sequence with two monomers and multiple "wrong" matchs
+     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["CH3", "PBD", "PEO", "OH"]),
+     # sequence with two monomers, four repeats and multiple "wrong" matchs
+     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]",
+      "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]",
+      "[CH2][OH]"], ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"]),
+     # super symmtry - worst case scenario
+     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
+      ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"]),
+    ])
+def test_label_fragments(smiles, resnames):
+    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames)
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
+    unique_fragments = frag_finder.label_fragments_from_graph(frag_mols)
+    for resid, (resname, frag_to_mol) in enumerate(zip(resnames, fragments_in_mol), start=1):
+        for frag_node, mol_node in frag_to_mol.items():
+            assert frag_finder.molecule.nodes[mol_node]['resname'] == resname
+            assert frag_finder.molecule.nodes[mol_node]['resid'] == resid
+
+@pytest.mark.parametrize(
+    "smiles, resnames, remove, new_name",
+    [
+     # do not match termini
+     (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
+      ["CH3", "PEO", "PEO", "PEO", "CH3"],
+      {1:2, 6:3},
+      {1: "PEO", "4": "PEO"},
+     ),
+     # have dangling atom in center
+     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
+      ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"],
+      {4:5},
+      {4:"PE"},
+     ),
+    ])
+def test_label_unmatched_atoms(smiles, resnames, remove, new_name):
+    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=False)
+    nodes_to_label = {}
+    max_by_resid = {}
+
+    for node in molecule.nodes:
+        resid = molecule.nodes[node]['resid']
+        if resid in remove:
+            del molecule.nodes[node]['resid']
+            del molecule.nodes[node]['resname']
+            nodes_to_label[node] = resid
+        else:
+            if resid in max_by_resid:
+                known_atom = node
+                max_by_resid[resid] += 1
+            else:
+                max_by_resid[resid] = 1
+
+    resids = nx.get_node_attributes(molecule, "resid")
+    # the frag finder removes resid attributes so we have to later reset them
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
+    nx.set_node_attributes(frag_finder.molecule, resids, "resid")
+    frag_finder.max_by_resid = max_by_resid
+    frag_finder.known_atom = known_atom
+    frag_finder.label_unmatched_atoms()
+    for node, old_id in nodes_to_label.items():
+        assert frag_finder.molecule.nodes[node]['resid'] == remove[old_id]
+        assert frag_finder.molecule.nodes[node]['resname'] == new_name[old_id]
+
+@pytest.mark.parametrize(
+    "smiles, resnames, remove, uni_frags",
+    [
+     # completely defined molecule with two termini
+     (["[CH3]", "[CH2]O[CH2]", "[CH3]"],
+      ["CH3", "PEO", "CH3"],
+      {},
+      {"CH3ter": 0, "PEO": 1}
+     ),
+     # two different termini
+     (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"],
+      ["OH", "PEO", "CH3"],
+      {},
+      {"OHter": 0, "PEO": 1, "CH3ter": 2}
+     ),
+     # sequence with two monomers, four repeats and multiple "wrong" matchs
+     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]",
+      "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]",
+      "[CH2][OH]"],
+      ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"],
+      {},
+      {"CH3ter": 0, "PBE": 1, "PEO": 5, "OHter": 9}
+     ),
+     # super symmtry - worst case scenario
+     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
+      ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"],
+      {},
+      {"CH3ter":0, "PE": 1}
+     ),
+     # do not match termini
+     (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
+      ["CH3", "PEO", "PEO", "PEO", "CH3"],
+      {5: 4},
+      {"CH3ter":0, "PEO": 1, "PEOter": (3, 4)},
+     ),
+     # have dangling atom in center; this is a bit akward but essentially serves
+     # as a guard of having really shitty input
+     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
+      ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"],
+      {4: 3},
+      {"CH3ter": 0, "PE": 1, "PEter": (2, 3, 4, 5, 6, 7)},
+     ),
+    ])
+def test_extract_fragments(smiles, resnames, remove, uni_frags):
+    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=True)
+    for node in molecule.nodes:
+        resid = molecule.nodes[node]['resid']
+        if resid in remove:
+            del molecule.nodes[node]['resid']
+            del molecule.nodes[node]['resname']
+
+    match_mols = []
+    for idx, frag in enumerate(frag_mols):
+        if idx not in remove.values():
+            match_mols.append(frag)
+
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
+    fragments = frag_finder.extract_unique_fragments(match_mols)
+    frag_finder.match_keys = ['element', 'mass', 'resname']
+    for resname, graph in fragments.items():
+        if type(uni_frags[resname]) == tuple:
+           new_smiles = [smiles[idx] for idx in uni_frags[resname]]
+           new_resnames = [resnames[idx] for idx in uni_frags[resname]]
+           ref, _, _ = polymer_from_fragments(new_smiles, new_resnames)
+           nx.set_node_attributes(ref, resname, "resname")
+        else:
+            ref = frag_mols[uni_frags[resname]]
+        # because the terminii are not labelled yet in the fragment
+        # graphs used to make the match
+        nx.set_node_attributes(ref, resname, "resname")
+        assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match)

From ae2794c5bb796acdb9fa712972d51bafa6d2b77a Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 10:14:15 +0200
Subject: [PATCH 05/82] add test for 100% coverage

---
 polyply/tests/test_fragment_finder.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py
index 3e58f5c9..e2b319c0 100644
--- a/polyply/tests/test_fragment_finder.py
+++ b/polyply/tests/test_fragment_finder.py
@@ -218,6 +218,12 @@ def test_label_unmatched_atoms(smiles, resnames, remove, new_name):
       {},
       {"CH3ter":0, "PE": 1}
      ),
+     # different fragments with same resname
+     (["[CH3]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
+      ["PEO", "PEO", "CH3"],
+      {3:2},
+      {"PEOter": 0, "PEOter_1": (1,2)}
+     ),
      # do not match termini
      (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
       ["CH3", "PEO", "PEO", "PEO", "CH3"],
@@ -248,6 +254,7 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags):
     frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
     fragments = frag_finder.extract_unique_fragments(match_mols)
     frag_finder.match_keys = ['element', 'mass', 'resname']
+    assert len(fragments) == len(uni_frags)
     for resname, graph in fragments.items():
         if type(uni_frags[resname]) == tuple:
            new_smiles = [smiles[idx] for idx in uni_frags[resname]]

From 101d2b7e8f90231e949c7256e09d57efef8348ce Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 10:15:06 +0200
Subject: [PATCH 06/82] refactor graph matchin post isomorph check

---
 polyply/src/fragment_finder.py | 114 +++++++++++++++++----------------
 1 file changed, 60 insertions(+), 54 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index 53f9d9e1..6d8e67c5 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -15,7 +15,6 @@
 import networkx as nx
 from vermouth.graph_utils import make_residue_graph
 from polyply.src.graph_utils import find_one_ismags_match
-import matplotlib.pyplot as plt
 
 def _element_match(node1, node2):
     """
@@ -95,6 +94,8 @@ def __init__(self, molecule, prefix):
             graphs in the second stage.
         masses_to_elements: dict[int][str]
             matches masses to elements
+        res_graph: :class:`vermouth.molecule.Molecule`
+            residue graph of the molecule
         """
         self.max_by_resid = {}
         self.ter_prefix = prefix
@@ -109,18 +110,32 @@ def __init__(self, molecule, prefix):
                                   12: "C",
                                   32: "S",
                                    1: "H"}
+        self.res_graph = None
 
-        # resids are not reliable so we set them all to None
-        nx.set_node_attributes(self.molecule, None, "resid")
+        if self.molecule:
+            # resids are not reliable so we set them all to None
+            nx.set_node_attributes(self.molecule, None, "resid")
 
-        # set the element attribute for each atom in the
-        # molecule
-        for node in self.molecule.nodes:
-            mass = round(self.molecule.nodes[node]["mass"])
-            self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
-            self.molecule.nodes[node]["degree"] = self.molecule.degree(node)
+            # set the element attribute for each atom in the
+            # molecule
+            for node in self.molecule.nodes:
+                mass = round(self.molecule.nodes[node]["mass"])
+                self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
+                self.molecule.nodes[node]["degree"] = self.molecule.degree(node)
 
     def _node_match(self, node1, node2):
+        """
+        Check if two node dicts match.
+
+        Parameters
+        ----------
+        node1: dict
+        node2: dict
+
+        Returns
+        -------
+        bool
+        """
         for attr in self.match_keys:
             if node1[attr] != node2[attr]:
                 return False
@@ -142,18 +157,46 @@ def pre_match(self, fragment_graph):
         -----------
         fragment_graph: 'nx.Graph'
             must have attributes element for each node
+
+        Returns
+        -------
+        'nx.Graph'
+            the labelled fragment graph
         """
+        template_atoms = list(fragment_graph.nodes)
         # find subgraph isomorphic matches to the target fragment
         # based on the element only
         GM = nx.isomorphism.GraphMatcher(self.molecule,
                                          fragment_graph,
                                          node_match=_element_match,)
-        one_match = next(GM.subgraph_isomorphisms_iter())
+
+        for one_match in GM.subgraph_isomorphisms_iter():
+            rev_current_match = {val: key for key, val in one_match.items()}
+            atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
+            if self.is_valid_match(one_match, atoms)[0]:
+                break
+
         for mol_atom, tempt_atom in one_match.items():
             for attr in self.match_keys:
                 fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr]
         return fragment_graph
 
+    def is_valid_match(self, match, atoms):
+        """
+        Check if the found isomorphism match is valid.
+        """
+        # is the match connected to the previous residue
+        if not self.is_connected_to_prev(match.keys(), self.assigned_atoms,):
+            return False, 1
+        # check if atoms are already assigned
+        if frozenset(atoms) in self.res_assigment:
+            return False, 2
+        # check if there is any partial overlap
+        if any([atom in self.assigned_atoms for atom in atoms]):
+            return False, 3
+
+        return True, 4
+
     def is_connected_to_prev(self, current, prev):
         """
         Check if the atoms in the lists current or
@@ -166,6 +209,10 @@ def is_connected_to_prev(self, current, prev):
         prev: list[abc.hashable]
             list of prev nodes
         """
+        # no atoms have been assigned
+        if len(prev) == 0:
+            return True
+
         for node in current:
             for neigh_node in self.molecule.neighbors(node):
                 if neigh_node in prev:
@@ -195,32 +242,16 @@ def label_fragment_from_graph(self, fragment_graph):
                                          node_match=self._node_match,
                                         )
         template_atoms = list(fragment_graph.nodes)
-        # the below statement scales super duper extra poorly
         resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0]
         raw_matchs = list(GM.subgraph_isomorphisms_iter())
-        print('\n', resname)
         # loop over all matchs and check if the atoms are already
         # assigned - symmetric matches must be skipped
         for current_match in raw_matchs:
-            if resname == "OH":
-                print(current_match)
             # the graph matcher can return the matchs in any order so we need to sort them
             # according to our tempalte molecule
             rev_current_match = {val: key for key, val in current_match.items()}
             atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
-            if self.assigned_atoms:
-                connected = self.is_connected_to_prev(current_match.keys(),
-                                                      self.assigned_atoms,)
-            else:
-                connected = True
-
-            #print(connected, frozenset(atoms) not in self.res_assigment, not any([atom in self.assigned_atoms for atom in atoms]))
-
-            if frozenset(atoms) not in self.res_assigment and \
-                not any([atom in self.assigned_atoms for atom in atoms]) and \
-                connected:
-
-              #  print(current_match.keys())
+            if self.is_valid_match(current_match, atoms)[0]:
                 self.res_assigment.append(frozenset(atoms))
                 for idx, atom in enumerate(atoms):
                     self.molecule.nodes[atom]["resid"] = self.resid
@@ -240,9 +271,6 @@ def label_fragments_from_graph(self, fragment_graphs):
         fragment_graphs: list[nx.Graph]
         """
         for fragment_graph in fragment_graphs:
-            labeldict = nx.get_node_attributes(fragment_graph, "element")
-            nx.draw(fragment_graph, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(fragment_graph))
-            plt.show()
             self.label_fragment_from_graph(fragment_graph)
 
     def label_unmatched_atoms(self):
@@ -278,14 +306,8 @@ def extract_unique_fragments(self, fragment_graphs):
         list[nx.Graph]
             all unique fragment graphs
         """
-        labeldict = nx.get_node_attributes(self.molecule, "element")
-        nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
-        plt.show()
         # first we find and label all fragments in the molecule
         self.label_fragments_from_graph(fragment_graphs)
-       # labeldict = nx.get_node_attributes(self.molecule, "atomname")
-       # nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule))
-       # plt.show()
         # then we assign all left-over atoms to the existing residues
         self.label_unmatched_atoms()
         # make the residue graph
@@ -300,6 +322,7 @@ def extract_unique_fragments(self, fragment_graphs):
             if self.res_graph.degree(node) == 1:
                resname = resname + self.ter_prefix
                nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
+               nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname")
             # here we extract the fragments and set appropiate residue names
             for other_frag in unique_fragments.values():
                 if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match):
@@ -316,27 +339,10 @@ def extract_unique_fragments(self, fragment_graphs):
                 if resname in unique_fragments:
                     resname = resname + "_" + str(had_resnames[resname] + 1)
                     nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
+                    nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname")
                 else:
                     had_resnames[resname] = 0
                 unique_fragments[resname] = fragment
 
-        print("--")
-        resid_col = {}
-        resids  = nx.get_node_attributes(self.molecule, "resid")
-        one = True
-        for resid in set(resids.values()):
-            if one:
-                resid_col[resid] = 'tab:red'
-                one = False
-            else:
-                resid_col[resid] = 'tab:blue'
-                one = True
-        labeldict = nx.get_node_attributes(self.molecule, "atomname")
-        colors = [resid_col[resid] for node, resid in resids.items()]
-        print(colors)
-        print(labeldict)
-        nx.draw(self.molecule, labels=labeldict, with_labels=True,  pos=nx.kamada_kawai_layout(self.molecule), node_color=colors)
-        plt.show()
-        print("--")
         return unique_fragments
 

From 6261186a91f3d716348c6dec6dce7902573fd0ac Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 13:23:28 +0200
Subject: [PATCH 07/82] add check on node naming

---
 polyply/src/fragment_finder.py        | 12 +++---------
 polyply/tests/test_fragment_finder.py |  9 ++++++++-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index 6d8e67c5..3db65c9c 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -326,15 +326,7 @@ def extract_unique_fragments(self, fragment_graphs):
             # here we extract the fragments and set appropiate residue names
             for other_frag in unique_fragments.values():
                 if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match):
-                    # it can happen that two fragments are completely isomorphic but have different
-                    # atom names because we don't know the order of atoms when looping over the molecule
-                    # and setting the names. In this case we simply take the atom-names of the known
-                    # fragment. Better ideas anyone?
-                    mapping = find_one_ismags_match(fragment, other_frag, self._node_match)
-                    if mapping:
-                        for source, target in mapping.items():
-                            self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname']
-                        break
+                    break
             else:
                 if resname in unique_fragments:
                     resname = resname + "_" + str(had_resnames[resname] + 1)
@@ -344,5 +336,7 @@ def extract_unique_fragments(self, fragment_graphs):
                     had_resnames[resname] = 0
                 unique_fragments[resname] = fragment
 
+        # remake the residue graph since some resnames have changed
+        self.make_res_graph()
         return unique_fragments
 
diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py
index e2b319c0..59155e77 100644
--- a/polyply/tests/test_fragment_finder.py
+++ b/polyply/tests/test_fragment_finder.py
@@ -253,9 +253,9 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags):
 
     frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
     fragments = frag_finder.extract_unique_fragments(match_mols)
-    frag_finder.match_keys = ['element', 'mass', 'resname']
     assert len(fragments) == len(uni_frags)
     for resname, graph in fragments.items():
+        frag_finder.match_keys = ['element', 'mass', 'resname']
         if type(uni_frags[resname]) == tuple:
            new_smiles = [smiles[idx] for idx in uni_frags[resname]]
            new_resnames = [resnames[idx] for idx in uni_frags[resname]]
@@ -267,3 +267,10 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags):
         # graphs used to make the match
         nx.set_node_attributes(ref, resname, "resname")
         assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match)
+        # make sure all molecule nodes are named correctly
+        frag_finder.match_keys = ['atomname', 'resname']
+        for node in frag_finder.res_graph:
+           resname_mol = frag_finder.res_graph.nodes[node]["resname"]
+           if resname == resname_mol:
+               target = frag_finder.res_graph.nodes[node]["graph"]
+               assert nx.is_isomorphic(target, graph, node_match=frag_finder._node_match)

From a8ce5a13bf7fb9f8a99159cfd848ffe68e878064 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 13:27:56 +0200
Subject: [PATCH 08/82] add pysmiles to tests

---
 requirements-tests.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements-tests.txt b/requirements-tests.txt
index 595a4902..03357910 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -4,3 +4,4 @@ pytest-cov
 pylint
 codecov
 tqdm
+pysmiles

From b8dfa7be105bdc2e419aa8551ac0a041fa67c03c Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 17:20:38 +0200
Subject: [PATCH 09/82] tests for ffoutput

---
 polyply/src/ffoutput.py        | 89 +++++++++++++++++++++++++++++----
 polyply/tests/test_ffoutput.py | 91 ++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 11 deletions(-)
 create mode 100644 polyply/tests/test_ffoutput.py

diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py
index 8beb7a6e..a1ac7b89 100644
--- a/polyply/src/ffoutput.py
+++ b/polyply/src/ffoutput.py
@@ -11,6 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
+from vermouth.molecule import Choice
+
+def _choice_to_str(attr_dict):
+    """
+    Makes a string out of a choice object.
+    """
+    for attr in attr_dict:
+        if isinstance(attr_dict[attr], Choice):
+            attr_string = "|".join(attr_dict[attr].value)
+            attr_dict[attr] = attr_string
+    return attr_dict
 
 class ForceFieldDirectiveWriter():
     """
@@ -21,7 +33,7 @@ class ForceFieldDirectiveWriter():
     which does not offer the complete rich
     syntax of the ff file format.
     """
-    def __init__(self, forcefield, stream):
+    def __init__(self, forcefield, stream, write_block_edges=True):
         """
         Parameters
         ----------
@@ -36,6 +48,7 @@ def __init__(self, forcefield, stream):
         # these attributes have a specific order in the moleculetype section
         self.normal_order_block_atoms = ["atype", "resid", "resname",
                                          "atomname", "charge_group", "charge", "mass"]
+        self.write_block_edges = True
 
     def write(self):
         """
@@ -47,12 +60,22 @@ def write(self):
             self.stream.write(f"{name} {excl}\n")
             self.write_atoms_block(block.nodes(data=True))
             self.write_interaction_dict(block.interactions)
+            if self.write_block_edges:
+                self.write_edges(block.edges)
 
         for link in self.forcefield.links:
+            if link.patterns:
+                nometa = True
+            else:
+                nometa = False
             self.write_link_header()
-            self.write_atoms_link(link.nodes(data=True))
+            self.write_atoms_link(link.nodes(data=True), nometa)
             self.write_interaction_dict(link.interactions)
             self.write_edges(link.edges)
+            if link.non_edges:
+                self.write_nonedges(link.non_edges)
+            if link.patterns:
+                self.write_patterns(link.patterns)
 
     def write_interaction_dict(self, inter_dict):
         """
@@ -68,9 +91,14 @@ def write_interaction_dict(self, inter_dict):
         for inter_type in inter_dict:
             self.stream.write(f"[ {inter_type} ]\n")
             for interaction in inter_dict[inter_type]:
-                atom_string = " ".join(interaction.atoms)
-                param_string = " ".join(interaction.parameters)
-                meta_string = "{" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in interaction.meta.items()]) + "}"
+                if inter_type not in ["virtual_sitesn", "virtual_sites1", "virtual_sites2", "virtual_sites3"]:
+                    atom_string = " ".join(interaction.atoms)
+                    param_string = " ".join(interaction.parameters)
+                else:
+                    atom_string = " ".join(interaction.atoms) + " -- "
+                    param_string = " ".join(interaction.parameters)
+
+                meta_string = json.dumps(interaction.meta)
                 line = atom_string + " " + param_string + " " + meta_string + "\n"
                 self.stream.write(line)
 
@@ -87,6 +115,24 @@ def write_edges(self, edges):
         for idx, jdx in edges:
             self.stream.write(f"{idx} {jdx}\n")
 
+    def write_nonedges(self, edges):
+        """
+        Writes edges to `self.stream` into the edges directive.
+
+        Parameters
+        ----------
+        edges: abc.iteratable
+            pair-wise iteratable edge list
+        """
+        self.stream.write("[ non-edges ]\n")
+        for idx, jdx in edges:
+            # for reasons the second edge is actually an attribute dict
+            kdx = jdx['atomname']
+            write_attrs = {key: value for key, value in jdx.items() if key != "atomname"}
+            write_attrs = _choice_to_str(write_attrs)
+            attr_line = json.dumps(write_attrs)
+            self.stream.write(f"{idx} {kdx} {attr_line}\n")
+
     def write_atoms_block(self, nodes):
         """
         Writes the nodes/atoms of the block atomtype directive to `self.stream`.
@@ -99,13 +145,14 @@ def write_atoms_block(self, nodes):
             pair-wise iteratable edge list
         """
         self.stream.write("[ atoms ]\n")
-        for idx, (node, attrs) in enumerate(nodes):
-            idx += 1
-            attr_line = " ".join([str(attrs[attr]) for attr in self.normal_order_block_atoms ])
+        for idx, (node, attrs) in enumerate(nodes, start=1):
+            write_attrs = {attr: attrs[attr] for attr in self.normal_order_block_atoms if attr in attrs}
+            write_attrs = _choice_to_str(write_attrs)
+            attr_line = " ".join([str(value) for value in write_attrs.values()])
             line = f"{idx} " + attr_line + "\n"
             self.stream.write(line)
 
-    def write_atoms_link(self, nodes):
+    def write_atoms_link(self, nodes, nometa=False):
         """
         Writes the nodes/atoms of the link atomtype directive to `self.stream`.
         All attributes are written as json style dicts.
@@ -118,8 +165,13 @@ def write_atoms_link(self, nodes):
         """
         self.stream.write("[ atoms ]\n")
         for node_key, attributes  in nodes:
-            attr_line = " {" + " ,".join([f"\"{key}\": \"{value}\"" for key, value in attributes.items()]) + "}"
-            line = str(node_key) + attr_line + "\n"
+            attributes = {key: value for key, value in attributes.items() if key != "order"}
+            attributes = _choice_to_str(attributes)
+            attr_line = " " + json.dumps(attributes)
+            if nometa:
+                line = str(node_key) + " { }\n"
+            else:
+                line = str(node_key) + attr_line + "\n"
             self.stream.write(line)
 
     def write_link_header(self):
@@ -133,3 +185,18 @@ def write_link_header(self):
         resnames: `abc.itertable[str]`
         """
         self.stream.write("[ link ]\n")
+
+    def write_patterns(self, patterns):
+        """
+        Write the patterns directive.
+        """
+        self.stream.write("[ patterns ]\n")
+        for pattern in patterns:
+            line = ""
+            for tokens in pattern:
+                atom = tokens[0]
+                meta = {key: value for key, value in tokens[1].items() if key not in ["atomname", "order"]}
+                meta_line = json.dumps(_choice_to_str(meta))
+                line = line + " " + atom + " " + meta_line
+            line = line + "\n"
+            self.stream.write(line)
diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py
new file mode 100644
index 00000000..878d2325
--- /dev/null
+++ b/polyply/tests/test_ffoutput.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+import pytest
+import vermouth
+from vermouth.ffinput import read_ff
+import polyply
+from polyply.src.ffoutput import ForceFieldDirectiveWriter
+
+def _read_force_field(fpath):
+    """
+    wrapper to read and return force-field
+    """
+    force_field = vermouth.forcefield.ForceField("test")
+    with open(fpath, "r") as _file:
+        lines = _file.readlines()
+    read_ff(lines, force_field)
+    return force_field
+
+def equal_blocks(block1, block2):
+    """
+    Need to overwrite since obviously
+    the force-fields cannot be the same.
+    """
+    return (block1.nrexcl == block2.nrexcl and
+            block1.same_nodes(block2) and
+            block1.same_edges(block2) and
+            block1.same_interactions(block2) and
+            block1.name == block2.name )
+
+def compare_patterns(patterns1, patterns2):
+    """
+    Patterns are evil so we also need a
+    special compare function.
+    """
+    assert len(patterns1) == len(patterns2)
+    for pattern1, pattern2 in zip(patterns1, patterns2):
+        for entry1, entry2 in zip(pattern1, pattern2):
+            assert entry1[0] == entry2[0]
+            assert not vermouth.utils.are_different(entry1[1],
+                                                    entry2[1])
+    return True
+
+def equal_links(link1, link2):
+    """
+    Needs to overwrite for the same reason
+    as for blocks.
+    """
+    return (equal_blocks(link1, link2)
+           and link1.same_non_edges(link2)
+           and link1.removed_interactions == link2.removed_interactions
+           and link1.molecule_meta == link2.molecule_meta
+           and compare_patterns(link1.patterns, link2.patterns)
+           and set(link1.features) == set(link2.features)
+           )
+
+def equal_ffs(ff1, ff2):
+    """
+    Compare two forcefields.
+    """
+    assert len(ff1.blocks) == len(ff2.blocks)
+    # compare blocks
+    for name, block in ff1.blocks.items():
+        assert equal_blocks(block, ff2.blocks[name])
+
+    for link1, link2 in zip(ff1.links, ff2.links):
+        assert equal_links(link1, link2)
+    return True
+
+@pytest.mark.parametrize("libname", [
+     '2016H66',
+     'gromos53A6',
+     'oplsaaLigParGen',
+     'martini2',
+     'parmbsc1',
+])
+def test_ffoutput(tmp_path, libname):
+    """
+    Check if we can write and reread our own ff-libraries.
+    """
+    tmp_path = "/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp"
+    lib_path = Path(polyply.DATA_PATH) / libname
+    for idx, _file in enumerate(lib_path.iterdir()):
+        if _file.suffix == ".ff":
+            # read the forcefield
+            force_field = _read_force_field(_file)
+            # write the forcefield
+            tmp_file = Path(tmp_path) / (str(idx) + f"{libname}_new.ff")
+            with open(tmp_file, "w") as filehandle:
+                ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()
+            # read the smae forcefield file
+            force_field_target = _read_force_field(tmp_file)
+            assert equal_ffs(force_field, force_field_target)

From b3ea5ac6804ca5e176f0d32092b08117da6aa93e Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 17:21:26 +0200
Subject: [PATCH 10/82] use tmp-file for testing ffoutput

---
 polyply/tests/test_ffoutput.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py
index 878d2325..c5855bd6 100644
--- a/polyply/tests/test_ffoutput.py
+++ b/polyply/tests/test_ffoutput.py
@@ -76,7 +76,6 @@ def test_ffoutput(tmp_path, libname):
     """
     Check if we can write and reread our own ff-libraries.
     """
-    tmp_path = "/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp"
     lib_path = Path(polyply.DATA_PATH) / libname
     for idx, _file in enumerate(lib_path.iterdir()):
         if _file.suffix == ".ff":

From 79c38fb884384112bc4fd81761aec588679147cb Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 17:44:48 +0200
Subject: [PATCH 11/82] modify extract block and use in itp_to_ff

---
 polyply/src/generate_templates.py | 11 ++++---
 polyply/src/itp_to_ff.py          | 53 +------------------------------
 2 files changed, 8 insertions(+), 56 deletions(-)

diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index 4939353c..509663d7 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -235,7 +235,7 @@ def _relabel_interaction_atoms(interaction, mapping):
     new_interaction = interaction._replace(atoms=new_atoms)
     return new_interaction
 
-def extract_block(molecule, resname, defines):
+def extract_block(molecule, nodes, defines={}):
     """
     Given a `vermouth.molecule` and a `resname`
     extract the information of a block from the
@@ -245,7 +245,9 @@ def extract_block(molecule, resname, defines):
     Parameters
     ----------
     molecule:  :class:vermouth.molecule.Molecule
-    resname:   str
+    nodes: abc.hashable
+        the nodes corresponding to the block to
+        extract
     defines:   dict
       dict of type define: value
 
@@ -253,8 +255,8 @@ def extract_block(molecule, resname, defines):
     -------
     :class:vermouth.molecule.Block
     """
-    nodes = find_atoms(molecule, "resname", resname)
     resid = molecule.nodes[nodes[0]]["resid"]
+    resname = molecule.nodes[nodes[0]]["resname"]
     block = vermouth.molecule.Block()
 
     # select all nodes with the same first resid and
@@ -324,7 +326,8 @@ class variable.
 
         for resname in resnames:
             if resname not in self.templates:
-                block = extract_block(meta_molecule.molecule, resname,
+                nodes_from_block = find_atoms(meta_molecule.molecule, "resname", resname)
+                block = extract_block(meta_molecule.molecule, nodes_from_block,
                                       self.topology.defines)
 
                 opt_counter = 0
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 9ba46c21..249adb81 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -21,7 +21,7 @@
 from vermouth.forcefield import ForceField
 from vermouth.molecule import Interaction
 from polyply.src.topology import Topology
-from polyply.src.generate_templates import _relabel_interaction_atoms
+from polyply.src.generate_templates import extract_block
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 
@@ -97,57 +97,6 @@ def _extract_edges_from_shortest_path(atoms, block, min_resid):
                 resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
     return final_atoms, edges, resnames
 
-def extract_block(molecule, nodes, defines):
-    """
-    Given a `vermouth.molecule` and a `resname`
-    extract the information of a block from the
-    molecule definition and replace all defines
-    if any are found.
-
-    Parameters
-    ----------
-    molecule:  :class:vermouth.molecule.Molecule
-    resname:   str
-    defines:   dict
-      dict of type define: value
-
-    Returns
-    -------
-    :class:vermouth.molecule.Block
-    """
-    resid = molecule.nodes[nodes[0]]["resid"]
-    block = vermouth.molecule.Block()
-
-    # select all nodes with the same first resid and
-    # make sure the block node labels are atomnames
-    # also build a correspondance dict between node
-    # label in the molecule and in the block for
-    # relabeling the interactions
-    mapping = {}
-    for node in nodes:
-        attr_dict = molecule.nodes[node]
-        if attr_dict["resid"] == resid:
-            block.add_node(attr_dict["atomname"], **attr_dict)
-            mapping[node] = attr_dict["atomname"]
-
-    for inter_type in molecule.interactions:
-        for interaction in molecule.interactions[inter_type]:
-            if all(atom in mapping for atom in interaction.atoms):
-                interaction = _relabel_interaction_atoms(interaction, mapping)
-                block.interactions[inter_type].append(interaction)
-
-    for inter_type in ["bonds", "constraints", "virtual_sitesn",
-                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
-        block.make_edges_from_interaction_type(inter_type)
-
-    if not nx.is_connected(block):
-        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
-               'Make sure all atoms/particles in a residue are connected by bonds,'
-               ' constraints or virual-sites.')
-        raise IOError(msg.format(resname, resid))
-
-    return block
-
 def extract_links(molecule):
     """
     Given a molecule that has the resid and resname attributes

From 77dfe16959a68c31aad2eb105884a2c61fc1bf7f Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Tue, 20 Jun 2023 18:01:16 +0200
Subject: [PATCH 12/82] update test for generate templates accordingly

---
 polyply/tests/test_generate_templates.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index ff4bd11f..6c21fc63 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -169,7 +169,8 @@ def test_extract_block():
          polyply.src.polyply_parser.read_polyply(lines, ff)
          block = ff.blocks['test']
          molecule = block.to_molecule()
-         new_block = extract_block(molecule, "GLY", {})
+         nodes = find_atoms(molecule, "resname", "GLY")
+         new_block = extract_block(molecule, nodes=nodes, defines={})
          for node in ff.blocks["GLY"]:
              atomname = ff.blocks["GLY"].nodes[node]["atomname"]
              assert ff.blocks["GLY"].nodes[node] == new_block.nodes[atomname]

From 214f5f24a1645a44a51517f6ff6d3906d2e107fc Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Wed, 21 Jun 2023 14:59:27 +0200
Subject: [PATCH 13/82] add isomorphism naming

---
 polyply/src/fragment_finder.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index 3db65c9c..d806c054 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -326,7 +326,11 @@ def extract_unique_fragments(self, fragment_graphs):
             # here we extract the fragments and set appropiate residue names
             for other_frag in unique_fragments.values():
                 if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match):
-                    break
+                    mapping = find_one_ismags_match(fragment, other_frag, self._node_match)
+                    if mapping:
+                        for source, target in mapping.items():
+                            self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname']
+                        break
             else:
                 if resname in unique_fragments:
                     resname = resname + "_" + str(had_resnames[resname] + 1)
@@ -339,4 +343,3 @@ def extract_unique_fragments(self, fragment_graphs):
         # remake the residue graph since some resnames have changed
         self.make_res_graph()
         return unique_fragments
-

From ef700123ea0d2409c0cadb2fb56f02ee3796e7dd Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Wed, 21 Jun 2023 14:59:51 +0200
Subject: [PATCH 14/82] properly check if interactions are equal

---
 polyply/src/itp_to_ff.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 249adb81..30f48251 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -24,6 +24,7 @@
 from polyply.src.generate_templates import extract_block
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
+from polyply.tests.test_lib_files import _interaction_equal 
 
 def diffs_to_prefix(atoms, resid_diffs):
     """
@@ -160,9 +161,8 @@ def extract_links(molecule):
            #         print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n")
 
                 for other_inter in patterns[pattern].get(inter_type, []):
-                    if other_inter.atoms == link_inter.atoms:
-                        if  other_inter.parameters == link_inter.parameters:
-                            break
+                    if _interaction_equal(other_inter, link_inter, inter_type):
+                        break
                 else:
                     patterns[pattern][inter_type].append(link_inter)
                     resnames_for_patterns[pattern].update(resnames)

From 2410b0a9c3fc1f134c9207310ae4e65cae56bfc1 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Wed, 21 Jun 2023 16:55:31 +0200
Subject: [PATCH 15/82] read itp files

---
 polyply/src/itp_to_ff.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 30f48251..94214ce7 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -20,6 +20,7 @@
 import vermouth
 from vermouth.forcefield import ForceField
 from vermouth.molecule import Interaction
+from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
 from polyply.src.generate_templates import extract_block
 from polyply.src.fragment_finder import FragmentFinder
@@ -241,10 +242,20 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
     """
     Main executable for itp to ff tool.
     """
-    # read the target itp-file
-    top = Topology.from_gmx_topfile(itppath, name="test")
-    mol = top.molecules[0].molecule
-    mol = equalize_charges(mol, target_charge=charge)
+    if itppath.suffix == ".top":
+        # read the topology file
+        top = Topology.from_gmx_topfile(itppath, name="test")
+        mol = top.molecules[0].molecule
+        mol = equalize_charges(mol, target_charge=charge)
+
+    if itppath.suffix == ".itp":
+        with open(itppath, "r") as _file:
+            lines = _file.readlines()
+        force_field = ForceField("tmp")
+        read_itp(lines, force_field)
+        block = next(iter(force_field.blocks.values()))
+        mol = block.to_molecule()
+        mol.make_edges_from_interaction_type(type_="bonds")
 
     # read the target fragments and convert to graph
     fragment_graphs = []

From 450ebc4fee67799fb38a04b07c976452adb3d552 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Wed, 21 Jun 2023 16:55:54 +0200
Subject: [PATCH 16/82] draft round robin tests

---
 .../test_data/itp_to_ff/PEG_PBE/in_itp.itp    | 573 ++++++++++++++++++
 .../tests/test_data/itp_to_ff/PEG_PBE/ref.itp | 569 +++++++++++++++++
 .../tests/test_data/itp_to_ff/PEG_PBE/seq.txt |   1 +
 .../test_data/itp_to_ff/PEO_OHter/in_itp.itp  | 327 ++++++++++
 .../test_data/itp_to_ff/PEO_OHter/ref.itp     | 308 ++++++++++
 .../test_data/itp_to_ff/PEO_OHter/seq.txt     |   1 +
 polyply/tests/test_itp_to_ff.py               |  97 +++
 7 files changed, 1876 insertions(+)
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt
 create mode 100644 polyply/tests/test_itp_to_ff.py

diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp
new file mode 100644
index 00000000..4fb4521a
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/in_itp.itp
@@ -0,0 +1,573 @@
+
+[ moleculetype ]
+; Name               nrexcl
+PBE_PEO                   3
+[ atoms ]
+;   nr       type  resnr residue  atom   cgnr     charge       mass  
+     1   opls_800      1    UNK   C00      1    -0.2328    12.0110 
+     2   opls_801      1    UNK   C01      1    -0.1006    12.0110 
+     3   opls_802      1    UNK   C02      1    -0.1838    12.0110 
+     4   opls_803      1    UNK   C03      1    -0.2559    12.0110 
+     5   opls_804      1    UNK   C04      1    -0.1654    12.0110 
+     6   opls_805      1    UNK   C05      1    -0.0974    12.0110 
+     7   opls_806      1    UNK   C06      1    -0.1786    12.0110 
+     8   opls_807      1    UNK   C07      1    -0.2529    12.0110 
+     9   opls_808      1    UNK   C08      1    -0.1651    12.0110 
+    10   opls_809      1    UNK   C09      1    -0.0962    12.0110 
+    11   opls_810      1    UNK   C0A      1    -0.1791    12.0110 
+    12   opls_811      1    UNK   C0B      1    -0.2540    12.0110 
+    13   opls_812      1    UNK   C0C      1    -0.1626    12.0110 
+    14   opls_813      1    UNK   C0D      1    -0.0981    12.0110 
+    15   opls_814      1    UNK   C0E      1    -0.1725    12.0110 
+    16   opls_815      1    UNK   C0F      1     0.0098    12.0110 
+    17   opls_816      1    UNK   O0G      1    -0.3851    15.9990 
+    18   opls_817      1    UNK   C0H      1     0.0156    12.0110 
+    19   opls_818      1    UNK   C0I      1     0.0130    12.0110 
+    20   opls_819      1    UNK   O0J      1    -0.3669    15.9990 
+    21   opls_820      1    UNK   C0K      1     0.0119    12.0110 
+    22   opls_821      1    UNK   C0M      1     0.0272    12.0110 
+    23   opls_822      1    UNK   O0N      1    -0.6013    15.9990 
+    24   opls_823      1    UNK   H0O      1     0.4144     1.0080 
+    25   opls_824      1    UNK   C0P      1    -0.1809    12.0110 
+    26   opls_825      1    UNK   C0Q      1    -0.2618    12.0110 
+    27   opls_826      1    UNK   H0R      1     0.0850     1.0080 
+    28   opls_827      1    UNK   H0S      1     0.0850     1.0080 
+    29   opls_828      1    UNK   H0T      1     0.0850     1.0080 
+    30   opls_829      1    UNK   H0U      1     0.1144     1.0080 
+    31   opls_830      1    UNK   H0V      1     0.1385     1.0080 
+    32   opls_831      1    UNK   H0W      1     0.1264     1.0080 
+    33   opls_832      1    UNK   H0X      2     0.1264     1.0080 
+    34   opls_833      1    UNK   H0Y      2     0.0958     1.0080 
+    35   opls_834      1    UNK   H0Z      2     0.0958     1.0080 
+    36   opls_835      1    UNK   H10      2     0.1112     1.0080 
+    37   opls_836      1    UNK   H11      2     0.1395     1.0080 
+    38   opls_837      1    UNK   H12      2     0.1255     1.0080 
+    39   opls_838      1    UNK   H13      2     0.1255     1.0080 
+    40   opls_839      1    UNK   H14      2     0.0955     1.0080 
+    41   opls_840      1    UNK   H15      2     0.0955     1.0080 
+    42   opls_841      1    UNK   H16      2     0.1146     1.0080 
+    43   opls_842      1    UNK   H17      2     0.1385     1.0080 
+    44   opls_843      1    UNK   H18      2     0.1264     1.0080 
+    45   opls_844      1    UNK   H19      2     0.1264     1.0080 
+    46   opls_845      1    UNK   H1A      2     0.0969     1.0080 
+    47   opls_846      1    UNK   H1B      2     0.0969     1.0080 
+    48   opls_847      1    UNK   H1C      2     0.1149     1.0080 
+    49   opls_848      1    UNK   H1D      2     0.1074     1.0080 
+    50   opls_849      1    UNK   H1E      2     0.1074     1.0080 
+    51   opls_850      1    UNK   H1F      2     0.0768     1.0080 
+    52   opls_851      1    UNK   H1G      2     0.0768     1.0080 
+    53   opls_852      1    UNK   H1H      2     0.0868     1.0080 
+    54   opls_853      1    UNK   H1I      2     0.0868     1.0080 
+    55   opls_854      1    UNK   H1J      2     0.0841     1.0080 
+    56   opls_855      1    UNK   H1K      2     0.0841     1.0080 
+    57   opls_856      1    UNK   H1M      2     0.0840     1.0080 
+    58   opls_857      1    UNK   H1N      2     0.0840     1.0080 
+    59   opls_858      1    UNK   H1O      2     0.0812     1.0080 
+    60   opls_859      1    UNK   H1P      2     0.0812     1.0080 
+    61   opls_860      1    UNK   H1Q      2     0.1428     1.0080 
+    62   opls_861      1    UNK   H1R      2     0.1279     1.0080 
+    63   opls_862      1    UNK   H1S      2     0.1279     1.0080 
+[ bonds ]
+    2     1     1      0.1529 224262.400
+    3     2     1      0.1510 265265.600
+    4     3     1      0.1340 459403.200
+    5     2     1      0.1529 224262.400
+    6     5     1      0.1529 224262.400
+    7     6     1      0.1510 265265.600
+    8     7     1      0.1340 459403.200
+    9     6     1      0.1529 224262.400
+   10     9     1      0.1529 224262.400
+   11    10     1      0.1510 265265.600
+   12    11     1      0.1340 459403.200
+   13    10     1      0.1529 224262.400
+   14    13     1      0.1529 224262.400
+   15    14     1      0.1529 224262.400
+   16    15     1      0.1529 224262.400
+   17    16     1      0.1410 267776.000
+   18    17     1      0.1410 267776.000
+   19    18     1      0.1529 224262.400
+   20    19     1      0.1410 267776.000
+   21    20     1      0.1410 267776.000
+   22    21     1      0.1529 224262.400
+   23    22     1      0.1410 267776.000
+   24    23     1      0.0945 462750.400
+   25    14     1      0.1510 265265.600
+   26    25     1      0.1340 459403.200
+   27     1     1      0.1090 284512.000
+   28     1     1      0.1090 284512.000
+   29     1     1      0.1090 284512.000
+   30     2     1      0.1090 284512.000
+   31     3     1      0.1080 284512.000
+   32     4     1      0.1080 284512.000
+   33     4     1      0.1080 284512.000
+   34     5     1      0.1090 284512.000
+   35     5     1      0.1090 284512.000
+   36     6     1      0.1090 284512.000
+   37     7     1      0.1080 284512.000
+   38     8     1      0.1080 284512.000
+   39     8     1      0.1080 284512.000
+   40     9     1      0.1090 284512.000
+   41     9     1      0.1090 284512.000
+   42    10     1      0.1090 284512.000
+   43    11     1      0.1080 284512.000
+   44    12     1      0.1080 284512.000
+   45    12     1      0.1080 284512.000
+   46    13     1      0.1090 284512.000
+   47    13     1      0.1090 284512.000
+   48    14     1      0.1090 284512.000
+   49    15     1      0.1090 284512.000
+   50    15     1      0.1090 284512.000
+   51    16     1      0.1090 284512.000
+   52    16     1      0.1090 284512.000
+   53    18     1      0.1090 284512.000
+   54    18     1      0.1090 284512.000
+   55    19     1      0.1090 284512.000
+   56    19     1      0.1090 284512.000
+   57    21     1      0.1090 284512.000
+   58    21     1      0.1090 284512.000
+   59    22     1      0.1090 284512.000
+   60    22     1      0.1090 284512.000
+   61    25     1      0.1080 284512.000
+   62    26     1      0.1080 284512.000
+   63    26     1      0.1080 284512.000
+
+[ angles ]
+;  ai    aj    ak funct            c0            c1            c2            c3 
+    1     2     3     1    111.100    527.184
+    2     3     4     1    124.000    585.760
+    1     2     5     1    112.700    488.273
+    2     5     6     1    112.700    488.273
+    5     6     7     1    111.100    527.184
+    6     7     8     1    124.000    585.760
+    5     6     9     1    112.700    488.273
+    6     9    10     1    112.700    488.273
+    9    10    11     1    111.100    527.184
+   10    11    12     1    124.000    585.760
+    9    10    13     1    112.700    488.273
+   10    13    14     1    112.700    488.273
+   13    14    15     1    112.700    488.273
+   14    15    16     1    112.700    488.273
+   15    16    17     1    109.500    418.400
+   16    17    18     1    109.500    502.080
+   17    18    19     1    109.500    418.400
+   18    19    20     1    109.500    418.400
+   19    20    21     1    109.500    502.080
+   20    21    22     1    109.500    418.400
+   21    22    23     1    109.500    418.400
+   22    23    24     1    108.500    460.240
+   13    14    25     1    111.100    527.184
+   14    25    26     1    124.000    585.760
+    2     1    27     1    110.700    313.800
+    2     1    28     1    110.700    313.800
+    2     1    29     1    110.700    313.800
+    1     2    30     1    110.700    313.800
+    2     3    31     1    117.000    292.880
+    3     4    32     1    120.000    292.880
+    3     4    33     1    120.000    292.880
+    2     5    34     1    110.700    313.800
+    2     5    35     1    110.700    313.800
+    5     6    36     1    110.700    313.800
+    6     7    37     1    117.000    292.880
+    7     8    38     1    120.000    292.880
+    7     8    39     1    120.000    292.880
+    6     9    40     1    110.700    313.800
+    6     9    41     1    110.700    313.800
+    9    10    42     1    110.700    313.800
+   10    11    43     1    117.000    292.880
+   11    12    44     1    120.000    292.880
+   11    12    45     1    120.000    292.880
+   10    13    46     1    110.700    313.800
+   10    13    47     1    110.700    313.800
+   13    14    48     1    110.700    313.800
+   14    15    49     1    110.700    313.800
+   14    15    50     1    110.700    313.800
+   15    16    51     1    110.700    313.800
+   15    16    52     1    110.700    313.800
+   17    18    53     1    109.500    292.880
+   17    18    54     1    109.500    292.880
+   18    19    55     1    110.700    313.800
+   18    19    56     1    110.700    313.800
+   20    21    57     1    109.500    292.880
+   20    21    58     1    109.500    292.880
+   21    22    59     1    110.700    313.800
+   21    22    60     1    110.700    313.800
+   14    25    61     1    117.000    292.880
+   25    26    62     1    120.000    292.880
+   25    26    63     1    120.000    292.880
+   16    15    50     1    110.700    313.800
+    6     5    34     1    110.700    313.800
+   27     1    29     1    107.800    276.144
+   51    16    52     1    107.800    276.144
+    7     6    36     1    109.500    292.880
+   20    19    55     1    109.500    292.880
+   16    15    49     1    110.700    313.800
+   23    22    59     1    109.500    292.880
+   19    18    54     1    110.700    313.800
+   22    21    57     1    110.700    313.800
+   49    15    50     1    107.800    276.144
+   22    21    58     1    110.700    313.800
+   12    11    43     1    120.000    292.880
+   57    21    58     1    107.800    276.144
+   11    10    13     1    111.100    527.184
+   10     9    41     1    110.700    313.800
+   25    14    48     1    109.500    292.880
+   40     9    41     1    107.800    276.144
+   23    22    60     1    109.500    292.880
+   34     5    35     1    107.800    276.144
+   14    13    47     1    110.700    313.800
+   26    25    61     1    120.000    292.880
+   17    16    52     1    109.500    292.880
+   59    22    60     1    107.800    276.144
+   62    26    63     1    117.000    292.880
+    3     2    30     1    109.500    292.880
+    3     2     5     1    111.100    527.184
+   13    10    42     1    110.700    313.800
+   44    12    45     1    117.000    292.880
+    4     3    31     1    120.000    292.880
+   28     1    29     1    107.800    276.144
+   14    13    46     1    110.700    313.800
+    5     2    30     1    110.700    313.800
+    6     5    35     1    110.700    313.800
+    9     6    36     1    110.700    313.800
+   27     1    28     1    107.800    276.144
+    7     6     9     1    111.100    527.184
+   10     9    40     1    110.700    313.800
+   38     8    39     1    117.000    292.880
+   20    19    56     1    109.500    292.880
+   55    19    56     1    107.800    276.144
+   19    18    53     1    110.700    313.800
+   46    13    47     1    107.800    276.144
+    8     7    37     1    120.000    292.880
+   11    10    42     1    109.500    292.880
+   15    14    48     1    110.700    313.800
+   15    14    25     1    111.100    527.184
+   53    18    54     1    107.800    276.144
+   17    16    51     1    109.500    292.880
+   32     4    33     1    117.000    292.880
+
+[ dihedrals ]
+; IMPROPER DIHEDRAL ANGLES 
+;  ai    aj    ak    al funct            c0            c1            c2            c3            c4            c5
+    33     4     3    32    4        180.000     10.460     2  
+    63    26    25    62    4        180.000     10.460     2  
+    39     8     7    38    4        180.000     10.460     2  
+    45    12    11    44    4        180.000     10.460     2  
+    43    11    10    12    4        180.000     10.460     2  
+    61    25    14    26    4        180.000     10.460     2  
+    37     7     6     8    4        180.000     10.460     2  
+    31     3     2     4    4        180.000     10.460     2  
+
+[ dihedrals ]
+; PROPER DIHEDRAL ANGLES
+;  ai    aj    ak    al funct            c0            c1            c2            c3            c4            c5
+    4    3    2    1        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   26   25   14   15        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   26   25   14   13        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+    8    7    6    5        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   12   11   10    9        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   25   14   15   16        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   25   14   13   10        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+    7    6    5    2        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   11   10    9    6        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+    9    6    7    8        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+    5    2    3    4        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   13   10   11   12        3       0.527  -6.397  -1.695   7.565  -0.000   0.000
+   10    9    6    7        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   14   13   10   11        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+    6    5    2    3        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+    6    5    2    1        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   10    9    6    5        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   16   15   14   13        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   15   14   13   10        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   14   13   10    9        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   13   10    9    6        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+    9    6    5    2        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   19   18   17   16        3       1.715   2.845   1.046  -5.607  -0.000   0.000
+   22   21   20   19        3       1.715   2.845   1.046  -5.607  -0.000   0.000
+   21   20   19   18        3       1.715   2.845   1.046  -5.607  -0.000   0.000
+   18   17   16   15        3       1.715   2.845   1.046  -5.607  -0.000   0.000
+   38    8    7    6        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   39    8    7    6        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   44   12   11   10        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   62   26   25   14        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   32    4    3    2        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   63   26   25   14        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   45   12   11   10        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   33    4    3    2        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   33    4    3   31        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   44   12   11   43        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   32    4    3   31        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   62   26   25   61        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   45   12   11   43        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   39    8    7   37        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   38    8    7   37        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   63   26   25   61        3      58.576   0.000  -58.576  -0.000  -0.000   0.000
+   61   25   14   13        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   43   11   10    9        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   31    3    2    5        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   61   25   14   15        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   37    7    6    5        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   43   11   10   13        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   31    3    2    1        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   37    7    6    9        3      -33.472   0.000  33.472  -0.000  -0.000   0.000
+   31    3    2   30        3       0.665   1.996   0.000  -2.661  -0.000   0.000
+   43   11   10   42        3       0.665   1.996   0.000  -2.661  -0.000   0.000
+   37    7    6   36        3       0.665   1.996   0.000  -2.661  -0.000   0.000
+   61   25   14   48        3       0.665   1.996   0.000  -2.661  -0.000   0.000
+   48   14   25   26        3      -0.778  -2.335   0.000   3.113  -0.000   0.000
+   42   10   11   12        3      -0.778  -2.335   0.000   3.113  -0.000   0.000
+   30    2    3    4        3      -0.778  -2.335   0.000   3.113  -0.000   0.000
+   36    6    7    8        3      -0.778  -2.335   0.000   3.113  -0.000   0.000
+   40    9   10   11        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   34    5    2    3        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   46   13   10   11        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   29    1    2    3        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   47   13   10   11        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   34    5    6    7        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   40    9    6    7        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   50   15   14   25        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   28    1    2    3        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   47   13   14   25        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   46   13   14   25        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   35    5    2    3        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   41    9   10   11        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   35    5    6    7        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   49   15   14   25        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   41    9    6    7        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   27    1    2    3        3       0.766   2.297   0.000  -3.063  -0.000   0.000
+   49   15   14   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   52   16   15   14        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   36    6    9   10        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   48   14   13   10        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   50   15   14   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   47   13   10    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   27    1    2    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   40    9   10   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   29    1    2    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   41    9    6    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   51   16   15   14        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   35    5    6    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   46   13   14   15        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   35    5    2    1        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   28    1    2    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   47   13   14   15        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   36    6    5    2        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   10    9    6        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   41    9   10   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   48   14   15   16        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   34    5    2    1        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   40    9    6    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   10   13   14        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30    2    5    6        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   46   13   10    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   34    5    6    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   48   14   13   47        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   46   13   10   42        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   35    5    2   30        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   10    9   40        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30    2    1   28        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   51   16   15   49        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30    2    1   29        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   56   19   18   54        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   36    6    5   34        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   60   22   21   57        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   60   22   21   58        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   56   19   18   53        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   10    9   41        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   52   16   15   50        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   55   19   18   53        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   52   16   15   49        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   49   15   14   48        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   48   14   13   46        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   50   15   14   48        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   55   19   18   54        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   59   22   21   57        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   51   16   15   50        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   47   13   10   42        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   40    9    6   36        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30    2    1   27        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   36    6    5   35        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   59   22   21   58        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   41    9    6   36        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   34    5    2   30        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   58   21   22   23        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   57   21   22   23        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   53   18   19   20        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   56   19   18   17        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   49   15   16   17        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   59   22   21   20        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   54   18   19   20        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   60   22   21   20        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   55   19   18   17        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   50   15   16   17        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   60   22   23   24        3       0.736   2.209   0.000  -2.946  -0.000   0.000
+   59   22   23   24        3       0.736   2.209   0.000  -2.946  -0.000   0.000
+   56   19   20   21        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   51   16   17   18        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   57   21   20   19        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   55   19   20   21        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   53   18   17   16        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   54   18   17   16        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   58   21   20   19        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   52   16   17   18        3       1.590   4.770   0.000  -6.360  -0.000   0.000
+   24   23   22   21        3      -0.444   3.833   0.728  -4.117  -0.000   0.000
+   23   22   21   20        3       9.035  -9.035   0.000  -0.000  -0.000   0.000
+   17   16   15   14        3       2.874   0.582   2.092  -5.548  -0.000   0.000
+   20   19   18   17        3      -1.151   1.151   0.000  -0.000  -0.000   0.000
+
+[ pairs ]
+     1     4    1
+     1     6    1
+     4     5    1
+     3     6    1
+     2     7    1
+     2     9    1
+     5     8    1
+     5    10    1
+     8     9    1
+     7    10    1
+     6    11    1
+     6    13    1
+     9    12    1
+     9    14    1
+    12    13    1
+    11    14    1
+    10    15    1
+    13    16    1
+     3    27    1
+    14    17    1
+     3    28    1
+     5    27    1
+     3    29    1
+     1    31    1
+    15    18    1
+     5    28    1
+     5    29    1
+     4    30    1
+     2    32    1
+    16    19    1
+    10    25    1
+     2    33    1
+     1    34    1
+     6    30    1
+     5    31    1
+     1    35    1
+    17    20    1
+     3    34    1
+     3    35    1
+     2    36    1
+    18    21    1
+    13    26    1
+    19    22    1
+    16    25    1
+    15    26    1
+     7    34    1
+     7    35    1
+     5    37    1
+    20    23    1
+     9    34    1
+     9    35    1
+     8    36    1
+     6    38    1
+    21    24    1
+     6    39    1
+     5    40    1
+    10    36    1
+     9    37    1
+     5    41    1
+     7    40    1
+     7    41    1
+     6    42    1
+    11    40    1
+    11    41    1
+     9    43    1
+    13    40    1
+    13    41    1
+    12    42    1
+    10    44    1
+    10    45    1
+     9    46    1
+    14    42    1
+    13    43    1
+     9    47    1
+    27    30    1
+    11    46    1
+    28    30    1
+    11    47    1
+    10    48    1
+    29    30    1
+    30    31    1
+    15    46    1
+    15    47    1
+    13    49    1
+    31    32    1
+    13    50    1
+    31    33    1
+    30    34    1
+    16    48    1
+    30    35    1
+    14    51    1
+    17    49    1
+    14    52    1
+    17    50    1
+    18    51    1
+    16    53    1
+    34    36    1
+    18    52    1
+    16    54    1
+    35    36    1
+    25    46    1
+    25    47    1
+    17    55    1
+    36    37    1
+    20    53    1
+    17    56    1
+    26    48    1
+    25    49    1
+    20    54    1
+    13    61    1
+    37    38    1
+    25    50    1
+    37    39    1
+    36    40    1
+    21    55    1
+    19    57    1
+    15    61    1
+    14    62    1
+    36    41    1
+    21    56    1
+    19    58    1
+    14    63    1
+    20    59    1
+    23    57    1
+    20    60    1
+    23    58    1
+    40    42    1
+    41    42    1
+    24    59    1
+    24    60    1
+    42    43    1
+    43    44    1
+    43    45    1
+    42    46    1
+    42    47    1
+    46    48    1
+    47    48    1
+    48    49    1
+    48    50    1
+    49    51    1
+    50    51    1
+    49    52    1
+    50    52    1
+    53    55    1
+    54    55    1
+    53    56    1
+    48    61    1
+    54    56    1
+    57    59    1
+    58    59    1
+    57    60    1
+    58    60    1
+    61    62    1
+    61    63    1
+
diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
new file mode 100644
index 00000000..53941636
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
@@ -0,0 +1,569 @@
+; ../../bench.py
+
+; Please cite the following papers:
+
+[ moleculetype ]
+new 3
+
+[ atoms ]
+ 1 opls_800 1 CH3ter C0  1  -0.2327952380952381 12.011
+ 2 opls_826 1 CH3ter H1  1  0.08500476190476192  1.008
+ 3 opls_827 1 CH3ter H2  1  0.08500476190476192  1.008
+ 4 opls_828 1 CH3ter H3  1  0.08500476190476192  1.008
+ 5 opls_832 2 PBE    H8  3   0.1264047619047619  1.008
+ 6 opls_801 2 PBE    C1  2 -0.10059523809523808 12.011
+ 7 opls_802 2 PBE    C2  2  -0.1837952380952381 12.011
+ 8 opls_803 2 PBE    C3  2  -0.2558952380952381 12.011
+ 9 opls_804 2 PBE    C0  2  -0.1653952380952381 12.011
+10 opls_833 2 PBE    H4  3   0.0958047619047619  1.008
+11 opls_834 2 PBE    H5  3   0.0958047619047619  1.008
+12 opls_829 2 PBE    H6  2  0.11440476190476191  1.008
+13 opls_830 2 PBE    H7  2   0.1385047619047619  1.008
+14 opls_831 2 PBE    H9  2   0.1264047619047619  1.008
+15 opls_832 3 PBE    H8  4   0.1264047619047619  1.008
+16 opls_801 3 PBE    C1  3 -0.10059523809523808 12.011
+17 opls_802 3 PBE    C2  3  -0.1837952380952381 12.011
+18 opls_803 3 PBE    C3  3  -0.2558952380952381 12.011
+19 opls_804 3 PBE    C0  3  -0.1653952380952381 12.011
+20 opls_833 3 PBE    H4  4   0.0958047619047619  1.008
+21 opls_834 3 PBE    H5  4   0.0958047619047619  1.008
+22 opls_829 3 PBE    H6  3  0.11440476190476191  1.008
+23 opls_830 3 PBE    H7  3   0.1385047619047619  1.008
+24 opls_831 3 PBE    H9  3   0.1264047619047619  1.008
+25 opls_832 4 PBE    H8  5   0.1264047619047619  1.008
+26 opls_801 4 PBE    C1  4 -0.10059523809523808 12.011
+27 opls_802 4 PBE    C2  4  -0.1837952380952381 12.011
+28 opls_803 4 PBE    C3  4  -0.2558952380952381 12.011
+29 opls_804 4 PBE    C0  4  -0.1653952380952381 12.011
+30 opls_833 4 PBE    H4  5   0.0958047619047619  1.008
+31 opls_834 4 PBE    H5  5   0.0958047619047619  1.008
+32 opls_829 4 PBE    H6  4  0.11440476190476191  1.008
+33 opls_830 4 PBE    H7  4   0.1385047619047619  1.008
+34 opls_831 4 PBE    H9  4   0.1264047619047619  1.008
+35 opls_832 5 PBE    H8  6   0.1264047619047619  1.008
+36 opls_801 5 PBE    C1  5 -0.10059523809523808 12.011
+37 opls_802 5 PBE    C2  5  -0.1837952380952381 12.011
+38 opls_803 5 PBE    C3  5  -0.2558952380952381 12.011
+39 opls_804 5 PBE    C0  5  -0.1653952380952381 12.011
+40 opls_833 5 PBE    H4  6   0.0958047619047619  1.008
+41 opls_834 5 PBE    H5  6   0.0958047619047619  1.008
+42 opls_829 5 PBE    H6  5  0.11440476190476191  1.008
+43 opls_830 5 PBE    H7  5   0.1385047619047619  1.008
+44 opls_831 5 PBE    H9  5   0.1264047619047619  1.008
+45 opls_815 6 PEO    C0  6 0.009804761904761906 12.011
+46 opls_816 6 PEO    O1  6  -0.3850952380952381 15.999
+47 opls_817 6 PEO    C2  6 0.015604761904761906 12.011
+48 opls_850 6 PEO    H3  7   0.0768047619047619  1.008
+49 opls_851 6 PEO    H4  7   0.0768047619047619  1.008
+50 opls_852 6 PEO    H5  7  0.08680476190476191  1.008
+51 opls_853 6 PEO    H6  7  0.08680476190476191  1.008
+52 opls_858 7 PEOter H10 9   0.0812047619047619  1.008
+53 opls_818 7 PEOter C0  8 0.013004761904761906 12.011
+54 opls_819 7 PEOter O1  8  -0.3668952380952381 15.999
+55 opls_820 7 PEOter C2  8 0.011904761904761908 12.011
+56 opls_821 7 PEOter C7  8 0.027204761904761905 12.011
+57 opls_822 7 PEOter O8  8   -0.601295238095238 15.999
+58 opls_823 7 PEOter H9  8   0.4144047619047619  1.008
+59 opls_854 7 PEOter H3  9   0.0841047619047619  1.008
+60 opls_855 7 PEOter H4  9   0.0841047619047619  1.008
+61 opls_856 7 PEOter H5  9  0.08400476190476192  1.008
+62 opls_859 7 PEOter H11 9   0.0812047619047619  1.008
+63 opls_857 7 PEOter H6  9  0.08400476190476192  1.008
+
+[ bonds ]
+ 2  1 1 0.1090 284512.000
+ 3  1 1 0.1090 284512.000
+ 4  1 1 0.1090 284512.000
+ 7  6 1 0.1510 265265.600
+ 8  7 1 0.1340 459403.200
+ 9  6 1 0.1529 224262.400
+12  6 1 0.1090 284512.000
+13  7 1 0.1080 284512.000
+14  8 1 0.1080 284512.000
+ 5  8 1 0.1080 284512.000
+10  9 1 0.1090 284512.000
+11  9 1 0.1090 284512.000
+17 16 1 0.1510 265265.600
+18 17 1 0.1340 459403.200
+19 16 1 0.1529 224262.400
+22 16 1 0.1090 284512.000
+23 17 1 0.1080 284512.000
+24 18 1 0.1080 284512.000
+15 18 1 0.1080 284512.000
+20 19 1 0.1090 284512.000
+21 19 1 0.1090 284512.000
+27 26 1 0.1510 265265.600
+28 27 1 0.1340 459403.200
+29 26 1 0.1529 224262.400
+32 26 1 0.1090 284512.000
+33 27 1 0.1080 284512.000
+34 28 1 0.1080 284512.000
+25 28 1 0.1080 284512.000
+30 29 1 0.1090 284512.000
+31 29 1 0.1090 284512.000
+37 36 1 0.1510 265265.600
+38 37 1 0.1340 459403.200
+39 36 1 0.1529 224262.400
+42 36 1 0.1090 284512.000
+43 37 1 0.1080 284512.000
+44 38 1 0.1080 284512.000
+35 38 1 0.1080 284512.000
+40 39 1 0.1090 284512.000
+41 39 1 0.1090 284512.000
+46 45 1 0.1410 267776.000
+47 46 1 0.1410 267776.000
+48 45 1 0.1090 284512.000
+49 45 1 0.1090 284512.000
+50 47 1 0.1090 284512.000
+51 47 1 0.1090 284512.000
+54 53 1 0.1410 267776.000
+55 54 1 0.1410 267776.000
+56 55 1 0.1529 224262.400
+57 56 1 0.1410 267776.000
+58 57 1 0.0945 462750.400
+59 53 1 0.1090 284512.000
+60 53 1 0.1090 284512.000
+61 55 1 0.1090 284512.000
+63 55 1 0.1090 284512.000
+52 56 1 0.1090 284512.000
+62 56 1 0.1090 284512.000
+ 6  1 1 0.1529 224262.400 ; link
+16  9 1 0.1529 224262.400 ; link
+26 19 1 0.1529 224262.400 ; link
+36 29 1 0.1529 224262.400 ; link
+45 39 1 0.1529 224262.400 ; link
+53 47 1 0.1529 224262.400 ; link
+
+[ pairs ]
+ 8  9 1
+ 8 12 1
+ 6 14 1
+ 6  5 1
+ 9 13 1
+ 7 10 1
+ 7 11 1
+12 13 1
+13 14 1
+13  5 1
+12 10 1
+12 11 1
+18 19 1
+18 22 1
+16 24 1
+16 15 1
+19 23 1
+17 20 1
+17 21 1
+22 23 1
+23 24 1
+23 15 1
+22 20 1
+22 21 1
+28 29 1
+28 32 1
+26 34 1
+26 25 1
+29 33 1
+27 30 1
+27 31 1
+32 33 1
+33 34 1
+33 25 1
+32 30 1
+32 31 1
+38 39 1
+38 42 1
+36 44 1
+36 35 1
+39 43 1
+37 40 1
+37 41 1
+42 43 1
+43 44 1
+43 35 1
+42 40 1
+42 41 1
+47 48 1
+45 50 1
+47 49 1
+45 51 1
+53 56 1
+54 57 1
+55 58 1
+55 59 1
+53 61 1
+55 60 1
+53 63 1
+54 52 1
+57 61 1
+54 62 1
+57 63 1
+58 52 1
+58 62 1
+61 52 1
+63 52 1
+61 62 1
+63 62 1
+ 1  8 1 ; link
+ 7  2 1 ; link
+ 7  3 1 ; link
+ 9  2 1 ; link
+ 7  4 1 ; link
+ 1 13 1 ; link
+ 9  3 1 ; link
+ 9  4 1 ; link
+ 1 10 1 ; link
+ 1 11 1 ; link
+ 2 12 1 ; link
+ 3 12 1 ; link
+ 4 12 1 ; link
+ 7 16 1 ; link
+ 6 17 1 ; link
+ 6 19 1 ; link
+ 9 18 1 ; link
+16 12 1 ; link
+ 6 22 1 ; link
+17 10 1 ; link
+17 11 1 ; link
+ 9 23 1 ; link
+19 10 1 ; link
+19 11 1 ; link
+ 9 21 1 ; link
+ 9 20 1 ; link
+10 22 1 ; link
+11 22 1 ; link
+17 26 1 ; link
+16 27 1 ; link
+16 29 1 ; link
+19 28 1 ; link
+26 22 1 ; link
+16 32 1 ; link
+27 20 1 ; link
+27 21 1 ; link
+19 33 1 ; link
+29 20 1 ; link
+29 21 1 ; link
+19 31 1 ; link
+19 30 1 ; link
+20 32 1 ; link
+21 32 1 ; link
+27 36 1 ; link
+26 37 1 ; link
+26 39 1 ; link
+29 38 1 ; link
+36 32 1 ; link
+26 42 1 ; link
+37 30 1 ; link
+37 31 1 ; link
+29 43 1 ; link
+39 30 1 ; link
+39 31 1 ; link
+29 41 1 ; link
+29 40 1 ; link
+30 42 1 ; link
+31 42 1 ; link
+36 46 1 ; link
+39 47 1 ; link
+45 37 1 ; link
+45 42 1 ; link
+36 48 1 ; link
+46 40 1 ; link
+36 49 1 ; link
+46 41 1 ; link
+40 48 1 ; link
+41 48 1 ; link
+40 49 1 ; link
+41 49 1 ; link
+45 53 1 ; link
+46 54 1 ; link
+47 55 1 ; link
+46 59 1 ; link
+54 50 1 ; link
+46 60 1 ; link
+54 51 1 ; link
+50 59 1 ; link
+51 59 1 ; link
+50 60 1 ; link
+51 60 1 ; link
+ 1 16 1 ; link
+ 9 26 1 ; link
+19 36 1 ; link
+29 45 1 ; link
+
+[ angles ]
+ 2  1  4 1 107.800 276.144
+ 3  1  4 1 107.800 276.144
+ 2  1  3 1 107.800 276.144
+ 6  7  8 1 124.000 585.760
+ 6  7 13 1 117.000 292.880
+ 7  8 14 1 120.000 292.880
+ 7  8  5 1 120.000 292.880
+ 6  9 10 1 110.700 313.800
+ 6  9 11 1 110.700 313.800
+10  9 11 1 107.800 276.144
+ 7  6 12 1 109.500 292.880
+ 7  6  9 1 111.100 527.184
+ 8  7 13 1 120.000 292.880
+ 9  6 12 1 110.700 313.800
+14  8  5 1 117.000 292.880
+16 17 18 1 124.000 585.760
+16 17 23 1 117.000 292.880
+17 18 24 1 120.000 292.880
+17 18 15 1 120.000 292.880
+16 19 20 1 110.700 313.800
+16 19 21 1 110.700 313.800
+20 19 21 1 107.800 276.144
+17 16 22 1 109.500 292.880
+17 16 19 1 111.100 527.184
+18 17 23 1 120.000 292.880
+19 16 22 1 110.700 313.800
+24 18 15 1 117.000 292.880
+26 27 28 1 124.000 585.760
+26 27 33 1 117.000 292.880
+27 28 34 1 120.000 292.880
+27 28 25 1 120.000 292.880
+26 29 30 1 110.700 313.800
+26 29 31 1 110.700 313.800
+30 29 31 1 107.800 276.144
+27 26 32 1 109.500 292.880
+27 26 29 1 111.100 527.184
+28 27 33 1 120.000 292.880
+29 26 32 1 110.700 313.800
+34 28 25 1 117.000 292.880
+36 37 38 1 124.000 585.760
+36 37 43 1 117.000 292.880
+37 38 44 1 120.000 292.880
+37 38 35 1 120.000 292.880
+36 39 40 1 110.700 313.800
+36 39 41 1 110.700 313.800
+40 39 41 1 107.800 276.144
+37 36 42 1 109.500 292.880
+37 36 39 1 111.100 527.184
+38 37 43 1 120.000 292.880
+39 36 42 1 110.700 313.800
+44 38 35 1 117.000 292.880
+45 46 47 1 109.500 502.080
+46 47 50 1 109.500 292.880
+46 47 51 1 109.500 292.880
+48 45 49 1 107.800 276.144
+46 45 49 1 109.500 292.880
+50 47 51 1 107.800 276.144
+46 45 48 1 109.500 292.880
+53 54 55 1 109.500 502.080
+54 55 56 1 109.500 418.400
+55 56 57 1 109.500 418.400
+56 57 58 1 108.500 460.240
+54 55 61 1 109.500 292.880
+54 55 63 1 109.500 292.880
+55 56 52 1 110.700 313.800
+55 56 62 1 110.700 313.800
+54 53 59 1 109.500 292.880
+57 56 52 1 109.500 292.880
+56 55 61 1 110.700 313.800
+56 55 63 1 110.700 313.800
+61 55 63 1 107.800 276.144
+57 56 62 1 109.500 292.880
+52 56 62 1 107.800 276.144
+54 53 60 1 109.500 292.880
+59 53 60 1 107.800 276.144
+ 1  6  7 1 111.100 527.184 ; link
+ 1  6  9 1 112.700 488.273 ; link
+ 6  1  2 1 110.700 313.800 ; link
+ 6  1  3 1 110.700 313.800 ; link
+ 6  1  4 1 110.700 313.800 ; link
+ 1  6 12 1 110.700 313.800 ; link
+ 6  9 16 1 112.700 488.273 ; link
+ 9 16 17 1 111.100 527.184 ; link
+ 9 16 19 1 112.700 488.273 ; link
+ 9 16 22 1 110.700 313.800 ; link
+16  9 10 1 110.700 313.800 ; link
+16  9 11 1 110.700 313.800 ; link
+16 19 26 1 112.700 488.273 ; link
+19 26 27 1 111.100 527.184 ; link
+19 26 29 1 112.700 488.273 ; link
+19 26 32 1 110.700 313.800 ; link
+26 19 20 1 110.700 313.800 ; link
+26 19 21 1 110.700 313.800 ; link
+26 29 36 1 112.700 488.273 ; link
+29 36 37 1 111.100 527.184 ; link
+29 36 39 1 112.700 488.273 ; link
+29 36 42 1 110.700 313.800 ; link
+36 29 30 1 110.700 313.800 ; link
+36 29 31 1 110.700 313.800 ; link
+36 39 45 1 112.700 488.273 ; link
+39 45 46 1 109.500 418.400 ; link
+39 45 48 1 110.700 313.800 ; link
+39 45 49 1 110.700 313.800 ; link
+45 39 41 1 110.700 313.800 ; link
+45 39 40 1 110.700 313.800 ; link
+46 47 53 1 109.500 418.400 ; link
+47 53 54 1 109.500 418.400 ; link
+47 53 59 1 110.700 313.800 ; link
+47 53 60 1 110.700 313.800 ; link
+53 47 51 1 110.700 313.800 ; link
+53 47 50 1 110.700 313.800 ; link
+
+[ dihedrals ]
+ 5  8  7 14 4 180.000 10.460 2
+13  7  6  8 4 180.000 10.460 2
+ 9  6  7  8 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
+14  8  7  6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+ 5  8  7  6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+ 5  8  7 13 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+14  8  7 13 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+13  7  6  9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000
+13  7  6 12 3 0.665 1.996 0.000 -2.661 -0.000 0.000
+12  6  7  8 3 -0.778 -2.335 0.000 3.113 -0.000 0.000
+10  9  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+11  9  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+11  9  6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+10  9  6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+15 18 17 24 4 180.000 10.460 2
+23 17 16 18 4 180.000 10.460 2
+19 16 17 18 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
+24 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+15 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+15 18 17 23 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+24 18 17 23 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+23 17 16 19 3 -33.472 0.000 33.472 -0.000 -0.000 0.000
+23 17 16 22 3 0.665 1.996 0.000 -2.661 -0.000 0.000
+22 16 17 18 3 -0.778 -2.335 0.000 3.113 -0.000 0.000
+20 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+21 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+21 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+20 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+25 28 27 34 4 180.000 10.460 2
+33 27 26 28 4 180.000 10.460 2
+29 26 27 28 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
+34 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+25 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+25 28 27 33 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+34 28 27 33 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+33 27 26 29 3 -33.472 0.000 33.472 -0.000 -0.000 0.000
+33 27 26 32 3 0.665 1.996 0.000 -2.661 -0.000 0.000
+32 26 27 28 3 -0.778 -2.335 0.000 3.113 -0.000 0.000
+30 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+31 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+31 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+30 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+35 38 37 44 4 180.000 10.460 2
+43 37 36 38 4 180.000 10.460 2
+39 36 37 38 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
+44 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+35 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+35 38 37 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+44 38 37 43 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
+43 37 36 39 3 -33.472 0.000 33.472 -0.000 -0.000 0.000
+43 37 36 42 3 0.665 1.996 0.000 -2.661 -0.000 0.000
+42 36 37 38 3 -0.778 -2.335 0.000 3.113 -0.000 0.000
+40 39 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+41 39 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000
+41 39 36 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+40 39 36 42 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+48 45 46 47 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+50 47 46 45 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+51 47 46 45 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+49 45 46 47 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+56 55 54 53 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+62 56 55 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+62 56 55 63 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+52 56 55 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+52 56 55 63 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+63 55 56 57 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+61 55 56 57 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+52 56 55 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+62 56 55 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+62 56 57 58 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+52 56 57 58 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+60 53 54 55 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+61 55 54 53 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+59 53 54 55 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+63 55 54 53 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+58 57 56 55 3 -0.444 3.833 0.728 -4.117 -0.000 0.000
+57 56 55 54 3 9.035 -9.035 0.000 -0.000 -0.000 0.000
+ 8  7  6  1 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link
+13  7  6  1 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link
+ 4  1  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+ 3  1  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+ 2  1  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+ 2  1  6  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+ 4  1  6  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+11  9  6  1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+ 3  1  6  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+10  9  6  1 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+12  6  1  3 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+12  6  1  4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+12  6  1  2 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+18 17 16  9 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link
+17 16  9  6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+16  9  6  7 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+19 16  9  6 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+23 17 16  9 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link
+11  9 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+10  9 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+20 19 16  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+12  6  9 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+22 16  9  6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+21 19 16  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+11  9 16 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+10  9 16 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+22 16  9 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+22 16  9 10 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+28 27 26 19 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link
+27 26 19 16 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+26 19 16 17 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+29 26 19 16 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+33 27 26 19 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link
+21 19 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+20 19 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+30 29 26 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+22 16 19 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+32 26 19 16 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+31 29 26 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+21 19 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+20 19 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+32 26 19 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+32 26 19 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+38 37 36 29 3 0.527 -6.397 -1.695 7.565 -0.000 0.000 ; link
+37 36 29 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+36 29 26 27 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+39 36 29 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+43 37 36 29 3 -33.472 0.000 33.472 -0.000 -0.000 0.000 ; link
+31 29 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+30 29 36 37 3 0.766 2.297 0.000 -3.063 -0.000 0.000 ; link
+40 39 36 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+32 26 29 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+42 36 29 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+41 39 36 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+31 29 36 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+30 29 36 39 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+42 36 29 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+42 36 29 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+37 36 39 45 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+47 46 45 39 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+49 45 39 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+48 45 39 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+42 36 39 45 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+48 45 39 40 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+49 45 39 41 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+49 45 39 40 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+48 45 39 41 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+40 39 45 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+41 39 45 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+46 45 39 36 3 2.874 0.582 2.092 -5.548 -0.000 0.000 ; link
+53 47 46 45 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+55 54 53 47 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+60 53 47 51 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+60 53 47 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+59 53 47 50 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+59 53 47 51 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+50 47 53 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+60 53 47 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+51 47 53 54 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+59 53 47 46 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+54 53 47 46 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link
+16  9  6  1 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+26 19 16  9 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+36 29 26 19 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+45 39 36 29 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+
diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
new file mode 100644
index 00000000..408d9986
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
@@ -0,0 +1 @@
+CH3ter PBE PBE PBE PBE PEO PEOter
diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp b/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp
new file mode 100644
index 00000000..b8659bb2
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/in_itp.itp
@@ -0,0 +1,327 @@
+; /coarse/fabian/current-projects/polymer_itp_builder/vermouth_dev/venv_py38/bin/polyply gen_params -lib oplsaaLigParGen -seq OHter:1 PEO:4 OHter:1 -o test.itp
+
+; Please cite the following papers:
+; Jorgensen, W L; Tirado-Rives, J;  Proceedings of the National Academy of Sciences 2005; 10.1073/pnas.0408037102
+; Dodda, L S; Vilseck, J Z; Tirado-Rives, J; Jorgensen, W L;  The Journal of Physical Chemistry B 2017; 10.1021/acs.jpcb.7b00272
+; Grunewald, F; Alessandri, R; Kroon, P C; Monticelli, L; Souza, P C; Marrink, S J;  Nature Communications 2022; 10.1038/s41467-021-27627-4
+; Dodda, L S; Cabeza de Vaca, I; Tirado-Rives, J; Jorgensen, W L;  Nucleic Acids Research 2017; 10.1093/nar/gkx312
+
+[ moleculetype ]
+polymer 3
+
+[ atoms ]
+ 1 opls_154 1 OHter OA1  1 -0.6887 15.999
+ 2 opls_135 1 OHter C2   2   0.107 12.011
+ 3 opls_004 1 OHter HA3  3  0.4173  1.008
+ 4 opls_140 1 OHter H4   4  0.0822  1.008
+ 5 opls_140 1 OHter H5   5  0.0822  1.008
+ 6 opls_135 2 PEO   C01  6  0.0089 12.011
+ 7 opls_179 2 PEO   O02  7 -0.3846 15.999
+ 8 opls_135 2 PEO   C03  8  0.0089 12.011
+ 9 opls_140 2 PEO   H04  9  0.0917  1.008
+10 opls_140 2 PEO   H05 10  0.0917  1.008
+11 opls_140 2 PEO   H06 11  0.0917  1.008
+12 opls_140 2 PEO   H07 12  0.0917  1.008
+13 opls_135 3 PEO   C01 13  0.0089 12.011
+14 opls_179 3 PEO   O02 14 -0.3846 15.999
+15 opls_135 3 PEO   C03 15  0.0089 12.011
+16 opls_140 3 PEO   H04 16  0.0917  1.008
+17 opls_140 3 PEO   H05 17  0.0917  1.008
+18 opls_140 3 PEO   H06 18  0.0917  1.008
+19 opls_140 3 PEO   H07 19  0.0917  1.008
+20 opls_135 4 PEO   C01 20  0.0089 12.011
+21 opls_179 4 PEO   O02 21 -0.3846 15.999
+22 opls_135 4 PEO   C03 22  0.0089 12.011
+23 opls_140 4 PEO   H04 23  0.0917  1.008
+24 opls_140 4 PEO   H05 24  0.0917  1.008
+25 opls_140 4 PEO   H06 25  0.0917  1.008
+26 opls_140 4 PEO   H07 26  0.0917  1.008
+27 opls_135 5 PEO   C01 27  0.0089 12.011
+28 opls_179 5 PEO   O02 28 -0.3846 15.999
+29 opls_135 5 PEO   C03 29  0.0089 12.011
+30 opls_140 5 PEO   H04 30  0.0917  1.008
+31 opls_140 5 PEO   H05 31  0.0917  1.008
+32 opls_140 5 PEO   H06 32  0.0917  1.008
+33 opls_140 5 PEO   H07 33  0.0917  1.008
+34 opls_154 6 OHter OA1 34 -0.6887 15.999
+35 opls_135 6 OHter C2  35   0.107 12.011
+36 opls_004 6 OHter HA3 36  0.4173  1.008
+37 opls_140 6 OHter H4  37  0.0822  1.008
+38 opls_140 6 OHter H5  38  0.0822  1.008
+
+[ bonds ]
+ 2  1 1 0.1410 267776.000
+ 3  1 1 0.0945 462750.400
+ 4  2 1 0.1090 284512.000
+ 5  2 1 0.1090 284512.000
+ 7  6 1 0.1410 267776.000
+ 8  7 1 0.1410 267776.000
+ 9  6 1 0.1090 284512.000
+10  6 1 0.1090 284512.000
+11  8 1 0.1090 284512.000
+12  8 1 0.1090 284512.000
+14 13 1 0.1410 267776.000
+15 14 1 0.1410 267776.000
+16 13 1 0.1090 284512.000
+17 13 1 0.1090 284512.000
+18 15 1 0.1090 284512.000
+19 15 1 0.1090 284512.000
+21 20 1 0.1410 267776.000
+22 21 1 0.1410 267776.000
+23 20 1 0.1090 284512.000
+24 20 1 0.1090 284512.000
+25 22 1 0.1090 284512.000
+26 22 1 0.1090 284512.000
+28 27 1 0.1410 267776.000
+29 28 1 0.1410 267776.000
+30 27 1 0.1090 284512.000
+31 27 1 0.1090 284512.000
+32 29 1 0.1090 284512.000
+33 29 1 0.1090 284512.000
+35 34 1 0.1410 267776.000
+36 34 1 0.0945 462750.400
+37 35 1 0.1090 284512.000
+38 35 1 0.1090 284512.000
+
+; connection
+13  8 1 0.1529 224262.400
+20 15 1 0.1529 224262.400
+27 22 1 0.1529 224262.400
+
+; termini
+ 6  2 1 0.1529 224262.400 ; OH-l-link
+35 29 1 0.1529 224262.400 ; OH-r-link
+
+[ pairs ]
+ 3  4 1
+ 3  5 1
+ 8  9 1
+ 6 11 1
+ 8 10 1
+ 6 12 1
+15 16 1
+13 18 1
+15 17 1
+13 19 1
+22 23 1
+20 25 1
+22 24 1
+20 26 1
+29 30 1
+27 32 1
+29 31 1
+27 33 1
+36 37 1
+36 38 1
+
+; connection
+ 6 13 1
+ 7 14 1
+ 8 15 1
+ 7 16 1
+14 11 1
+ 7 17 1
+14 12 1
+11 16 1
+12 16 1
+11 17 1
+12 17 1
+13 20 1
+14 21 1
+15 22 1
+14 23 1
+21 18 1
+14 24 1
+21 19 1
+18 23 1
+19 23 1
+18 24 1
+19 24 1
+20 27 1
+21 28 1
+22 29 1
+21 30 1
+28 25 1
+21 31 1
+28 26 1
+25 30 1
+26 30 1
+25 31 1
+26 31 1
+
+; termini
+ 1  7 1 ; OH-l-link
+ 2  8 1 ; OH-l-link
+ 6  3 1 ; OH-l-link
+ 1  9 1 ; OH-l-link
+ 7  4 1 ; OH-l-link
+ 1 10 1 ; OH-l-link
+ 7  5 1 ; OH-l-link
+ 4  9 1 ; OH-l-link
+ 5  9 1 ; OH-l-link
+ 4 10 1 ; OH-l-link
+ 5 10 1 ; OH-l-link
+27 35 1 ; OH-r-link
+28 34 1 ; OH-r-link
+28 37 1 ; OH-r-link
+34 32 1 ; OH-r-link
+28 38 1 ; OH-r-link
+34 33 1 ; OH-r-link
+29 36 1 ; OH-r-link
+32 37 1 ; OH-r-link
+33 37 1 ; OH-r-link
+32 38 1 ; OH-r-link
+33 38 1 ; OH-r-link
+
+[ angles ]
+ 2  1  3 1 108.500 460.240
+ 1  2  4 1 109.500 292.880
+ 1  2  5 1 109.500 292.880
+ 4  2  5 1 107.800 276.144
+ 6  7  8 1 109.500 502.080
+ 7  8 11 1 109.500 292.880
+ 7  8 12 1 109.500 292.880
+11  8 12 1 107.800 276.144
+ 7  6 10 1 109.500 292.880
+ 9  6 10 1 107.800 276.144
+ 7  6  9 1 109.500 292.880
+13 14 15 1 109.500 502.080
+14 15 18 1 109.500 292.880
+14 15 19 1 109.500 292.880
+18 15 19 1 107.800 276.144
+14 13 17 1 109.500 292.880
+16 13 17 1 107.800 276.144
+14 13 16 1 109.500 292.880
+20 21 22 1 109.500 502.080
+21 22 25 1 109.500 292.880
+21 22 26 1 109.500 292.880
+25 22 26 1 107.800 276.144
+21 20 24 1 109.500 292.880
+23 20 24 1 107.800 276.144
+21 20 23 1 109.500 292.880
+27 28 29 1 109.500 502.080
+28 29 32 1 109.500 292.880
+28 29 33 1 109.500 292.880
+32 29 33 1 107.800 276.144
+28 27 31 1 109.500 292.880
+30 27 31 1 107.800 276.144
+28 27 30 1 109.500 292.880
+35 34 36 1 108.500 460.240
+34 35 37 1 109.500 292.880
+34 35 38 1 109.500 292.880
+37 35 38 1 107.800 276.144
+
+; connection
+ 7  8 13 1 109.500 418.400
+ 8 13 14 1 109.500 418.400
+ 8 13 16 1 110.700 313.800
+ 8 13 17 1 110.700 313.800
+13  8 11 1 110.700 313.800
+13  8 12 1 110.700 313.800
+14 15 20 1 109.500 418.400
+15 20 21 1 109.500 418.400
+15 20 23 1 110.700 313.800
+15 20 24 1 110.700 313.800
+20 15 18 1 110.700 313.800
+20 15 19 1 110.700 313.800
+21 22 27 1 109.500 418.400
+22 27 28 1 109.500 418.400
+22 27 30 1 110.700 313.800
+22 27 31 1 110.700 313.800
+27 22 25 1 110.700 313.800
+27 22 26 1 110.700 313.800
+
+; termini
+ 1  2  6 1 109.500 418.400 ; OH-l-link
+ 2  6  7 1 109.500 418.400 ; OH-l-link
+ 2  6  9 1 110.700 313.800 ; OH-l-link
+ 2  6 10 1 110.700 313.800 ; OH-l-link
+ 6  2  4 1 110.700 313.800 ; OH-l-link
+ 6  2  5 1 110.700 313.800 ; OH-l-link
+28 29 35 1 109.500 418.400 ; OH-r-link
+29 35 34 1 109.500 418.400 ; OH-r-link
+29 35 37 1 110.700 313.800 ; OH-r-link
+29 35 38 1 110.700 313.800 ; OH-r-link
+35 29 32 1 110.700 313.800 ; OH-r-link
+35 29 33 1 110.700 313.800 ; OH-r-link
+
+[ dihedrals ]
+ 5  2  1  3 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+ 4  2  1  3 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+ 9  6  7  8 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+12  8  7  6 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+10  6  7  8 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+11  8  7  6 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+16 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+19 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+17 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+18 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+23 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+26 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+24 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+25 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+30 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+33 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+31 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+32 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+38 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+37 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+
+; connection
+13  8  7  6 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+15 14 13  8 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+17 13  8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+17 13  8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+16 13  8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+16 13  8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+17 13  8  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+12  8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+11  8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+16 13  8  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+14 13  8  7 3 -1.151 1.151 0.000 -0.000 -0.000 0.000
+20 15 14 13 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+22 21 20 15 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+24 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+24 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+23 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+23 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+24 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+19 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+18 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+23 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+21 20 15 14 3 -1.151 1.151 0.000 -0.000 -0.000 0.000
+27 22 21 20 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+29 28 27 22 3 1.715 2.845 1.046 -5.607 -0.000 0.000
+31 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+31 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+30 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+30 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+31 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+26 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+25 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+30 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+28 27 22 21 3 -1.151 1.151 0.000 -0.000 -0.000 0.000
+
+; termini
+ 8  7  6  2 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; OH-l-link
+10  6  2  4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link
+10  6  2  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link
+ 9  6  2  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link
+ 9  6  2  4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-l-link
+10  6  2  1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link
+ 9  6  2  1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link
+ 4  2  6  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link
+ 5  2  6  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-l-link
+ 3  1  2  6 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; OH-l-link
+ 7  6  2  1 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; OH-l-link
+35 29 28 27 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; OH-r-link
+37 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link
+38 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link
+38 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link
+37 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; OH-r-link
+33 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link
+32 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link
+38 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link
+37 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; OH-r-link
+36 34 35 29 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; OH-r-link
+34 35 29 28 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; OH-r-link
diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp b/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp
new file mode 100644
index 00000000..a1962688
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/ref.itp
@@ -0,0 +1,308 @@
+; ../../bench.py
+
+; Please cite the following papers:
+
+[ moleculetype ]
+new 3
+
+[ atoms ]
+ 1 opls_154 1 OHter O1  1              -0.6887 15.999
+ 2 opls_135 1 OHter C0  2                0.107 12.011
+ 3 opls_004 1 OHter H4  3               0.4173  1.008
+ 4 opls_140 1 OHter H3  4               0.0822  1.008
+ 5 opls_140 1 OHter H2  5               0.0822  1.008
+ 6 opls_135 2 PEO   C0 11 0.008899999999999995 12.011
+ 7 opls_179 2 PEO   O1 12              -0.3846 15.999
+ 8 opls_135 2 PEO   C2 13 0.008899999999999995 12.011
+ 9 opls_140 2 PEO   H3 14               0.0917  1.008
+10 opls_140 2 PEO   H4 15               0.0917  1.008
+11 opls_140 2 PEO   H5 16               0.0917  1.008
+12 opls_140 2 PEO   H6 17               0.0917  1.008
+13 opls_135 3 PEO   C0 23 0.008899999999999995 12.011
+14 opls_179 3 PEO   O1 24              -0.3846 15.999
+15 opls_135 3 PEO   C2 25 0.008899999999999995 12.011
+16 opls_140 3 PEO   H3 26               0.0917  1.008
+17 opls_140 3 PEO   H4 27               0.0917  1.008
+18 opls_140 3 PEO   H5 28               0.0917  1.008
+19 opls_140 3 PEO   H6 29               0.0917  1.008
+20 opls_135 4 PEO   C0 35 0.008899999999999995 12.011
+21 opls_179 4 PEO   O1 36              -0.3846 15.999
+22 opls_135 4 PEO   C2 37 0.008899999999999995 12.011
+23 opls_140 4 PEO   H3 38               0.0917  1.008
+24 opls_140 4 PEO   H4 39               0.0917  1.008
+25 opls_140 4 PEO   H5 40               0.0917  1.008
+26 opls_140 4 PEO   H6 41               0.0917  1.008
+27 opls_135 5 PEO   C0 47 0.008899999999999995 12.011
+28 opls_179 5 PEO   O1 48              -0.3846 15.999
+29 opls_135 5 PEO   C2 49 0.008899999999999995 12.011
+30 opls_140 5 PEO   H3 50               0.0917  1.008
+31 opls_140 5 PEO   H4 51               0.0917  1.008
+32 opls_140 5 PEO   H5 52               0.0917  1.008
+33 opls_140 5 PEO   H6 53               0.0917  1.008
+34 opls_154 6 OHter O1 54              -0.6887 15.999
+35 opls_135 6 OHter C0 55                0.107 12.011
+36 opls_004 6 OHter H4 56               0.4173  1.008
+37 opls_140 6 OHter H3 57               0.0822  1.008
+38 opls_140 6 OHter H2 58               0.0822  1.008
+
+[ bonds ]
+ 2  1 1 0.1410 267776.000
+ 3  1 1 0.0945 462750.400
+ 4  2 1 0.1090 284512.000
+ 5  2 1 0.1090 284512.000
+ 7  6 1 0.1410 267776.000
+ 8  7 1 0.1410 267776.000
+ 9  6 1 0.1090 284512.000
+10  6 1 0.1090 284512.000
+11  8 1 0.1090 284512.000
+12  8 1 0.1090 284512.000
+14 13 1 0.1410 267776.000
+15 14 1 0.1410 267776.000
+16 13 1 0.1090 284512.000
+17 13 1 0.1090 284512.000
+18 15 1 0.1090 284512.000
+19 15 1 0.1090 284512.000
+21 20 1 0.1410 267776.000
+22 21 1 0.1410 267776.000
+23 20 1 0.1090 284512.000
+24 20 1 0.1090 284512.000
+25 22 1 0.1090 284512.000
+26 22 1 0.1090 284512.000
+28 27 1 0.1410 267776.000
+29 28 1 0.1410 267776.000
+30 27 1 0.1090 284512.000
+31 27 1 0.1090 284512.000
+32 29 1 0.1090 284512.000
+33 29 1 0.1090 284512.000
+35 34 1 0.1410 267776.000
+36 34 1 0.0945 462750.400
+37 35 1 0.1090 284512.000
+38 35 1 0.1090 284512.000
+13  8 1 0.1529 224262.400 ; link
+20 15 1 0.1529 224262.400 ; link
+27 22 1 0.1529 224262.400 ; link
+ 6  2 1 0.1529 224262.400 ; link
+35 29 1 0.1529 224262.400 ; link
+
+[ pairs ]
+ 3  4 1
+ 3  5 1
+ 8  9 1
+ 6 11 1
+ 8 10 1
+ 6 12 1
+15 16 1
+13 18 1
+15 17 1
+13 19 1
+22 23 1
+20 25 1
+22 24 1
+20 26 1
+29 30 1
+27 32 1
+29 31 1
+27 33 1
+36 37 1
+36 38 1
+ 6 13 1 ; link
+ 7 14 1 ; link
+ 8 15 1 ; link
+ 7 17 1 ; link
+14 11 1 ; link
+ 7 16 1 ; link
+14 12 1 ; link
+11 17 1 ; link
+12 17 1 ; link
+11 16 1 ; link
+12 16 1 ; link
+13 20 1 ; link
+14 21 1 ; link
+15 22 1 ; link
+14 24 1 ; link
+21 18 1 ; link
+14 23 1 ; link
+21 19 1 ; link
+18 24 1 ; link
+19 24 1 ; link
+18 23 1 ; link
+19 23 1 ; link
+20 27 1 ; link
+21 28 1 ; link
+22 29 1 ; link
+21 31 1 ; link
+28 25 1 ; link
+21 30 1 ; link
+28 26 1 ; link
+25 31 1 ; link
+26 31 1 ; link
+25 30 1 ; link
+26 30 1 ; link
+ 1  7 1 ; link
+ 2  8 1 ; link
+ 6  3 1 ; link
+ 1  9 1 ; link
+ 7  4 1 ; link
+ 1 10 1 ; link
+ 7  5 1 ; link
+ 4  9 1 ; link
+ 5  9 1 ; link
+ 4 10 1 ; link
+ 5 10 1 ; link
+27 35 1 ; link
+28 34 1 ; link
+28 37 1 ; link
+34 33 1 ; link
+28 38 1 ; link
+34 32 1 ; link
+29 36 1 ; link
+33 37 1 ; link
+32 37 1 ; link
+33 38 1 ; link
+32 38 1 ; link
+
+[ angles ]
+ 2  1  3 1 108.500 460.240
+ 1  2  4 1 109.500 292.880
+ 1  2  5 1 109.500 292.880
+ 4  2  5 1 107.800 276.144
+ 6  7  8 1 109.500 502.080
+ 7  8 11 1 109.500 292.880
+ 7  8 12 1 109.500 292.880
+11  8 12 1 107.800 276.144
+ 7  6 10 1 109.500 292.880
+ 9  6 10 1 107.800 276.144
+ 7  6  9 1 109.500 292.880
+13 14 15 1 109.500 502.080
+14 15 18 1 109.500 292.880
+14 15 19 1 109.500 292.880
+18 15 19 1 107.800 276.144
+14 13 17 1 109.500 292.880
+16 13 17 1 107.800 276.144
+14 13 16 1 109.500 292.880
+20 21 22 1 109.500 502.080
+21 22 25 1 109.500 292.880
+21 22 26 1 109.500 292.880
+25 22 26 1 107.800 276.144
+21 20 24 1 109.500 292.880
+23 20 24 1 107.800 276.144
+21 20 23 1 109.500 292.880
+27 28 29 1 109.500 502.080
+28 29 32 1 109.500 292.880
+28 29 33 1 109.500 292.880
+32 29 33 1 107.800 276.144
+28 27 31 1 109.500 292.880
+30 27 31 1 107.800 276.144
+28 27 30 1 109.500 292.880
+35 34 36 1 108.500 460.240
+34 35 37 1 109.500 292.880
+34 35 38 1 109.500 292.880
+37 35 38 1 107.800 276.144
+ 7  8 13 1 109.500 418.400 ; link
+ 8 13 14 1 109.500 418.400 ; link
+ 8 13 17 1 110.700 313.800 ; link
+ 8 13 16 1 110.700 313.800 ; link
+13  8 11 1 110.700 313.800 ; link
+13  8 12 1 110.700 313.800 ; link
+14 15 20 1 109.500 418.400 ; link
+15 20 21 1 109.500 418.400 ; link
+15 20 24 1 110.700 313.800 ; link
+15 20 23 1 110.700 313.800 ; link
+20 15 18 1 110.700 313.800 ; link
+20 15 19 1 110.700 313.800 ; link
+21 22 27 1 109.500 418.400 ; link
+22 27 28 1 109.500 418.400 ; link
+22 27 31 1 110.700 313.800 ; link
+22 27 30 1 110.700 313.800 ; link
+27 22 25 1 110.700 313.800 ; link
+27 22 26 1 110.700 313.800 ; link
+ 1  2  6 1 109.500 418.400 ; link
+ 2  6  7 1 109.500 418.400 ; link
+ 2  6  9 1 110.700 313.800 ; link
+ 2  6 10 1 110.700 313.800 ; link
+ 6  2  4 1 110.700 313.800 ; link
+ 6  2  5 1 110.700 313.800 ; link
+28 29 35 1 109.500 418.400 ; link
+29 35 34 1 109.500 418.400 ; link
+29 35 37 1 110.700 313.800 ; link
+29 35 38 1 110.700 313.800 ; link
+35 29 33 1 110.700 313.800 ; link
+35 29 32 1 110.700 313.800 ; link
+
+[ dihedrals ]
+ 5  2  1  3 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+ 4  2  1  3 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+ 9  6  7  8 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+12  8  7  6 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+10  6  7  8 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+11  8  7  6 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+16 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+19 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+17 13 14 15 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+18 15 14 13 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+23 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+26 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+24 20 21 22 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+25 22 21 20 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+30 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+33 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+31 27 28 29 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+32 29 28 27 3 1.590 4.770 0.000 -6.360 -0.000 0.000
+38 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+37 35 34 36 3 0.736 2.209 0.000 -2.946 -0.000 0.000
+13  8  7  6 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+15 14 13  8 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+16 13  8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+16 13  8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+17 13  8 11 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+17 13  8 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+16 13  8  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+12  8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+11  8 13 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+17 13  8  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+14 13  8  7 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link
+20 15 14 13 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+22 21 20 15 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+23 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+23 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+24 20 15 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+24 20 15 19 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+23 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+19 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+18 15 20 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+24 20 15 14 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+21 20 15 14 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link
+27 22 21 20 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+29 28 27 22 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+30 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+30 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+31 27 22 25 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+31 27 22 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+30 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+26 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+25 22 27 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+31 27 22 21 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+28 27 22 21 3 -1.151 1.151 0.000 -0.000 -0.000 0.000 ; link
+ 8  7  6  2 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+10  6  2  4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+10  6  2  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+ 9  6  2  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+ 9  6  2  4 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+10  6  2  1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+ 9  6  2  1 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+ 4  2  6  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+ 5  2  6  7 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+ 3  1  2  6 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; link
+ 7  6  2  1 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; link
+35 29 28 27 3 1.715 2.845 1.046 -5.607 -0.000 0.000 ; link
+37 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+38 35 29 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+38 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+37 35 29 33 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+32 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+33 29 35 34 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+38 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+37 35 29 28 3 0.979 2.937 0.000 -3.916 -0.000 0.000 ; link
+36 34 35 29 3 -0.444 3.833 0.728 -4.117 -0.000 0.000 ; link
+34 35 29 28 3 9.035 -9.035 0.000 -0.000 -0.000 0.000 ; link
+
diff --git a/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt b/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt
new file mode 100644
index 00000000..31ad4f78
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/PEO_OHter/seq.txt
@@ -0,0 +1 @@
+OHter PEO PEO PEO PEO OHter
diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
new file mode 100644
index 00000000..588515d7
--- /dev/null
+++ b/polyply/tests/test_itp_to_ff.py
@@ -0,0 +1,97 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Integration tests for the itp_to_ff utility program.
+"""
+from pathlib import Path
+import numpy as np
+import pytest
+from vermouth.molecule import Molecule, Interaction
+from vermouth.forcefield import ForceField
+from vermouth.gmx.itp_read import read_itp
+import polyply
+from polyply import itp_to_ff, gen_params
+from polyply.src.graph_utils import find_one_ismags_match
+from .test_ffoutput import (_read_force_field, equal_ffs)
+from .test_lib_files import _interaction_equal 
+
+def _mass_match(node1, node2):
+    return node1['mass'] == node2['mass']
+
+def _read_itp(itppath):
+    with open(itppath, "r") as _file:
+        lines = _file.readlines()
+    force_field = ForceField("tmp")
+    read_itp(lines, force_field)
+    block = next(iter(force_field.blocks.values()))
+    mol = block.to_molecule()
+    mol.make_edges_from_interaction_type(type_="bonds")
+    return mol
+
+def itp_equal(ref_mol, new_mol):
+    """
+    Leightweight itp comparison.
+    """
+    # new_node: ref_node
+    match = find_one_ismags_match(new_mol, ref_mol, _mass_match)
+    for node in new_mol.nodes:
+        # check if important attributes are the same
+        #assert new_mol.nodes[node]['atype'] == ref_mol.nodes[match[node]]['atype']
+        # charge
+        assert np.isclose(new_mol.nodes[node]['charge'],
+                          ref_mol.nodes[match[node]]['charge'],
+                          atol=0.1)
+
+    for inter_type in new_mol.interactions:
+        assert len(new_mol.interactions[inter_type]) == len(ref_mol.interactions[inter_type])
+        for inter in new_mol.interactions[inter_type]:
+            new_atoms = [match[atom] for atom in inter.atoms]
+            new_inter = Interaction(atoms=new_atoms,
+                                    parameters=inter.parameters,
+                                    meta=inter.meta)
+            for other_inter in ref_mol.interactions[inter_type]:
+                if _interaction_equal(inter, other_inter, inter_type):
+                    break
+            else:
+                assert False
+    return True
+
+@pytest.mark.parametrize("case, smiles, resnames, charge", [
+    ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], 0),
+    ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], 0),
+])
+def _test_ffoutput(tmp_path, case, smiles, resnames, charge):
+    """
+    Call itp-to-ff and check if it generates the same force-field
+    as in the ref.ff file.
+    """
+    tmp_path = Path("/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp")
+    tmp_file = Path(tmp_path) / "test.ff"
+    inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case
+    itp_to_ff(itppath=inpath/"in_itp.itp",
+              fragment_smiles=smiles,
+              resnames=resnames,
+              charge=charge,
+              term_prefix='ter',
+              outpath=tmp_file,)
+    # now generate an itp file with this ff-file
+    tmp_itp = tmp_path / "new.itp"
+    gen_params(inpath=[tmp_file],
+               seq_file=inpath/"seq.txt",
+               outpath=tmp_itp, name="new")
+    # read the itp-file and return a molecule
+    new_mol = _read_itp(tmp_itp)
+    ref_mol = _read_itp(inpath/"in_itp.itp")
+    # check if itps are the same
+    assert itp_equal(ref_mol, new_mol)

From 888515bb1914a4602ba4ede9fda16b2fa0d21f86 Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Wed, 21 Jun 2023 20:14:41 +0200
Subject: [PATCH 17/82] fix input types

---
 bin/polyply | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/polyply b/bin/polyply
index 5a14457c..8ff25efa 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -234,7 +234,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
                                 help='Enable debug logging output. Can be given '
                                 'multiple times.', default=0)
 
-    parser_itp_ff.add_argument('-i', dest="itppath")
+    parser_itp_ff.add_argument('-i', dest="itppath", type=Path)
     parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*')
     parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*')
     parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter")

From 44cc8675e0a0b2c07b47a19de4b6654df70768aa Mon Sep 17 00:00:00 2001
From: "f.grunewald" <f.grunewald@rug.nl>
Date: Mon, 26 Jun 2023 11:28:49 +0200
Subject: [PATCH 18/82] add test print

---
 polyply/src/itp_to_ff.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 94214ce7..d21db023 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -200,6 +200,7 @@ def extract_links(molecule):
                 link.interactions[inter_type].append(interaction)
 
         links.append(link)
+    print("--test--")
     print(links)
     return links
 

From 32cd8f8fd0e5141dc10d4bdb1cdfd2b19af56bb6 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 15:47:11 +0100
Subject: [PATCH 19/82] clean up output

---
 polyply/src/ffoutput.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py
index a1ac7b89..0e06ea3f 100644
--- a/polyply/src/ffoutput.py
+++ b/polyply/src/ffoutput.py
@@ -57,6 +57,7 @@ def write(self):
         for name, block in self.forcefield.blocks.items():
             self.stream.write("[ moleculetype ]\n")
             excl = str(block.nrexcl)
+            self.max_idx = max(len(node) for node in block.nodes)
             self.stream.write(f"{name} {excl}\n")
             self.write_atoms_block(block.nodes(data=True))
             self.write_interaction_dict(block.interactions)
@@ -68,6 +69,7 @@ def write(self):
                 nometa = True
             else:
                 nometa = False
+            self.max_idx = max(len(node) for node in link.nodes)
             self.write_link_header()
             self.write_atoms_link(link.nodes(data=True), nometa)
             self.write_interaction_dict(link.interactions)
@@ -91,11 +93,13 @@ def write_interaction_dict(self, inter_dict):
         for inter_type in inter_dict:
             self.stream.write(f"[ {inter_type} ]\n")
             for interaction in inter_dict[inter_type]:
+                atoms = ['{atom:>{imax}}'.format(atom=atom,
+                                                 imax=self.max_idx) for atom in interaction.atoms]
                 if inter_type not in ["virtual_sitesn", "virtual_sites1", "virtual_sites2", "virtual_sites3"]:
-                    atom_string = " ".join(interaction.atoms)
+                    atom_string = " ".join(atoms)
                     param_string = " ".join(interaction.parameters)
                 else:
-                    atom_string = " ".join(interaction.atoms) + " -- "
+                    atom_string = " ".join(atoms) + " -- "
                     param_string = " ".join(interaction.parameters)
 
                 meta_string = json.dumps(interaction.meta)
@@ -113,7 +117,10 @@ def write_edges(self, edges):
         """
         self.stream.write("[ edges ]\n")
         for idx, jdx in edges:
-            self.stream.write(f"{idx} {jdx}\n")
+            line = "{idx:>{imax}} {jdx:>{imax}}\n".format(idx=idx,
+                                                          jdx=jdx,
+                                                          imax=self.max_idx)
+            self.stream.write(line)
 
     def write_nonedges(self, edges):
         """
@@ -145,12 +152,23 @@ def write_atoms_block(self, nodes):
             pair-wise iteratable edge list
         """
         self.stream.write("[ atoms ]\n")
+        max_length = {'idx': len(str(len(nodes)))}
+        for attribute in self.normal_order_block_atoms:
+            max_length[attribute] = max(len(str(atom.get(attribute, '')))
+                                        for _, atom in nodes)
+
         for idx, (node, attrs) in enumerate(nodes, start=1):
-            write_attrs = {attr: attrs[attr] for attr in self.normal_order_block_atoms if attr in attrs}
-            write_attrs = _choice_to_str(write_attrs)
-            attr_line = " ".join([str(value) for value in write_attrs.values()])
-            line = f"{idx} " + attr_line + "\n"
-            self.stream.write(line)
+            write_attrs = {attr: str(attrs[attr]) for attr in self.normal_order_block_atoms if attr in attrs}
+            self.stream.write('{idx:>{max_length[idx]}} '
+                              '{atype:<{max_length[atype]}} '
+                              '{resid:>{max_length[resid]}} '
+                              '{resname:<{max_length[resname]}} '
+                              '{atomname:<{max_length[atomname]}} '
+                              '{charge_group:>{max_length[charge_group]}} '
+                              '{charge:>{max_length[charge]}} '
+                              '{mass:>{max_length[mass]}}\n'.format(idx=idx,
+                                                                    max_length=max_length,
+                                                                    **write_attrs))
 
     def write_atoms_link(self, nodes, nometa=False):
         """

From a8d1bb9e0b602e969d5cfa85761ab74f9cdb3c4c Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 15:47:31 +0100
Subject: [PATCH 20/82] methods to deal with charges

---
 polyply/src/charges.py | 101 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 polyply/src/charges.py

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
new file mode 100644
index 00000000..ff640d4a
--- /dev/null
+++ b/polyply/src/charges.py
@@ -0,0 +1,101 @@
+import numpy as np
+import networkx as nx
+import scipy.optimize
+
+def set_charges(block, res_graph, name):
+    resnames = nx.get_node_attributes(res_graph, 'resname')
+    centrality = nx.betweenness_centrality(res_graph)
+    score = -1
+    most_central_node = None
+    for node, resname in resnames.items():
+        if resname == name and centrality[node] > score:
+            score = centrality[node]
+            most_central_node = node
+    charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge')
+    atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname')
+    charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()}
+    for node in block.nodes:
+        block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']]
+    return block
+
+def bond_dipoles(bonds, charges):
+    bond_dipo = np.zeros((len(bonds)))
+    for kdx, (idx, jdx) in enumerate(bonds.keys()):
+        lb = bonds[(idx, jdx)]
+        bond_dipo[kdx] = lb*(charges[idx] - charges[jdx])
+    return bond_dipo
+
+def _get_bonds(block, topology=None):
+    bonds = {}
+    atoms = block.nodes
+    nodes_to_count = {node: count for count, node in enumerate(block.nodes)}
+    for idx, jdx in block.edges:
+        for bond in block.interactions['bonds']:
+            if tuple(bond.atoms) in [(idx, jdx), (jdx, idx)]:
+                try:
+                    bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(bond.parameters[1])
+                except IndexError:
+                    if topology:
+                        batoms = (atoms[idx]['atype'],
+                                  atoms[jdx]['atype'])
+                        if batoms in topology.types['bonds']:
+                            params = topology.types['bonds'][batoms][0][0][1]
+                        elif batoms[::-1] in topology.types['bonds']:
+                            params = topology.types['bonds'][batoms[::-1]][0][0][1]
+                        print(params)
+                        bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params)
+    return bonds
+
+def equalize_charges(block, topology=None):
+    block.make_edges_from_interaction_type('bonds')
+    keys = nx.get_node_attributes(block, 'charge').keys()
+    charges = np.array(list(nx.get_node_attributes(block, 'charge').values()))
+    if np.isclose(charges.sum(), 0, atol=1*10**-6):
+        return block
+
+    # we need to equalize the charge
+    bonds = _get_bonds(block, topology)
+    ref_dipoles = bond_dipoles(bonds, charges)
+
+    # the loss consists of the deviation of the
+    # sum of charges from zero and the difference
+    # in the original bond dipole moments
+    def loss(arr):
+        arr.reshape(-1)
+        curr_dipoles = bond_dipoles(bonds, arr)
+        loss = np.abs(arr.sum()) + np.sum(np.square(ref_dipoles -  curr_dipoles))
+        return loss
+
+    opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B',
+                                          options={'ftol': 0.001, 'maxiter': 100})
+    balanced_charges = opt_results['x']
+    nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge')
+    return block
+
+
+#def equalize_charges(molecule, target_charge=0):
+#    """
+#    Make sure that the total charge of molecule is equal to
+#    the target charge by substracting the differences split
+#    over all atoms.
+#
+#    Parameters
+#    ----------
+#    molecule: :class:`vermouth.molecule.Molecule`
+#    target_charge: float
+#        the charge of the molecule
+#
+#    Returns
+#    -------
+#    molecule
+#        the molecule with updated charge attribute
+#    """
+#    total = nx.get_node_attributes(molecule, "charge")
+#    diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes)
+#    if np.isclose(diff, 0, atol=0.0001):
+#        return molecule
+#    for node in molecule.nodes:
+#        charge = float(molecule.nodes[node]['charge']) - diff
+#        molecule.nodes[node]['charge'] = charge
+#    total = nx.get_node_attributes(molecule, "charge")
+#    return molecule

From 37bad71940aaa6ee77b4b46635c0c71e901d4df8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 15:47:40 +0100
Subject: [PATCH 21/82] methods to deal with charges

---
 polyply/src/fragment_finder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index d806c054..bde5316b 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -342,4 +342,4 @@ def extract_unique_fragments(self, fragment_graphs):
 
         # remake the residue graph since some resnames have changed
         self.make_res_graph()
-        return unique_fragments
+        return unique_fragments, self.res_graph

From 74090983ed36d1756f1b6a29cdac51004fc47788 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 15:47:47 +0100
Subject: [PATCH 22/82] methods to deal with charges

---
 polyply/src/itp_to_ff.py | 53 ++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index d21db023..d8f6d0b0 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -25,6 +25,7 @@
 from polyply.src.generate_templates import extract_block
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
+from polyply.src.charges import equalize_charges
 from polyply.tests.test_lib_files import _interaction_equal 
 
 def diffs_to_prefix(atoms, resid_diffs):
@@ -200,35 +201,9 @@ def extract_links(molecule):
                 link.interactions[inter_type].append(interaction)
 
         links.append(link)
-    print("--test--")
-    print(links)
+    #print(links)
     return links
 
-def equalize_charges(molecule, target_charge=0):
-    """
-    Make sure that the total charge of molecule is equal to
-    the target charge by substracting the differences split
-    over all atoms.
-
-    Parameters
-    ----------
-    molecule: :class:`vermouth.molecule.Molecule`
-    target_charge: float
-        the charge of the molecule
-
-    Returns
-    -------
-    molecule
-        the molecule with updated charge attribute
-    """
-    total = nx.get_node_attributes(molecule, "charge")
-    diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes)
-    for node in molecule.nodes:
-        charge = float(molecule.nodes[node]['charge']) - diff
-        molecule.nodes[node]['charge'] = charge
-    total = nx.get_node_attributes(molecule, "charge")
-    return molecule
-
 def handle_chirality(molecule, chiral_centers):
     pass
 
@@ -239,6 +214,22 @@ def hcount(molecule, node):
             hcounter+= 1
     return hcounter
 
+def set_charges(block, res_graph, name):
+    resnames = nx.get_node_attributes(res_graph, 'resname')
+    centrality = nx.betweenness_centrality(res_graph)
+    score = -1
+    most_central_node = None
+    for node, resname in resnames.items():
+        if resname == name and centrality[node] > score:
+            score = centrality[node]
+            most_central_node = node
+    charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge')
+    atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname')
+    charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()}
+    for node in block.nodes:
+        block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']]
+    return block
+
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """
     Main executable for itp to ff tool.
@@ -247,7 +238,6 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         # read the topology file
         top = Topology.from_gmx_topfile(itppath, name="test")
         mol = top.molecules[0].molecule
-        mol = equalize_charges(mol, target_charge=charge)
 
     if itppath.suffix == ".itp":
         with open(itppath, "r") as _file:
@@ -266,18 +256,23 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         fragment_graphs.append(fragment_graph)
 
     # identify and extract all unique fragments
-    unique_fragments = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs)
+    unique_fragments, res_graph = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs)
     force_field = ForceField("new")
     for name, fragment in unique_fragments.items():
         new_block = extract_block(mol, list(fragment.nodes), defines={})
         nx.set_node_attributes(new_block, 1, "resid")
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
+        set_charges(new_block, res_graph, name)
+        #print("here")
+        if itppath.suffix == ".top":
+            equalize_charges(new_block, top)
 
 #    for node in mol.nodes:
 #        print(mol.nodes[node])
 
     force_field.links = extract_links(mol)
 
+    print("-----")
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()

From 362372ebe5ac391f9ea811565445db6db8010502 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 15:50:40 +0100
Subject: [PATCH 23/82] adjust test

---
 polyply/tests/test_fragment_finder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py
index 59155e77..7fb1478c 100644
--- a/polyply/tests/test_fragment_finder.py
+++ b/polyply/tests/test_fragment_finder.py
@@ -252,7 +252,7 @@ def test_extract_fragments(smiles, resnames, remove, uni_frags):
             match_mols.append(frag)
 
     frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
-    fragments = frag_finder.extract_unique_fragments(match_mols)
+    fragments, _ = frag_finder.extract_unique_fragments(match_mols)
     assert len(fragments) == len(uni_frags)
     for resname, graph in fragments.items():
         frag_finder.match_keys = ['element', 'mass', 'resname']

From 4ed29798a32dd85f8cb9bd3ae4e66a2a1c46e2b8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 16:09:33 +0100
Subject: [PATCH 24/82] move extract block to molecule utils

---
 polyply/src/generate_templates.py        |  76 +------
 polyply/src/itp_to_ff.py                 | 218 +------------------
 polyply/src/molecule_utils.py            | 260 +++++++++++++++++++++++
 polyply/tests/test_generate_templates.py |   6 +-
 4 files changed, 267 insertions(+), 293 deletions(-)
 create mode 100644 polyply/src/molecule_utils.py

diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index 509663d7..d5ccbe23 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -19,8 +19,8 @@
 from .processor import Processor
 from .linalg_functions import (u_vect, center_of_geometry,
                                radius_of_gyration)
-from .topology import replace_defined_interaction
 from .linalg_functions import dih
+from .molecule_utils import extract_block
 """
 Processor generating coordinates for all residues of a meta_molecule
 matching those in the meta_molecule.molecule attribute.
@@ -216,80 +216,6 @@ def map_from_CoG(coords):
 
     return out_vectors
 
-def _relabel_interaction_atoms(interaction, mapping):
-    """
-    Relables the atoms in interaction according to the
-    rules defined in mapping.
-
-    Parameters
-    ----------
-    interaction: `vermouth.molecule.Interaction`
-    mapping: `:class:dict`
-
-    Returns
-    -------
-    interaction: `vermouth.molecule.Interaction`
-        the new interaction with updated atoms
-    """
-    new_atoms = [mapping[atom] for atom in interaction.atoms]
-    new_interaction = interaction._replace(atoms=new_atoms)
-    return new_interaction
-
-def extract_block(molecule, nodes, defines={}):
-    """
-    Given a `vermouth.molecule` and a `resname`
-    extract the information of a block from the
-    molecule definition and replace all defines
-    if any are found.
-
-    Parameters
-    ----------
-    molecule:  :class:vermouth.molecule.Molecule
-    nodes: abc.hashable
-        the nodes corresponding to the block to
-        extract
-    defines:   dict
-      dict of type define: value
-
-    Returns
-    -------
-    :class:vermouth.molecule.Block
-    """
-    resid = molecule.nodes[nodes[0]]["resid"]
-    resname = molecule.nodes[nodes[0]]["resname"]
-    block = vermouth.molecule.Block()
-
-    # select all nodes with the same first resid and
-    # make sure the block node labels are atomnames
-    # also build a correspondance dict between node
-    # label in the molecule and in the block for
-    # relabeling the interactions
-    mapping = {}
-    for node in nodes:
-        attr_dict = molecule.nodes[node]
-        if attr_dict["resid"] == resid:
-            block.add_node(attr_dict["atomname"], **attr_dict)
-            mapping[node] = attr_dict["atomname"]
-
-    for inter_type in molecule.interactions:
-        for interaction in molecule.interactions[inter_type]:
-            if all(atom in mapping for atom in interaction.atoms):
-                interaction = replace_defined_interaction(interaction, defines)
-                interaction = _relabel_interaction_atoms(interaction, mapping)
-                block.interactions[inter_type].append(interaction)
-
-    for inter_type in ["bonds", "constraints", "virtual_sitesn",
-                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
-        block.make_edges_from_interaction_type(inter_type)
-
-    if not nx.is_connected(block):
-        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
-               'Make sure all atoms/particles in a residue are connected by bonds,'
-               ' constraints or virual-sites.')
-        raise IOError(msg.format(resname, resid))
-
-    return block
-
 class GenerateTemplates(Processor):
     """
     This processor takes a a class:`polyply.src.MetaMolecule` and
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index d8f6d0b0..dc03725c 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -11,234 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import itertools
-from collections import defaultdict
 import numpy as np
 import networkx as nx
 import pysmiles
 import vermouth
 from vermouth.forcefield import ForceField
-from vermouth.molecule import Interaction
 from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
-from polyply.src.generate_templates import extract_block
+from polyply.src.molecule_utils import extract_block, extract_links
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import equalize_charges
-from polyply.tests.test_lib_files import _interaction_equal 
-
-def diffs_to_prefix(atoms, resid_diffs):
-    """
-    Given a list of atoms and corresponding differences
-    between their resids, generate the offset prefix for
-    the atomnames according to the vermouth sepcific offset
-    language.
-
-    The reference atom must have resid_diff value of 0.
-    Other atoms either get - or + signs
-    depending on their resid offset.
-
-    Parameters
-    ----------
-    atoms: abc.itertable[str]
-    resid_diff: abc.itertable[int]
-        the differences in resid with respeect to
-        the smallest/largest resid which is 0
-
-    Returns
-    -------
-    abc.itertable
-        list with prefixed atom names
-    """
-    prefixed_atoms = []
-    for atom, diff in zip(atoms, resid_diffs):
-        if diff > 0:
-            prefix = "".join(["+" for i in range(0, diff)])
-        else:
-            prefix = "".join(["-" for i in range(diff, 0)])
-        prefixed_atoms.append(prefix + atom)
-    return prefixed_atoms
-
-def _extract_edges_from_shortest_path(atoms, block, min_resid):
-    """
-    Given a list atoms generate a list of edges correspoding to
-    all edges required to connect all atoms by at least one
-    shortest path. Edges are retunred on atomname basis with
-    prefix relative to the `min_resid`. See diffs_to_prefix.
-
-    Paramters:
-    ----------
-    atoms: abc.itertable
-        the atoms to collect edges for
-    block: :class:`vermouth.molecule.Block`
-        the molecule which to servey for edges
-    min_resid: int
-        the resid to which the prefix indicate relative resid
-        distance
-
-    Returns
-    -------
-    list[tuple]
-        the edge list by atomname with prefix indicating relative
-        residue distance to min_resid
-    """
-    edges = []
-    had_edges = []
-    final_atoms = {}
-    resnames = {}
-    for origin, target in itertools.combinations(atoms, r=2):
-        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
-        for edge in zip(path[:-1], path[1:]):
-            if edge not in had_edges:
-                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
-                atom_names = [block.nodes[node]["atomname"] for node in edge]
-                link_names = diffs_to_prefix(atom_names, resid_diffs)
-                final_atoms.update(dict(zip(edge, link_names)))
-                edges.append(link_names)
-                had_edges.append(edge)
-                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
-    return final_atoms, edges, resnames
-
-def extract_links(molecule):
-    """
-    Given a molecule that has the resid and resname attributes
-    correctly set, extract the interactions which span more than
-    a single residue and generate a link.
-
-    Parameters
-    ----------
-    molecule: :class:`vermouth.molecule.Molecule`
-        the molecule from which to extract interactions
-
-    Returns
-    -------
-    list[:class:`vermouth.molecule.Links`]
-        a list with a links found
-    """
-    links = []
-    # patterns are a sqeuence of atoms that define an interaction
-    # sometimes multiple interactions are defined for one pattern
-    # in that case they are all collected in this dictionary
-    patterns = defaultdict(dict)
-    # for each found pattern the resnames are collected; this is important
-    # because the same pattern may apply to residues with different name
-    resnames_for_patterns = defaultdict(dict)
-    link_atoms_for_patterns = defaultdict(list)
-    # as additional safe-gaurd against false links we also collect the edges
-    # that span the interaction by finding the shortest simple path between
-    # all atoms in patterns. Note that the atoms in patterns not always have
-    # to be directly bonded. For example, pairs are not directly bonded and
-    # can span multiple residues
-    #edges_for_patterns = defaultdict(list)
-    for inter_type in molecule.interactions:
-        #print("TYPE", inter_type)
-        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
-            # extract resids and resname corresponding to interaction atoms
-            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
-            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
-            # compute the resid offset to be used for the atom prefixes
-            min_resid = min(resids)
-            diff = resids - min_resid
-            pattern = tuple(set(list(zip(diff, resnames))))
-
-            # in this case all interactions are in a block and we skip
-            if np.sum(diff) == 0:
-                continue
-
-            # we collect the edges corresponding to the simple paths between pairs of atoms
-            # in the interaction
-            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
-            #print(kdx, resnames)
-            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
-            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
-            link_inter = Interaction(atoms=link_atoms,
-                                     parameters=interaction.parameters,
-                                     meta={})
-            #print("inter number", kdx)
-            # here we deal with filtering redundancy
-            if pattern in patterns and inter_type in patterns[pattern]:
-                #print(pattern)
-           #     if pattern == ((0, 'PEO'), (1, 'PEO')):
-           #         print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n")
-
-                for other_inter in patterns[pattern].get(inter_type, []):
-                    if _interaction_equal(other_inter, link_inter, inter_type):
-                        break
-                else:
-                    patterns[pattern][inter_type].append(link_inter)
-                    resnames_for_patterns[pattern].update(resnames)
-                    link_atoms_for_patterns[pattern] += link_atoms
-            else:
-                patterns[pattern][inter_type] = [link_inter]
-                resnames_for_patterns[pattern].update(resnames)
-                #edges_for_patterns[pattern] += edges
-                link_atoms_for_patterns[pattern] += link_atoms
-            #print('resnames', resnames_for_patterns[pattern], '\n')
-#    for inter in patterns[list(patterns.keys())[0]]['angles']:
-#        print(inter)
-    # we make new links for each unique interaction per type
-    for pattern in patterns:
-        link = vermouth.molecule.Link()
-        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
-        #link.add_edges_from(edges_for_patterns[pattern])
-        resnames = resnames_for_patterns[pattern]
-     #   print(resnames)
-        nx.set_node_attributes(link, resnames, "resname")
-
-        had_parameters = []
-        for inter_type, inters in patterns[pattern].items():
-            for idx, interaction in enumerate(inters):
-                #new_parameters = interaction.parameters
-                new_meta = interaction.meta
-                #new_atoms = interaction.atoms
-                # to account for the fact when multiple interactions with the same
-                # atom patterns need to be written to ff
-                new_meta.update({"version": idx})
-                new_meta.update({"comment": "link"})
-                had_parameters.append(interaction.parameters)
-                # map atoms to proper atomnames ..
-                link.interactions[inter_type].append(interaction)
-
-        links.append(link)
-    #print(links)
-    return links
-
-def handle_chirality(molecule, chiral_centers):
-    pass
-
-def hcount(molecule, node):
-    hcounter = 0
-    for node in molecule.neighbors(node):
-        if molecule.nodes[node]["element"] == "H":
-            hcounter+= 1
-    return hcounter
-
-def set_charges(block, res_graph, name):
-    resnames = nx.get_node_attributes(res_graph, 'resname')
-    centrality = nx.betweenness_centrality(res_graph)
-    score = -1
-    most_central_node = None
-    for node, resname in resnames.items():
-        if resname == name and centrality[node] > score:
-            score = centrality[node]
-            most_central_node = node
-    charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge')
-    atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname')
-    charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()}
-    for node in block.nodes:
-        block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']]
-    return block
 
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """
     Main executable for itp to ff tool.
     """
+    # read the topology file
     if itppath.suffix == ".top":
-        # read the topology file
         top = Topology.from_gmx_topfile(itppath, name="test")
         mol = top.molecules[0].molecule
-
+    # read itp file
     if itppath.suffix == ".itp":
         with open(itppath, "r") as _file:
             lines = _file.readlines()
@@ -264,15 +57,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
-        #print("here")
         if itppath.suffix == ".top":
             equalize_charges(new_block, top)
 
-#    for node in mol.nodes:
-#        print(mol.nodes[node])
-
     force_field.links = extract_links(mol)
 
-    print("-----")
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()
diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py
new file mode 100644
index 00000000..7da9ce43
--- /dev/null
+++ b/polyply/src/molecule_utils.py
@@ -0,0 +1,260 @@
+# Copyright 2022 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+from collections import defaultdict
+import numpy as np
+import networkx as nx
+import vermouth
+from vermouth.molecule import Interaction
+from polyply.tests.test_lib_files import _interaction_equal
+from .topology import replace_defined_interaction
+
+def diffs_to_prefix(atoms, resid_diffs):
+    """
+    Given a list of atoms and corresponding differences
+    between their resids, generate the offset prefix for
+    the atomnames according to the vermouth sepcific offset
+    language.
+
+    The reference atom must have resid_diff value of 0.
+    Other atoms either get - or + signs
+    depending on their resid offset.
+
+    Parameters
+    ----------
+    atoms: abc.itertable[str]
+    resid_diff: abc.itertable[int]
+        the differences in resid with respeect to
+        the smallest/largest resid which is 0
+
+    Returns
+    -------
+    abc.itertable
+        list with prefixed atom names
+    """
+    prefixed_atoms = []
+    for atom, diff in zip(atoms, resid_diffs):
+        if diff > 0:
+            prefix = "".join(["+" for i in range(0, diff)])
+        else:
+            prefix = "".join(["-" for i in range(diff, 0)])
+        prefixed_atoms.append(prefix + atom)
+    return prefixed_atoms
+
+def _extract_edges_from_shortest_path(atoms, block, min_resid):
+    """
+    Given a list atoms generate a list of edges correspoding to
+    all edges required to connect all atoms by at least one
+    shortest path. Edges are retunred on atomname basis with
+    prefix relative to the `min_resid`. See diffs_to_prefix.
+
+    Paramters:
+    ----------
+    atoms: abc.itertable
+        the atoms to collect edges for
+    block: :class:`vermouth.molecule.Block`
+        the molecule which to servey for edges
+    min_resid: int
+        the resid to which the prefix indicate relative resid
+        distance
+
+    Returns
+    -------
+    list[tuple]
+        the edge list by atomname with prefix indicating relative
+        residue distance to min_resid
+    """
+    edges = []
+    had_edges = []
+    final_atoms = {}
+    resnames = {}
+    for origin, target in itertools.combinations(atoms, r=2):
+        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
+        for edge in zip(path[:-1], path[1:]):
+            if edge not in had_edges:
+                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
+                atom_names = [block.nodes[node]["atomname"] for node in edge]
+                link_names = diffs_to_prefix(atom_names, resid_diffs)
+                final_atoms.update(dict(zip(edge, link_names)))
+                edges.append(link_names)
+                had_edges.append(edge)
+                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
+    return final_atoms, edges, resnames
+
+
+def extract_links(molecule):
+    """
+    Given a molecule that has the resid and resname attributes
+    correctly set, extract the interactions which span more than
+    a single residue and generate a link.
+
+    Parameters
+    ----------
+    molecule: :class:`vermouth.molecule.Molecule`
+        the molecule from which to extract interactions
+
+    Returns
+    -------
+    list[:class:`vermouth.molecule.Links`]
+        a list with a links found
+    """
+    links = []
+    # patterns are a sqeuence of atoms that define an interaction
+    # sometimes multiple interactions are defined for one pattern
+    # in that case they are all collected in this dictionary
+    patterns = defaultdict(dict)
+    # for each found pattern the resnames are collected; this is important
+    # because the same pattern may apply to residues with different name
+    resnames_for_patterns = defaultdict(dict)
+    link_atoms_for_patterns = defaultdict(list)
+    # as additional safe-gaurd against false links we also collect the edges
+    # that span the interaction by finding the shortest simple path between
+    # all atoms in patterns. Note that the atoms in patterns not always have
+    # to be directly bonded. For example, pairs are not directly bonded and
+    # can span multiple residues
+    for inter_type in molecule.interactions:
+        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
+            # extract resids and resname corresponding to interaction atoms
+            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
+            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
+            # compute the resid offset to be used for the atom prefixes
+            min_resid = min(resids)
+            diff = resids - min_resid
+            pattern = tuple(set(list(zip(diff, resnames))))
+
+            # in this case all interactions are in a block and we skip
+            if np.sum(diff) == 0:
+                continue
+
+            # we collect the edges corresponding to the simple paths between pairs of atoms
+            # in the interaction
+            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
+            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
+            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
+            link_inter = Interaction(atoms=link_atoms,
+                                     parameters=interaction.parameters,
+                                     meta={})
+
+            # here we deal with filtering redundancy
+            if pattern in patterns and inter_type in patterns[pattern]:
+                for other_inter in patterns[pattern].get(inter_type, []):
+                    if _interaction_equal(other_inter, link_inter, inter_type):
+                        break
+                else:
+                    patterns[pattern][inter_type].append(link_inter)
+                    resnames_for_patterns[pattern].update(resnames)
+                    link_atoms_for_patterns[pattern] += link_atoms
+            else:
+                patterns[pattern][inter_type] = [link_inter]
+                resnames_for_patterns[pattern].update(resnames)
+                link_atoms_for_patterns[pattern] += link_atoms
+
+    # we make new links for each unique interaction per type
+    for pattern in patterns:
+        link = vermouth.molecule.Link()
+        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
+        resnames = resnames_for_patterns[pattern]
+        nx.set_node_attributes(link, resnames, "resname")
+
+        had_parameters = []
+        for inter_type, inters in patterns[pattern].items():
+            for idx, interaction in enumerate(inters):
+                #new_parameters = interaction.parameters
+                new_meta = interaction.meta
+                #new_atoms = interaction.atoms
+                # to account for the fact when multiple interactions with the same
+                # atom patterns need to be written to ff
+                new_meta.update({"version": idx})
+                new_meta.update({"comment": "link"})
+                had_parameters.append(interaction.parameters)
+                # map atoms to proper atomnames ..
+                link.interactions[inter_type].append(interaction)
+        links.append(link)
+    return links
+
+
+def _relabel_interaction_atoms(interaction, mapping):
+    """
+    Relables the atoms in interaction according to the
+    rules defined in mapping.
+
+    Parameters
+    ----------
+    interaction: `vermouth.molecule.Interaction`
+    mapping: `:class:dict`
+
+    Returns
+    -------
+    interaction: `vermouth.molecule.Interaction`
+        the new interaction with updated atoms
+    """
+    new_atoms = [mapping[atom] for atom in interaction.atoms]
+    new_interaction = interaction._replace(atoms=new_atoms)
+    return new_interaction
+
+
+def extract_block(molecule, nodes, defines={}):
+    """
+    Given a `vermouth.molecule` and a `resname`
+    extract the information of a block from the
+    molecule definition and replace all defines
+    if any are found.
+
+    Parameters
+    ----------
+    molecule:  :class:vermouth.molecule.Molecule
+    nodes: abc.hashable
+        the nodes corresponding to the block to
+        extract
+    defines:   dict
+      dict of type define: value
+
+    Returns
+    -------
+    :class:vermouth.molecule.Block
+    """
+    resid = molecule.nodes[nodes[0]]["resid"]
+    resname = molecule.nodes[nodes[0]]["resname"]
+    block = vermouth.molecule.Block()
+
+    # select all nodes with the same first resid and
+    # make sure the block node labels are atomnames
+    # also build a correspondance dict between node
+    # label in the molecule and in the block for
+    # relabeling the interactions
+    mapping = {}
+    for node in nodes:
+        attr_dict = molecule.nodes[node]
+        if attr_dict["resid"] == resid:
+            block.add_node(attr_dict["atomname"], **attr_dict)
+            mapping[node] = attr_dict["atomname"]
+
+    for inter_type in molecule.interactions:
+        for interaction in molecule.interactions[inter_type]:
+            if all(atom in mapping for atom in interaction.atoms):
+                interaction = replace_defined_interaction(interaction, defines)
+                interaction = _relabel_interaction_atoms(interaction, mapping)
+                block.interactions[inter_type].append(interaction)
+
+    for inter_type in ["bonds", "constraints", "virtual_sitesn",
+                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
+        block.make_edges_from_interaction_type(inter_type)
+
+    if not nx.is_connected(block):
+        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
+               'Make sure all atoms/particles in a residue are connected by bonds,'
+               ' constraints or virual-sites.')
+        raise IOError(msg.format(resname, resid))
+
+    return block
diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index 6c21fc63..1d852cb7 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -27,10 +27,10 @@
 from polyply.src.linalg_functions import center_of_geometry
 from polyply.src.generate_templates import (find_atoms,
                                             _expand_inital_coords,
-                                            _relabel_interaction_atoms,
                                             compute_volume, map_from_CoG,
-                                            extract_block, GenerateTemplates,
-					    find_interaction_involving)
+                                            GenerateTemplates,
+					                        find_interaction_involving)
+from polyply.src.molecule_utils import (extract_block, _relabel_interaction_atoms)
 
 class TestGenTemps:
 

From fa32f76906344638475a5b8da9f71774a1e6ef24 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 16:22:24 +0100
Subject: [PATCH 25/82] small fix

---
 polyply/src/itp_to_ff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index dc03725c..55bc8a7f 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -21,7 +21,7 @@
 from polyply.src.molecule_utils import extract_block, extract_links
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
-from polyply.src.charges import equalize_charges
+from polyply.src.charges import equalize_charges, set_charges
 
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """

From 520780189f5f3878e1e2139d982ca7c9cdb3391b Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 23 Nov 2023 13:16:48 +0100
Subject: [PATCH 26/82] allow for charged residues and make pysmiles optional
 import

---
 polyply/src/charges.py   |  5 +++--
 polyply/src/itp_to_ff.py | 14 +++++++++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
index ff640d4a..7672a8c8 100644
--- a/polyply/src/charges.py
+++ b/polyply/src/charges.py
@@ -46,7 +46,7 @@ def _get_bonds(block, topology=None):
                         bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params)
     return bonds
 
-def equalize_charges(block, topology=None):
+def equalize_charges(block, topology=None, charge=0):
     block.make_edges_from_interaction_type('bonds')
     keys = nx.get_node_attributes(block, 'charge').keys()
     charges = np.array(list(nx.get_node_attributes(block, 'charge').values()))
@@ -63,7 +63,8 @@ def equalize_charges(block, topology=None):
     def loss(arr):
         arr.reshape(-1)
         curr_dipoles = bond_dipoles(bonds, arr)
-        loss = np.abs(arr.sum()) + np.sum(np.square(ref_dipoles -  curr_dipoles))
+        crg_dev = np.abs(charge - arr.sum())
+        loss = crg_dev + np.sum(np.square(ref_dipoles -  curr_dipoles))
         return loss
 
     opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B',
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 55bc8a7f..25a4a424 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -13,7 +13,10 @@
 # limitations under the License.
 import numpy as np
 import networkx as nx
-import pysmiles
+try:
+    import pysmiles
+except ImportError:
+    raise ImportError("To use polyply itp_to_ff you need to install pysmiles.")
 import vermouth
 from vermouth.forcefield import ForceField
 from vermouth.gmx.itp_read import read_itp
@@ -23,10 +26,13 @@
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import equalize_charges, set_charges
 
-def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
+def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None):
     """
     Main executable for itp to ff tool.
     """
+    # what charges belong to which resname
+    if charges:
+        crg_dict = dict(zip(resnames, charges))
     # read the topology file
     if itppath.suffix == ".top":
         top = Topology.from_gmx_topfile(itppath, name="test")
@@ -58,7 +64,9 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
         if itppath.suffix == ".top":
-            equalize_charges(new_block, top)
+            base_resname = name.split(term_prefix)[0].split('_')[0]
+            print(base_resname)
+            equalize_charges(new_block, top, crg_dict[base_resname])
 
     force_field.links = extract_links(mol)
 

From 737b45ce15f9a17794b0bbf8ed498c7d0f9e1264 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 23 Nov 2023 13:21:00 +0100
Subject: [PATCH 27/82] make mass optional

---
 polyply/src/ffoutput.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/polyply/src/ffoutput.py b/polyply/src/ffoutput.py
index 0e06ea3f..1db13586 100644
--- a/polyply/src/ffoutput.py
+++ b/polyply/src/ffoutput.py
@@ -159,16 +159,19 @@ def write_atoms_block(self, nodes):
 
         for idx, (node, attrs) in enumerate(nodes, start=1):
             write_attrs = {attr: str(attrs[attr]) for attr in self.normal_order_block_atoms if attr in attrs}
-            self.stream.write('{idx:>{max_length[idx]}} '
-                              '{atype:<{max_length[atype]}} '
-                              '{resid:>{max_length[resid]}} '
-                              '{resname:<{max_length[resname]}} '
-                              '{atomname:<{max_length[atomname]}} '
-                              '{charge_group:>{max_length[charge_group]}} '
-                              '{charge:>{max_length[charge]}} '
-                              '{mass:>{max_length[mass]}}\n'.format(idx=idx,
-                                                                    max_length=max_length,
-                                                                    **write_attrs))
+            template = ('{idx:>{max_length[idx]}} '
+                        '{atype:<{max_length[atype]}} '
+                        '{resid:>{max_length[resid]}} '
+                        '{resname:<{max_length[resname]}} '
+                        '{atomname:<{max_length[atomname]}} '
+                        '{charge_group:>{max_length[charge_group]}} '
+                        '{charge:>{max_length[charge]}} ')
+            if 'mass' in write_attrs:
+                template += '{mass:>{max_length[mass]}}\n'
+            else:
+                template += '\n'
+
+            self.stream.write(template.format(idx=idx, max_length=max_length, **write_attrs))
 
     def write_atoms_link(self, nodes, nometa=False):
         """

From 9a1e800475cd5463e30088d0d84538ec521456ed Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 23 Nov 2023 13:33:36 +0100
Subject: [PATCH 28/82] add doc-strings and rename equalize_charge

---
 polyply/src/charges.py   | 103 +++++++++++++++++++++++++++------------
 polyply/src/itp_to_ff.py |   4 +-
 2 files changed, 75 insertions(+), 32 deletions(-)

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
index 7672a8c8..d53bae3d 100644
--- a/polyply/src/charges.py
+++ b/polyply/src/charges.py
@@ -3,6 +3,25 @@
 import scipy.optimize
 
 def set_charges(block, res_graph, name):
+    """
+    Set the charges of `block` by finding the most central
+    residue in res_graph that matches the residue `name` of
+    block.
+
+    Parameters
+    ----------
+    block: :class:`vermouth.molecule.Block`
+        block describing single residue
+    res_graph: nx.Graph
+        residue graph
+    name: str
+        residue name
+
+    Returns
+    -------
+    :class:`vermouth.molecule.Block`
+        the block with updated charges
+    """
     resnames = nx.get_node_attributes(res_graph, 'resname')
     centrality = nx.betweenness_centrality(res_graph)
     score = -1
@@ -19,6 +38,23 @@ def set_charges(block, res_graph, name):
     return block
 
 def bond_dipoles(bonds, charges):
+    """
+    Compute bond dipole moments from charges
+    and bondlengths. The charges array must
+    match the numeric bond dict keys.
+
+    Parameters
+    ----------
+    bonds: dict[tuple(int, int)][float]
+        the bond length indexed by atom indices
+    charges: np.array
+        array of charges
+
+    Returns
+    -------
+    np.array
+        the bond dipoles
+    """
     bond_dipo = np.zeros((len(bonds)))
     for kdx, (idx, jdx) in enumerate(bonds.keys()):
         lb = bonds[(idx, jdx)]
@@ -26,6 +62,20 @@ def bond_dipoles(bonds, charges):
     return bond_dipo
 
 def _get_bonds(block, topology=None):
+    """
+    Extract a bond length dict from block. If topology
+    is given bond lengths may be looked up by type.
+
+    Parameters
+    ----------
+    block: :class:`vermouth.molecule.Block`
+    topology: :class:`polyply.src.topology.Topology`
+
+    Returns
+    -------
+    dict
+        a dict of edges and their bond length
+    """
     bonds = {}
     atoms = block.nodes
     nodes_to_count = {node: count for count, node in enumerate(block.nodes)}
@@ -42,11 +92,32 @@ def _get_bonds(block, topology=None):
                             params = topology.types['bonds'][batoms][0][0][1]
                         elif batoms[::-1] in topology.types['bonds']:
                             params = topology.types['bonds'][batoms[::-1]][0][0][1]
-                        print(params)
                         bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params)
     return bonds
 
-def equalize_charges(block, topology=None, charge=0):
+def balance_charges(block, topology=None, charge=0):
+    """
+    Given a block and a total charge for that block
+    balance the charge until the total charge of the
+    block is exactly the same as set. The balancing
+    takes also into account to retain the bond dipole
+    moments as closely as possible such that ideally
+    the electrostatics are as little influenced as
+    possible due to rescaling. A topology is only
+    needed if the force field uses bondtypes.
+
+    Parameters
+    ----------
+    block: :class:`vermouth.molecule.Block`
+    topology: :class:`polyply.src.topology.Topology`
+    charge: float
+        total charge of the residue
+
+    Returns
+    -------
+    :class:`vermouth.molecule.Block`
+        block with updated charges
+    """
     block.make_edges_from_interaction_type('bonds')
     keys = nx.get_node_attributes(block, 'charge').keys()
     charges = np.array(list(nx.get_node_attributes(block, 'charge').values()))
@@ -72,31 +143,3 @@ def loss(arr):
     balanced_charges = opt_results['x']
     nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge')
     return block
-
-
-#def equalize_charges(molecule, target_charge=0):
-#    """
-#    Make sure that the total charge of molecule is equal to
-#    the target charge by substracting the differences split
-#    over all atoms.
-#
-#    Parameters
-#    ----------
-#    molecule: :class:`vermouth.molecule.Molecule`
-#    target_charge: float
-#        the charge of the molecule
-#
-#    Returns
-#    -------
-#    molecule
-#        the molecule with updated charge attribute
-#    """
-#    total = nx.get_node_attributes(molecule, "charge")
-#    diff = (sum(list(total.values())) - target_charge)/len(molecule.nodes)
-#    if np.isclose(diff, 0, atol=0.0001):
-#        return molecule
-#    for node in molecule.nodes:
-#        charge = float(molecule.nodes[node]['charge']) - diff
-#        molecule.nodes[node]['charge'] = charge
-#    total = nx.get_node_attributes(molecule, "charge")
-#    return molecule
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 25a4a424..76b8bf0d 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -24,7 +24,7 @@
 from polyply.src.molecule_utils import extract_block, extract_links
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
-from polyply.src.charges import equalize_charges, set_charges
+from polyply.src.charges import balance_charges, set_charges
 
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None):
     """
@@ -66,7 +66,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=
         if itppath.suffix == ".top":
             base_resname = name.split(term_prefix)[0].split('_')[0]
             print(base_resname)
-            equalize_charges(new_block, top, crg_dict[base_resname])
+            balance_charges(new_block, top, crg_dict[base_resname])
 
     force_field.links = extract_links(mol)
 

From c9621a3396dc54a71392b0d9e8a2c06aa1931dd8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 10:38:10 +0100
Subject: [PATCH 29/82] remove print

---
 polyply/tests/test_lib_files.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/polyply/tests/test_lib_files.py b/polyply/tests/test_lib_files.py
index 98d748eb..cb8f1b18 100644
--- a/polyply/tests/test_lib_files.py
+++ b/polyply/tests/test_lib_files.py
@@ -161,8 +161,8 @@ def _interaction_equal(interaction1, interaction2, inter_type):
         a1.reverse()
         if a1 == a2:
             return True
-        else:
-            print(a1, a2)
+       # else:
+       #     print(a1, a2)
 
     elif inter_type in ["angles"]:
         return a1[1] == a2[1] and frozenset([a1[0], a1[2]]) == frozenset([a2[0], a2[2]])

From fdae3db7c55d9fe0de5b84299fadd7a2423998c6 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 11:12:03 +0100
Subject: [PATCH 30/82] remove martini2 from ffoutput test as it fails on  GH

---
 polyply/tests/test_ffoutput.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/tests/test_ffoutput.py b/polyply/tests/test_ffoutput.py
index c5855bd6..5b8ecaa7 100644
--- a/polyply/tests/test_ffoutput.py
+++ b/polyply/tests/test_ffoutput.py
@@ -69,7 +69,7 @@ def equal_ffs(ff1, ff2):
      '2016H66',
      'gromos53A6',
      'oplsaaLigParGen',
-     'martini2',
+ #    'martini2',
      'parmbsc1',
 ])
 def test_ffoutput(tmp_path, libname):

From e50e232ad3f2e0e2fb518ea687ccb626ec8f7707 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 12:47:25 +0100
Subject: [PATCH 31/82] add test for extract links

---
 polyply/tests/test_molecule_utils.py | 77 ++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 polyply/tests/test_molecule_utils.py

diff --git a/polyply/tests/test_molecule_utils.py b/polyply/tests/test_molecule_utils.py
new file mode 100644
index 00000000..de15dc1d
--- /dev/null
+++ b/polyply/tests/test_molecule_utils.py
@@ -0,0 +1,77 @@
+# Copyright 2022 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the fragment finder for itp_to_ff.
+"""
+import pytest
+from pathlib import Path
+import networkx as nx
+from vermouth.molecule import Interaction
+from polyply.src.molecule_utils import extract_links
+from .test_apply_links import example_meta_molecule
+
+@pytest.mark.parametrize('inters, expected',(
+    # simple bond spanning two residues
+    ({'bonds':[Interaction(atoms=(0, 1), parameters=['1', '0.33', '500'], meta={}),
+               Interaction(atoms=(1, 2), parameters=['1', '0.33', '500'], meta={}),
+               Interaction(atoms=(1, 4), parameters=['1', '0.30', '500'], meta={}),
+               Interaction(atoms=(4, 5), parameters=['1', '0.35', '500'], meta={}),]},
+     {'bonds': [Interaction(atoms=['BB1', '+BB'],
+                            parameters=['1', '0.30', '500'],
+                            meta={'version': 0, 'comment': 'link'}),
+               ]},
+    ),
+    # double version dihedral spanning two residues
+    ({'dihedrals':[Interaction(atoms=(0, 1, 4, 5),
+                               parameters=['9', '120', '4', '1'],
+                               meta={}),
+                   Interaction(atoms=(0, 1, 4, 5),
+                               parameters=['9', '120', '4', '2'],
+                               meta={}),
+                   Interaction(atoms=(0, 1, 2, 3),
+                               parameters=['9', '120', '4', '2'],
+                               meta={})]
+     },
+     {'dihedrals': [Interaction(atoms=['BB', 'BB1', '+BB', '+BB1'],
+                                parameters=['9', '120', '4', '1'],
+                                meta={'version': 0, 'comment': 'link'}),
+                    Interaction(atoms=['BB', 'BB1', '+BB', '+BB1'],
+                                parameters=['9', '120', '4', '2'],
+                                meta={'version': 1, 'comment': 'link'}),]
+     },
+    ),
+    # 1-5 pairs spanning 3 residues
+    ({'pairs': [Interaction(atoms=(1, 9),
+                            parameters=[1],
+                            meta={})]},
+    {'pairs': [Interaction(atoms=['BB1', '++BB'],
+                           parameters=[1],
+                           meta={'version': 0, 'comment': 'link'})]
+    }),
+))
+def test_extract_links(example_meta_molecule, inters, expected):
+    mol = example_meta_molecule.molecule
+    mol.add_edges_from([(1, 4), (8, 9)])
+    nx.set_node_attributes(mol, {0: "resA", 1: "resA", 2: "resA", 3: "resA",
+                                 4: "resB", 5: "resB", 6: "resB", 7: "resB", 8: "resB",
+                                 9: "resA", 10: "resA", 11: "resA", 12: "resA"}, "resname")
+    nx.set_node_attributes(mol, {0: "BB", 1: "BB1", 2: "SC1", 3: "SC2",
+                                 4: "BB", 5: "BB1", 6: "BB2", 7: "SC1", 8: "SC2",
+                                 9: "BB", 10: "BB1", 11: "SC1", 12: "SC2"}, "atomname")
+    mol.interactions.update(inters)
+    link = extract_links(mol)[0]
+    for inter_type in expected:
+        assert expected[inter_type] == link.interactions[inter_type]
+
+

From 6c94485f37cd7b32f82fe5007c3c168326ddfe9c Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 12:57:04 +0100
Subject: [PATCH 32/82] add test for extract links with redundant interaction

---
 polyply/tests/test_molecule_utils.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/polyply/tests/test_molecule_utils.py b/polyply/tests/test_molecule_utils.py
index de15dc1d..8af59cab 100644
--- a/polyply/tests/test_molecule_utils.py
+++ b/polyply/tests/test_molecule_utils.py
@@ -59,6 +59,18 @@
                            parameters=[1],
                            meta={'version': 0, 'comment': 'link'})]
     }),
+    # redundant pair
+    ({'pairs': [Interaction(atoms=(1, 5),
+                            parameters=[1],
+                            meta={}),
+                Interaction(atoms=(5, 9),
+                            parameters=[1],
+                            meta={}),
+               ],},
+    {'pairs': [Interaction(atoms=['BB1', '+BB1'],
+                           parameters=[1],
+                           meta={'version': 0, 'comment': 'link'})]
+    }),
 ))
 def test_extract_links(example_meta_molecule, inters, expected):
     mol = example_meta_molecule.molecule

From e343211a48240126629079a938f4aab480e97f73 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 13:56:49 +0100
Subject: [PATCH 33/82] test for charge balancing

---
 polyply/tests/test_charges.py | 51 +++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 polyply/tests/test_charges.py

diff --git a/polyply/tests/test_charges.py b/polyply/tests/test_charges.py
new file mode 100644
index 00000000..59b3c5ff
--- /dev/null
+++ b/polyply/tests/test_charges.py
@@ -0,0 +1,51 @@
+# Copyright 2022 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the charge modification functions used in itp_to_ff.
+"""
+import textwrap
+import pytest
+from pathlib import Path
+import networkx as nx
+import vermouth
+import polyply
+from polyply.src.charges import balance_charges
+@pytest.mark.parametrize('charges, target',(
+    ({0: 0.2, 1: -0.4, 2: 0.23, 3: 0.001},
+     0.0,),
+    ({0: 0.6, 1: -0.2, 2: 0.5, 3: 0.43},
+     0.5,),
+))
+def test_balance_charges(charges, target):
+    lines = """
+    [ moleculetype ]
+    test 1
+    [ atoms ]
+    1 P4 1 GLY BB  1
+    2 P3 1 GLY SC1 2
+    3 P2 1 ALA SC2 3
+    4 P2 1 ALA SC3 3
+    [ bonds ]
+    1 2 1 0.2 100
+    2 3 1 0.6 700
+    3 4 1 0.2 700
+    """
+    lines = textwrap.dedent(lines).splitlines()
+    ff = vermouth.forcefield.ForceField(name='test_ff')
+    polyply.src.polyply_parser.read_polyply(lines, ff)
+    block = ff.blocks['test']
+    nx.set_node_attributes(block, charges, 'charge')
+    balance_charges(block, topology=None, charge=target, tol=10**-4, decimals=4)
+    new_charges = nx.get_node_attributes(block, 'charge')
+    assert pytest.approx(sum(new_charges.values()),abs=0.00001) == target

From 7db6462697a7a1863106323546703c39fb081619 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 14:05:06 +0100
Subject: [PATCH 34/82] test for charge balancing

---
 polyply/tests/test_charges.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/polyply/tests/test_charges.py b/polyply/tests/test_charges.py
index 59b3c5ff..7f974478 100644
--- a/polyply/tests/test_charges.py
+++ b/polyply/tests/test_charges.py
@@ -26,6 +26,8 @@
      0.0,),
     ({0: 0.6, 1: -0.2, 2: 0.5, 3: 0.43},
      0.5,),
+    ({0: -0.633, 1: -0.532, 2: 0.512, 3: 0.0},
+     -0.6,),
 ))
 def test_balance_charges(charges, target):
     lines = """
@@ -46,6 +48,6 @@ def test_balance_charges(charges, target):
     polyply.src.polyply_parser.read_polyply(lines, ff)
     block = ff.blocks['test']
     nx.set_node_attributes(block, charges, 'charge')
-    balance_charges(block, topology=None, charge=target, tol=10**-4, decimals=4)
+    balance_charges(block, topology=None, charge=target, tol=10**-5, decimals=5)
     new_charges = nx.get_node_attributes(block, 'charge')
-    assert pytest.approx(sum(new_charges.values()),abs=0.00001) == target
+    assert pytest.approx(sum(new_charges.values()),abs=0.0001) == target

From c9dadac5b1c40952b0aebe2617c1bc28697f4e55 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 14:08:04 +0100
Subject: [PATCH 35/82] implement tolerances for charge balancing

---
 polyply/src/charges.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
index d53bae3d..cfd50235 100644
--- a/polyply/src/charges.py
+++ b/polyply/src/charges.py
@@ -95,7 +95,7 @@ def _get_bonds(block, topology=None):
                         bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params)
     return bonds
 
-def balance_charges(block, topology=None, charge=0):
+def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None):
     """
     Given a block and a total charge for that block
     balance the charge until the total charge of the
@@ -121,7 +121,7 @@ def balance_charges(block, topology=None, charge=0):
     block.make_edges_from_interaction_type('bonds')
     keys = nx.get_node_attributes(block, 'charge').keys()
     charges = np.array(list(nx.get_node_attributes(block, 'charge').values()))
-    if np.isclose(charges.sum(), 0, atol=1*10**-6):
+    if np.isclose(charges.sum(), 0, atol=tol):
         return block
 
     # we need to equalize the charge
@@ -139,7 +139,7 @@ def loss(arr):
         return loss
 
     opt_results = scipy.optimize.minimize(loss, charges, method='L-BFGS-B',
-                                          options={'ftol': 0.001, 'maxiter': 100})
-    balanced_charges = opt_results['x']
+                                          options={'ftol': tol, 'maxiter': 100})
+    balanced_charges = np.around(opt_results['x'], decimals)
     nx.set_node_attributes(block, dict(zip(keys, balanced_charges)), 'charge')
     return block

From b6937354ef333553afed5838cf8fc6a42c033f75 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 14:34:42 +0100
Subject: [PATCH 36/82] add integration tests itp_to_ff and adjust CLI

---
 bin/polyply                     |  4 +++-
 polyply/tests/test_itp_to_ff.py | 14 +++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/bin/polyply b/bin/polyply
index 8ff25efa..eff60024 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -239,7 +239,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
     parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*')
     parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter")
     parser_itp_ff.add_argument('-o', dest="outpath", type=Path)
-    parser_itp_ff.add_argument('-c', dest="charge", type=float, default=0.0)
+    parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*')
+    parser_itp_ff.add_argument('-tol', dest="tolerance", type=float, default=1e-5)
+    parser_itp_ff.add_argument('-d', dest="decimals", type=int, default=5)
 
     parser_itp_ff.set_defaults(func=itp_to_ff)
 
diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
index 588515d7..df97d73e 100644
--- a/polyply/tests/test_itp_to_ff.py
+++ b/polyply/tests/test_itp_to_ff.py
@@ -67,22 +67,22 @@ def itp_equal(ref_mol, new_mol):
                 assert False
     return True
 
-@pytest.mark.parametrize("case, smiles, resnames, charge", [
-    ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], 0),
-    ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], 0),
+@pytest.mark.parametrize("case, smiles, resnames, charges", [
+    ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]),
+    ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]),
 ])
-def _test_ffoutput(tmp_path, case, smiles, resnames, charge):
+def test_itp_to_ff(tmp_path, case, smiles, resnames, charges):
     """
     Call itp-to-ff and check if it generates the same force-field
     as in the ref.ff file.
     """
-    tmp_path = Path("/coarse/fabian/current-projects/polymer_itp_builder/polyply_2.0/polyply/tests/test_data/tmp")
+    tmp_path = Path("/Users/fabian/ProgramDev/polyply_1.0/polyply/tests/test_data/itp_to_ff/PEG_PBE/tmp")
     tmp_file = Path(tmp_path) / "test.ff"
     inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case
     itp_to_ff(itppath=inpath/"in_itp.itp",
               fragment_smiles=smiles,
               resnames=resnames,
-              charge=charge,
+              charges=charges,
               term_prefix='ter',
               outpath=tmp_file,)
     # now generate an itp file with this ff-file
@@ -92,6 +92,6 @@ def _test_ffoutput(tmp_path, case, smiles, resnames, charge):
                outpath=tmp_itp, name="new")
     # read the itp-file and return a molecule
     new_mol = _read_itp(tmp_itp)
-    ref_mol = _read_itp(inpath/"in_itp.itp")
+    ref_mol = _read_itp(inpath/"ref.itp")
     # check if itps are the same
     assert itp_equal(ref_mol, new_mol)

From 1c542cbcb68b1edf938a75650c232152aa8affd1 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 14:35:14 +0100
Subject: [PATCH 37/82] fix bug in integration tests itp_to_ff

---
 polyply/tests/test_itp_to_ff.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
index df97d73e..ac727795 100644
--- a/polyply/tests/test_itp_to_ff.py
+++ b/polyply/tests/test_itp_to_ff.py
@@ -76,7 +76,6 @@ def test_itp_to_ff(tmp_path, case, smiles, resnames, charges):
     Call itp-to-ff and check if it generates the same force-field
     as in the ref.ff file.
     """
-    tmp_path = Path("/Users/fabian/ProgramDev/polyply_1.0/polyply/tests/test_data/itp_to_ff/PEG_PBE/tmp")
     tmp_file = Path(tmp_path) / "test.ff"
     inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case
     itp_to_ff(itppath=inpath/"in_itp.itp",

From aa0865d683fc1336d1d7f85798bb31768a77b8ad Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 15:04:59 +0100
Subject: [PATCH 38/82] complex integration test itp_to_ff plus charged mol

---
 polyply/src/itp_to_ff.py                      |   4 +-
 .../tests/test_data/itp_to_ff/ACOL/in_itp.itp | 680 ++++++++++++++++++
 .../tests/test_data/itp_to_ff/ACOL/ref.itp    | 677 +++++++++++++++++
 .../tests/test_data/itp_to_ff/ACOL/ref.top    |  28 +
 .../tests/test_data/itp_to_ff/ACOL/seq.txt    |   1 +
 polyply/tests/test_itp_to_ff.py               |   5 +
 6 files changed, 1394 insertions(+), 1 deletion(-)
 create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ref.itp
 create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ref.top
 create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/seq.txt

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 76b8bf0d..bd08e1bd 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -66,7 +66,9 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=
         if itppath.suffix == ".top":
             base_resname = name.split(term_prefix)[0].split('_')[0]
             print(base_resname)
-            balance_charges(new_block, top, crg_dict[base_resname])
+            balance_charges(new_block,
+                            topology=top,
+                            charge=crg_dict[base_resname])
 
     force_field.links = extract_links(mol)
 
diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp b/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp
new file mode 100644
index 00000000..566a82c1
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/in_itp.itp
@@ -0,0 +1,680 @@
+[ moleculetype ]
+; Name               nrexcl
+ref                   3
+[ atoms ]
+;   nr       type  resnr residue  atom   cgnr     charge       mass  
+     1   opls_800      1    UNL   O00      1    -0.3942    15.9990 
+     2   opls_801      1    UNL   C01      1     0.3911    12.0110 
+     3   opls_802      1    UNL   C02      1    -0.1501    12.0110 
+     4   opls_803      1    UNL   O03      1    -0.3449    15.9990 
+     5   opls_804      1    UNL   C04      1    -0.1595    12.0110 
+     6   opls_805      1    UNL   H05      1     0.1269     1.0080 
+     7   opls_806      1    UNL   H06      1     0.1269     1.0080 
+     8   opls_807      1    UNL   C07      1    -0.0916    12.0110 
+     9   opls_808      1    UNL   H08      1     0.1135     1.0080 
+    10   opls_809      1    UNL   H09      1     0.1135     1.0080 
+    11   opls_810      1    UNL   C0A      1    -0.1496    12.0110 
+    12   opls_811      1    UNL   C0B      1     0.3901    12.0110 
+    13   opls_812      1    UNL   H0C      1     0.1118     1.0080 
+    14   opls_813      1    UNL   C0D      1    -0.0920    12.0110 
+    15   opls_814      1    UNL   H0E      1     0.1149     1.0080 
+    16   opls_815      1    UNL   H0F      1     0.1149     1.0080 
+    17   opls_816      1    UNL   O0G      1    -0.3434    15.9990 
+    18   opls_817      1    UNL   O0H      1    -0.3876    15.9990 
+    19   opls_818      1    UNL   C0I      1    -0.1460    12.0110 
+    20   opls_819      1    UNL   C0J      1     0.3959    12.0110 
+    21   opls_820      1    UNL   H0K      1     0.1177     1.0080 
+    22   opls_821      1    UNL   C0M      1    -0.0273    12.0110 
+    23   opls_822      1    UNL   C0N      1    -0.0916    12.0110 
+    24   opls_823      1    UNL   H0O      1     0.1194     1.0080 
+    25   opls_824      1    UNL   H0P      1     0.1194     1.0080 
+    26   opls_825      1    UNL   O0Q      1    -0.3478    15.9990 
+    27   opls_826      1    UNL   O0R      1    -0.3336    15.9990 
+    28   opls_827      1    UNL   C0S      1    -0.1411    12.0110 
+    29   opls_828      1    UNL   C0T      1     0.3737    12.0110 
+    30   opls_829      1    UNL   H0U      1     0.1083     1.0080 
+    31   opls_830      1    UNL   C0V      1     0.0287    12.0110 
+    32   opls_831      1    UNL   C0W      1    -0.0926    12.0110 
+    33   opls_832      1    UNL   H0X      2     0.1142     1.0080 
+    34   opls_833      1    UNL   H0Y      2     0.1142     1.0080 
+    35   opls_834      1    UNL   O0Z      2    -0.3484    15.9990 
+    36   opls_835      1    UNL   O10      2    -0.3544    15.9990 
+    37   opls_836      1    UNL   C11      2    -0.1709    12.0110 
+    38   opls_837      1    UNL   H12      2     0.0965     1.0080 
+    39   opls_838      1    UNL   H13      2     0.0965     1.0080 
+    40   opls_839      1    UNL   C14      2    -0.2114    12.0110 
+    41   opls_840      1    UNL   C15      2     0.3799    12.0110 
+    42   opls_841      1    UNL   H16      2     0.1129     1.0080 
+    43   opls_842      1    UNL   C17      2    -0.0170    12.0110 
+    44   opls_843      1    UNL   H18      2     0.0946     1.0080 
+    45   opls_844      1    UNL   H19      2     0.0946     1.0080 
+    46   opls_845      1    UNL   H1A      2     0.0946     1.0080 
+    47   opls_846      1    UNL   O1B      2    -0.3369    15.9990 
+    48   opls_847      1    UNL   O1C      2    -0.3839    15.9990 
+    49   opls_848      1    UNL   H1D      2     0.0757     1.0080 
+    50   opls_849      1    UNL   H1E      2     0.0757     1.0080 
+    51   opls_850      1    UNL   H1F      2     0.0757     1.0080 
+    52   opls_851      1    UNL   C1G      2    -0.0289    12.0110 
+    53   opls_852      1    UNL   H1H      2     0.0867     1.0080 
+    54   opls_853      1    UNL   H1I      2     0.0867     1.0080 
+    55   opls_854      1    UNL   H1J      2     0.0867     1.0080 
+    56   opls_855      1    UNL   N1K      2     0.1659    14.0070 
+    57   opls_856      1    UNL   H1M      2     0.1558     1.0080 
+    58   opls_857      1    UNL   H1N      2     0.1558     1.0080 
+    59   opls_858      1    UNL   C1O      2    -0.2247    12.0110 
+    60   opls_859      1    UNL   C1P      2    -0.2238    12.0110 
+    61   opls_860      1    UNL   C1Q      2    -0.2254    12.0110 
+    62   opls_861      1    UNL   H1R      2     0.1443     1.0080 
+    63   opls_862      1    UNL   H1S      2     0.1443     1.0080 
+    64   opls_863      1    UNL   H1T      2     0.1443     1.0080 
+    65   opls_864      1    UNL   H1U      2     0.1436     1.0080 
+    66   opls_865      1    UNL   H1V      3     0.1436     1.0080 
+    67   opls_866      1    UNL   H1W      3     0.1436     1.0080 
+    68   opls_867      1    UNL   H1X      3     0.1427     1.0080 
+    69   opls_868      1    UNL   H1Y      3     0.1427     1.0080 
+    70   opls_869      1    UNL   H1Z      3     0.1427     1.0080 
+    71   opls_870      1    UNL   H20      3     0.0844     1.0080 
+    72   opls_871      1    UNL   H21      3     0.0844     1.0080 
+    73   opls_872      1    UNL   H22      3     0.0844     1.0080 
+    74   opls_873      1    UNL   C23      3    -0.0241    12.0110 
+    75   opls_874      1    UNL   H24      3     0.0894     1.0080 
+    76   opls_875      1    UNL   H25      3     0.0894     1.0080 
+    77   opls_876      1    UNL   H26      3     0.0894     1.0080 
+[ bonds ]
+    2     1     1      0.1229 476976.000
+    3     2     1      0.1522 265265.600
+    4     2     1      0.1327 179075.200
+    5     3     1      0.1529 224262.400
+    6     3     1      0.1090 284512.000
+    7     3     1      0.1090 284512.000
+    8     5     1      0.1529 224262.400
+    9     5     1      0.1090 284512.000
+   10     5     1      0.1090 284512.000
+   11     8     1      0.1529 224262.400
+   12     8     1      0.1522 265265.600
+   13     8     1      0.1090 284512.000
+   14    11     1      0.1529 224262.400
+   15    11     1      0.1090 284512.000
+   16    11     1      0.1090 284512.000
+   17    12     1      0.1327 179075.200
+   18    12     1      0.1229 476976.000
+   19    14     1      0.1529 224262.400
+   20    14     1      0.1522 265265.600
+   21    14     1      0.1090 284512.000
+   22    17     1      0.1410 267776.000
+   23    19     1      0.1529 224262.400
+   24    19     1      0.1090 284512.000
+   25    19     1      0.1090 284512.000
+   26    20     1      0.1327 179075.200
+   27    20     1      0.1229 476976.000
+   28    23     1      0.1529 224262.400
+   29    23     1      0.1522 265265.600
+   30    23     1      0.1090 284512.000
+   31    26     1      0.1410 267776.000
+   32    28     1      0.1529 224262.400
+   33    28     1      0.1090 284512.000
+   34    28     1      0.1090 284512.000
+   35    29     1      0.1327 179075.200
+   36    29     1      0.1229 476976.000
+   37    31     1      0.1529 224262.400
+   38    31     1      0.1090 284512.000
+   39    31     1      0.1090 284512.000
+   40    32     1      0.1529 224262.400
+   41    32     1      0.1522 265265.600
+   42    32     1      0.1090 284512.000
+   43    35     1      0.1410 267776.000
+   44    40     1      0.1090 284512.000
+   45    40     1      0.1090 284512.000
+   46    40     1      0.1090 284512.000
+   47    41     1      0.1327 179075.200
+   48    41     1      0.1229 476976.000
+   49    43     1      0.1090 284512.000
+   50    43     1      0.1090 284512.000
+   51    43     1      0.1090 284512.000
+   52    47     1      0.1410 267776.000
+   53    52     1      0.1090 284512.000
+   54    52     1      0.1090 284512.000
+   55    52     1      0.1090 284512.000
+   56    37     1      0.1471 307105.600
+   57    37     1      0.1090 284512.000
+   58    37     1      0.1090 284512.000
+   59    56     1      0.1471 307105.600
+   60    56     1      0.1471 307105.600
+   61    56     1      0.1471 307105.600
+   62    59     1      0.1090 284512.000
+   63    59     1      0.1090 284512.000
+   64    59     1      0.1090 284512.000
+   65    60     1      0.1090 284512.000
+   66    60     1      0.1090 284512.000
+   67    60     1      0.1090 284512.000
+   68    61     1      0.1090 284512.000
+   69    61     1      0.1090 284512.000
+   70    61     1      0.1090 284512.000
+   71    22     1      0.1090 284512.000
+   72    22     1      0.1090 284512.000
+   73    22     1      0.1090 284512.000
+   74     4     1      0.1410 267776.000
+   75    74     1      0.1090 284512.000
+   76    74     1      0.1090 284512.000
+   77    74     1      0.1090 284512.000
+
+[ angles ]
+;  ai    aj    ak funct            c0            c1            c2            c3 
+    1     2     3     1    120.400    669.440
+    1     2     4     1    123.400    694.544
+    2     3     5     1    111.100    527.184
+    2     3     6     1    109.500    292.880
+    2     3     7     1    109.500    292.880
+    3     5     8     1    112.700    488.273
+    3     5     9     1    110.700    313.800
+    3     5    10     1    110.700    313.800
+    5     8    11     1    112.700    488.273
+    5     8    12     1    111.100    527.184
+    5     8    13     1    110.700    313.800
+    8    11    14     1    112.700    488.273
+    8    11    15     1    110.700    313.800
+    8    11    16     1    110.700    313.800
+    8    12    17     1    111.400    677.808
+    8    12    18     1    120.400    669.440
+   11    14    19     1    112.700    488.273
+   11    14    20     1    111.100    527.184
+   11    14    21     1    110.700    313.800
+   12    17    22     1    116.900    694.544
+   14    19    23     1    112.700    488.273
+   14    19    24     1    110.700    313.800
+   14    19    25     1    110.700    313.800
+   14    20    26     1    111.400    677.808
+   14    20    27     1    120.400    669.440
+   19    23    28     1    112.700    488.273
+   19    23    29     1    111.100    527.184
+   19    23    30     1    110.700    313.800
+   20    26    31     1    116.900    694.544
+   23    28    32     1    112.700    488.273
+   23    28    33     1    110.700    313.800
+   23    28    34     1    110.700    313.800
+   23    29    35     1    111.400    677.808
+   23    29    36     1    120.400    669.440
+   26    31    37     1    109.500    418.400
+   26    31    38     1    109.500    292.880
+   26    31    39     1    109.500    292.880
+   28    32    40     1    112.700    488.273
+   28    32    41     1    111.100    527.184
+   28    32    42     1    110.700    313.800
+   29    35    43     1    116.900    694.544
+   32    40    44     1    110.700    313.800
+   32    40    45     1    110.700    313.800
+   32    40    46     1    110.700    313.800
+   32    41    47     1    111.400    677.808
+   32    41    48     1    120.400    669.440
+   35    43    49     1    109.500    292.880
+   35    43    50     1    109.500    292.880
+   35    43    51     1    109.500    292.880
+   41    47    52     1    116.900    694.544
+   47    52    53     1    109.500    292.880
+   47    52    54     1    109.500    292.880
+   47    52    55     1    109.500    292.880
+   31    37    56     1    111.200    669.440
+   31    37    57     1    110.700    313.800
+   31    37    58     1    110.700    313.800
+   37    56    59     1    113.000    418.400
+   37    56    60     1    113.000    418.400
+   37    56    61     1    113.000    418.400
+   56    59    62     1    109.500    292.880
+   56    59    63     1    109.500    292.880
+   56    59    64     1    109.500    292.880
+   56    60    65     1    109.500    292.880
+   56    60    66     1    109.500    292.880
+   56    60    67     1    109.500    292.880
+   56    61    68     1    109.500    292.880
+   56    61    69     1    109.500    292.880
+   56    61    70     1    109.500    292.880
+   17    22    71     1    109.500    292.880
+   17    22    72     1    109.500    292.880
+   17    22    73     1    109.500    292.880
+    2     4    74     1    116.900    694.544
+    4    74    75     1    109.500    292.880
+    4    74    76     1    109.500    292.880
+    4    74    77     1    109.500    292.880
+   49    43    50     1    107.800    276.144
+   23    19    25     1    110.700    313.800
+   45    40    46     1    107.800    276.144
+   54    52    55     1    107.800    276.144
+   28    23    30     1    110.700    313.800
+   65    60    66     1    107.800    276.144
+   62    59    64     1    107.800    276.144
+   41    32    42     1    109.500    292.880
+   75    74    76     1    107.800    276.144
+   37    31    39     1    110.700    313.800
+   59    56    60     1    113.000    418.400
+   14    11    16     1    110.700    313.800
+   44    40    45     1    107.800    276.144
+   26    20    27     1    123.400    694.544
+   56    37    57     1    109.500    292.880
+   76    74    77     1    107.800    276.144
+   32    28    34     1    110.700    313.800
+   37    31    38     1    110.700    313.800
+   29    23    30     1    109.500    292.880
+   32    28    33     1    110.700    313.800
+   23    19    24     1    110.700    313.800
+   65    60    67     1    107.800    276.144
+   19    14    21     1    110.700    313.800
+   71    22    73     1    107.800    276.144
+   53    52    54     1    107.800    276.144
+   56    37    58     1    109.500    292.880
+   66    60    67     1    107.800    276.144
+   72    22    73     1    107.800    276.144
+   60    56    61     1    113.000    418.400
+    5     3     6     1    110.700    313.800
+   63    59    64     1    107.800    276.144
+   71    22    72     1    107.800    276.144
+   62    59    63     1    107.800    276.144
+   11     8    12     1    111.100    527.184
+   35    29    36     1    123.400    694.544
+   50    43    51     1    107.800    276.144
+   68    61    70     1    107.800    276.144
+   15    11    16     1    107.800    276.144
+    5     3     7     1    110.700    313.800
+   57    37    58     1    107.800    276.144
+   17    12    18     1    123.400    694.544
+   44    40    46     1    107.800    276.144
+   75    74    77     1    107.800    276.144
+    8     5    10     1    110.700    313.800
+   20    14    21     1    109.500    292.880
+    6     3     7     1    107.800    276.144
+   53    52    55     1    107.800    276.144
+   59    56    61     1    113.000    418.400
+    8     5     9     1    110.700    313.800
+   33    28    34     1    107.800    276.144
+   38    31    39     1    107.800    276.144
+   40    32    41     1    111.100    527.184
+   11     8    13     1    110.700    313.800
+   14    11    15     1    110.700    313.800
+   24    19    25     1    107.800    276.144
+    9     5    10     1    107.800    276.144
+   68    61    69     1    107.800    276.144
+   69    61    70     1    107.800    276.144
+    3     2     4     1    111.400    677.808
+   28    23    29     1    111.100    527.184
+   19    14    20     1    111.100    527.184
+   49    43    51     1    107.800    276.144
+   40    32    42     1    110.700    313.800
+   47    41    48     1    123.400    694.544
+   12     8    13     1    109.500    292.880
+
+[ dihedrals ]
+; IMPROPER DIHEDRAL ANGLES 
+;  ai    aj    ak    al funct            c0            c1            c2            c3            c4            c5
+    18    12     8    17    4        180.000     43.932     2  
+    27    20    14    26    4        180.000     43.932     2  
+    48    41    32    47    4        180.000     43.932     2  
+    36    29    23    35    4        180.000     43.932     2  
+     4     2     1     3    4        180.000     43.932     2  
+
+[ dihedrals ]
+; PROPER DIHEDRAL ANGLES
+;  ai    aj    ak    al funct            c0            c1            c2            c3            c4            c5
+   12    8    5    3        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   29   23   19   14        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   41   32   28   23        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   20   14   11    8        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   20   14   11   15        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   41   32   28   33        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   12    8    5    9        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   29   23   19   24        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   20   14   11   16        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   12    8    5   10        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   41   32   28   34        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   29   23   19   25        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+    5    3    2    1        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+    5    3    2    4        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   14   11    8   12        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   23   19   14   20        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   32   28   23   29        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+    8    5    3    2        3      -4.960   6.286   1.310  -2.636  -0.000   0.000
+   28   23   19   14        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   23   19   14   11        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   11    8    5    3        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   32   28   23   19        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   14   11    8    5        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   19   14   11    8        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   40   32   28   23        3       2.301  -1.464   0.837  -1.674  -0.000   0.000
+   28   23   19   25        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   40   32   28   33        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+    8    5    3    7        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   23   19   14   21        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   40   32   28   34        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   28   23   19   24        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   32   28   23   30        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+    8    5    3    6        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   11    8    5    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   14   11    8   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   11    8    5   10        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   19   14   11   15        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   19   14   11   16        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   37   31   26   20        3      -2.197   5.201   0.527  -3.531  -0.000   0.000
+   61   56   37   31        3       3.042  -1.351   0.519  -2.209  -0.000   0.000
+   59   56   37   31        3       3.042  -1.351   0.519  -2.209  -0.000   0.000
+   60   56   37   31        3       3.042  -1.351   0.519  -2.209  -0.000   0.000
+   61   56   37   57        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   60   56   37   58        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   59   56   37   57        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   60   56   37   57        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   59   56   37   58        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   61   56   37   58        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   31   26   20   14        3      31.206  -9.768  -21.439  -0.000  -0.000   0.000
+   74    4    2    3        3      31.206  -9.768  -21.439  -0.000  -0.000   0.000
+   43   35   29   23        3      31.206  -9.768  -21.439  -0.000  -0.000   0.000
+   22   17   12    8        3      31.206  -9.768  -21.439  -0.000  -0.000   0.000
+   52   47   41   32        3      31.206  -9.768  -21.439  -0.000  -0.000   0.000
+   74    4    2    1        3      21.439   0.000  -21.439  -0.000  -0.000   0.000
+   22   17   12   18        3      21.439   0.000  -21.439  -0.000  -0.000   0.000
+   43   35   29   36        3      21.439   0.000  -21.439  -0.000  -0.000   0.000
+   31   26   20   27        3      21.439   0.000  -21.439  -0.000  -0.000   0.000
+   52   47   41   48        3      21.439   0.000  -21.439  -0.000  -0.000   0.000
+    7    3    2    1        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+    6    3    2    1        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+    7    3    2    4        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+    6    3    2    4        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+   46   40   32   41        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   10    5    3    2        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   15   11    8   12        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   44   40   32   41        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   45   40   32   41        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   34   28   23   29        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   24   19   14   20        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+    9    5    3    2        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   25   19   14   20        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   16   11    8   12        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   33   28   23   29        3      -0.209  -0.628   0.000   0.837  -0.000   0.000
+   34   28   23   19        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   13    8    5    3        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   15   11    8    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   45   40   32   28        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   24   19   14   11        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   16   11    8    5        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   32   28   23        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   33   28   23   19        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   46   40   32   28        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   21   14   11    8        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30   23   19   14        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   44   40   32   28        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   25   19   14   11        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+    9    5    3    6        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   45   40   32   42        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   58   37   31   38        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   24   19   14   21        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   21   14   11   15        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   13    8    5    9        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   15   11    8   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   32   28   33        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   46   40   32   42        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   13    8    5   10        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   57   37   31   39        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   58   37   31   39        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   34   28   23   30        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30   23   19   25        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+    9    5    3    7        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   10    5    3    6        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   30   23   19   24        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   10    5    3    7        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   21   14   11   16        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   16   11    8   13        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   57   37   31   38        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   25   19   14   21        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   33   28   23   30        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   44   40   32   42        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   42   32   28   34        3       0.628   1.883   0.000  -2.510  -0.000   0.000
+   58   37   31   26        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   57   37   31   26        3       0.979   2.937   0.000  -3.916  -0.000   0.000
+   70   61   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   63   59   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   64   59   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   63   59   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   66   60   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   67   60   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   65   60   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   67   60   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   64   59   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   63   59   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   68   61   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   70   61   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   62   59   56   61        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   69   61   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   65   60   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   69   61   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   70   61   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   68   61   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   66   60   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   66   60   56   59        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   62   59   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   64   59   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   69   61   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   68   61   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   65   60   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   67   60   56   37        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   62   59   56   60        3       0.632   1.895   0.000  -2.527  -0.000   0.000
+   49   43   35   29        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   75   74    4    2        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   71   22   17   12        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   39   31   26   20        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   50   43   35   29        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   38   31   26   20        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   72   22   17   12        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   73   22   17   12        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   77   74    4    2        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   54   52   47   41        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   55   52   47   41        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   53   52   47   41        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   76   74    4    2        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   51   43   35   29        3       0.414   1.243   0.000  -1.657  -0.000   0.000
+   56   37   31   38        3       0.803   2.410   0.000  -3.213  -0.000   0.000
+   56   37   31   39        3       0.803   2.410   0.000  -3.213  -0.000   0.000
+   56   37   31   26        3      16.736  -16.736   0.000  -0.000  -0.000   0.000
+   36   29   23   28        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   36   29   23   19        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   48   41   32   28        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   27   20   14   11        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   18   12    8   11        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   48   41   32   40        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   18   12    8    5        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   27   20   14   19        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   27   20   14   21        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   36   29   23   30        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   18   12    8   13        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   48   41   32   42        3       0.000   0.000   0.000  -0.000  -0.000   0.000
+   17   12    8    5        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   17   12    8   11        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   26   20   14   11        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   35   29   23   19        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   47   41   32   28        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   35   29   23   28        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   26   20   14   19        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   47   41   32   40        3      -1.157  -3.471   0.000   4.628  -0.000   0.000
+   17   12    8   13        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+   47   41   32   42        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+   35   29   23   30        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+   26   20   14   21        3       0.276   0.828   0.000  -1.105  -0.000   0.000
+
+[ pairs ]
+     1     5    1
+     1     6    1
+     1     7    1
+     4     5    1
+     4     6    1
+     2     8    1
+     4     7    1
+     2     9    1
+     2    10    1
+     6     8    1
+     3    11    1
+     7     8    1
+     6     9    1
+     3    12    1
+     7     9    1
+     6    10    1
+     3    13    1
+     7    10    1
+     5    14    1
+     9    11    1
+     5    15    1
+    10    11    1
+     9    12    1
+     5    16    1
+    10    12    1
+     9    13    1
+     5    17    1
+    10    13    1
+     5    18    1
+    12    14    1
+    13    14    1
+    12    15    1
+     8    19    1
+    13    15    1
+    12    16    1
+    11    17    1
+     8    20    1
+    13    16    1
+    11    18    1
+     8    21    1
+    13    17    1
+     8    22    1
+    13    18    1
+    15    19    1
+    11    23    1
+    16    19    1
+    15    20    1
+    11    24    1
+    16    20    1
+    15    21    1
+    11    25    1
+    16    21    1
+    11    26    1
+    11    27    1
+    18    22    1
+    14    28    1
+    20    23    1
+    14    29    1
+    21    23    1
+    20    24    1
+    14    30    1
+    21    24    1
+    20    25    1
+    19    26    1
+    14    31    1
+    21    25    1
+    19    27    1
+    21    26    1
+    21    27    1
+    19    32    1
+    24    28    1
+    19    33    1
+    25    28    1
+    24    29    1
+    19    34    1
+    25    29    1
+    24    30    1
+    19    35    1
+    25    30    1
+    19    36    1
+    20    37    1
+    27    31    1
+    20    38    1
+    20    39    1
+    29    32    1
+    30    32    1
+    29    33    1
+    30    33    1
+    29    34    1
+    28    35    1
+    23    40    1
+    30    34    1
+    28    36    1
+    23    41    1
+    30    35    1
+    23    42    1
+    30    36    1
+    23    43    1
+    28    44    1
+    33    40    1
+    28    45    1
+    34    40    1
+    33    41    1
+    28    46    1
+    34    41    1
+    33    42    1
+    28    47    1
+     1    74    1
+    34    42    1
+    28    48    1
+     3    74    1
+     2    75    1
+    29    49    1
+     2    76    1
+    36    43    1
+    29    50    1
+     2    77    1
+    29    51    1
+    26    56    1
+    26    57    1
+    12    71    1
+    32    52    1
+    26    58    1
+    12    72    1
+    41    44    1
+    12    73    1
+    42    44    1
+    41    45    1
+    42    45    1
+    41    46    1
+    40    47    1
+    42    46    1
+    40    48    1
+    42    47    1
+    42    48    1
+    31    59    1
+    31    60    1
+    31    61    1
+    41    53    1
+    38    56    1
+    41    54    1
+    39    56    1
+    38    57    1
+    41    55    1
+    39    57    1
+    38    58    1
+    39    58    1
+    37    62    1
+    48    52    1
+    37    63    1
+    37    64    1
+    37    65    1
+    37    66    1
+    37    67    1
+    37    68    1
+    37    69    1
+    37    70    1
+    57    59    1
+    58    59    1
+    57    60    1
+    58    60    1
+    57    61    1
+    58    61    1
+    60    62    1
+    61    62    1
+    60    63    1
+    61    63    1
+    60    64    1
+    59    65    1
+    61    64    1
+    59    66    1
+    61    65    1
+    59    67    1
+    61    66    1
+    59    68    1
+    61    67    1
+    60    68    1
+    59    69    1
+    60    69    1
+    59    70    1
+    60    70    1
+
diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp b/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp
new file mode 100644
index 00000000..9aba902f
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/ref.itp
@@ -0,0 +1,677 @@
+; ../../bench.py
+
+; Please cite the following papers:
+
+[ moleculetype ]
+new 3
+
+[ atoms ]
+ 1 opls_800 1 Mter   O3   1 -0.39899 15.999
+ 2 opls_801 1 Mter   C2   1  0.38641 12.011
+ 3 opls_802 1 Mter   C1   1 -0.15511 12.011
+ 4 opls_803 1 Mter   O4   1 -0.34963 15.999
+ 5 opls_804 1 Mter   C0   1 -0.16566 12.011
+ 6 opls_805 1 Mter   H8   1  0.12065  1.008
+ 7 opls_806 1 Mter   H12  1  0.12065  1.008
+ 8 opls_808 1 Mter   H6   1  0.10725  1.008
+ 9 opls_809 1 Mter   H7   1   0.1087  1.008
+10 opls_873 1 Mter   C5   3 -0.02807 12.011
+11 opls_874 1 Mter   H9   3   0.0846  1.008
+12 opls_875 1 Mter   H10  3   0.0846  1.008
+13 opls_876 1 Mter   H11  3   0.0846  1.008
+14 opls_870 2 M      H9   6  0.08562  1.008
+15 opls_807 2 M      C1   4 -0.09038 12.011
+16 opls_871 2 M      H10  6  0.08562  1.008
+17 opls_872 2 M      H11  6  0.08562  1.008
+18 opls_810 2 M      C0   4 -0.14838 12.011
+19 opls_811 2 M      C2   4  0.39132 12.011
+20 opls_812 2 M      H8   4  0.11302  1.008
+21 opls_814 2 M      H6   4  0.11612  1.008
+22 opls_815 2 M      H7   4  0.11612  1.008
+23 opls_816 2 M      O4   4 -0.34218 15.999
+24 opls_817 2 M      O3   4 -0.38638 15.999
+25 opls_821 2 M      C5   4 -0.02608 12.011
+26 opls_813 3 AOL    C1   5 -0.09123 12.011
+27 opls_818 3 AOL    C0   5 -0.14523 12.011
+28 opls_819 3 AOL    C2   5  0.39667 12.011
+29 opls_820 3 AOL    H13  5  0.11847  1.008
+30 opls_823 3 AOL    H12  5  0.12017  1.008
+31 opls_824 3 AOL    H11  5  0.12017  1.008
+32 opls_825 3 AOL    O4   5 -0.34703 15.999
+33 opls_826 3 AOL    O3   5 -0.33283 15.999
+34 opls_830 3 AOL    C5   5  0.02947 12.011
+35 opls_836 3 AOL    C6   6 -0.17013 12.011
+36 opls_837 3 AOL    H14  6  0.09727  1.008
+37 opls_838 3 AOL    H15  6  0.09727  1.008
+38 opls_855 3 AOL    N7   6  0.16667 14.007
+39 opls_856 3 AOL    H17  6  0.15657  1.008
+40 opls_857 3 AOL    H16  6  0.15657  1.008
+41 opls_858 3 AOL    C8   6 -0.22393 12.011
+42 opls_859 3 AOL    C9   6 -0.22303 12.011
+43 opls_860 3 AOL    C10  6 -0.22463 12.011
+44 opls_861 3 AOL    H18  6  0.14507  1.008
+45 opls_862 3 AOL    H19  6  0.14507  1.008
+46 opls_863 3 AOL    H20  6  0.14507  1.008
+47 opls_864 3 AOL    H21  6  0.14437  1.008
+48 opls_865 3 AOL    H22  7  0.14437  1.008
+49 opls_866 3 AOL    H23  7  0.14437  1.008
+50 opls_867 3 AOL    H24  7  0.14347  1.008
+51 opls_868 3 AOL    H25  7  0.14347  1.008
+52 opls_869 3 AOL    H26  7  0.14347  1.008
+53 opls_870 4 M      H9  10  0.08562  1.008
+54 opls_807 4 M      C1   8 -0.09038 12.011
+55 opls_871 4 M      H10 10  0.08562  1.008
+56 opls_872 4 M      H11 10  0.08562  1.008
+57 opls_810 4 M      C0   8 -0.14838 12.011
+58 opls_811 4 M      C2   8  0.39132 12.011
+59 opls_812 4 M      H8   8  0.11302  1.008
+60 opls_814 4 M      H6   8  0.11612  1.008
+61 opls_815 4 M      H7   8  0.11612  1.008
+62 opls_816 4 M      O4   8 -0.34218 15.999
+63 opls_817 4 M      O3   8 -0.38638 15.999
+64 opls_821 4 M      C5   8 -0.02608 12.011
+65 opls_839 5 Mter_1 C0  10 -0.21009 12.011
+66 opls_840 5 Mter_1 C2  10  0.38121 12.011
+67 opls_841 5 Mter_1 H8  10  0.11421  1.008
+68 opls_843 5 Mter_1 H6  10  0.09591  1.008
+69 opls_844 5 Mter_1 H7  10  0.09591  1.008
+70 opls_845 5 Mter_1 H12 10  0.09591  1.008
+71 opls_846 5 Mter_1 O4  10 -0.33559 15.999
+72 opls_847 5 Mter_1 O3  10 -0.38259 15.999
+73 opls_851 5 Mter_1 C5  10 -0.02759 12.011
+74 opls_852 5 Mter_1 H9  10  0.08801  1.008
+75 opls_853 5 Mter_1 H10 10  0.08801  1.008
+76 opls_854 5 Mter_1 H11 10  0.08801  1.008
+77 opls_831 5 Mter_1 C1   9 -0.09129 12.011
+
+[ bonds ]
+ 2  1 1 0.1229 476976.000
+ 3  2 1 0.1522 265265.600
+ 4  2 1 0.1327 179075.200
+ 5  3 1 0.1529 224262.400
+ 6  3 1 0.1090 284512.000
+ 7  3 1 0.1090 284512.000
+ 8  5 1 0.1090 284512.000
+ 9  5 1 0.1090 284512.000
+10  4 1 0.1410 267776.000
+11 10 1 0.1090 284512.000
+12 10 1 0.1090 284512.000
+13 10 1 0.1090 284512.000
+18 15 1 0.1529 224262.400
+19 15 1 0.1522 265265.600
+20 15 1 0.1090 284512.000
+21 18 1 0.1090 284512.000
+22 18 1 0.1090 284512.000
+23 19 1 0.1327 179075.200
+24 19 1 0.1229 476976.000
+25 23 1 0.1410 267776.000
+14 25 1 0.1090 284512.000
+16 25 1 0.1090 284512.000
+17 25 1 0.1090 284512.000
+27 26 1 0.1529 224262.400
+28 26 1 0.1522 265265.600
+29 26 1 0.1090 284512.000
+30 27 1 0.1090 284512.000
+31 27 1 0.1090 284512.000
+32 28 1 0.1327 179075.200
+33 28 1 0.1229 476976.000
+34 32 1 0.1410 267776.000
+35 34 1 0.1529 224262.400
+36 34 1 0.1090 284512.000
+37 34 1 0.1090 284512.000
+38 35 1 0.1471 307105.600
+39 35 1 0.1090 284512.000
+40 35 1 0.1090 284512.000
+41 38 1 0.1471 307105.600
+42 38 1 0.1471 307105.600
+43 38 1 0.1471 307105.600
+44 41 1 0.1090 284512.000
+45 41 1 0.1090 284512.000
+46 41 1 0.1090 284512.000
+47 42 1 0.1090 284512.000
+48 42 1 0.1090 284512.000
+49 42 1 0.1090 284512.000
+50 43 1 0.1090 284512.000
+51 43 1 0.1090 284512.000
+52 43 1 0.1090 284512.000
+57 54 1 0.1529 224262.400
+58 54 1 0.1522 265265.600
+59 54 1 0.1090 284512.000
+60 57 1 0.1090 284512.000
+61 57 1 0.1090 284512.000
+62 58 1 0.1327 179075.200
+63 58 1 0.1229 476976.000
+64 62 1 0.1410 267776.000
+53 64 1 0.1090 284512.000
+55 64 1 0.1090 284512.000
+56 64 1 0.1090 284512.000
+65 77 1 0.1529 224262.400
+66 77 1 0.1522 265265.600
+67 77 1 0.1090 284512.000
+68 65 1 0.1090 284512.000
+69 65 1 0.1090 284512.000
+70 65 1 0.1090 284512.000
+71 66 1 0.1327 179075.200
+72 66 1 0.1229 476976.000
+73 71 1 0.1410 267776.000
+74 73 1 0.1090 284512.000
+75 73 1 0.1090 284512.000
+76 73 1 0.1090 284512.000
+15  5 1 0.1529 224262.400 ; link
+26 18 1 0.1529 224262.400 ; link
+54 27 1 0.1529 224262.400 ; link
+77 57 1 0.1529 224262.400 ; link
+
+[ pairs ]
+ 1  5 1
+ 1  6 1
+ 1  7 1
+ 4  5 1
+ 4  6 1
+ 4  7 1
+ 2  8 1
+ 2  9 1
+ 6  8 1
+ 7  8 1
+ 6  9 1
+ 7  9 1
+ 1 10 1
+ 3 10 1
+ 2 11 1
+ 2 12 1
+ 2 13 1
+19 21 1
+20 21 1
+19 22 1
+18 23 1
+20 22 1
+18 24 1
+20 23 1
+15 25 1
+20 24 1
+24 25 1
+19 14 1
+19 16 1
+19 17 1
+28 30 1
+29 30 1
+28 31 1
+27 32 1
+26 34 1
+29 31 1
+27 33 1
+29 32 1
+29 33 1
+28 35 1
+33 34 1
+28 36 1
+28 37 1
+32 38 1
+32 39 1
+32 40 1
+34 41 1
+34 42 1
+34 43 1
+36 38 1
+37 38 1
+36 39 1
+37 39 1
+36 40 1
+37 40 1
+35 44 1
+35 45 1
+35 46 1
+35 47 1
+35 48 1
+35 49 1
+35 50 1
+35 51 1
+35 52 1
+39 41 1
+40 41 1
+39 42 1
+40 42 1
+39 43 1
+40 43 1
+42 44 1
+43 44 1
+42 45 1
+43 45 1
+42 46 1
+41 47 1
+43 46 1
+41 48 1
+43 47 1
+41 49 1
+43 48 1
+41 50 1
+43 49 1
+42 50 1
+41 51 1
+42 51 1
+41 52 1
+42 52 1
+58 60 1
+59 60 1
+58 61 1
+57 62 1
+59 61 1
+57 63 1
+59 62 1
+54 64 1
+59 63 1
+63 64 1
+58 53 1
+58 55 1
+58 56 1
+77 73 1
+66 68 1
+67 68 1
+66 69 1
+67 69 1
+66 70 1
+65 71 1
+67 70 1
+65 72 1
+67 71 1
+67 72 1
+66 74 1
+66 75 1
+66 76 1
+72 73 1
+ 2 15 1 ; link
+ 6 15 1 ; link
+ 3 18 1 ; link
+ 7 15 1 ; link
+ 3 19 1 ; link
+ 3 20 1 ; link
+ 8 18 1 ; link
+ 5 21 1 ; link
+ 9 18 1 ; link
+ 8 19 1 ; link
+ 5 22 1 ; link
+ 9 19 1 ; link
+ 8 20 1 ; link
+ 5 23 1 ; link
+ 9 20 1 ; link
+ 5 24 1 ; link
+19 26 1 ; link
+20 26 1 ; link
+15 27 1 ; link
+15 28 1 ; link
+15 29 1 ; link
+21 27 1 ; link
+22 27 1 ; link
+21 28 1 ; link
+18 30 1 ; link
+22 28 1 ; link
+21 29 1 ; link
+18 31 1 ; link
+22 29 1 ; link
+18 32 1 ; link
+18 33 1 ; link
+26 57 1 ; link
+28 54 1 ; link
+26 58 1 ; link
+29 54 1 ; link
+26 59 1 ; link
+30 57 1 ; link
+27 60 1 ; link
+31 57 1 ; link
+30 58 1 ; link
+27 61 1 ; link
+31 58 1 ; link
+30 59 1 ; link
+27 62 1 ; link
+31 59 1 ; link
+27 63 1 ; link
+58 77 1 ; link
+59 77 1 ; link
+54 65 1 ; link
+54 66 1 ; link
+54 67 1 ; link
+57 68 1 ; link
+60 65 1 ; link
+57 69 1 ; link
+61 65 1 ; link
+60 66 1 ; link
+57 70 1 ; link
+61 66 1 ; link
+60 67 1 ; link
+57 71 1 ; link
+61 67 1 ; link
+57 72 1 ; link
+ 5 26 1 ; link
+18 54 1 ; link
+27 77 1 ; link
+
+[ angles ]
+ 1  2  3 1 120.400 669.440
+ 1  2  4 1 123.400 694.544
+ 2  3  5 1 111.100 527.184
+ 2  3  6 1 109.500 292.880
+ 2  3  7 1 109.500 292.880
+ 3  5  8 1 110.700 313.800
+ 3  5  9 1 110.700 313.800
+ 2  4 10 1 116.900 694.544
+ 4 10 11 1 109.500 292.880
+ 4 10 12 1 109.500 292.880
+ 4 10 13 1 109.500 292.880
+11 10 12 1 107.800 276.144
+12 10 13 1 107.800 276.144
+ 5  3  6 1 110.700 313.800
+ 5  3  7 1 110.700 313.800
+11 10 13 1 107.800 276.144
+ 6  3  7 1 107.800 276.144
+ 8  5  9 1 107.800 276.144
+ 3  2  4 1 111.400 677.808
+15 18 21 1 110.700 313.800
+15 18 22 1 110.700 313.800
+15 19 23 1 111.400 677.808
+15 19 24 1 120.400 669.440
+19 23 25 1 116.900 694.544
+23 25 14 1 109.500 292.880
+23 25 16 1 109.500 292.880
+23 25 17 1 109.500 292.880
+14 25 17 1 107.800 276.144
+16 25 17 1 107.800 276.144
+14 25 16 1 107.800 276.144
+18 15 19 1 111.100 527.184
+21 18 22 1 107.800 276.144
+23 19 24 1 123.400 694.544
+18 15 20 1 110.700 313.800
+19 15 20 1 109.500 292.880
+26 27 30 1 110.700 313.800
+26 27 31 1 110.700 313.800
+26 28 32 1 111.400 677.808
+26 28 33 1 120.400 669.440
+28 32 34 1 116.900 694.544
+32 34 35 1 109.500 418.400
+32 34 36 1 109.500 292.880
+32 34 37 1 109.500 292.880
+34 35 38 1 111.200 669.440
+34 35 39 1 110.700 313.800
+34 35 40 1 110.700 313.800
+35 38 41 1 113.000 418.400
+35 38 42 1 113.000 418.400
+35 38 43 1 113.000 418.400
+38 41 44 1 109.500 292.880
+38 41 45 1 109.500 292.880
+38 41 46 1 109.500 292.880
+38 42 47 1 109.500 292.880
+38 42 48 1 109.500 292.880
+38 42 49 1 109.500 292.880
+38 43 50 1 109.500 292.880
+38 43 51 1 109.500 292.880
+38 43 52 1 109.500 292.880
+47 42 48 1 107.800 276.144
+44 41 46 1 107.800 276.144
+35 34 37 1 110.700 313.800
+41 38 42 1 113.000 418.400
+32 28 33 1 123.400 694.544
+38 35 39 1 109.500 292.880
+35 34 36 1 110.700 313.800
+47 42 49 1 107.800 276.144
+27 26 29 1 110.700 313.800
+38 35 40 1 109.500 292.880
+48 42 49 1 107.800 276.144
+42 38 43 1 113.000 418.400
+45 41 46 1 107.800 276.144
+44 41 45 1 107.800 276.144
+50 43 52 1 107.800 276.144
+39 35 40 1 107.800 276.144
+28 26 29 1 109.500 292.880
+41 38 43 1 113.000 418.400
+36 34 37 1 107.800 276.144
+30 27 31 1 107.800 276.144
+50 43 51 1 107.800 276.144
+51 43 52 1 107.800 276.144
+27 26 28 1 111.100 527.184
+54 57 60 1 110.700 313.800
+54 57 61 1 110.700 313.800
+54 58 62 1 111.400 677.808
+54 58 63 1 120.400 669.440
+58 62 64 1 116.900 694.544
+62 64 53 1 109.500 292.880
+62 64 55 1 109.500 292.880
+62 64 56 1 109.500 292.880
+53 64 56 1 107.800 276.144
+55 64 56 1 107.800 276.144
+53 64 55 1 107.800 276.144
+57 54 58 1 111.100 527.184
+60 57 61 1 107.800 276.144
+62 58 63 1 123.400 694.544
+57 54 59 1 110.700 313.800
+58 54 59 1 109.500 292.880
+77 65 68 1 110.700 313.800
+77 65 69 1 110.700 313.800
+77 65 70 1 110.700 313.800
+77 66 71 1 111.400 677.808
+77 66 72 1 120.400 669.440
+66 71 73 1 116.900 694.544
+71 73 74 1 109.500 292.880
+71 73 75 1 109.500 292.880
+71 73 76 1 109.500 292.880
+69 65 70 1 107.800 276.144
+75 73 76 1 107.800 276.144
+66 77 67 1 109.500 292.880
+68 65 69 1 107.800 276.144
+74 73 75 1 107.800 276.144
+68 65 70 1 107.800 276.144
+74 73 76 1 107.800 276.144
+65 77 66 1 111.100 527.184
+65 77 67 1 110.700 313.800
+71 66 72 1 123.400 694.544
+ 3  5 15 1 112.700 488.273 ; link
+ 5 15 18 1 112.700 488.273 ; link
+ 5 15 19 1 111.100 527.184 ; link
+ 5 15 20 1 110.700 313.800 ; link
+15  5  9 1 110.700 313.800 ; link
+15  5  8 1 110.700 313.800 ; link
+15 18 26 1 112.700 488.273 ; link
+18 26 27 1 112.700 488.273 ; link
+18 26 28 1 111.100 527.184 ; link
+18 26 29 1 110.700 313.800 ; link
+26 18 22 1 110.700 313.800 ; link
+26 18 21 1 110.700 313.800 ; link
+26 27 54 1 112.700 488.273 ; link
+27 54 57 1 112.700 488.273 ; link
+27 54 58 1 111.100 527.184 ; link
+27 54 59 1 110.700 313.800 ; link
+54 27 31 1 110.700 313.800 ; link
+54 27 30 1 110.700 313.800 ; link
+54 57 77 1 112.700 488.273 ; link
+57 77 65 1 112.700 488.273 ; link
+57 77 66 1 111.100 527.184 ; link
+57 77 67 1 110.700 313.800 ; link
+77 57 61 1 110.700 313.800 ; link
+77 57 60 1 110.700 313.800 ; link
+
+[ dihedrals ]
+ 4  2  1  3 4 180.000 43.932 2
+ 5  3  2  1 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+ 5  3  2  4 3 -1.157 -3.471 0.000 4.628 -0.000 0.000
+10  4  2  3 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000
+10  4  2  1 3 21.439 0.000 -21.439 -0.000 -0.000 0.000
+ 7  3  2  1 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+ 6  3  2  1 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+ 7  3  2  4 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+ 6  3  2  4 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+ 9  5  3  2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+ 8  5  3  2 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+ 8  5  3  6 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+ 8  5  3  7 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+ 9  5  3  6 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+ 9  5  3  7 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+11 10  4  2 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+13 10  4  2 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+12 10  4  2 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+24 19 15 23 4 180.000 43.932 2
+25 23 19 15 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000
+25 23 19 24 3 21.439 0.000 -21.439 -0.000 -0.000 0.000
+21 18 15 19 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+22 18 15 19 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+21 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+22 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+14 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+16 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+17 25 23 19 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+24 19 15 18 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+24 19 15 20 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+23 19 15 18 3 -1.157 -3.471 0.000 4.628 -0.000 0.000
+23 19 15 20 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+33 28 26 32 4 180.000 43.932 2
+35 34 32 28 3 -2.197 5.201 0.527 -3.531 -0.000 0.000
+43 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000
+41 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000
+42 38 35 34 3 3.042 -1.351 0.519 -2.209 -0.000 0.000
+43 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+42 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+41 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+42 38 35 39 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+41 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+43 38 35 40 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+34 32 28 26 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000
+34 32 28 33 3 21.439 0.000 -21.439 -0.000 -0.000 0.000
+30 27 26 28 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+31 27 26 28 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+40 35 34 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+30 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+39 35 34 37 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+40 35 34 37 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+39 35 34 36 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+31 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+40 35 34 32 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+39 35 34 32 3 0.979 2.937 0.000 -3.916 -0.000 0.000
+52 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+45 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+46 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+45 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+48 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+49 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+47 42 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+49 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+46 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+45 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+50 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+52 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+44 41 38 43 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+51 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+47 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+51 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+52 43 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+50 43 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+48 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+48 42 38 41 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+44 41 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+46 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+51 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+50 43 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+47 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+49 42 38 35 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+44 41 38 42 3 0.632 1.895 0.000 -2.527 -0.000 0.000
+37 34 32 28 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+36 34 32 28 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+38 35 34 36 3 0.803 2.410 0.000 -3.213 -0.000 0.000
+38 35 34 37 3 0.803 2.410 0.000 -3.213 -0.000 0.000
+38 35 34 32 3 16.736 -16.736 0.000 -0.000 -0.000 0.000
+33 28 26 27 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+33 28 26 29 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+32 28 26 27 3 -1.157 -3.471 0.000 4.628 -0.000 0.000
+32 28 26 29 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+63 58 54 62 4 180.000 43.932 2
+64 62 58 54 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000
+64 62 58 63 3 21.439 0.000 -21.439 -0.000 -0.000 0.000
+60 57 54 58 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+61 57 54 58 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+60 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+61 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+53 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+55 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+56 64 62 58 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+63 58 54 57 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+63 58 54 59 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+62 58 54 57 3 -1.157 -3.471 0.000 4.628 -0.000 0.000
+62 58 54 59 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+72 66 77 71 4 180.000 43.932 2
+73 71 66 77 3 31.206 -9.768 -21.439 -0.000 -0.000 0.000
+73 71 66 72 3 21.439 0.000 -21.439 -0.000 -0.000 0.000
+70 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+68 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+69 65 77 66 3 -0.209 -0.628 0.000 0.837 -0.000 0.000
+69 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+70 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+68 65 77 67 3 0.628 1.883 0.000 -2.510 -0.000 0.000
+75 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+76 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+74 73 71 66 3 0.414 1.243 0.000 -1.657 -0.000 0.000
+72 66 77 65 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+72 66 77 67 3 0.000 0.000 0.000 -0.000 -0.000 0.000
+71 66 77 65 3 -1.157 -3.471 0.000 4.628 -0.000 0.000
+71 66 77 67 3 0.276 0.828 0.000 -1.105 -0.000 0.000
+19 15  5  3 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+19 15  5  8 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+19 15  5  9 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+15  5  3  2 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+18 15  5  3 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+15  5  3  7 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+15  5  3  6 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+18 15  5  8 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+18 15  5  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+20 15  5  3 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+21 18 15  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+22 18 15  5 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+20 15  5  8 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+20 15  5  9 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+24 19 15  5 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link
+23 19 15  5 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link
+28 26 18 15 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+28 26 18 21 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+28 26 18 22 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+26 18 15 19 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+27 26 18 15 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+26 18 15 20 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+27 26 18 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+27 26 18 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+30 27 26 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+29 26 18 15 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+31 27 26 18 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+29 26 18 21 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+29 26 18 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+33 28 26 18 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link
+32 28 26 18 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link
+58 54 27 26 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+58 54 27 30 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+58 54 27 31 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+54 27 26 28 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+57 54 27 26 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+57 54 27 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+54 27 26 29 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+57 54 27 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+61 57 54 27 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+60 57 54 27 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+59 54 27 26 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+59 54 27 31 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+59 54 27 30 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+63 58 54 27 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link
+62 58 54 27 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link
+66 77 57 54 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+66 77 57 60 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+66 77 57 61 3 -0.209 -0.628 0.000 0.837 -0.000 0.000 ; link
+77 57 54 58 3 -4.960 6.286 1.310 -2.636 -0.000 0.000 ; link
+65 77 57 54 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+65 77 57 60 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+65 77 57 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+77 57 54 59 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+69 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+67 77 57 54 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+70 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+68 65 77 57 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+67 77 57 60 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+67 77 57 61 3 0.628 1.883 0.000 -2.510 -0.000 0.000 ; link
+72 66 77 57 3 0.000 0.000 0.000 -0.000 -0.000 0.000 ; link
+71 66 77 57 3 -1.157 -3.471 0.000 4.628 -0.000 0.000 ; link
+54 27 26 18 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+77 57 54 27 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+26 18 15  5 3 2.301 -1.464 0.837 -1.674 -0.000 0.000 ; link
+
diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ref.top b/polyply/tests/test_data/itp_to_ff/ACOL/ref.top
new file mode 100644
index 00000000..f6d5e4e9
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/ref.top
@@ -0,0 +1,28 @@
+#define _FF_OPLS
+#define _FF_OPLSAA
+
+; This force field uses a format that requires Gromacs 3.1.4 or later.
+;
+; References for the OPLS-AA force field: 
+;
+; W. L. Jorgensen, D. S. Maxwell, and J. Tirado-Rives,
+; J. Am. Chem. Soc. 118, 11225-11236 (1996).
+; W. L. Jorgensen and N. A. McDonald, Theochem 424, 145-155 (1998).
+; W. L. Jorgensen and N. A. McDonald, J. Phys. Chem. B 102, 8049-8059 (1998).
+; R. C. Rizzo and W. L. Jorgensen, J. Am. Chem. Soc. 121, 4827-4836 (1999).
+; M. L. Price, D. Ostrovsky, and W. L. Jorgensen, J. Comp. Chem. (2001).
+; E. K. Watkins and W. L. Jorgensen, J. Phys. Chem. A 105, 4118-4125 (2001).
+; G. A. Kaminski, R.A. Friesner, J.Tirado-Rives and W.L. Jorgensen, J. Phys. Chem. B 105, 6474 (2001).
+;
+
+[ defaults ]
+; nbfunc	comb-rule	gen-pairs	fudgeLJ	fudgeQQ
+1		3		yes		0.5	0.5
+
+#include "ligpargen.itp"
+#include "in_itp.itp"
+
+[system]
+test
+[molecules]
+ref 1
diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt
new file mode 100644
index 00000000..1a088a04
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt
@@ -0,0 +1 @@
+Mter M AOL M Mter_1
diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
index ac727795..db2a9984 100644
--- a/polyply/tests/test_itp_to_ff.py
+++ b/polyply/tests/test_itp_to_ff.py
@@ -70,6 +70,11 @@ def itp_equal(ref_mol, new_mol):
 @pytest.mark.parametrize("case, smiles, resnames, charges", [
     ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]),
     ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]),
+    ("ACOL", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]",
+              "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])",
+              "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"],
+             ["M", "M", "AOL", "M", "M"],
+             [0, 0, 1, 0, 0]),
 ])
 def test_itp_to_ff(tmp_path, case, smiles, resnames, charges):
     """

From a39c3acb06614d99fa6181a702e62123f823561f Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 Nov 2023 15:17:25 +0100
Subject: [PATCH 39/82] use top file for ACOL test and fix bug in test

---
 .../test_data/itp_to_ff/ACOL/ligpargen.itp    | 83 +++++++++++++++++++
 polyply/tests/test_itp_to_ff.py               | 14 ++--
 2 files changed, 91 insertions(+), 6 deletions(-)
 create mode 100644 polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp

diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp b/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp
new file mode 100644
index 00000000..dddc1fc4
--- /dev/null
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/ligpargen.itp
@@ -0,0 +1,83 @@
+
+;
+; GENERATED BY LigParGen Server
+; Jorgensen Lab @ Yale University 
+;
+[ atomtypes ]
+  opls_846  O846  1 15.9990     0.000    A    2.90000E-01   5.85760E-01
+  opls_835  O835  1 15.9990     0.000    A    2.96000E-01   8.78640E-01
+  opls_839  C839  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_867  H867  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_803  O803  1 15.9990     0.000    A    2.90000E-01   5.85760E-01
+  opls_806  H806  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_864  H864  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_818  C818  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_855  N855  1 14.0070     0.000    A    3.25000E-01   7.11280E-01
+  opls_874  H874  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_843  H843  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_826  O826  1 15.9990     0.000    A    2.96000E-01   8.78640E-01
+  opls_862  H862  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_827  C827  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_849  H849  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_834  O834  1 15.9990     0.000    A    2.90000E-01   5.85760E-01
+  opls_844  H844  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_802  C802  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_815  H815  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_851  C851  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_814  H814  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_825  O825  1 15.9990     0.000    A    2.90000E-01   5.85760E-01
+  opls_808  H808  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_807  C807  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_842  C842  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_838  H838  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_876  H876  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_805  H805  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_804  C804  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_824  H824  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_820  H820  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_801  C801  1 12.0110     0.000    A    3.55000E-01   2.92880E-01
+  opls_837  H837  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_819  C819  1 12.0110     0.000    A    3.55000E-01   2.92880E-01
+  opls_829  H829  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_822  C822  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_832  H832  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_875  H875  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_848  H848  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_856  H856  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_800  O800  1 15.9990     0.000    A    2.96000E-01   8.78640E-01
+  opls_823  H823  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_811  C811  1 12.0110     0.000    A    3.55000E-01   2.92880E-01
+  opls_833  H833  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_813  C813  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_816  O816  1 15.9990     0.000    A    2.90000E-01   5.85760E-01
+  opls_869  H869  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_831  C831  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_868  H868  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_841  H841  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_871  H871  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_821  C821  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_810  C810  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_861  H861  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_847  O847  1 15.9990     0.000    A    2.96000E-01   8.78640E-01
+  opls_857  H857  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_852  H852  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_870  H870  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_866  H866  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_860  C860  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_850  H850  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_817  O817  1 15.9990     0.000    A    2.96000E-01   8.78640E-01
+  opls_853  H853  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_873  C873  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_812  H812  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_858  C858  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_865  H865  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_809  H809  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_859  C859  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_830  C830  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_863  H863  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_828  C828  1 12.0110     0.000    A    3.55000E-01   2.92880E-01
+  opls_836  C836  1 12.0110     0.000    A    3.50000E-01   2.76144E-01
+  opls_845  H845  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_840  C840  1 12.0110     0.000    A    3.55000E-01   2.92880E-01
+  opls_854  H854  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
+  opls_872  H872  1  1.0080     0.000    A    2.50000E-01   1.25520E-01
diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
index db2a9984..13afaf0a 100644
--- a/polyply/tests/test_itp_to_ff.py
+++ b/polyply/tests/test_itp_to_ff.py
@@ -67,23 +67,25 @@ def itp_equal(ref_mol, new_mol):
                 assert False
     return True
 
-@pytest.mark.parametrize("case, smiles, resnames, charges", [
-    ("PEO_OHter", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["OH", "PEO", "OH"], [0, 0, 0]),
-    ("PEG_PBE", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"], ["CH3", "PBE", "PEO"], [0, 0, 0]),
-    ("ACOL", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]",
+@pytest.mark.parametrize("case, fname, smiles, resnames, charges", [
+    ("PEO_OHter", "in_itp.itp", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"],
+    ["OH", "PEO", "OH"], [0, 0, 0]),
+    ("PEG_PBE", "in_itp.itp", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"],
+    ["CH3", "PBE", "PEO"], [0, 0, 0]),
+    ("ACOL","ref.top", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]",
               "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])",
               "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"],
              ["M", "M", "AOL", "M", "M"],
              [0, 0, 1, 0, 0]),
 ])
-def test_itp_to_ff(tmp_path, case, smiles, resnames, charges):
+def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges):
     """
     Call itp-to-ff and check if it generates the same force-field
     as in the ref.ff file.
     """
     tmp_file = Path(tmp_path) / "test.ff"
     inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case
-    itp_to_ff(itppath=inpath/"in_itp.itp",
+    itp_to_ff(itppath=inpath/fname,
               fragment_smiles=smiles,
               resnames=resnames,
               charges=charges,

From 39f7ad4e9b5923c18ff8a529294cabe877b44a9c Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 28 Dec 2023 12:57:58 +0100
Subject: [PATCH 40/82] fix toplevel itp_to_ff parser

---
 bin/polyply | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bin/polyply b/bin/polyply
index eff60024..1eb4d6f9 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -240,8 +240,6 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
     parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter")
     parser_itp_ff.add_argument('-o', dest="outpath", type=Path)
     parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*')
-    parser_itp_ff.add_argument('-tol', dest="tolerance", type=float, default=1e-5)
-    parser_itp_ff.add_argument('-d', dest="decimals", type=int, default=5)
 
     parser_itp_ff.set_defaults(func=itp_to_ff)
 

From 8f80a99952a20665aab75aa708c0d294aacfabf2 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 15 Jan 2024 11:47:59 +0100
Subject: [PATCH 41/82] bigsmile_draft

---
 polyply/src/big_smiles.py        |  93 +++++++++++++++
 polyply/src/big_smiles_helper.py | 193 +++++++++++++++++++++++++++++++
 polyply/src/fragment_finder.py   |  30 ++++-
 polyply/src/new.py               |  76 ++++++++++++
 4 files changed, 391 insertions(+), 1 deletion(-)
 create mode 100644 polyply/src/big_smiles.py
 create mode 100644 polyply/src/big_smiles_helper.py
 create mode 100644 polyply/src/new.py

diff --git a/polyply/src/big_smiles.py b/polyply/src/big_smiles.py
new file mode 100644
index 00000000..41e8535e
--- /dev/null
+++ b/polyply/src/big_smiles.py
@@ -0,0 +1,93 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def find_token_indices(line, target):
+    idxs = [idx for idx, token in enumerate(line) if token == target]
+    for idx in idxs:
+        yield idx
+
+def compatible(left, right):
+    if left == right:
+        return True
+    if left[0] == "<" and right[0] == ">":
+        if left[1:] == right[1:]:
+            return True
+    if left[0] == ">" and right[0] == "<":
+        if left[1:] == right[1:]:
+            return True
+    return False
+
+def find_compatible_pair(polymol, residue, bond_type="bond_type", eligible_nodes=None):
+    ref_nodes = nx.get_node_attributes(polymol, bond_type)
+    target_nodes = nx.get_node_attributes(residue, bond_type)
+    for ref_node in ref_nodes:
+        if eligible_nodes and\
+           polymol.nodes[ref_node]['resid'] not in eligible_nodes:
+            continue
+        for target_node in target_nodes:
+            if compatible(ref_nodes[ref_node],
+                          target_nodes[target_node]):
+                return ref_node, target_node
+    return None
+
+class BigSmileParser:
+
+    def __init__(self):
+        self.molecule =
+
+    def parse_stochastic_object():
+
+
+def read_simplified_big_smile_string(line):
+
+    # split the different stochastic objects
+    line = line.strip()
+    # a stochastic object is enclosed in '{' and '}'
+    start_idx = next(find_token_indices(line, "{"))
+    stop_idx = next(find_token_indices(line, "}"))
+    stoch_line = line[start_idx+1:stop_idx]
+    # residues are separated by , and end
+    # groups by ;
+    if ';' in stoch_line:
+        residue_string, terminii_string = stoch_line.split(';')
+    else:
+        residue_string = stoch_line
+        terminii_string = None
+    # let's read the smile residue strings
+    residues = []
+    count = 0
+    for residue_string in residue_string.split(','):
+        # figure out if this is a named object
+        if residue_string[0] == "#":
+            jdx = next(find_token_indices(residue_string, "="))
+            name = residue_string[:jdx]
+            residue_string = residue_string[jdx:]
+        else:
+            name = count
+
+        mol_graph = read_smiles(residue_string)
+        residues.append((name, mol_graph))
+        count += 1
+    # let's read the terminal residue strings
+    end_groups = []
+    if terminii_string:
+        for terminus_string in terminii_string.split(','):
+            mol_graph = read_smiles(terminus_string)
+            bond_types = nx.get_node_attributes(mol_graph, "bond_type")
+            nx.set_node_attributes(mol_graph, bond_types, "ter_bond_type")
+            end_groups.append(mol_graph)
+    return cls(dict(residues), end_groups)
+
+
+
diff --git a/polyply/src/big_smiles_helper.py b/polyply/src/big_smiles_helper.py
new file mode 100644
index 00000000..ae546ffe
--- /dev/null
+++ b/polyply/src/big_smiles_helper.py
@@ -0,0 +1,193 @@
+# Copyright 2020 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import pysmiles
+except ImportError:
+    msg = "The tool you are using requires pysmiles as dependcy."
+    raise ImportError(msg)
+
+from pysmiles.read_smiles import _tokenize
+
+def find_anchor(mol, pre_mol, atom):
+    anchors = list(pre_mol.neighbors(atom))
+    for anchor in anchors:
+        if anchor in mol.nodes:
+            return False, anchor
+    for anchor in nx.ego_graph(pre_mol, atom, radius=2).nodes:
+        if anchor in mol.nodes:
+            return True, anchor
+    raise RuntimeError
+
+def parse_atom(atom):
+    """
+    Parses a SMILES atom token, and returns a dict with the information.
+
+    Note
+    ----
+    Can not deal with stereochemical information yet. This gets discarded.
+
+    Parameters
+    ----------
+    atom : str
+        The atom string to interpret. Looks something like one of the
+        following: "C", "c", "[13CH3-1:2]"
+
+    Returns
+    -------
+    dict
+        A dictionary containing at least 'element', 'aromatic', and 'charge'. If
+        present, will also contain 'hcount', 'isotope', and 'class'.
+    """
+    defaults = {'charge': 0, 'hcount': 0, 'aromatic': False}
+    if atom.startswith('[') and any(mark in atom for mark in ['$', '>', '<']):
+        bond_type = atom[1:-1]
+        # we have a big smile bond anchor
+        defaults.update({"element": None,
+                         "bond_type": bond_type})
+        return defaults
+
+    if atom.startswith('[') and '#' == atom[1]:
+        # this atom is a replacable place holder
+        defaults.update({"element": None, "replace": atom[2:-1]})
+        return defaults
+
+    if not atom.startswith('[') and not atom.endswith(']'):
+        if atom != '*':
+            # Don't specify hcount to signal we don't actually know anything
+            # about it
+            return {'element': atom.capitalize(), 'charge': 0,
+                    'aromatic': atom.islower()}
+        else:
+            return defaults.copy()
+
+    match = ATOM_PATTERN.match(atom)
+
+    if match is None:
+        raise ValueError('The atom {} is malformatted'.format(atom))
+
+    out = defaults.copy()
+    out.update({k: v for k, v in match.groupdict().items() if v is not None})
+
+    if out.get('element', 'X').islower():
+        out['aromatic'] = True
+
+    parse_helpers = {
+        'isotope': int,
+        'element': str.capitalize,
+        'stereo': lambda x: x,
+        'hcount': parse_hcount,
+        'charge': parse_charge,
+        'class': int,
+        'aromatic': lambda x: x,
+    }
+
+    for attr, val_str in out.items():
+        out[attr] = parse_helpers[attr](val_str)
+
+    if out['element'] == '*':
+        del out['element']
+
+    if out.get('element') == 'H' and out.get('hcount', 0):
+        raise ValueError("A hydrogen atom can't have hydrogens")
+
+    if 'stereo' in out:
+        LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom)
+
+    return out
+
+def big_smile_str_to_graph(smile_str):
+    """
+    
+    """
+    bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
+    pre_mol = nx.Graph()
+    anchor = None
+    idx = 0
+    default_bond = 1
+    next_bond = None
+    branches = []
+    ring_nums = {}
+    for tokentype, token in _tokenize(smiles):
+        if tokentype == TokenType.ATOM:
+            pre_mol.add_node(idx, **parse_atom(token))
+            if anchor is not None:
+                if next_bond is None:
+                    next_bond = default_bond
+                if next_bond or zero_order_bonds:
+                    pre_mol.add_edge(anchor, idx, order=next_bond)
+                next_bond = None
+            anchor = idx
+            idx += 1
+        elif tokentype == TokenType.BRANCH_START:
+            branches.append(anchor)
+        elif tokentype == TokenType.BRANCH_END:
+            anchor = branches.pop()
+        elif tokentype == TokenType.BOND_TYPE:
+            if next_bond is not None:
+                raise ValueError('Previous bond (order {}) not used. '
+                                 'Overwritten by "{}"'.format(next_bond, token))
+            next_bond = bond_to_order[token]
+        elif tokentype == TokenType.RING_NUM:
+            if token in ring_nums:
+                jdx, order = ring_nums[token]
+                if next_bond is None and order is None:
+                    next_bond = default_bond
+                elif order is None:  # Note that the check is needed,
+                    next_bond = next_bond  # But this could be pass.
+                elif next_bond is None:
+                    next_bond = order
+                elif next_bond != order:  # Both are not None
+                    raise ValueError('Conflicting bond orders for ring '
+                                     'between indices {}'.format(token))
+                # idx is the index of the *next* atom we're adding. So: -1.
+                if pre_mol.has_edge(idx-1, jdx):
+                    raise ValueError('Edge specified by marker {} already '
+                                     'exists'.format(token))
+                if idx-1 == jdx:
+                    raise ValueError('Marker {} specifies a bond between an '
+                                     'atom and itself'.format(token))
+                if next_bond or zero_order_bonds:
+                    pre_mol.add_edge(idx - 1, jdx, order=next_bond)
+                next_bond = None
+                del ring_nums[token]
+            else:
+                if idx == 0:
+                    raise ValueError("Can't have a marker ({}) before an atom"
+                                     "".format(token))
+                # idx is the index of the *next* atom we're adding. So: -1.
+                ring_nums[token] = (idx - 1, next_bond)
+                next_bond = None
+        elif tokentype == TokenType.EZSTEREO:
+            LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token)
+    if ring_nums:
+        raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
+
+    return pre_mol
+
+def mol_graph_from_big_smile_graph(pre_mol):
+    # here we condense any BigSmilesBonding information
+    clean_nodes = [node for node in pre_mol.nodes(data=True) if 'bond_type' not in node[1]]
+    mol = nx.Graph()
+    mol.add_nodes_from(clean_nodes)
+    mol.add_edges_from([edge for edge in pre_mol.edges if edge[0] in mol.nodes and edge[1] in mol.nodes])
+    for node in pre_mol.nodes:
+        if 'bond_type' in pre_mol.nodes[node]:
+            terminus, anchor = find_anchor(mol, pre_mol, node)
+            if terminus:
+                mol.nodes[anchor].update({"ter_bond_type": pre_mol.nodes[node]['bond_type'],
+                                          "ter_bond_probs": pre_mol.nodes[node]['bond_probs']})
+            else:
+                mol.nodes[anchor].update({"bond_type": pre_mol.nodes[node]['bond_type'],
+                                          "bond_probs": pre_mol.nodes[node]['bond_probs']})
+    return mol
diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index bde5316b..060fbb44 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import numpy as np
 import networkx as nx
 from vermouth.graph_utils import make_residue_graph
 from polyply.src.graph_utils import find_one_ismags_match
@@ -123,6 +123,33 @@ def __init__(self, molecule, prefix):
                 self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
                 self.molecule.nodes[node]["degree"] = self.molecule.degree(node)
 
+    def linearize_resids(self, unique_fragments):
+        resids = np.arange(0, len(self.res_graph))
+        old_resids = {}
+        # find the first terminal
+        ter = self.ter_prefix
+        ter_nodes = [ node[0] for node in self.res_graph.nodes(data=True) if ter in node[1]['resname'] ]
+        print(ter_nodes[0])
+        #assert 0 > len(ter_nodes) < 3
+        path = nx.dfs_edges(self.res_graph, source=ter_nodes[0])
+        old_resids = {self.res_graph.nodes[ter_nodes[0]]['resid']: resids[0]}
+        self.res_graph.nodes[ter_nodes[0]]['resid'] = resids[0]
+        for mol_node in self.res_graph.nodes[ter_nodes[0]]['graph'].nodes:
+            self.res_graph.nodes[ter_nodes[0]]['graph'].nodes[mol_node]['resid'] = resids[0]
+            self.molecule.nodes[mol_node]['resid'] = resids[0]
+
+        for new_resid, (_, node) in zip(resids[1:], path):
+            print('node', node)
+            old_resids[self.res_graph.nodes[node]['resid']] = new_resid
+            self.res_graph.nodes[node]['resid'] = new_resid
+            for mol_node in self.res_graph.nodes[node]['graph'].nodes:
+                self.res_graph.nodes[node]['graph'].nodes[mol_node]['resid'] = new_resid
+                self.molecule.nodes[mol_node]['resid'] = new_resid
+        print(old_resids)
+        for fragment in unique_fragments.values():
+            for node in fragment.nodes:
+                fragment.nodes[node]['resid'] = old_resids[fragment.nodes[node]['resid']]
+
     def _node_match(self, node1, node2):
         """
         Check if two node dicts match.
@@ -342,4 +369,5 @@ def extract_unique_fragments(self, fragment_graphs):
 
         # remake the residue graph since some resnames have changed
         self.make_res_graph()
+        self.linearize_resids(unique_fragments)
         return unique_fragments, self.res_graph
diff --git a/polyply/src/new.py b/polyply/src/new.py
new file mode 100644
index 00000000..4ed025ec
--- /dev/null
+++ b/polyply/src/new.py
@@ -0,0 +1,76 @@
+import re
+
+PATTERNS = {"bond_anchor": "\[\$.*?\]",
+            "place_holder": "\[\#.*?\]",
+            "annotation": "\|.*?\|",
+            "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
+            "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}
+
+def read_big_smile(line):
+    res_graphs = []
+    seq_str, patterns = re.findall(PATTERNS['seq_pattern'], line)[0]
+    fragments = dict(re.findall(PATTERNS['fragment'], patterns))
+    for fragment in fragments:
+        res_graphs.append(read_smile_w_bondtypes(fragment_smile))
+
+    # now stitch together ..
+    # 1 segement the seq_str
+    # allocate any leftover atoms
+    # add the residues
+    targets = set()
+    for match in re.finditer(PATTERNS['place_holder'], seq_str):
+       targets.add(match.group(0))
+    for target in targets:
+       seq_str = seq_str.replace(target, fragments[target[2:-1]])
+       
+    return seq_str
+
+def read_smile_w_bondtypes(line):
+    smile = line
+    bonds=[]
+    # find all bond types and remove them from smile
+    for bond in re.finditer(PATTERNS['bond_anchor'], ex_str):
+        smile=smile.replace(bond.group(0), "")
+        bonds.append((bond.span(0), bond.group(0)[1:-1]))
+
+    # read smile and make molecule
+    mol = read_smiles(smile)
+    pos_to_node = position_to_node(smile)
+
+    # strip the first terminal anchor if there is any //
+
+    # associate the bond atoms with the smile atoms
+    for bond in bonds:
+        # the bondtype contains the zero index so it
+        # referes to the first smile node
+        if bond[0][0] == 0:
+            mol.nodes[0]['bondtype'] = bond[1]
+        else:
+            anchor = find_anchor(smile, bond[0][0])
+            mol.nodes[anchor]['bondtype'] = bond[1]
+
+    return mol
+
+
+def find_anchor(smile, start):
+    branch = False
+    sub_smile=smile[:start]
+    for idx, token in enumerate(sub_smile[::-1]):
+        if token == ")":
+            branch = True
+            continue
+        if token == "(" and branch:
+            branch = False
+            continue
+        if not branch:
+            return start-idx
+    raise IndexError
+
+def position_to_node(smile):
+    count=0
+    pos_to_node={}
+    for idx, token in enumerate(smile):
+        if token not in ['[', ']', '$', '@', '(', ')']:
+            pos_to_node[idx] = count
+            count+=1
+    return pos_to_node

From 40b89af2fe29eda0c1d61123cdd4b2eb20318eb3 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 19 Jan 2024 10:44:59 +0100
Subject: [PATCH 42/82] infrastructure for big smile parsing

---
 polyply/src/big_smile_parsing.py | 222 +++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)
 create mode 100644 polyply/src/big_smile_parsing.py

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
new file mode 100644
index 00000000..72e504e6
--- /dev/null
+++ b/polyply/src/big_smile_parsing.py
@@ -0,0 +1,222 @@
+import re
+import pysmiles
+import networkx as nx
+from vermouth.forcefield import ForceField
+from vermouth.molecule import Block
+from polyply.src.meta_molecule import MetaMolecule
+
+PATTERNS = {"bond_anchor": "\[\$.*?\]",
+            "place_holder": "\[\#.*?\]",
+            "annotation": "\|.*?\|",
+            "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
+            "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}
+
+def res_pattern_to_meta_mol(pattern):
+    """
+    Generate a :class:`polyply.MetaMolecule` from a
+    pattern string describing a residue graph with the
+    simplified big-smile syntax.
+
+    The syntax scheme consists of two curly braces
+    enclosing the residue graph sequence. It can contain
+    any enumeration of residues by writing them as if they
+    were smile atoms but the atomname is given by # + resname.
+    This input fomat can handle branching as well ,however,
+    macrocycles are currently not supported.
+
+    General Pattern
+    '{' + [#resname_1][#resname_2]... + '}'
+
+    In addition to plain enumeration any residue may be
+    followed by a '|' and an integern number that
+    specifies how many times the given residue should
+    be added within a sequence. For example, a pentamer
+    of PEO can be written as:
+
+    {[#PEO][#PEO][#PEO][#PEO][#PEO]}
+
+    or
+
+    {[#PEO]|5}
+
+    The block syntax also applies to branches. Here the convetion
+    is that the complete branch including it's first anchoring
+    residue is repeated. For example, to generate a PMA-g-PEG
+    polymer the following syntax is permitted:
+
+    {[#PMA]([#PEO][#PEO])|5}
+
+    Parameters
+    ----------
+    pattern: str
+        a string describing the meta-molecule
+
+    Returns
+    -------
+    :class:`polyply.MetaMolecule`
+    """
+    meta_mol = MetaMolecule()
+    current = 0
+    branch_anchor = 0
+    prev_node = None
+    branching = False
+    for match in re.finditer(PATTERNS['place_holder'], pattern):
+        start, stop = match.span()
+        # new branch here
+        if pattern[start-1] == '(':
+            branching = True
+            branch_anchor = prev_node
+            recipie = [(meta_mol.nodes[prev_node]['resname'], 1)]
+        if stop < len(pattern) and pattern[stop] == '|':
+            n_mon = int(pattern[stop+1:pattern.find('[', stop)])
+        else:
+            n_mon = 1
+
+        resname = match.group(0)[2:-1]
+        # collect all residues in branch
+        if branching:
+            recipie.append((resname, n_mon))
+
+        # add the new residue
+        connection = []
+        for _ in range(0, n_mon):
+            if prev_node is not None:
+                connection = [(prev_node, current)]
+            meta_mol.add_monomer(current,
+                                 resname,
+                                 connection)
+            prev_node = current
+            current += 1
+
+        # terminate branch and jump back to anchor
+        if stop < len(pattern) and pattern[stop] == ')' and branching:
+            branching = False
+            prev_node = branch_anchor
+            # we have to multiply the branch n-times
+            if stop+1 < len(pattern) and pattern[stop+1] == "|":
+                for _ in range(0,int(pattern[stop+2:pattern.find('[', stop)])):
+                    for bdx, (resname, n_mon) in enumerate(recipie):
+                        if bdx == 0:
+                            anchor = current
+                        for _ in range(0, n_mon):
+                            connection = [(prev_node, current)]
+                            meta_mol.add_monomer(current,
+                                                 resname,
+                                                 connection)
+                            prev_node = current
+                            current += 1
+                    prev_node = anchor
+    return meta_mol
+
+def _big_smile_iter(smile):
+    for token in smile:
+        yield token
+
+def tokenize_big_smile(big_smile):
+    """
+    Processes a BigSmile string by storing the
+    the BigSmile specific bonding descriptors
+    in a dict with refernce to the atom they
+    refer to. Furthermore, a cleaned smile
+    string is generated with the BigSmile
+    specific syntax removed.
+
+    Parameters
+    ----------
+    smile: str
+        a BigSmile smile string
+
+    Returns
+    -------
+    str
+        a canonical smile string
+    dict
+        a dict mapping bonding descriptors
+        to the nodes within the smile
+    """
+    smile_iter = _big_smile_iter(big_smile)
+    bonding_descrpt = {}
+    smile = ""
+    node_count = 0
+    prev_node = 0
+    for token in smile_iter:
+        if token == '[':
+            peek = next(smile_iter)
+            if peek in ['$', '>', '<']:
+                bond_descrp = peek
+                peek = next(smile_iter)
+                while peek != ']':
+                    bond_descrp += peek
+                    peek = next(smile_iter)
+                bonding_descrpt[prev_node] = bond_descrp
+            else:
+                smile = smile + token + peek
+                prev_node = node_count
+                node_count += 1
+
+        elif token == '(':
+            anchor = prev_node
+            smile += token
+        elif token == ')':
+            prev_node = anchor
+            smile += token
+        else:
+            if token not in '@ . - = # $ : / \\ + - %':
+                prev_node = node_count
+                node_count += 1
+            smile += token
+    return smile, bonding_descrpt
+
+def fragment_iter(fragment_str):
+    """
+    Iterates over fragments defined in a BigSmile string.
+    Fragments are named residues that consist of a single
+    smile string together with the BigSmile specific bonding
+    descriptors. The function returns the resname of a named
+    fragment as well as a plain nx.Graph of the molecule
+    described by the smile. Bonding descriptors are annotated
+    as node attributes with the keyword bonding.
+
+    Parameters
+    ----------
+    fragment_str: str
+        the string describing the fragments
+
+    Yields
+    ------
+    str, nx.Graph
+    """
+    for fragment in fragment_str[1:-1].split(','):
+        delim = fragment.find('=', 0)
+        resname = fragment[1:delim]
+        big_smile = fragment[delim+1:]
+        smile, bonding_descrpt = tokenize_big_smile(big_smile)
+        mol_graph = pysmiles.read_smiles(smile)
+        atomnames = [str(node[0])+node[1]['element'] for node in mol_graph.nodes(data=True) ]
+        nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+        nx.set_node_attributes(mol_graph, atomnames, 'atomname')
+        nx.set_node_attributes(mol_graph, resname, 'resname')
+        yield resname, mol_graph
+
+def force_field_from_fragments(fragment_str):
+    """
+    Collects the fragments defined in a BigSmile string
+    as :class:`vermouth.molecule.Blocks` in a force-field
+    object. Bonding descriptors are annotated as node
+    attribtues.
+
+    Parameters
+    ----------
+    fragment_str: str
+        string using BigSmile fragment syntax
+
+    Returns
+    -------
+    :class:`vermouth.forcefield.ForceField`
+    """
+    force_field = ForceField("big_smile_ff")
+    frag_iter = fragment_iter(fragment_str)
+    for resname, mol_graph in frag_iter:
+        mol_block = Block(mol_graph)
+        force_field.blocks[resname] = mol_block
+    return forxe_field

From 05ed0456919ad1865925f45b07d2e95396053734 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 19 Jan 2024 10:47:06 +0100
Subject: [PATCH 43/82] optional dep. for pysmiles

---
 polyply/src/big_smile_parsing.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 72e504e6..2ad65a7b 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -1,5 +1,10 @@
 import re
-import pysmiles
+try:
+    import pysmiles
+except ImportError:
+    msg = ("You are using a functionality that requires "
+           "the pysmiles package. Use pip install pysmiles ")
+    raise ImportError(msg)
 import networkx as nx
 from vermouth.forcefield import ForceField
 from vermouth.molecule import Block

From 82a2acc2bc0aa6a3e47eb9f51e4d4db06f4f0dbe Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 19 Jan 2024 10:50:13 +0100
Subject: [PATCH 44/82] add a processor that reads a big smile string and
 returns a full metamolecule including edges.

---
 polyply/src/big_smile_mol_processsor.py | 99 +++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 polyply/src/big_smile_mol_processsor.py

diff --git a/polyply/src/big_smile_mol_processsor.py b/polyply/src/big_smile_mol_processsor.py
new file mode 100644
index 00000000..8131e009
--- /dev/null
+++ b/polyply/src/big_smile_mol_processsor.py
@@ -0,0 +1,99 @@
+import networkx as nx
+from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
+                                           force_field_from_fragments)
+from polyply.src.map_to_molecule import MapToMolecule
+
+def compatible(left, right):
+    """
+    Check bonding descriptor compatibility according
+    to the BigSmiles syntax convetions.
+
+    Parameters
+    ----------
+    left: str
+    right: str
+
+    Returns
+    -------
+    bool
+    """
+    if left == right:
+        return True
+    if left[0] == "<" and right[0] == ">":
+        if left[1:] == right[1:]:
+            return True
+    if left[0] == ">" and right[0] == "<":
+        if left[1:] == right[1:]:
+            return True
+    return False
+
+def generate_edge(source, target, bond_type="bonding"):
+    """
+    Given a source and a target graph, which have bonding
+    descriptors stored as node attributes, find a pair of
+    matching descriptors and return the respective nodes.
+    The function also returns the bonding descriptors. If
+    no bonding descriptor is found an instance of LookupError
+    is raised.
+
+    Parameters
+    ----------
+    source: :class:`nx.Graph`
+    target: :class:`nx.Graph`
+    bond_type: `abc.hashable`
+        under which attribute are the bonding descriptors
+        stored.
+
+    Returns
+    -------
+    ((abc.hashable, abc.hashable), (str, str))
+        the nodes as well as bonding descriptors
+
+    Raises
+    ------
+    LookupError
+        if no match is found
+    """
+    source_nodes = nx.get_node_attributes(source, bond_type)
+    target_nodes = nx.get_node_attributes(target, bond_type)
+    for source_node in source_nodes:
+        for target_node in target_nodes:
+            bond_source = source_nodes[source_node]
+            bond_target = target_nodes[target_node]
+            if compatible(bond_source, bond_target):
+                return ((source_node, target_node), (bond_source, bond_target))
+    raise LookupError
+
+class DefBigSmileParser:
+    """
+    Parse an a string instance of a defined BigSmile,
+    which describes a polymer molecule.
+    """
+
+    def __init__(self):
+        self.force_field = None
+        self.meta_molecule = None
+        self.molecule = None
+
+    def edges_from_bonding_descrpt(self):
+        """
+        Make edges according to the bonding descriptors stored
+        in the node attributes of meta_molecule residue graph.
+        If a bonding descriptor is consumed it is set to None,
+        however, the meta_molecule edge gets an attribute with the
+        bonding descriptors that formed the edge.
+        """
+        for prev_node, node in nx.dfs_edges(self.meta_molecule):
+            edge, bonding = generate_edge(self.meta_molecule.nodes[prev_node]['graph'],
+                                          self.meta_molecule.nodes[node]['graph'])
+            self.meta_molecule.nodes[prev_node]['graph'][edge[0]]['bonding'] = None
+            self.meta_molecule.nodes[prev_node]['graph'][edge[1]]['bonding'] = None
+            self.meta_molecule.molecule.add_edge(edge, bonding=bonding)
+
+    def parse(self, big_smile_str):
+        res_pattern, residues = big_smile_str.split('.')
+        self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
+        self.force_field = force_field_from_fragments(residues)
+        MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
+        self.edges_from_bonding_descrpt()
+        return self.meta_molecule

From 257b76b7665355d217dfb0b71249e64255096a35 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sat, 20 Jan 2024 15:43:12 +0100
Subject: [PATCH 45/82] atest-big-smile parsing part I

---
 polyply/tests/test_big_smile_parsing.py | 64 +++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 polyply/tests/test_big_smile_parsing.py

diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
new file mode 100644
index 00000000..43045a83
--- /dev/null
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -0,0 +1,64 @@
+import pytest
+import networkx as nx
+from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
+                                           tokenize_big_smile)
+
+@pytest.mark.parametrize('smile, nodes, edges',(
+                        # smiple linear seqeunce
+                        ("{[#PMA][#PEO][#PMA]}",
+                        ["PMA", "PEO", "PMA"],
+                        [(0, 1), (1, 2)]),
+                        # simple branched sequence
+                        ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3), (1, 4)]),
+                        # simple sequence two branches
+                        ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}",
+                        ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"],
+                        [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)]),
+                        # simple linear sequence with expansion
+                        ("{[#PMA]|3}",
+                        ["PMA", "PMA", "PMA"],
+                        [(0, 1), (1, 2)]),
+                       ## simple branched with expansion
+                       #("{[#PMA]([#PEO]|3)|2}",
+                       #["PMA", "PEO", "PEO", "PEO",
+                       # "PMA", "PEO", "PEO", "PEO"],
+                       #[(0, 1), (1, 2), (2, 3),
+                       # (0, 4), (4, 5), (5, 6), (6, 7)]
+                       # )
+))
+def test_res_pattern_to_meta_mol(smile, nodes, edges):
+    """
+    Test that the meta-molecule is correctly reproduced
+    from the simplified smile string syntax.
+    """
+    meta_mol = res_pattern_to_meta_mol(smile)
+    assert len(meta_mol.edges) == len(edges)
+    for edge in edges:
+        assert meta_mol.has_edge(*edge)
+    resnames = nx.get_node_attributes(meta_mol, 'resname')
+    assert nodes == list(resnames.values())
+
+@pytest.mark.parametrize('big_smile, smile, bonding',(
+                        # smiple symmetric bonding
+                        ("[$]COC[$]",
+                         "COC",
+                        {0: '$', 2: '$'}),
+                        # named different bonding descriptors
+                        ("[$1]CCCC[$2]",
+                         "CCCC",
+                        {0: "$1", 3: "$2"}),
+                        # bonding descript. after branch
+                        ("C(COC[$1])[$2]CCC[$3]",
+                         "C(COC)CCC",
+                        {0: '$2', 3: '$1', 6: '$3'}),
+                        # left rigth bonding desciptors
+                        ("[>]COC[<]",
+                        "COC",
+                        {0: '>', 2: '<'})
+))
+def test_tokenize_big_smile(big_smile, smile, bonding):
+    new_smile, new_bonding = tokenize_big_smile(big_smile)
+    assert new_smile == smile
+    assert new_bonding == bonding

From 061c8efc60931b2fff5ef4c866b0c3a952ebded9 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jan 2024 16:37:32 +0100
Subject: [PATCH 46/82] fix hcount for single atom; fix nexted branches

---
 polyply/src/big_smile_parsing.py | 54 +++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 2ad65a7b..ddb9bd2a 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -1,4 +1,5 @@
 import re
+import numpy as np
 try:
     import pysmiles
 except ImportError:
@@ -16,6 +17,12 @@
             "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
             "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}
 
+def _find_next_character(string, chars, start):
+    for idx, token in enumerate(string[start:]):
+        if token in chars:
+            return idx+start
+    return np.inf
+
 def res_pattern_to_meta_mol(pattern):
     """
     Generate a :class:`polyply.MetaMolecule` from a
@@ -67,13 +74,15 @@ def res_pattern_to_meta_mol(pattern):
     branching = False
     for match in re.finditer(PATTERNS['place_holder'], pattern):
         start, stop = match.span()
+        print(pattern[start:stop])
         # new branch here
         if pattern[start-1] == '(':
             branching = True
             branch_anchor = prev_node
             recipie = [(meta_mol.nodes[prev_node]['resname'], 1)]
         if stop < len(pattern) and pattern[stop] == '|':
-            n_mon = int(pattern[stop+1:pattern.find('[', stop)])
+            eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
+            n_mon = int(pattern[stop+1:eon])
         else:
             n_mon = 1
 
@@ -94,12 +103,17 @@ def res_pattern_to_meta_mol(pattern):
             current += 1
 
         # terminate branch and jump back to anchor
-        if stop < len(pattern) and pattern[stop] == ')' and branching:
+        branch_stop = _find_next_character(pattern, ['['], stop) >\
+                      _find_next_character(pattern, [')'], stop)
+        if stop <= len(pattern) and branch_stop and branching:
             branching = False
             prev_node = branch_anchor
             # we have to multiply the branch n-times
-            if stop+1 < len(pattern) and pattern[stop+1] == "|":
-                for _ in range(0,int(pattern[stop+2:pattern.find('[', stop)])):
+            eon_a = _find_next_character(pattern, [')'], stop)
+            if stop+1 < len(pattern) and pattern[eon_a+1] == "|":
+                eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
+                # -1 because one branch has already been added at this point
+                for _ in range(0,int(pattern[eon_a+2:eon_b])-1):
                     for bdx, (resname, n_mon) in enumerate(recipie):
                         if bdx == 0:
                             anchor = current
@@ -166,12 +180,36 @@ def tokenize_big_smile(big_smile):
             prev_node = anchor
             smile += token
         else:
-            if token not in '@ . - = # $ : / \\ + - %':
+            if token not in '@ . - = # $ : / \\ + - %'\
+                and not token.isdigit():
                 prev_node = node_count
                 node_count += 1
             smile += token
     return smile, bonding_descrpt
 
+def _rebuild_h_atoms(mol_graph):
+    # special hack around to fix
+    # pysmiles bug for a single
+    # atom molecule; we assume that the
+    # hcount is just wrong and set it to
+    # the valance number minus bonds minus
+    # bonding connectors
+    if len(mol_graph.nodes) == 1:
+        ele = mol_graph.nodes[0]['element']
+        # for N and P we assume the regular valency
+        hcount = pysmiles.smiles_helper.VALENCES[ele][0]
+        if mol_graph.nodes[0].get('bonding', False):
+            hcount -= 1
+        mol_graph.nodes[0]['hcount'] = hcount
+    else:
+        for node in mol_graph.nodes:
+            if mol_graph.nodes[node].get('bonding', False):
+                hcount = mol_graph.nodes[node]['hcount']
+                mol_graph.nodes[node]['hcount'] = hcount - 1
+
+    pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
+    return mol_graph
+
 def fragment_iter(fragment_str):
     """
     Iterates over fragments defined in a BigSmile string.
@@ -197,8 +235,10 @@ def fragment_iter(fragment_str):
         big_smile = fragment[delim+1:]
         smile, bonding_descrpt = tokenize_big_smile(big_smile)
         mol_graph = pysmiles.read_smiles(smile)
-        atomnames = [str(node[0])+node[1]['element'] for node in mol_graph.nodes(data=True) ]
         nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+        # we need to rebuild hydrogen atoms now
+        _rebuild_h_atoms(mol_graph)
+        atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)}
         nx.set_node_attributes(mol_graph, atomnames, 'atomname')
         nx.set_node_attributes(mol_graph, resname, 'resname')
         yield resname, mol_graph
@@ -224,4 +264,4 @@ def force_field_from_fragments(fragment_str):
     for resname, mol_graph in frag_iter:
         mol_block = Block(mol_graph)
         force_field.blocks[resname] = mol_block
-    return forxe_field
+    return force_field

From 20e2e4917500ef8621a696f4395494a3dfd5a6e8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jan 2024 16:37:58 +0100
Subject: [PATCH 47/82] tests for smile iter and test nested branches

---
 polyply/tests/test_big_smile_parsing.py | 71 ++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
index 43045a83..3265564c 100644
--- a/polyply/tests/test_big_smile_parsing.py
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -1,7 +1,8 @@
 import pytest
 import networkx as nx
 from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
-                                           tokenize_big_smile)
+                                           tokenize_big_smile,
+                                           fragment_iter)
 
 @pytest.mark.parametrize('smile, nodes, edges',(
                         # smiple linear seqeunce
@@ -20,13 +21,20 @@
                         ("{[#PMA]|3}",
                         ["PMA", "PMA", "PMA"],
                         [(0, 1), (1, 2)]),
-                       ## simple branched with expansion
-                       #("{[#PMA]([#PEO]|3)|2}",
-                       #["PMA", "PEO", "PEO", "PEO",
-                       # "PMA", "PEO", "PEO", "PEO"],
-                       #[(0, 1), (1, 2), (2, 3),
-                       # (0, 4), (4, 5), (5, 6), (6, 7)]
-                       # )
+                        # simple branch expension
+                        ("{[#PMA]([#PEO][#PEO][#OHter])|2}",
+                        ["PMA", "PEO", "PEO", "OHter",
+                         "PMA", "PEO", "PEO", "OHter"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (0, 4), (4, 5), (5, 6), (6, 7)]
+                         ),
+                        # nested branched with expansion
+                        ("{[#PMA]([#PEO]|3)|2}",
+                        ["PMA", "PEO", "PEO", "PEO",
+                         "PMA", "PEO", "PEO", "PEO"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (0, 4), (4, 5), (5, 6), (6, 7)]
+                         )
 ))
 def test_res_pattern_to_meta_mol(smile, nodes, edges):
     """
@@ -49,6 +57,10 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges):
                         ("[$1]CCCC[$2]",
                          "CCCC",
                         {0: "$1", 3: "$2"}),
+                        # ring and bonding descriptors
+                        ("[$1]CC[$2]C1CCCCC1",
+                         "CCC1CCCCC1",
+                        {0: "$1", 1: "$2"}),
                         # bonding descript. after branch
                         ("C(COC[$1])[$2]CCC[$3]",
                          "C(COC)CCC",
@@ -62,3 +74,46 @@ def test_tokenize_big_smile(big_smile, smile, bonding):
     new_smile, new_bonding = tokenize_big_smile(big_smile)
     assert new_smile == smile
     assert new_bonding == bonding
+
+@pytest.mark.parametrize('fragment_str, nodes, edges',(
+                        # single fragment
+                        ("{#PEO=[$]COC[$]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                )},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
+                        # test NH3 terminal
+                        ("{#AMM=N[$]}",
+                        {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": "$", "element": "N"}),
+                                 (1, {"atomname": "H1", "resname": "AMM", "element": "H"}),
+                                 (2, {"atomname": "H2", "resname": "AMM", "element": "H"}),
+                                )},
+                        {"AMM": [(0, 1), (0, 2)]}),
+                        # single fragment + 1 terminal (i.e. only 1 bonding descrpt
+                        ("{#PEO=[$]COC[$],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": "$", "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)],
+                         "OHter": [(0, 1)]}),
+))
+def test_fragment_iter(fragment_str, nodes, edges):
+    for resname, mol_graph in fragment_iter(fragment_str):
+        assert len(mol_graph.nodes) == len(nodes[resname])
+        for node, ref_node in zip(mol_graph.nodes(data=True), nodes[resname]):
+           assert node[0] == ref_node[0]
+           for key in ref_node[1]:
+                assert ref_node[1][key] == node[1][key]
+        assert sorted(mol_graph.edges) == sorted(edges[resname])

From f505129f98cdff951f9137a832cb604f44bce8f9 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jan 2024 16:40:17 +0100
Subject: [PATCH 48/82] add pysmiles to test requrm.

---
 requirements-tests.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements-tests.txt b/requirements-tests.txt
index 595a4902..03357910 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -4,3 +4,4 @@ pytest-cov
 pylint
 codecov
 tqdm
+pysmiles

From 0c67ecc17c530c50aa98781fe7df0bb37324e983 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jan 2024 18:49:28 +0100
Subject: [PATCH 49/82] add tests for bonding descriptor evaluation

---
 polyply/tests/test_big_smile_mol_proc.py | 37 ++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 polyply/tests/test_big_smile_mol_proc.py

diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
new file mode 100644
index 00000000..7bcdf9f9
--- /dev/null
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -0,0 +1,37 @@
+import pytest
+import networkx as nx
+from polyply.src.big_smile_mol_processor import (DefBigSmileParser,
+                                                 generate_edge)
+
+@pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',(
+                        # single bond source each
+                        ({0: "$"},
+                         {3: "$"},
+                         (0, 3),
+                         ('$', '$')),
+                        # multiple sources one match
+                        ({0: '$1', 2: '$2'},
+                         {1: '$2', 3: '$'},
+                         (2, 1),
+                         ('$2', '$2')),
+                        # left right selective bonding
+                        ({0: '$', 1: '>', 3: '<'},
+                         {0: '>', 1: '$5'},
+                         (3, 0),
+                         ('<', '>')),
+                        # left right selective bonding
+                        # with identifier
+                        ({0: '$', 1: '>', 3: '<1'},
+                         {0: '>', 1: '$5', 2: '>1'},
+                         (3, 2),
+                         ('<1', '>1')),
+
+))
+def test_generate_edge(bonds_source, bonds_target, edge, btypes):
+    source = nx.path_graph(5)
+    target = nx.path_graph(4)
+    nx.set_node_attributes(source, bonds_source, "bonding")
+    nx.set_node_attributes(target, bonds_target, "bonding")
+    new_edge, new_btypes = generate_edge(source, target, bond_type="bonding")
+    assert new_edge == edge
+    assert new_btypes == btypes

From 52235c91887a87b1be7bea3d50902f9470e286e2 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 23 Jan 2024 18:57:46 +0100
Subject: [PATCH 50/82] add tests for big smile molecule prc

---
 polyply/tests/test_big_smile_mol_proc.py | 66 ++++++++++++++++++++----
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index 7bcdf9f9..58667ed8 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -2,27 +2,32 @@
 import networkx as nx
 from polyply.src.big_smile_mol_processor import (DefBigSmileParser,
                                                  generate_edge)
-
+import matplotlib.pyplot as plt
 @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',(
                         # single bond source each
-                        ({0: "$"},
-                         {3: "$"},
+                        ({0: ["$"]},
+                         {3: ["$"]},
+                         (0, 3),
+                         ('$', '$')),
+                        # include a None
+                        ({0: ["$"], 1: []},
+                         {3: ["$"]},
                          (0, 3),
                          ('$', '$')),
                         # multiple sources one match
-                        ({0: '$1', 2: '$2'},
-                         {1: '$2', 3: '$'},
+                        ({0: ['$1'], 2: ['$2']},
+                         {1: ['$2'], 3: ['$']},
                          (2, 1),
                          ('$2', '$2')),
                         # left right selective bonding
-                        ({0: '$', 1: '>', 3: '<'},
-                         {0: '>', 1: '$5'},
+                        ({0: ['$'], 1: ['>'], 3: ['<']},
+                         {0: ['>'], 1: ['$5']},
                          (3, 0),
                          ('<', '>')),
                         # left right selective bonding
                         # with identifier
-                        ({0: '$', 1: '>', 3: '<1'},
-                         {0: '>', 1: '$5', 2: '>1'},
+                        ({0: ['$'], 1: ['>'], 3: ['<1']},
+                         {0: ['>'], 1: ['$5'], 2: ['>1']},
                          (3, 2),
                          ('<1', '>1')),
 
@@ -35,3 +40,46 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
     new_edge, new_btypes = generate_edge(source, target, bond_type="bonding")
     assert new_edge == edge
     assert new_btypes == btypes
+
+
+@pytest.mark.parametrize('smile, ref_nodes, ref_edges',(
+                        # smiple linear seqeunce
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O]}",
+                        #           0 1             2 3 4 5 6 7 8
+                        [('OHter', 'O H'), ('PEO', 'C O C H H H H'),
+                        #        9 10 11 12 13 14 15         16 17
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O H')],
+                        [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
+                         (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
+                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+                        # simple branched sequence
+                        ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}",
+                        [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'),
+                         ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'), ('Hter', 'H')],
+                        [(0, 1), (1, 2), (1, 3), (1, 4), (2, 5), (2, 6), (2, 14), (6, 7), (6, 9), (6, 10), (7, 8),
+                         (8, 11), (8, 12), (8, 13), (14, 15), (14, 16), (14, 17), (15, 18), (15, 19), (15, 27),
+                         (19, 20), (19, 22), (19, 23), (20, 21), (21, 24), (21, 25), (21, 26)]),
+                        # something with a ring
+                        #            012 34567
+                        #            890123456
+                        ("{[#Hter][#PS]|2[#Hter]}.{#PS=[$]CC[$]c1ccccc1,#Hter=[$]H}",
+                        [('Hter', 'H'), ('PS', 'C C C C C C C C H H H H H H H H'),
+                         ('PS', 'C C C C C C C C H H H H H H H H'), ('Hter', 'H')],
+                        [(0, 1), (1, 2), (1, 9), (1, 10), (2, 3), (2, 11), (2, 17),
+                         (3, 4), (3, 8), (4, 5), (4, 12), (5, 6), (5, 13), (6, 7),
+                         (6, 14), (7, 8), (7, 15), (8, 16), (17, 18), (17, 25),
+                         (17, 26), (18, 19), (18, 27), (18, 33), (19, 20), (19, 24),
+                         (20, 21), (20, 28), (21, 22), (21, 29), (22, 23), (22, 30),
+                         (23, 24), (23, 31), (24, 32)]),
+
+))
+def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
+    meta_mol = DefBigSmileParser().parse(smile)
+    for node, ref in zip(meta_mol.nodes, ref_nodes):
+        assert meta_mol.nodes[node]['resname'] ==  ref[0]
+        block_graph = meta_mol.nodes[node]['graph']
+        elements = list(nx.get_node_attributes(block_graph, 'element').values())
+        assert elements == ref[1].split()
+    #nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
+    #plt.show()
+    assert sorted(meta_mol.molecule.edges) == sorted(ref_edges)

From 9a0a674fa685af88df1567d9051a0bbb308e80d4 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 23 Jan 2024 18:58:18 +0100
Subject: [PATCH 51/82] allow multiple bonding per atom; fix bugs

---
 polyply/src/big_smile_mol_processor.py | 117 +++++++++++++++++++++++++
 polyply/src/big_smile_parsing.py       |  22 +++--
 2 files changed, 132 insertions(+), 7 deletions(-)
 create mode 100644 polyply/src/big_smile_mol_processor.py

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
new file mode 100644
index 00000000..8499e7e3
--- /dev/null
+++ b/polyply/src/big_smile_mol_processor.py
@@ -0,0 +1,117 @@
+import networkx as nx
+from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
+                                           force_field_from_fragments)
+from polyply.src.map_to_molecule import MapToMolecule
+
+def compatible(left, right):
+    """
+    Check bonding descriptor compatibility according
+    to the BigSmiles syntax convetions.
+
+    Parameters
+    ----------
+    left: str
+    right: str
+
+    Returns
+    -------
+    bool
+    """
+    if left == right and left not in '> <':
+        return True
+    if left[0] == "<" and right[0] == ">":
+        if left[1:] == right[1:]:
+            return True
+    if left[0] == ">" and right[0] == "<":
+        if left[1:] == right[1:]:
+            return True
+    return False
+
+def generate_edge(source, target, bond_type="bonding"):
+    """
+    Given a source and a target graph, which have bonding
+    descriptors stored as node attributes, find a pair of
+    matching descriptors and return the respective nodes.
+    The function also returns the bonding descriptors. If
+    no bonding descriptor is found an instance of LookupError
+    is raised.
+
+    Parameters
+    ----------
+    source: :class:`nx.Graph`
+    target: :class:`nx.Graph`
+    bond_type: `abc.hashable`
+        under which attribute are the bonding descriptors
+        stored.
+
+    Returns
+    -------
+    ((abc.hashable, abc.hashable), (str, str))
+        the nodes as well as bonding descriptors
+
+    Raises
+    ------
+    LookupError
+        if no match is found
+    """
+    source_nodes = nx.get_node_attributes(source, bond_type)
+    target_nodes = nx.get_node_attributes(target, bond_type)
+    for source_node in source_nodes:
+        for target_node in target_nodes:
+            #print(source_node, target_node)
+            bond_sources = source_nodes[source_node]
+            bond_targets = target_nodes[target_node]
+            for bond_source in bond_sources:
+                for bond_target in bond_targets:
+                    #print(bond_source, bond_target)
+                    if compatible(bond_source, bond_target):
+                        return ((source_node, target_node), (bond_source, bond_target))
+    raise LookupError
+
+class DefBigSmileParser:
+    """
+    Parse an a string instance of a defined BigSmile,
+    which describes a polymer molecule.
+    """
+
+    def __init__(self):
+        self.force_field = None
+        self.meta_molecule = None
+        self.molecule = None
+
+    def edges_from_bonding_descrpt(self):
+        """
+        Make edges according to the bonding descriptors stored
+        in the node attributes of meta_molecule residue graph.
+        If a bonding descriptor is consumed it is set to None,
+        however, the meta_molecule edge gets an attribute with the
+        bonding descriptors that formed the edge.
+        """
+        for prev_node, node in nx.dfs_edges(self.meta_molecule):
+            prev_graph = self.meta_molecule.nodes[prev_node]['graph']
+            node_graph = self.meta_molecule.nodes[node]['graph']
+            edge, bonding = generate_edge(prev_graph,
+                                          node_graph)
+            # this is a bit of a workaround because at this stage the
+            # bonding list is actually shared between all residues of
+            # of the same type; so we first make a copy then we replace
+            # the list sans used bonding descriptor
+            prev_bond_list = prev_graph.nodes[edge[0]]['bonding'].copy()
+            prev_bond_list.remove(bonding[0])
+            prev_graph.nodes[edge[0]]['bonding'] = prev_bond_list
+            node_bond_list = node_graph.nodes[edge[1]]['bonding'].copy()
+            node_bond_list.remove(bonding[1])
+            node_graph.nodes[edge[1]]['bonding'] = node_bond_list
+            self.meta_molecule.molecule.add_edge(edge[0], edge[1], bonding=bonding)
+
+    def parse(self, big_smile_str):
+        res_pattern, residues = big_smile_str.split('.')
+        self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
+        self.force_field = force_field_from_fragments(residues)
+        MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
+        self.edges_from_bonding_descrpt()
+        return self.meta_molecule
+
+# ToDo
+# - replace non consumed bonding descrpt by hydrogen
+# - 
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index ddb9bd2a..fa6348cc 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import re
 import numpy as np
 try:
@@ -154,7 +155,7 @@ def tokenize_big_smile(big_smile):
         to the nodes within the smile
     """
     smile_iter = _big_smile_iter(big_smile)
-    bonding_descrpt = {}
+    bonding_descrpt = defaultdict(list)
     smile = ""
     node_count = 0
     prev_node = 0
@@ -167,7 +168,7 @@ def tokenize_big_smile(big_smile):
                 while peek != ']':
                     bond_descrp += peek
                     peek = next(smile_iter)
-                bonding_descrpt[prev_node] = bond_descrp
+                bonding_descrpt[prev_node].append(bond_descrp)
             else:
                 smile = smile + token + peek
                 prev_node = node_count
@@ -205,7 +206,7 @@ def _rebuild_h_atoms(mol_graph):
         for node in mol_graph.nodes:
             if mol_graph.nodes[node].get('bonding', False):
                 hcount = mol_graph.nodes[node]['hcount']
-                mol_graph.nodes[node]['hcount'] = hcount - 1
+                mol_graph.nodes[node]['hcount'] = hcount - len(mol_graph.nodes[node]['bonding'])
 
     pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
     return mol_graph
@@ -234,10 +235,17 @@ def fragment_iter(fragment_str):
         resname = fragment[1:delim]
         big_smile = fragment[delim+1:]
         smile, bonding_descrpt = tokenize_big_smile(big_smile)
-        mol_graph = pysmiles.read_smiles(smile)
-        nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
-        # we need to rebuild hydrogen atoms now
-        _rebuild_h_atoms(mol_graph)
+
+        if smile == "H":
+            mol_graph = nx.Graph()
+            mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
+            nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+        else:
+            mol_graph = pysmiles.read_smiles(smile)
+            nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+            # we need to rebuild hydrogen atoms now
+            _rebuild_h_atoms(mol_graph)
+
         atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)}
         nx.set_node_attributes(mol_graph, atomnames, 'atomname')
         nx.set_node_attributes(mol_graph, resname, 'resname')

From ceccc3d53fab73921c87bfd29a885fda7e284726 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 24 Jan 2024 10:59:53 +0100
Subject: [PATCH 52/82] remove mpl import

---
 polyply/tests/test_big_smile_mol_proc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index 58667ed8..6975b885 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -2,7 +2,7 @@
 import networkx as nx
 from polyply.src.big_smile_mol_processor import (DefBigSmileParser,
                                                  generate_edge)
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',(
                         # single bond source each
                         ({0: ["$"]},

From 158fd3734f321d2084d7b466297dfe4c0c851d30 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 24 Jan 2024 11:12:13 +0100
Subject: [PATCH 53/82] add changed tests for multiple bonding per atom

---
 polyply/tests/test_big_smile_parsing.py | 40 ++++++++++++++++++-------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
index 3265564c..f7faf71a 100644
--- a/polyply/tests/test_big_smile_parsing.py
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -52,23 +52,27 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges):
                         # smiple symmetric bonding
                         ("[$]COC[$]",
                          "COC",
-                        {0: '$', 2: '$'}),
+                        {0: ["$"], 2: ["$"]}),
+                        # smiple symmetric bonding; multiple descript
+                        ("[$]COC[$][$1]",
+                         "COC",
+                        {0: ["$"], 2: ["$", "$1"]}),
                         # named different bonding descriptors
                         ("[$1]CCCC[$2]",
                          "CCCC",
-                        {0: "$1", 3: "$2"}),
+                        {0: ["$1"], 3: ["$2"]}),
                         # ring and bonding descriptors
                         ("[$1]CC[$2]C1CCCCC1",
                          "CCC1CCCCC1",
-                        {0: "$1", 1: "$2"}),
+                        {0: ["$1"], 1: ["$2"]}),
                         # bonding descript. after branch
                         ("C(COC[$1])[$2]CCC[$3]",
                          "C(COC)CCC",
-                        {0: '$2', 3: '$1', 6: '$3'}),
+                        {0: ["$2"], 3: ["$1"], 6: ["$3"]}),
                         # left rigth bonding desciptors
                         ("[>]COC[<]",
                         "COC",
-                        {0: '>', 2: '<'})
+                        {0: [">"], 2: ["<"]})
 ))
 def test_tokenize_big_smile(big_smile, smile, bonding):
     new_smile, new_bonding = tokenize_big_smile(big_smile)
@@ -78,9 +82,9 @@ def test_tokenize_big_smile(big_smile, smile, bonding):
 @pytest.mark.parametrize('fragment_str, nodes, edges',(
                         # single fragment
                         ("{#PEO=[$]COC[$]}",
-                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}),
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
                                  (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
-                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
                                  (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
                                  (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
                                  (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
@@ -89,25 +93,39 @@ def test_tokenize_big_smile(big_smile, smile, bonding):
                         {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
                         # test NH3 terminal
                         ("{#AMM=N[$]}",
-                        {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": "$", "element": "N"}),
+                        {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": ["$"], "element": "N"}),
                                  (1, {"atomname": "H1", "resname": "AMM", "element": "H"}),
                                  (2, {"atomname": "H2", "resname": "AMM", "element": "H"}),
                                 )},
                         {"AMM": [(0, 1), (0, 2)]}),
                         # single fragment + 1 terminal (i.e. only 1 bonding descrpt
                         ("{#PEO=[$]COC[$],#OHter=[$][OH]}",
-                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": "$", "element": "C"}),
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
                                  (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
-                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": "$", "element": "C"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
                                  (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
                                  (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
                                  (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
                                  (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
                                  ),
-                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": "$", "element": "O"}),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
                                    (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
                         {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)],
                          "OHter": [(0, 1)]}),
+                        # single fragment + 1 terminal but multiple bond descritp.
+                        # this adjust the hydrogen count
+                        ("{#PEO=[$]COC[$][$1],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)],
+                         "OHter": [(0, 1)]}),
 ))
 def test_fragment_iter(fragment_str, nodes, edges):
     for resname, mol_graph in fragment_iter(fragment_str):

From 8f2887f5d2149e94330014a8b32f47d64caf1b3d Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 24 Jan 2024 12:13:41 +0100
Subject: [PATCH 54/82] delete old processor file

---
 polyply/src/big_smile_mol_processsor.py | 99 -------------------------
 1 file changed, 99 deletions(-)
 delete mode 100644 polyply/src/big_smile_mol_processsor.py

diff --git a/polyply/src/big_smile_mol_processsor.py b/polyply/src/big_smile_mol_processsor.py
deleted file mode 100644
index 8131e009..00000000
--- a/polyply/src/big_smile_mol_processsor.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import networkx as nx
-from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
-                                           force_field_from_fragments)
-from polyply.src.map_to_molecule import MapToMolecule
-
-def compatible(left, right):
-    """
-    Check bonding descriptor compatibility according
-    to the BigSmiles syntax convetions.
-
-    Parameters
-    ----------
-    left: str
-    right: str
-
-    Returns
-    -------
-    bool
-    """
-    if left == right:
-        return True
-    if left[0] == "<" and right[0] == ">":
-        if left[1:] == right[1:]:
-            return True
-    if left[0] == ">" and right[0] == "<":
-        if left[1:] == right[1:]:
-            return True
-    return False
-
-def generate_edge(source, target, bond_type="bonding"):
-    """
-    Given a source and a target graph, which have bonding
-    descriptors stored as node attributes, find a pair of
-    matching descriptors and return the respective nodes.
-    The function also returns the bonding descriptors. If
-    no bonding descriptor is found an instance of LookupError
-    is raised.
-
-    Parameters
-    ----------
-    source: :class:`nx.Graph`
-    target: :class:`nx.Graph`
-    bond_type: `abc.hashable`
-        under which attribute are the bonding descriptors
-        stored.
-
-    Returns
-    -------
-    ((abc.hashable, abc.hashable), (str, str))
-        the nodes as well as bonding descriptors
-
-    Raises
-    ------
-    LookupError
-        if no match is found
-    """
-    source_nodes = nx.get_node_attributes(source, bond_type)
-    target_nodes = nx.get_node_attributes(target, bond_type)
-    for source_node in source_nodes:
-        for target_node in target_nodes:
-            bond_source = source_nodes[source_node]
-            bond_target = target_nodes[target_node]
-            if compatible(bond_source, bond_target):
-                return ((source_node, target_node), (bond_source, bond_target))
-    raise LookupError
-
-class DefBigSmileParser:
-    """
-    Parse an a string instance of a defined BigSmile,
-    which describes a polymer molecule.
-    """
-
-    def __init__(self):
-        self.force_field = None
-        self.meta_molecule = None
-        self.molecule = None
-
-    def edges_from_bonding_descrpt(self):
-        """
-        Make edges according to the bonding descriptors stored
-        in the node attributes of meta_molecule residue graph.
-        If a bonding descriptor is consumed it is set to None,
-        however, the meta_molecule edge gets an attribute with the
-        bonding descriptors that formed the edge.
-        """
-        for prev_node, node in nx.dfs_edges(self.meta_molecule):
-            edge, bonding = generate_edge(self.meta_molecule.nodes[prev_node]['graph'],
-                                          self.meta_molecule.nodes[node]['graph'])
-            self.meta_molecule.nodes[prev_node]['graph'][edge[0]]['bonding'] = None
-            self.meta_molecule.nodes[prev_node]['graph'][edge[1]]['bonding'] = None
-            self.meta_molecule.molecule.add_edge(edge, bonding=bonding)
-
-    def parse(self, big_smile_str):
-        res_pattern, residues = big_smile_str.split('.')
-        self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
-        self.force_field = force_field_from_fragments(residues)
-        MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
-        self.edges_from_bonding_descrpt()
-        return self.meta_molecule

From 08021c2801ae8df9169c52bf2d2fc7238fa9516f Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 24 Jan 2024 16:03:48 +0100
Subject: [PATCH 55/82] have charge balancing for itps but raise error when
 bond length is missing

---
 polyply/src/charges.py   |  4 ++++
 polyply/src/itp_to_ff.py | 11 +++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
index cfd50235..bb7505fe 100644
--- a/polyply/src/charges.py
+++ b/polyply/src/charges.py
@@ -93,6 +93,10 @@ def _get_bonds(block, topology=None):
                         elif batoms[::-1] in topology.types['bonds']:
                             params = topology.types['bonds'][batoms[::-1]][0][0][1]
                         bonds[(nodes_to_count[idx], nodes_to_count[jdx])] = float(params)
+                    else:
+                        msg = ("Cannot find bond lengths. If your force field uses bondtypes lile"
+                               "Charmm you need to provide a topology file.")
+                        raise ValueError(msg)
     return bonds
 
 def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None):
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index bd08e1bd..8bf0a659 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -39,6 +39,7 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=
         mol = top.molecules[0].molecule
     # read itp file
     if itppath.suffix == ".itp":
+        top = None
         with open(itppath, "r") as _file:
             lines = _file.readlines()
         force_field = ForceField("tmp")
@@ -63,12 +64,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
-        if itppath.suffix == ".top":
-            base_resname = name.split(term_prefix)[0].split('_')[0]
-            print(base_resname)
-            balance_charges(new_block,
-                            topology=top,
-                            charge=crg_dict[base_resname])
+        base_resname = name.split(term_prefix)[0].split('_')[0]
+        balance_charges(new_block,
+                        topology=top,
+                        charge=crg_dict[base_resname])
 
     force_field.links = extract_links(mol)
 

From 681004f10e08629450ef6d8780b53766347369ae Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 29 Feb 2024 15:38:23 +0100
Subject: [PATCH 56/82] add closing bracket to special characters

---
 polyply/src/big_smile_parsing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index fa6348cc..6969a31c 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -75,7 +75,6 @@ def res_pattern_to_meta_mol(pattern):
     branching = False
     for match in re.finditer(PATTERNS['place_holder'], pattern):
         start, stop = match.span()
-        print(pattern[start:stop])
         # new branch here
         if pattern[start-1] == '(':
             branching = True
@@ -181,7 +180,7 @@ def tokenize_big_smile(big_smile):
             prev_node = anchor
             smile += token
         else:
-            if token not in '@ . - = # $ : / \\ + - %'\
+            if token not in '] H @ . - = # $ : / \\ + - %'\
                 and not token.isdigit():
                 prev_node = node_count
                 node_count += 1

From 353723958b54487558c45bd43cc8da12ee22c052 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 29 Feb 2024 15:38:49 +0100
Subject: [PATCH 57/82] only balance charges for blocks with at least 2 atoms

---
 polyply/src/charges.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/polyply/src/charges.py b/polyply/src/charges.py
index bb7505fe..5a08a854 100644
--- a/polyply/src/charges.py
+++ b/polyply/src/charges.py
@@ -122,6 +122,9 @@ def balance_charges(block, charge=0, tol=10**-5, decimals=5, topology=None):
     :class:`vermouth.molecule.Block`
         block with updated charges
     """
+    if len(block.nodes) < 2:
+        return block
+
     block.make_edges_from_interaction_type('bonds')
     keys = nx.get_node_attributes(block, 'charge').keys()
     charges = np.array(list(nx.get_node_attributes(block, 'charge').values()))

From 929b5d1a8a6b41525f9d0493f190814fd677ea73 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 29 Feb 2024 15:39:36 +0100
Subject: [PATCH 58/82] refactor fragment finder

---
 polyply/src/fragment_finder.py | 256 +++------------------------------
 1 file changed, 20 insertions(+), 236 deletions(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index 060fbb44..dcf92c87 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -11,22 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
 import networkx as nx
 from vermouth.graph_utils import make_residue_graph
 from polyply.src.graph_utils import find_one_ismags_match
 
-def _element_match(node1, node2):
-    """
-    Checks if the element attribute of two nodes
-    is the same.
-
-    Returns
-    --------
-    bool
-    """
-    return node1["element"] == node2["element"]
-
 class FragmentFinder():
     """
     This class enables finding and labelling of fragments
@@ -63,7 +51,7 @@ class FragmentFinder():
     the resname is appended by a number.
     """
 
-    def __init__(self, molecule, prefix):
+    def __init__(self, molecule):
         """
         Initalize the fragment finder with a molecule, setting the
         resid attribute to None, and correctly assining elements
@@ -97,14 +85,8 @@ def __init__(self, molecule, prefix):
         res_graph: :class:`vermouth.molecule.Molecule`
             residue graph of the molecule
         """
-        self.max_by_resid = {}
-        self.ter_prefix = prefix
-        self.resid = 1
-        self.res_assigment = []
-        self.assigned_atoms = []
         self.molecule = molecule
-        self.known_atom = None
-        self.match_keys = ['element', 'mass', 'degree'] #, 'charge']
+        self.match_keys = ['element'] #, 'mass', 'degree'] #, 'charge']
         self.masses_to_element = {16: "O",
                                   14: "N",
                                   12: "C",
@@ -123,33 +105,6 @@ def __init__(self, molecule, prefix):
                 self.molecule.nodes[node]["element"] = self.masses_to_element[mass]
                 self.molecule.nodes[node]["degree"] = self.molecule.degree(node)
 
-    def linearize_resids(self, unique_fragments):
-        resids = np.arange(0, len(self.res_graph))
-        old_resids = {}
-        # find the first terminal
-        ter = self.ter_prefix
-        ter_nodes = [ node[0] for node in self.res_graph.nodes(data=True) if ter in node[1]['resname'] ]
-        print(ter_nodes[0])
-        #assert 0 > len(ter_nodes) < 3
-        path = nx.dfs_edges(self.res_graph, source=ter_nodes[0])
-        old_resids = {self.res_graph.nodes[ter_nodes[0]]['resid']: resids[0]}
-        self.res_graph.nodes[ter_nodes[0]]['resid'] = resids[0]
-        for mol_node in self.res_graph.nodes[ter_nodes[0]]['graph'].nodes:
-            self.res_graph.nodes[ter_nodes[0]]['graph'].nodes[mol_node]['resid'] = resids[0]
-            self.molecule.nodes[mol_node]['resid'] = resids[0]
-
-        for new_resid, (_, node) in zip(resids[1:], path):
-            print('node', node)
-            old_resids[self.res_graph.nodes[node]['resid']] = new_resid
-            self.res_graph.nodes[node]['resid'] = new_resid
-            for mol_node in self.res_graph.nodes[node]['graph'].nodes:
-                self.res_graph.nodes[node]['graph'].nodes[mol_node]['resid'] = new_resid
-                self.molecule.nodes[mol_node]['resid'] = new_resid
-        print(old_resids)
-        for fragment in unique_fragments.values():
-            for node in fragment.nodes:
-                fragment.nodes[node]['resid'] = old_resids[fragment.nodes[node]['resid']]
-
     def _node_match(self, node1, node2):
         """
         Check if two node dicts match.
@@ -172,124 +127,7 @@ def _node_match(self, node1, node2):
     def make_res_graph(self):
         self.res_graph = make_residue_graph(self.molecule)
 
-    def pre_match(self, fragment_graph):
-        """
-        Find one match of fragment graph in the molecule
-        and then extract degrees and atom-types for further
-        matching. This is a safety measure because even though
-        the fragment graph is subgraph isomorphic the underlying
-        itp parameters might not be.
-
-        Parameters
-        -----------
-        fragment_graph: 'nx.Graph'
-            must have attributes element for each node
-
-        Returns
-        -------
-        'nx.Graph'
-            the labelled fragment graph
-        """
-        template_atoms = list(fragment_graph.nodes)
-        # find subgraph isomorphic matches to the target fragment
-        # based on the element only
-        GM = nx.isomorphism.GraphMatcher(self.molecule,
-                                         fragment_graph,
-                                         node_match=_element_match,)
-
-        for one_match in GM.subgraph_isomorphisms_iter():
-            rev_current_match = {val: key for key, val in one_match.items()}
-            atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
-            if self.is_valid_match(one_match, atoms)[0]:
-                break
-
-        for mol_atom, tempt_atom in one_match.items():
-            for attr in self.match_keys:
-                fragment_graph.nodes[tempt_atom][attr] = self.molecule.nodes[mol_atom][attr]
-        return fragment_graph
-
-    def is_valid_match(self, match, atoms):
-        """
-        Check if the found isomorphism match is valid.
-        """
-        # is the match connected to the previous residue
-        if not self.is_connected_to_prev(match.keys(), self.assigned_atoms,):
-            return False, 1
-        # check if atoms are already assigned
-        if frozenset(atoms) in self.res_assigment:
-            return False, 2
-        # check if there is any partial overlap
-        if any([atom in self.assigned_atoms for atom in atoms]):
-            return False, 3
-
-        return True, 4
-
-    def is_connected_to_prev(self, current, prev):
-        """
-        Check if the atoms in the lists current or
-        prev are connected.
-
-        Parameters
-        ----------
-        current: list[abc.hashable]
-            list of current nodes
-        prev: list[abc.hashable]
-            list of prev nodes
-        """
-        # no atoms have been assigned
-        if len(prev) == 0:
-            return True
-
-        for node in current:
-            for neigh_node in self.molecule.neighbors(node):
-                if neigh_node in prev:
-                    return True
-        return False
-
-    def label_fragment_from_graph(self, fragment_graph):
-        """
-        For the `self.molecule` label all atoms, that match
-        the `fragment_graph`, with a resid attribute and set
-        the atom-name to the element name plus index relative
-        to the atoms in the fragment.
-
-        Parameters
-        ----------
-        fragment_graph: nx.Graph
-            graph describing the fragment; must have the
-            element attribute
-        """
-        # pre-match one residue and extract the atomtypes and degrees
-        # this is needed to enforce symmetry in matching the other
-        # residues
-        fragment_graph = self.pre_match(fragment_graph)
-        # find all isomorphic matches to the target fragments
-        GM = nx.isomorphism.GraphMatcher(self.molecule,
-                                         fragment_graph,
-                                         node_match=self._node_match,
-                                        )
-        template_atoms = list(fragment_graph.nodes)
-        resname = list(nx.get_node_attributes(fragment_graph, "resname").values())[0]
-        raw_matchs = list(GM.subgraph_isomorphisms_iter())
-        # loop over all matchs and check if the atoms are already
-        # assigned - symmetric matches must be skipped
-        for current_match in raw_matchs:
-            # the graph matcher can return the matchs in any order so we need to sort them
-            # according to our tempalte molecule
-            rev_current_match = {val: key for key, val in current_match.items()}
-            atoms = [ rev_current_match[template_atom] for template_atom in template_atoms]
-            if self.is_valid_match(current_match, atoms)[0]:
-                self.res_assigment.append(frozenset(atoms))
-                for idx, atom in enumerate(atoms):
-                    self.molecule.nodes[atom]["resid"] = self.resid
-                    self.molecule.nodes[atom]["atomname"] = self.molecule.nodes[atom]["element"] + str(idx)
-                    self.molecule.nodes[atom]["resname"] = resname
-                    self.max_by_resid[self.resid] = idx
-                    self.known_atom = atom
-                    self.assigned_atoms.append(atom)
-                self.resid += 1
-
-    def label_fragments_from_graph(self, fragment_graphs):
+    def extract_unique_fragments(self, reference_graph):
         """
         Call the label_fragment method for multiple fragments.
 
@@ -297,77 +135,23 @@ def label_fragments_from_graph(self, fragment_graphs):
         ----------
         fragment_graphs: list[nx.Graph]
         """
-        for fragment_graph in fragment_graphs:
-            self.label_fragment_from_graph(fragment_graph)
-
-    def label_unmatched_atoms(self):
-        """
-        After all atoms have been assigned to target fragments using
-        the label_fragment method all left-over atoms are assigned to
-        the first fragment they are attached to. This method sets the
-        atom-name to the element name and element count and resid
-        attribute.
-        """
-        for from_node, to_node in nx.dfs_edges(self.molecule, source=self.known_atom):
-            if not self.molecule.nodes[to_node]["resid"]:
-                resid = self.molecule.nodes[from_node]["resid"]
-                self.max_by_resid[resid] = self.max_by_resid[resid] + 1
-                self.molecule.nodes[to_node]["resid"] = resid
-                self.molecule.nodes[to_node]["resname"] = self.molecule.nodes[from_node]["resname"]
-                self.molecule.nodes[to_node]["atomname"] = self.molecule.nodes[to_node]["element"] + str(self.max_by_resid[resid])
-
-    def extract_unique_fragments(self, fragment_graphs):
-        """
-        Given a list of fragment-graphs assing all atoms to fragments and
-        generate new fragments by assinging the left-over atoms to the
-        connecting fragment. Fragments get a unique resid in the molecule.
-        Then make the residue graph and filter out all unique residues
-        and return them.
-
-        Parameters
-        ----------
-        fragment_graphs: list[nx.Graph]
-
-        Returns
-        -------
-        list[nx.Graph]
-            all unique fragment graphs
-        """
-        # first we find and label all fragments in the molecule
-        self.label_fragments_from_graph(fragment_graphs)
-        # then we assign all left-over atoms to the existing residues
-        self.label_unmatched_atoms()
-        # make the residue graph
+        # find one correspondance
+        mapping = find_one_ismags_match(self.molecule,
+                                        reference_graph,
+                                        node_match=self._node_match)
+        # now assign the attributes from the reference graph to
+        # the target molecule
+        for target, ref in mapping.items():
+            for attr in ['resname', 'resid', 'atomname']:
+                self.molecule.nodes[target][attr] = reference_graph.nodes[ref][attr]
+
+        # now we make the residue graph and extract
         self.make_res_graph()
-        # now we make the residue graph and find all unique residues
-        unique_fragments = {}
-        had_resnames = {}
-        for node in self.res_graph.nodes:
-            resname = self.res_graph.nodes[node]['resname']
-            # this fragment is terminal located so we give it a special prefix
-            fragment = self.res_graph.nodes[node]['graph']
-            if self.res_graph.degree(node) == 1:
-               resname = resname + self.ter_prefix
-               nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
-               nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname")
-            # here we extract the fragments and set appropiate residue names
-            for other_frag in unique_fragments.values():
-                if nx.is_isomorphic(fragment, other_frag, node_match=self._node_match):
-                    mapping = find_one_ismags_match(fragment, other_frag, self._node_match)
-                    if mapping:
-                        for source, target in mapping.items():
-                            self.molecule.nodes[target]['atomname'] = self.molecule.nodes[source]['atomname']
-                        break
-            else:
-                if resname in unique_fragments:
-                    resname = resname + "_" + str(had_resnames[resname] + 1)
-                    nx.set_node_attributes(self.molecule, {node: resname for node in fragment.nodes} ,"resname")
-                    nx.set_node_attributes(fragment, {node: resname for node in fragment.nodes} ,"resname")
-                else:
-                    had_resnames[resname] = 0
-                unique_fragments[resname] = fragment
 
-        # remake the residue graph since some resnames have changed
-        self.make_res_graph()
-        self.linearize_resids(unique_fragments)
+        # finally we simply collect one graph per restype
+        unique_fragments = {}
+        for res in self.res_graph:
+            resname = self.res_graph.nodes[res]['resname']
+            if resname not in unique_fragments:
+                unique_fragments[resname] = self.res_graph.nodes[res]['graph']
         return unique_fragments, self.res_graph

From 87510bbfd5f4f414b42e6697c5884a712799a660 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 29 Feb 2024 15:43:39 +0100
Subject: [PATCH 59/82] refactor fragment itp_to_ff

---
 polyply/src/itp_to_ff.py | 60 ++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 8bf0a659..b39df391 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -11,13 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
 import networkx as nx
-try:
-    import pysmiles
-except ImportError:
-    raise ImportError("To use polyply itp_to_ff you need to install pysmiles.")
-import vermouth
 from vermouth.forcefield import ForceField
 from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
@@ -25,51 +19,57 @@
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import balance_charges, set_charges
+from polyply.src.big_smile_mol_processor import DefBigSmileParser
 
-def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charges=None):
+def _read_itp_file(itppath):
+    """
+    small wrapper for reading itps
+    """
+    with open(itppath, "r") as _file:
+        lines = _file.readlines()
+    force_field = ForceField("tmp")
+    read_itp(lines, force_field)
+    block = next(iter(force_field.blocks.values()))
+    mol = block.to_molecule()
+    mol.make_edges_from_interaction_type(type_="bonds")
+    return mol
+
+def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
     """
     Main executable for itp to ff tool.
     """
     # what charges belong to which resname
-    if charges:
-        crg_dict = dict(zip(resnames, charges))
+    if res_charges:
+        crg_dict = dict(res_charges)
+
     # read the topology file
     if itppath.suffix == ".top":
         top = Topology.from_gmx_topfile(itppath, name="test")
-        mol = top.molecules[0].molecule
+        target_mol = top.molecules[0].molecule
     # read itp file
-    if itppath.suffix == ".itp":
+    elif itppath.suffix == ".itp":
         top = None
-        with open(itppath, "r") as _file:
-            lines = _file.readlines()
-        force_field = ForceField("tmp")
-        read_itp(lines, force_field)
-        block = next(iter(force_field.blocks.values()))
-        mol = block.to_molecule()
-        mol.make_edges_from_interaction_type(type_="bonds")
+        target_mol = _read_itp_file(itppath)
 
-    # read the target fragments and convert to graph
-    fragment_graphs = []
-    for resname, smile in zip(resnames, fragment_smiles):
-        fragment_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True)
-        nx.set_node_attributes(fragment_graph, resname, "resname")
-        fragment_graphs.append(fragment_graph)
+    # read the big-smile representation
+    meta_mol = DefBigSmileParser().parse(smile_str)
 
     # identify and extract all unique fragments
-    unique_fragments, res_graph = FragmentFinder(mol, prefix=term_prefix).extract_unique_fragments(fragment_graphs)
+    unique_fragments, res_graph = FragmentFinder(target_mol).extract_unique_fragments(meta_mol.molecule)
+
+    # extract the blocks with parameters
     force_field = ForceField("new")
     for name, fragment in unique_fragments.items():
-        new_block = extract_block(mol, list(fragment.nodes), defines={})
+        new_block = extract_block(target_mol, list(fragment.nodes), defines={})
         nx.set_node_attributes(new_block, 1, "resid")
-        new_block.nrexcl = mol.nrexcl
+        new_block.nrexcl = target_mol.nrexcl
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
-        base_resname = name.split(term_prefix)[0].split('_')[0]
         balance_charges(new_block,
                         topology=top,
-                        charge=crg_dict[base_resname])
+                        charge=crg_dict[name])
 
-    force_field.links = extract_links(mol)
+    force_field.links = extract_links(target_mol)
 
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()

From 05df2e56a67e89de471108b7142b1b7ba1dd5489 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 29 Feb 2024 15:44:09 +0100
Subject: [PATCH 60/82] change input for itp_to_ff to allow bigmsiles

---
 bin/polyply | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/bin/polyply b/bin/polyply
index fb997c9c..3cb3d0bd 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -237,12 +237,10 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
                                 help='Enable debug logging output. Can be given '
                                 'multiple times.', default=0)
 
-    parser_itp_ff.add_argument('-i', dest="itppath", type=Path)
-    parser_itp_ff.add_argument('-sm', dest="fragment_smiles", nargs='*')
-    parser_itp_ff.add_argument('-rn', dest="resnames", nargs='*')
-    parser_itp_ff.add_argument('-tp',dest="term_prefix", default="ter")
+    parser_itp_ff.add_argument('-i', dest="itppath", type=Path, required=True)
+    parser_itp_ff.add_argument('-s', dest="smile_str", required=True)
     parser_itp_ff.add_argument('-o', dest="outpath", type=Path)
-    parser_itp_ff.add_argument('-c', dest="charges", type=float, nargs='*')
+    parser_itp_ff.add_argument('-c', dest="res_charges",  nargs='+', type=lambda s: s.split(':'),)
 
     parser_itp_ff.set_defaults(func=itp_to_ff)
 

From 0ebfa6ac8b73e0f280d6ab901e89ee319a013da7 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 1 Mar 2024 17:55:40 +0100
Subject: [PATCH 61/82] take most central fragment

---
 polyply/src/fragment_finder.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/polyply/src/fragment_finder.py b/polyply/src/fragment_finder.py
index dcf92c87..07849508 100644
--- a/polyply/src/fragment_finder.py
+++ b/polyply/src/fragment_finder.py
@@ -149,9 +149,13 @@ def extract_unique_fragments(self, reference_graph):
         self.make_res_graph()
 
         # finally we simply collect one graph per restype
+        # which are the most centrail (i.e. avoid ends)
         unique_fragments = {}
+        frag_centrality = {}
+        centrality = nx.betweenness_centrality(self.res_graph)
         for res in self.res_graph:
             resname = self.res_graph.nodes[res]['resname']
-            if resname not in unique_fragments:
+            if resname not in unique_fragments or frag_centrality[resname] < centrality[res]:
                 unique_fragments[resname] = self.res_graph.nodes[res]['graph']
+                frag_centrality[resname] = centrality[res]
         return unique_fragments, self.res_graph

From a7cd590fb70d0b136c84d8bf2e334a88307ce489 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 1 Mar 2024 17:56:00 +0100
Subject: [PATCH 62/82] add special links for terminal modifications

---
 polyply/src/itp_to_ff.py      |  5 ++-
 polyply/src/molecule_utils.py | 71 ++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index b39df391..9ff02f47 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -15,7 +15,7 @@
 from vermouth.forcefield import ForceField
 from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
-from polyply.src.molecule_utils import extract_block, extract_links
+from polyply.src.molecule_utils import extract_block, extract_links, find_termini_mods
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import balance_charges, set_charges
@@ -69,7 +69,10 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
                         topology=top,
                         charge=crg_dict[name])
 
+    # extract the regular links
     force_field.links = extract_links(target_mol)
+    # extract links that span the terminii
+    find_termini_mods(res_graph, target_mol, force_field)
 
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()
diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py
index 7da9ce43..a960ca85 100644
--- a/polyply/src/molecule_utils.py
+++ b/polyply/src/molecule_utils.py
@@ -19,6 +19,7 @@
 from vermouth.molecule import Interaction
 from polyply.tests.test_lib_files import _interaction_equal
 from .topology import replace_defined_interaction
+from .graph_utils import find_connecting_edges
 
 def diffs_to_prefix(atoms, resid_diffs):
     """
@@ -140,7 +141,7 @@ def extract_links(molecule):
             # we collect the edges corresponding to the simple paths between pairs of atoms
             # in the interaction
             mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
-            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
+            #link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
             link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
             link_inter = Interaction(atoms=link_atoms,
                                      parameters=interaction.parameters,
@@ -258,3 +259,71 @@ def extract_block(molecule, nodes, defines={}):
         raise IOError(msg.format(resname, resid))
 
     return block
+
+def find_termini_mods(meta_molecule, molecule, force_field):
+    """
+    Terminii are a bit special in the sense that they are often
+    different from a repeat unit of the polymer in the polymer.
+    """
+    terminal_nodes = [ node for node in meta_molecule.nodes if meta_molecule.degree(node) == 1 ]
+    for meta_node in terminal_nodes:
+        # get the node that is next to the terminal; by definition
+        # it can only be one neighbor
+        neigh_node = next(nx.neighbors(meta_molecule, meta_node))
+
+        # some useful info
+        neigh_resname = meta_molecule.nodes[neigh_node]['resname']
+        resids = [meta_molecule.nodes[neigh_node]['resid'],
+                  meta_molecule.nodes[meta_node]['resid']]
+        ref_block = force_field.blocks[neigh_resname]
+        target_block = meta_molecule.nodes[neigh_node]['graph']
+
+        # find different properties
+        replace_dict = defaultdict(dict)
+        for node in target_block.nodes:
+            target_attrs = target_block.nodes[node]
+            ref_attrs = ref_block.nodes[target_attrs['atomname']]
+            for attr in ['atype', 'mass']:
+                if target_attrs[attr] != ref_attrs[attr]:
+                    replace_dict[node][attr] = target_attrs[attr]
+
+        # bonded interactions could be different too so we need to check them
+        overwrite_inters = defaultdict(list)
+        for inter_type in ref_block.interactions:
+            for ref_inter in ref_block.interactions[inter_type]:
+                for target_inter in target_block.interactions[inter_type]:
+                    target_atoms = [target_block.nodes[atom]['atomname'] for atom in target_inter.atoms]
+                    if target_atoms == ref_inter.atoms and\
+                    target_inter.parameters != ref_inter.parameters:
+                         mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(target_inter.atoms,
+                                                                                                      molecule,
+                                                                                                      min(resids))
+                         #link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
+                         link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in target_inter.atoms]
+                         link_inter = Interaction(atoms=link_atoms,
+                                                  parameters=target_inter.parameters,
+                                                   meta={})
+                         overwrite_inters[inter_type].append(link_inter)
+
+        # we make a link
+        mol_atoms = list(replace_dict.keys()) + list(meta_molecule.nodes[meta_node]['graph'].nodes)
+        link = vermouth.molecule.Link()
+        mol_to_link, edges, resnames = _extract_edges_from_shortest_path(mol_atoms,
+                                                                         molecule,
+                                                                         min(resids))
+        link_atoms = mol_to_link.values()
+        link = vermouth.molecule.Link()
+        link.add_nodes_from(link_atoms)
+        for node in mol_atoms:
+            link.nodes[mol_to_link[node]]['resname'] = molecule.nodes[node]['resname']
+            link.nodes[mol_to_link[node]]['replace'] = replace_dict[node]
+
+        force_field.links.append(link)
+        for inter_type in overwrite_inters:
+            link.interactions[inter_type].append(overwrite_inters)
+
+        edges = find_connecting_edges(meta_molecule, molecule, [meta_node, neigh_node])
+        for ndx, jdx in edges:
+            link.add_edge(mol_to_link[ndx], mol_to_link[jdx])
+
+    return force_field

From 8e0c2577373b92988f668a1966615d83f71424c6 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sun, 3 Mar 2024 14:26:57 +0100
Subject: [PATCH 63/82] type the charges to float in itp to ff

---
 polyply/src/itp_to_ff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 9ff02f47..a26248d6 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -67,7 +67,7 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
         set_charges(new_block, res_graph, name)
         balance_charges(new_block,
                         topology=top,
-                        charge=crg_dict[name])
+                        charge=float(crg_dict[name]))
 
     # extract the regular links
     force_field.links = extract_links(target_mol)

From 7cb3b4cf32fc8eb52b42b4b8a795fafb3cf7faa6 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sun, 3 Mar 2024 14:30:49 +0100
Subject: [PATCH 64/82] add H and ] as special characters in big smile parser

---
 polyply/src/big_smile_parsing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index fa6348cc..6969a31c 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -75,7 +75,6 @@ def res_pattern_to_meta_mol(pattern):
     branching = False
     for match in re.finditer(PATTERNS['place_holder'], pattern):
         start, stop = match.span()
-        print(pattern[start:stop])
         # new branch here
         if pattern[start-1] == '(':
             branching = True
@@ -181,7 +180,7 @@ def tokenize_big_smile(big_smile):
             prev_node = anchor
             smile += token
         else:
-            if token not in '@ . - = # $ : / \\ + - %'\
+            if token not in '] H @ . - = # $ : / \\ + - %'\
                 and not token.isdigit():
                 prev_node = node_count
                 node_count += 1

From 097ec842efefb5d4e5ca8b32d38365174f9af10a Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sun, 3 Mar 2024 14:54:11 +0100
Subject: [PATCH 65/82] account for explicit hydrogen in the smiles string
 input

---
 polyply/src/big_smile_parsing.py        | 11 +++++++--
 polyply/tests/test_big_smile_parsing.py | 30 +++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 6969a31c..55d3a6aa 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -204,8 +204,15 @@ def _rebuild_h_atoms(mol_graph):
     else:
         for node in mol_graph.nodes:
             if mol_graph.nodes[node].get('bonding', False):
-                hcount = mol_graph.nodes[node]['hcount']
-                mol_graph.nodes[node]['hcount'] = hcount - len(mol_graph.nodes[node]['bonding'])
+                # get the degree
+                ele = mol_graph.nodes[0]['element']
+                # hcoung is the valance minus the degree minus
+                # the number of bonding descriptors
+                hcount = pysmiles.smiles_helper.VALENCES[ele][0] -\
+                         mol_graph.degree(node) -\
+                         len(mol_graph.nodes[node]['bonding'])
+
+                mol_graph.nodes[node]['hcount'] = hcount
 
     pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
     return mol_graph
diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
index f7faf71a..ba3f5f69 100644
--- a/polyply/tests/test_big_smile_parsing.py
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -53,6 +53,10 @@ def test_res_pattern_to_meta_mol(smile, nodes, edges):
                         ("[$]COC[$]",
                          "COC",
                         {0: ["$"], 2: ["$"]}),
+                        # simple symmetric but with explicit hydrogen
+                        ("[$][CH2]O[CH2][$]",
+                         "[CH2]O[CH2]",
+                        {0: ["$"], 2: ["$"]}),
                         # smiple symmetric bonding; multiple descript
                         ("[$]COC[$][$1]",
                          "COC",
@@ -91,6 +95,17 @@ def test_tokenize_big_smile(big_smile, smile, bonding):
                                  (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
                                 )},
                         {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
+                        # single fragment but with explicit hydrogen in smiles
+                        ("{#PEO=[$][CH2]O[CH2][$]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                )},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
                         # test NH3 terminal
                         ("{#AMM=N[$]}",
                         {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": ["$"], "element": "N"}),
@@ -126,6 +141,21 @@ def test_tokenize_big_smile(big_smile, smile, bonding):
                                    (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
                         {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)],
                          "OHter": [(0, 1)]}),
+                        # single fragment + 1 terminal but multiple bond descritp.
+                        # but explicit hydrogen in the smiles string
+                        ("{#PEO=[$][CH2]O[CH2][$][$1],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)],
+                         "OHter": [(0, 1)]}),
+
 ))
 def test_fragment_iter(fragment_str, nodes, edges):
     for resname, mol_graph in fragment_iter(fragment_str):

From 514ba1b2408da1bf592845719b05baeb4dc61d12 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sun, 3 Mar 2024 15:57:12 +0100
Subject: [PATCH 66/82] test accounting for explicit hydrogen in the smiles
 string input

---
 polyply/src/big_smile_mol_processor.py   | 25 ++++++++++++++++++++----
 polyply/tests/test_big_smile_mol_proc.py | 13 ++++++++++--
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index 8499e7e3..f474fe76 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -104,14 +104,31 @@ def edges_from_bonding_descrpt(self):
             node_graph.nodes[edge[1]]['bonding'] = node_bond_list
             self.meta_molecule.molecule.add_edge(edge[0], edge[1], bonding=bonding)
 
+    def replace_unconsumed_bonding_descrpt(self):
+        """
+        We allow multiple bonding descriptors per atom, which
+        however, are not always consumed. In this case the left
+        over bonding descriptors are replaced by hydrogen atoms.
+        """
+        for node in self.meta_molecule.nodes:
+            graph = self.meta_molecule.nodes[node]['graph']
+            bonding = nx.get_node_attributes(graph, "bonding")
+            for node, bondings in bonding.items():
+                attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']}
+                attrs['element'] = 'H'
+                for new_id in range(1, len(bondings)+1):
+                    new_node = len(self.meta_molecule.molecule.nodes) + 1
+                    graph.add_edge(node, new_node)
+                    attrs['atomname'] = "H" + str(new_id + len(graph.nodes))
+                    graph.nodes[new_node].update(attrs)
+                    self.meta_molecule.molecule.add_edge(node, new_node)
+                    self.meta_molecule.molecule.nodes[new_node].update(attrs)
+
     def parse(self, big_smile_str):
         res_pattern, residues = big_smile_str.split('.')
         self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
         self.force_field = force_field_from_fragments(residues)
         MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
         self.edges_from_bonding_descrpt()
+        self.replace_unconsumed_bonding_descrpt()
         return self.meta_molecule
-
-# ToDo
-# - replace non consumed bonding descrpt by hydrogen
-# - 
diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index 6975b885..26e85ba6 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -52,6 +52,15 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
                          (11, 14), (11, 15), (11, 16), (16, 17)]),
+                        # uncomsumed bonding IDs; note that this is not the same
+                        # molecule as previous test case. Here one of the OH branches
+                        # and replaces an CH2 group with CH-OH
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[>][$1]COC[<],#OHter=[$1][O]}",
+                        [('OHter', 'O H'), ('PEO', 'C O C H H H H'),
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O H')],
+                        [(0, 1), (0, 2), (2, 3), (2, 5), (2, 10), (3, 4),
+                         (4, 6), (4, 7), (4, 17), (8, 9), (8, 11), (8, 14),
+                         (8, 18), (9, 10), (10, 12), (10, 13), (14, 15)]),
                         # simple branched sequence
                         ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}",
                         [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'),
@@ -75,11 +84,11 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
 ))
 def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
     meta_mol = DefBigSmileParser().parse(smile)
+#    nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
+#    plt.show()
     for node, ref in zip(meta_mol.nodes, ref_nodes):
         assert meta_mol.nodes[node]['resname'] ==  ref[0]
         block_graph = meta_mol.nodes[node]['graph']
         elements = list(nx.get_node_attributes(block_graph, 'element').values())
         assert elements == ref[1].split()
-    #nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
-    #plt.show()
     assert sorted(meta_mol.molecule.edges) == sorted(ref_edges)

From 3e4a737aae34c8ce842c8a885782b4dd2750a87e Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 4 Mar 2024 13:12:45 +0100
Subject: [PATCH 67/82] read provided ff file and use these blocks instead of
 making new ones

---
 bin/polyply                            |  2 ++
 polyply/src/big_smile_mol_processor.py |  4 ++--
 polyply/src/big_smile_parsing.py       | 10 ++++++----
 polyply/src/itp_to_ff.py               | 18 ++++++++++++++----
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/bin/polyply b/bin/polyply
index 3cb3d0bd..68395b21 100755
--- a/bin/polyply
+++ b/bin/polyply
@@ -241,6 +241,8 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
     parser_itp_ff.add_argument('-s', dest="smile_str", required=True)
     parser_itp_ff.add_argument('-o', dest="outpath", type=Path)
     parser_itp_ff.add_argument('-c', dest="res_charges",  nargs='+', type=lambda s: s.split(':'),)
+    parser_itp_ff.add_argument('-f', dest='inpath', type=Path, required=False, default=[],
+                                     help='Input file (ITP|FF)', nargs='*')
 
     parser_itp_ff.set_defaults(func=itp_to_ff)
 
diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index 8499e7e3..cd899655 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -74,8 +74,8 @@ class DefBigSmileParser:
     which describes a polymer molecule.
     """
 
-    def __init__(self):
-        self.force_field = None
+    def __init__(self, force_field):
+        self.force_field = force_field
         self.meta_molecule = None
         self.molecule = None
 
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 6969a31c..57972078 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -250,7 +250,7 @@ def fragment_iter(fragment_str):
         nx.set_node_attributes(mol_graph, resname, 'resname')
         yield resname, mol_graph
 
-def force_field_from_fragments(fragment_str):
+def force_field_from_fragments(fragment_str, force_field=None):
     """
     Collects the fragments defined in a BigSmile string
     as :class:`vermouth.molecule.Blocks` in a force-field
@@ -266,9 +266,11 @@ def force_field_from_fragments(fragment_str):
     -------
     :class:`vermouth.forcefield.ForceField`
     """
-    force_field = ForceField("big_smile_ff")
+    if force_field is None:
+        force_field = ForceField("big_smile_ff")
     frag_iter = fragment_iter(fragment_str)
     for resname, mol_graph in frag_iter:
-        mol_block = Block(mol_graph)
-        force_field.blocks[resname] = mol_block
+        if resname not in force_field.blocks:
+            mol_block = Block(mol_graph)
+            force_field.blocks[resname] = mol_block
     return force_field
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index a26248d6..7ffaec93 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -20,6 +20,7 @@
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import balance_charges, set_charges
 from polyply.src.big_smile_mol_processor import DefBigSmileParser
+from .load_library import load_ff_library
 
 def _read_itp_file(itppath):
     """
@@ -34,10 +35,17 @@ def _read_itp_file(itppath):
     mol.make_edges_from_interaction_type(type_="bonds")
     return mol
 
-def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
+def itp_to_ff(itppath, smile_str, outpath, inpath=[], res_charges=None):
     """
     Main executable for itp to ff tool.
     """
+    # load FF files if given
+    if inpath:
+        force_field = load_ff_library("new", None, inpath)
+    # if none are given we create an empty ff
+    else:
+        force_field = ForceField("new")
+
     # what charges belong to which resname
     if res_charges:
         crg_dict = dict(res_charges)
@@ -52,14 +60,16 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
         target_mol = _read_itp_file(itppath)
 
     # read the big-smile representation
-    meta_mol = DefBigSmileParser().parse(smile_str)
+    meta_mol = DefBigSmileParser(force_field).parse(smile_str)
 
     # identify and extract all unique fragments
     unique_fragments, res_graph = FragmentFinder(target_mol).extract_unique_fragments(meta_mol.molecule)
 
     # extract the blocks with parameters
-    force_field = ForceField("new")
     for name, fragment in unique_fragments.items():
+        # don't overwrite existing blocks
+        if name in force_field.blocks:
+            continue
         new_block = extract_block(target_mol, list(fragment.nodes), defines={})
         nx.set_node_attributes(new_block, 1, "resid")
         new_block.nrexcl = target_mol.nrexcl
@@ -70,7 +80,7 @@ def itp_to_ff(itppath, smile_str, outpath, res_charges=None):
                         charge=float(crg_dict[name]))
 
     # extract the regular links
-    force_field.links = extract_links(target_mol)
+    force_field.links.append(extract_links(target_mol))
     # extract links that span the terminii
     find_termini_mods(res_graph, target_mol, force_field)
 

From d97632d57a427479f1a77fe380acc41df97ef3d1 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 4 Mar 2024 15:35:24 +0100
Subject: [PATCH 68/82] adjust doc string

---
 polyply/src/big_smile_mol_processor.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index f474fe76..b533e818 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -83,9 +83,10 @@ def edges_from_bonding_descrpt(self):
         """
         Make edges according to the bonding descriptors stored
         in the node attributes of meta_molecule residue graph.
-        If a bonding descriptor is consumed it is set to None,
+        If a bonding descriptor is consumed it is removed from the list,
         however, the meta_molecule edge gets an attribute with the
-        bonding descriptors that formed the edge.
+        bonding descriptors that formed the edge. Later uncomsumed
+        bonding descriptors are replaced by hydrogen atoms.
         """
         for prev_node, node in nx.dfs_edges(self.meta_molecule):
             prev_graph = self.meta_molecule.nodes[prev_node]['graph']

From b6acc737c2e7e0ec426bc5ac5fd3172feff1f4e3 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 4 Mar 2024 16:35:33 +0100
Subject: [PATCH 69/82] skip termini mods if none atoms are different

---
 polyply/src/molecule_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py
index a960ca85..f4249d74 100644
--- a/polyply/src/molecule_utils.py
+++ b/polyply/src/molecule_utils.py
@@ -286,6 +286,10 @@ def find_termini_mods(meta_molecule, molecule, force_field):
             for attr in ['atype', 'mass']:
                 if target_attrs[attr] != ref_attrs[attr]:
                     replace_dict[node][attr] = target_attrs[attr]
+        # a little dangerous but mostly ok; if there are no changes to
+        # the atoms we can continue
+        if len(replace_dict) == 0:
+            continue
 
         # bonded interactions could be different too so we need to check them
         overwrite_inters = defaultdict(list)

From 9d9ee891c4ac8e56c99bf02722f298889c9dfc71 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 6 Mar 2024 17:52:18 +0100
Subject: [PATCH 70/82] redo hydrogen based on valency not based on how many
 bonding descriptors are leftover

---
 polyply/src/big_smile_mol_processor.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index 461801ce..640c40e1 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -1,8 +1,12 @@
 import networkx as nx
+import pysmiles
 from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
                                            force_field_from_fragments)
 from polyply.src.map_to_molecule import MapToMolecule
 
+VALENCES = pysmiles.smiles_helper.VALENCES
+VALENCES.update({"H":(1,)})
+
 def compatible(left, right):
     """
     Check bonding descriptor compatibility according
@@ -115,9 +119,12 @@ def replace_unconsumed_bonding_descrpt(self):
             graph = self.meta_molecule.nodes[node]['graph']
             bonding = nx.get_node_attributes(graph, "bonding")
             for node, bondings in bonding.items():
+                element = graph.nodes[node]['element']
+                hcount = VALENCES[element][0] -\
+                         self.meta_molecule.molecule.degree(node) + 1
                 attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']}
                 attrs['element'] = 'H'
-                for new_id in range(1, len(bondings)+1):
+                for new_id in range(1, hcount):
                     new_node = len(self.meta_molecule.molecule.nodes) + 1
                     graph.add_edge(node, new_node)
                     attrs['atomname'] = "H" + str(new_id + len(graph.nodes))

From c4f16527532172f887689b86bc3659284ce33f97 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 6 Mar 2024 18:00:36 +0100
Subject: [PATCH 71/82] parse force-field in molprocessor, adjust hydrogen
 reconstruction

---
 polyply/src/big_smile_mol_processor.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index b533e818..640c40e1 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -1,8 +1,12 @@
 import networkx as nx
+import pysmiles
 from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
                                            force_field_from_fragments)
 from polyply.src.map_to_molecule import MapToMolecule
 
+VALENCES = pysmiles.smiles_helper.VALENCES
+VALENCES.update({"H":(1,)})
+
 def compatible(left, right):
     """
     Check bonding descriptor compatibility according
@@ -74,8 +78,8 @@ class DefBigSmileParser:
     which describes a polymer molecule.
     """
 
-    def __init__(self):
-        self.force_field = None
+    def __init__(self, force_field):
+        self.force_field = force_field
         self.meta_molecule = None
         self.molecule = None
 
@@ -115,9 +119,12 @@ def replace_unconsumed_bonding_descrpt(self):
             graph = self.meta_molecule.nodes[node]['graph']
             bonding = nx.get_node_attributes(graph, "bonding")
             for node, bondings in bonding.items():
+                element = graph.nodes[node]['element']
+                hcount = VALENCES[element][0] -\
+                         self.meta_molecule.molecule.degree(node) + 1
                 attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']}
                 attrs['element'] = 'H'
-                for new_id in range(1, len(bondings)+1):
+                for new_id in range(1, hcount):
                     new_node = len(self.meta_molecule.molecule.nodes) + 1
                     graph.add_edge(node, new_node)
                     attrs['atomname'] = "H" + str(new_id + len(graph.nodes))

From 7a5dd1f74e76e8c15103a5ad93e9490d253403bf Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 6 Mar 2024 19:12:26 +0100
Subject: [PATCH 72/82] fix tests

---
 polyply/tests/test_big_smile_mol_proc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index 26e85ba6..28c5390d 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -1,5 +1,6 @@
 import pytest
 import networkx as nx
+from vermouth.forcefield import ForceField
 from polyply.src.big_smile_mol_processor import (DefBigSmileParser,
                                                  generate_edge)
 #import matplotlib.pyplot as plt
@@ -83,7 +84,8 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
 
 ))
 def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
-    meta_mol = DefBigSmileParser().parse(smile)
+    ff = ForceField("new")
+    meta_mol = DefBigSmileParser(ff).parse(smile)
 #    nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
 #    plt.show()
     for node, ref in zip(meta_mol.nodes, ref_nodes):

From 2b9e7a9cdc414a3961efb802e667db74a7572a56 Mon Sep 17 00:00:00 2001
From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com>
Date: Wed, 6 Mar 2024 19:16:53 +0100
Subject: [PATCH 73/82] Apply suggestions from code review

Co-authored-by: Peter C Kroon <pckroon@users.noreply.github.com>
---
 polyply/src/big_smile_mol_processor.py |  9 +++------
 polyply/src/big_smile_parsing.py       | 16 ++++++++--------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index 640c40e1..365b61bc 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -23,12 +23,9 @@ def compatible(left, right):
     """
     if left == right and left not in '> <':
         return True
-    if left[0] == "<" and right[0] == ">":
-        if left[1:] == right[1:]:
-            return True
-    if left[0] == ">" and right[0] == "<":
-        if left[1:] == right[1:]:
-            return True
+    l, r = left[0], right[0]
+    if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
+        return left[1:] == right[1:]
     return False
 
 def generate_edge(source, target, bond_type="bonding"):
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 55d3a6aa..90e171a3 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -3,10 +3,10 @@
 import numpy as np
 try:
     import pysmiles
-except ImportError:
+except ImportError as error:
     msg = ("You are using a functionality that requires "
            "the pysmiles package. Use pip install pysmiles ")
-    raise ImportError(msg)
+    raise ImportError(msg) from error
 import networkx as nx
 from vermouth.forcefield import ForceField
 from vermouth.molecule import Block
@@ -41,7 +41,7 @@ def res_pattern_to_meta_mol(pattern):
     '{' + [#resname_1][#resname_2]... + '}'
 
     In addition to plain enumeration any residue may be
-    followed by a '|' and an integern number that
+    followed by a '|' and an integer number that
     specifies how many times the given residue should
     be added within a sequence. For example, a pentamer
     of PEO can be written as:
@@ -52,10 +52,10 @@ def res_pattern_to_meta_mol(pattern):
 
     {[#PEO]|5}
 
-    The block syntax also applies to branches. Here the convetion
+    The block syntax also applies to branches. Here the convention
     is that the complete branch including it's first anchoring
     residue is repeated. For example, to generate a PMA-g-PEG
-    polymer the following syntax is permitted:
+    polymer containing 15 residues the following syntax is permitted:
 
     {[#PMA]([#PEO][#PEO])|5}
 
@@ -79,7 +79,7 @@ def res_pattern_to_meta_mol(pattern):
         if pattern[start-1] == '(':
             branching = True
             branch_anchor = prev_node
-            recipie = [(meta_mol.nodes[prev_node]['resname'], 1)]
+            recipe = [(meta_mol.nodes[prev_node]['resname'], 1)]
         if stop < len(pattern) and pattern[stop] == '|':
             eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
             n_mon = int(pattern[stop+1:eon])
@@ -89,7 +89,7 @@ def res_pattern_to_meta_mol(pattern):
         resname = match.group(0)[2:-1]
         # collect all residues in branch
         if branching:
-            recipie.append((resname, n_mon))
+            recipe.append((resname, n_mon))
 
         # add the new residue
         connection = []
@@ -135,7 +135,7 @@ def tokenize_big_smile(big_smile):
     """
     Processes a BigSmile string by storing the
     the BigSmile specific bonding descriptors
-    in a dict with refernce to the atom they
+    in a dict with reference to the atom they
     refer to. Furthermore, a cleaned smile
     string is generated with the BigSmile
     specific syntax removed.

From b6d891f6f32bbc60ed96fd30d75953f717d21117 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 14:24:47 +0100
Subject: [PATCH 74/82] allow nested branch expansion

---
 polyply/src/big_smile_parsing.py | 81 ++++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 19 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index 90e171a3..e396a5e3 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -24,6 +24,22 @@ def _find_next_character(string, chars, start):
             return idx+start
     return np.inf
 
+def _expand_branch(meta_mol, current, anchor, recipe):
+    prev_node = anchor
+    for bdx, (resname, n_mon) in enumerate(recipe):
+        if bdx == 0:
+            anchor = current
+        for _ in range(0, n_mon):
+            connection = [(prev_node, current)]
+            print(connection)
+            meta_mol.add_monomer(current,
+                                 resname,
+                                 connection)
+            prev_node = current
+            current += 1
+    prev_node = anchor
+    return meta_mol, current, prev_node
+
 def res_pattern_to_meta_mol(pattern):
     """
     Generate a :class:`polyply.MetaMolecule` from a
@@ -70,17 +86,30 @@ def res_pattern_to_meta_mol(pattern):
     """
     meta_mol = MetaMolecule()
     current = 0
-    branch_anchor = 0
+    # stores one or more branch anchors; each next
+    # anchor belongs to a nested branch
+    branch_anchor = []
+    # used for storing composition protocol for
+    # for branches; each entry is a list of
+    # branches from extending from the anchor
+    # point
+    recipes = defaultdict(list)
+    # the previous node
     prev_node = None
+    # do we have an open branch
     branching = False
     for match in re.finditer(PATTERNS['place_holder'], pattern):
         start, stop = match.span()
         # new branch here
         if pattern[start-1] == '(':
             branching = True
-            branch_anchor = prev_node
-            recipe = [(meta_mol.nodes[prev_node]['resname'], 1)]
+            branch_anchor.append(prev_node)
+            # the recipe for making the branch includes the anchor; which
+            # is hence the first atom in the list
+            if len(branch_anchor) == 1:
+                recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)]
         if stop < len(pattern) and pattern[stop] == '|':
+            # eon => end of next
             eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
             n_mon = int(pattern[stop+1:eon])
         else:
@@ -89,7 +118,7 @@ def res_pattern_to_meta_mol(pattern):
         resname = match.group(0)[2:-1]
         # collect all residues in branch
         if branching:
-            recipe.append((resname, n_mon))
+            recipes[branch_anchor[-1]].append((resname, n_mon))
 
         # add the new residue
         connection = []
@@ -105,26 +134,40 @@ def res_pattern_to_meta_mol(pattern):
         # terminate branch and jump back to anchor
         branch_stop = _find_next_character(pattern, ['['], stop) >\
                       _find_next_character(pattern, [')'], stop)
-        if stop <= len(pattern) and branch_stop and branching:
+
+        if stop <= len(pattern) and branch_stop: # and branching:
             branching = False
-            prev_node = branch_anchor
+            prev_node = branch_anchor.pop()
+            if branch_anchor:
+                branching = True
             # we have to multiply the branch n-times
             eon_a = _find_next_character(pattern, [')'], stop)
             if stop+1 < len(pattern) and pattern[eon_a+1] == "|":
                 eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
-                # -1 because one branch has already been added at this point
-                for _ in range(0,int(pattern[eon_a+2:eon_b])-1):
-                    for bdx, (resname, n_mon) in enumerate(recipie):
-                        if bdx == 0:
-                            anchor = current
-                        for _ in range(0, n_mon):
-                            connection = [(prev_node, current)]
-                            meta_mol.add_monomer(current,
-                                                 resname,
-                                                 connection)
-                            prev_node = current
-                            current += 1
-                    prev_node = anchor
+                # the outermost loop goes over how often a the branch has to be
+                # added to the existing sequence
+                for idx in range(0,int(pattern[eon_a+2:eon_b])-1):
+                    prev_anchor = None
+                    skip = 0
+                    for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]:
+                        print("-->", recipe)
+                        if prev_anchor:
+                            offset = ref_anchor - prev_anchor
+                            prev_node = prev_node + offset
+                            #skip = 1
+                        print(prev_node)
+                        meta_mol, current, prev_node = _expand_branch(meta_mol,
+                                                                      current=current,
+                                                                      anchor=prev_node,
+                                                                      recipe=recipe) #[skip:])
+                        if prev_anchor is None:
+                            base_anchor = prev_node
+                        prev_anchor = ref_anchor
+                print(base_anchor)
+                prev_node = base_anchor
+            # if all branches are done we need to reset the lists
+         #   branch_anchor = []
+         #   recipes = defaultdict(list)
     return meta_mol
 
 def _big_smile_iter(smile):

From a867329a82d2f4988e43839b84357032841534a4 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 14:56:16 +0100
Subject: [PATCH 75/82] test branch expansion

---
 polyply/src/big_smile_parsing.py        | 17 +++---
 polyply/tests/test_big_smile_parsing.py | 69 +++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index e396a5e3..d93265ec 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -31,7 +31,6 @@ def _expand_branch(meta_mol, current, anchor, recipe):
             anchor = current
         for _ in range(0, n_mon):
             connection = [(prev_node, current)]
-            print(connection)
             meta_mol.add_monomer(current,
                                  resname,
                                  connection)
@@ -106,8 +105,8 @@ def res_pattern_to_meta_mol(pattern):
             branch_anchor.append(prev_node)
             # the recipe for making the branch includes the anchor; which
             # is hence the first atom in the list
-            if len(branch_anchor) == 1:
-                recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)]
+            #if len(branch_anchor) == 1:
+            recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)]
         if stop < len(pattern) and pattern[stop] == '|':
             # eon => end of next
             eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
@@ -150,24 +149,22 @@ def res_pattern_to_meta_mol(pattern):
                     prev_anchor = None
                     skip = 0
                     for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]:
-                        print("-->", recipe)
                         if prev_anchor:
                             offset = ref_anchor - prev_anchor
                             prev_node = prev_node + offset
-                            #skip = 1
-                        print(prev_node)
+                            skip = 1
                         meta_mol, current, prev_node = _expand_branch(meta_mol,
                                                                       current=current,
                                                                       anchor=prev_node,
-                                                                      recipe=recipe) #[skip:])
+                                                                      recipe=recipe[skip:])
                         if prev_anchor is None:
                             base_anchor = prev_node
                         prev_anchor = ref_anchor
-                print(base_anchor)
                 prev_node = base_anchor
             # if all branches are done we need to reset the lists
-         #   branch_anchor = []
-         #   recipes = defaultdict(list)
+            # when all nested branches are completed
+            if len(branch_anchor) == 0:
+                recipes = defaultdict(list)
     return meta_mol
 
 def _big_smile_iter(smile):
diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
index ba3f5f69..5c1491b8 100644
--- a/polyply/tests/test_big_smile_parsing.py
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -22,11 +22,13 @@
                         ["PMA", "PMA", "PMA"],
                         [(0, 1), (1, 2)]),
                         # simple branch expension
-                        ("{[#PMA]([#PEO][#PEO][#OHter])|2}",
+                        ("{[#PMA]([#PEO][#PEO][#OHter])|3}",
                         ["PMA", "PEO", "PEO", "OHter",
+                         "PMA", "PEO", "PEO", "OHter",
                          "PMA", "PEO", "PEO", "OHter"],
                         [(0, 1), (1, 2), (2, 3),
-                         (0, 4), (4, 5), (5, 6), (6, 7)]
+                         (0, 4), (4, 5), (5, 6), (6, 7),
+                         (4, 8), (8, 9), (9, 10), (10, 11)]
                          ),
                         # nested branched with expansion
                         ("{[#PMA]([#PEO]|3)|2}",
@@ -34,7 +36,68 @@
                          "PMA", "PEO", "PEO", "PEO"],
                         [(0, 1), (1, 2), (2, 3),
                          (0, 4), (4, 5), (5, 6), (6, 7)]
-                         )
+                         ),
+                        # nested braching
+                        #     0     1      2    3      4      5    6
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH",
+                         "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (3, 5), (1, 6)]
+                         ),
+                        # nested braching plus expansion
+                        #     0     1      2    3      4/5      6     7
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH", "OH",
+                         "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (4, 5), (3, 6), (1, 7)]
+                         ),
+                        # nested braching plus expansion incl. branch
+                        #     0     1      2    3      4      5
+                        #           6      7    8      9      10      11
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])|2[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH", "PEO",
+                         "PMA", "PEO", "PEO", "PEO", "OH", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (3, 5), (1, 6), (6, 7), (7, 8),
+                         (8, 9), (8, 10), (6, 11)]
+                         ),
+                        # nested braching plus expansion of nested branch
+                        # here the nested branch is expended
+                        #  0 - 1 - 10
+                        #      |
+                        #      2
+                        #      |
+                        #      3 {- 5 - 7 } - 9 -> the expanded fragment
+                        #      |    |   |
+                        #      4    6   8
+                        ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PQ", "OH",
+                         "PQ", "OH", "PQ", "OH", "PEO", "PMA"],
+                        [(0, 1), (1, 2), (1, 10),
+                         (2, 3), (3, 4), (3, 5), (5, 6),
+                         (5, 7), (7, 8), (7, 9)]
+                         ),
+                        # nested braching plus expansion of nested branch
+                        # here the nested branch is expended and a complete
+                        # new branch is added
+                        #          11   13
+                        #           |    |
+                        #  0 - 1 - 10 - 12
+                        #      |
+                        #      2
+                        #      |
+                        #      3 {- 5 - 7 } - 9 -> the expanded fragment
+                        #      |    |   |
+                        #      4    6   8
+                        ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]([#CH3])|2}",
+                        ["PMA", "PMA", "PEO", "PQ", "OH",
+                         "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"],
+                        [(0, 1), (1, 2), (1, 10),
+                         (2, 3), (3, 4), (3, 5), (5, 6),
+                         (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)]
+                         ),
 ))
 def test_res_pattern_to_meta_mol(smile, nodes, edges):
     """

From b6f5cc0d4a101948ca853cf1226afb598f6b96f3 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 15:19:09 +0100
Subject: [PATCH 76/82] add comments all over residue expansion functions

---
 polyply/src/big_smile_parsing.py | 62 +++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 8 deletions(-)

diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index d93265ec..b043ebd9 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -97,29 +97,42 @@ def res_pattern_to_meta_mol(pattern):
     prev_node = None
     # do we have an open branch
     branching = False
+    # each element in the for loop matches a pattern
+    # '[' + '#' + some alphanumeric name + ']'
     for match in re.finditer(PATTERNS['place_holder'], pattern):
         start, stop = match.span()
-        # new branch here
+        # we start a new branch when the residue is preceded by '('
+        # as in ... ([#PEO] ...
         if pattern[start-1] == '(':
             branching = True
             branch_anchor.append(prev_node)
             # the recipe for making the branch includes the anchor; which
-            # is hence the first atom in the list
-            #if len(branch_anchor) == 1:
+            # is hence the first residue in the list
             recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)]
+        # here we check if the atom is followed by a expansion character '|'
+        # as in ... [#PEO]|
         if stop < len(pattern) and pattern[stop] == '|':
             # eon => end of next
+            # we find the next character that starts a new residue, ends a branch or
+            # ends the complete pattern
             eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
+            # between the expansion character and the eon character
+            # is any number that correspnds to the number of times (i.e. monomers)
+            # that this atom should be added
             n_mon = int(pattern[stop+1:eon])
         else:
             n_mon = 1
 
+        # the resname starts at the second character and ends
+        # one before the last according to the above pattern
         resname = match.group(0)[2:-1]
-        # collect all residues in branch
+        # if this residue is part of a branch we store it in
+        # the recipe dict together with the anchor residue
+        # and expansion number
         if branching:
             recipes[branch_anchor[-1]].append((resname, n_mon))
 
-        # add the new residue
+        # new we add new residue as often as required
         connection = []
         for _ in range(0, n_mon):
             if prev_node is not None:
@@ -130,36 +143,69 @@ def res_pattern_to_meta_mol(pattern):
             prev_node = current
             current += 1
 
-        # terminate branch and jump back to anchor
+        # here we check if the residue considered before is the
+        # last residue of a branch (i.e. '...[#residue])'
+        # that is the case if the branch closure comes before
+        # any new atom begins
         branch_stop = _find_next_character(pattern, ['['], stop) >\
                       _find_next_character(pattern, [')'], stop)
 
-        if stop <= len(pattern) and branch_stop: # and branching:
+        # if the branch ends we reset the anchor
+        # and set branching False unless we are in
+        # a nested branch
+        if stop <= len(pattern) and branch_stop:
             branching = False
             prev_node = branch_anchor.pop()
             if branch_anchor:
                 branching = True
-            # we have to multiply the branch n-times
+            #========================================
+            #       expansion for branches
+            #========================================
+            # We need to know how often the branch has
+            # to be added so we first identify the branch
+            # terminal character ')' called eon_a.
             eon_a = _find_next_character(pattern, [')'], stop)
+            # Then we check if the expansion character
+            # is next.
             if stop+1 < len(pattern) and pattern[eon_a+1] == "|":
+                # If there is one we find the beginning
+                # of the next branch, residue or end of the string
+                # As before all characters inbetween are a number that
+                # is how often the branch is expanded.
                 eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
                 # the outermost loop goes over how often a the branch has to be
                 # added to the existing sequence
                 for idx in range(0,int(pattern[eon_a+2:eon_b])-1):
                     prev_anchor = None
                     skip = 0
+                    # in principle each branch can contain any number of nested branches
+                    # each branch is itself a recipe that has an anchor atom
                     for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]:
+                        # starting from the first nested branch we have to do some
+                        # math to find the anchor atom relative to the first branch
+                        # we also skip the first residue in recipe, which is the
+                        # anchor residue. Only the outermost branch in an expansion
+                        # is expanded including the anchor. This allows easy description
+                        # of graft polymers.
                         if prev_anchor:
                             offset = ref_anchor - prev_anchor
                             prev_node = prev_node + offset
                             skip = 1
+                        # this function simply adds the residues of the paticular
+                        # branch
                         meta_mol, current, prev_node = _expand_branch(meta_mol,
                                                                       current=current,
                                                                       anchor=prev_node,
                                                                       recipe=recipe[skip:])
+                        # if this is the first branch we want to set the anchor
+                        # as the base anchor to which we jump back after all nested
+                        # branches have been added
                         if prev_anchor is None:
                             base_anchor = prev_node
+                        # store the previous anchor so we can do the math for nested
+                        # branches
                         prev_anchor = ref_anchor
+                # all branches added; then go back to the base anchor
                 prev_node = base_anchor
             # if all branches are done we need to reset the lists
             # when all nested branches are completed

From f965e1d42c6b3afb41b690407c36aeee0a4e8493 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 15:33:45 +0100
Subject: [PATCH 77/82] address comments

---
 polyply/src/big_smile_mol_processor.py   |  8 ++++----
 polyply/src/big_smile_parsing.py         | 12 ++++--------
 polyply/tests/test_big_smile_mol_proc.py |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index 365b61bc..e706217a 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -28,7 +28,7 @@ def compatible(left, right):
         return left[1:] == right[1:]
     return False
 
-def generate_edge(source, target, bond_type="bonding"):
+def generate_edge(source, target, bond_attribute="bonding"):
     """
     Given a source and a target graph, which have bonding
     descriptors stored as node attributes, find a pair of
@@ -41,7 +41,7 @@ def generate_edge(source, target, bond_type="bonding"):
     ----------
     source: :class:`nx.Graph`
     target: :class:`nx.Graph`
-    bond_type: `abc.hashable`
+    bond_attribute: `abc.hashable`
         under which attribute are the bonding descriptors
         stored.
 
@@ -55,8 +55,8 @@ def generate_edge(source, target, bond_type="bonding"):
     LookupError
         if no match is found
     """
-    source_nodes = nx.get_node_attributes(source, bond_type)
-    target_nodes = nx.get_node_attributes(target, bond_type)
+    source_nodes = nx.get_node_attributes(source, bond_attribute)
+    target_nodes = nx.get_node_attributes(target, bond_attribute)
     for source_node in source_nodes:
         for target_node in target_nodes:
             #print(source_node, target_node)
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index b043ebd9..d591eecd 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -213,10 +213,6 @@ def res_pattern_to_meta_mol(pattern):
                 recipes = defaultdict(list)
     return meta_mol
 
-def _big_smile_iter(smile):
-    for token in smile:
-        yield token
-
 def tokenize_big_smile(big_smile):
     """
     Processes a BigSmile string by storing the
@@ -229,17 +225,17 @@ def tokenize_big_smile(big_smile):
     Parameters
     ----------
     smile: str
-        a BigSmile smile string
+        a BigSmile smiles string
 
     Returns
     -------
     str
-        a canonical smile string
+        a canonical smiles string
     dict
         a dict mapping bonding descriptors
-        to the nodes within the smile
+        to the nodes within the smiles string
     """
-    smile_iter = _big_smile_iter(big_smile)
+    smile_iter = iter(big_smile)
     bonding_descrpt = defaultdict(list)
     smile = ""
     node_count = 0
diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index 28c5390d..c40f96bd 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -38,7 +38,7 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
     target = nx.path_graph(4)
     nx.set_node_attributes(source, bonds_source, "bonding")
     nx.set_node_attributes(target, bonds_target, "bonding")
-    new_edge, new_btypes = generate_edge(source, target, bond_type="bonding")
+    new_edge, new_btypes = generate_edge(source, target, bond_attribute="bonding")
     assert new_edge == edge
     assert new_btypes == btypes
 

From 0335956072b84fcc8d59f2d9b6264917ce971879 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 16:00:29 +0100
Subject: [PATCH 78/82] allow for ionic bonds with . syntax

---
 polyply/src/big_smile_mol_processor.py   |  6 +++++-
 polyply/src/big_smile_parsing.py         |  4 ++++
 polyply/tests/test_big_smile_mol_proc.py | 10 ++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
index e706217a..1801a437 100644
--- a/polyply/src/big_smile_mol_processor.py
+++ b/polyply/src/big_smile_mol_processor.py
@@ -1,3 +1,4 @@
+import re
 import networkx as nx
 import pysmiles
 from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
@@ -130,10 +131,13 @@ def replace_unconsumed_bonding_descrpt(self):
                     self.meta_molecule.molecule.nodes[new_node].update(attrs)
 
     def parse(self, big_smile_str):
-        res_pattern, residues = big_smile_str.split('.')
+        res_pattern, residues = re.findall(r"\{[^\}]+\}", big_smile_str)
         self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
         self.force_field = force_field_from_fragments(residues)
         MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
         self.edges_from_bonding_descrpt()
         self.replace_unconsumed_bonding_descrpt()
         return self.meta_molecule
+
+# ToDo
+# - clean copying of bond-list attributes L100
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
index d591eecd..ec136bea 100644
--- a/polyply/src/big_smile_parsing.py
+++ b/polyply/src/big_smile_parsing.py
@@ -361,3 +361,7 @@ def force_field_from_fragments(fragment_str):
         mol_block = Block(mol_graph)
         force_field.blocks[resname] = mol_block
     return force_field
+
+# ToDos
+# - remove special case hydrogen line 327ff
+# - check rebuild_h and clean up
diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
index c40f96bd..b6fe8e03 100644
--- a/polyply/tests/test_big_smile_mol_proc.py
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -53,6 +53,16 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
                          (11, 14), (11, 15), (11, 16), (16, 17)]),
+                        # smiple linear seqeunce with ionic bond
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O].[Na+]}",
+                        #           0 1             2 3 4 5 6 7 8
+                        [('OHter', 'O Na'), ('PEO', 'C O C H H H H'),
+                        #        9 10 11 12 13 14 15         16 17
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O Na')],
+                        [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
+                         (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
+                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+
                         # uncomsumed bonding IDs; note that this is not the same
                         # molecule as previous test case. Here one of the OH branches
                         # and replaces an CH2 group with CH-OH

From 47fef2382fa3a35d895ac0c0d2c852ab499a4274 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 17:01:37 +0100
Subject: [PATCH 79/82] fix previous issue with link appending

---
 polyply/src/itp_to_ff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index 7ffaec93..14437fe1 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -80,7 +80,7 @@ def itp_to_ff(itppath, smile_str, outpath, inpath=[], res_charges=None):
                         charge=float(crg_dict[name]))
 
     # extract the regular links
-    force_field.links.append(extract_links(target_mol))
+    force_field.links += extract_links(target_mol)
     # extract links that span the terminii
     find_termini_mods(res_graph, target_mol, force_field)
 

From 7f7fe21c055d272e077ef566cf2b906c56917dc4 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 18:02:49 +0100
Subject: [PATCH 80/82] update itp_to_ff tests

---
 .../tests/test_data/itp_to_ff/ACOL/seq.txt    |  2 +-
 .../tests/test_data/itp_to_ff/PEG_PBE/ref.itp | 14 ++++--
 .../tests/test_data/itp_to_ff/PEG_PBE/seq.txt |  2 +-
 polyply/tests/test_itp_to_ff.py               | 45 ++++++++++++-------
 4 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt
index 1a088a04..db7ea3e6 100644
--- a/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt
+++ b/polyply/tests/test_data/itp_to_ff/ACOL/seq.txt
@@ -1 +1 @@
-Mter M AOL M Mter_1
+ter1 PMA AOL PMA ter2
diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
index 53941636..b878a1a1 100644
--- a/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
+++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/ref.itp
@@ -289,6 +289,8 @@ new 3
  9 26 1 ; link
 19 36 1 ; link
 29 45 1 ; link
+; added manually
+39 53 1
 
 [ angles ]
  2  1  4 1 107.800 276.144
@@ -404,7 +406,8 @@ new 3
 53 47 50 1 110.700 313.800 ; link
 
 [ dihedrals ]
- 5  8  7 14 4 180.000 10.460 2
+; 5  8  7 14 4 180.000 10.460 2
+14  8  7  5 4 180.000 10.460 2
 13  7  6  8 4 180.000 10.460 2
  9  6  7  8 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
 14  8  7  6 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
@@ -418,7 +421,8 @@ new 3
 11  9  6  7 3 0.766 2.297 0.000 -3.063 -0.000 0.000
 11  9  6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
 10  9  6 12 3 0.628 1.883 0.000 -2.510 -0.000 0.000
-15 18 17 24 4 180.000 10.460 2
+;15 18 17 24 4 180.000 10.460 2
+24 18 17 15 4 180.000 10.460 2
 23 17 16 18 4 180.000 10.460 2
 19 16 17 18 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
 24 18 17 16 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
@@ -432,7 +436,8 @@ new 3
 21 19 16 17 3 0.766 2.297 0.000 -3.063 -0.000 0.000
 21 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000
 20 19 16 22 3 0.628 1.883 0.000 -2.510 -0.000 0.000
-25 28 27 34 4 180.000 10.460 2
+;25 28 27 34 4 180.000 10.460 2
+34 28 27 25 4 180.000 10.460 2
 33 27 26 28 4 180.000 10.460 2
 29 26 27 28 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
 34 28 27 26 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
@@ -446,7 +451,8 @@ new 3
 31 29 26 27 3 0.766 2.297 0.000 -3.063 -0.000 0.000
 31 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000
 30 29 26 32 3 0.628 1.883 0.000 -2.510 -0.000 0.000
-35 38 37 44 4 180.000 10.460 2
+;35 38 37 44 4 180.000 10.460 2
+44 38 37 35 4 180.000 10.460 2
 43 37 36 38 4 180.000 10.460 2
 39 36 37 38 3 0.527 -6.397 -1.695 7.565 -0.000 0.000
 44 38 37 36 3 58.576 0.000 -58.576 -0.000 -0.000 0.000
diff --git a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
index 408d9986..5225a4e5 100644
--- a/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
+++ b/polyply/tests/test_data/itp_to_ff/PEG_PBE/seq.txt
@@ -1 +1 @@
-CH3ter PBE PBE PBE PBE PEO PEOter
+CH3ter PBE PBE PBE PBE PEO PEO OHter
diff --git a/polyply/tests/test_itp_to_ff.py b/polyply/tests/test_itp_to_ff.py
index 13afaf0a..caa6f66a 100644
--- a/polyply/tests/test_itp_to_ff.py
+++ b/polyply/tests/test_itp_to_ff.py
@@ -54,31 +54,44 @@ def itp_equal(ref_mol, new_mol):
                           atol=0.1)
 
     for inter_type in new_mol.interactions:
+        print(inter_type)
+        print(len(new_mol.interactions[inter_type]), len(ref_mol.interactions[inter_type]))
         assert len(new_mol.interactions[inter_type]) == len(ref_mol.interactions[inter_type])
         for inter in new_mol.interactions[inter_type]:
-            new_atoms = [match[atom] for atom in inter.atoms]
+            new_atoms = tuple([match[atom] for atom in inter.atoms])
             new_inter = Interaction(atoms=new_atoms,
                                     parameters=inter.parameters,
                                     meta=inter.meta)
+            print(new_inter)
             for other_inter in ref_mol.interactions[inter_type]:
-                if _interaction_equal(inter, other_inter, inter_type):
+                if _interaction_equal(new_inter, other_inter, inter_type):
                     break
             else:
+                print("--")
                 assert False
     return True
 
-@pytest.mark.parametrize("case, fname, smiles, resnames, charges", [
-    ("PEO_OHter", "in_itp.itp", ["[OH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"],
-    ["OH", "PEO", "OH"], [0, 0, 0]),
-    ("PEG_PBE", "in_itp.itp", ["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]"],
-    ["CH3", "PBE", "PEO"], [0, 0, 0]),
-    ("ACOL","ref.top", ["[CH2][CH]C(=O)[O][CH3]","[CH2][CH]C(=O)[O][CH3]",
-              "[CH2][CH]C(=O)[O][CH2][CH2][N]([CH3])([CH3])([CH3])",
-              "[CH2][CH]C(=O)[O][CH3]", "[CH2][CH]C(=O)[O][CH3]"],
-             ["M", "M", "AOL", "M", "M"],
-             [0, 0, 1, 0, 0]),
+@pytest.mark.parametrize("case, fname, bigsmile, charges", [
+    # test case 1 PEO with OHtermini
+    ("PEO_OHter",
+     "in_itp.itp",
+     "{[#OHter][#PEO]|4[#OHter]}.{#PEO=[$]COC[$],#OHter=[$]CO}",
+     [("OHter", 0), ("PEO", 0)],
+    ),
+    # test case 2 PEO-PBE block cooplymer with two termini
+    ("PEG_PBE",
+     "in_itp.itp",
+     "{[#CH3ter][#PBE]|4[#PEO]|2[#OHter]}.{#PEO=[>]COC[<],#OHter=[<]CO,#CH3ter=[>][CH3],#PBE=[>]CC[<]C=C}",
+    [("CH3ter", 0), ("PBE", 0), ("PEO", 0), ("OHter", 0)],
+    ),
+    # test case 3 complex sequence with charged ion in the center
+   ("ACOL",
+    "ref.top",
+    "{[#ter1][#PMA][#AOL][#PMA][#ter2]}.{#Hter=[>][<]H,#ter1=CC[<]C(=O)OC,#ter2=[>]CCC(=O)OC,#PMA=[>]CC[<]C(=O)OC,#AOL=[>]CC[<]C(=O)OCC[N+](C)(C)(C)}",
+     [("ter1", 0), ("PMA", 0), ("AOL", 1), ("ter2", 0)],
+    )
 ])
-def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges):
+def test_itp_to_ff(tmp_path, case, fname, bigsmile, charges):
     """
     Call itp-to-ff and check if it generates the same force-field
     as in the ref.ff file.
@@ -86,10 +99,8 @@ def test_itp_to_ff(tmp_path, case, fname, smiles, resnames, charges):
     tmp_file = Path(tmp_path) / "test.ff"
     inpath = Path(polyply.TEST_DATA) / "itp_to_ff" / case
     itp_to_ff(itppath=inpath/fname,
-              fragment_smiles=smiles,
-              resnames=resnames,
-              charges=charges,
-              term_prefix='ter',
+              smile_str=bigsmile,
+              res_charges=charges,
               outpath=tmp_file,)
     # now generate an itp file with this ff-file
     tmp_itp = tmp_path / "new.itp"

From 726866371ce70bd9a50bbfdc655a29d657281084 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 21:06:16 +0100
Subject: [PATCH 81/82] update tests for fragment finder

---
 polyply/tests/test_fragment_finder.py | 293 ++++++--------------------
 1 file changed, 61 insertions(+), 232 deletions(-)

diff --git a/polyply/tests/test_fragment_finder.py b/polyply/tests/test_fragment_finder.py
index 7fb1478c..77c60a29 100644
--- a/polyply/tests/test_fragment_finder.py
+++ b/polyply/tests/test_fragment_finder.py
@@ -14,33 +14,12 @@
 """
 Test the fragment finder for itp_to_ff.
 """
-
-import textwrap
+import random
 import pytest
-from pathlib import Path
-import numpy as np
 import networkx as nx
-import vermouth.forcefield
-import vermouth.molecule
-from vermouth.gmx.itp_read import read_itp
-from polyply import TEST_DATA
-import polyply.src.meta_molecule
-from polyply.src.meta_molecule import (MetaMolecule, Monomer)
+from vermouth.forcefield import ForceField
 import polyply
-from collections import defaultdict
-import pysmiles
-
-@pytest.mark.parametrize(
-    "node1, node2, expected",
-    [
-        ({"element": "C"}, {"element": "C"}, True),
-        ({"element": "H"}, {"element": "O"}, False),
-        ({"element": "N"}, {"element": "N"}, True),
-        ({"element": "O"}, {"element": "S"}, False),
-    ],
-)
-def test_element_match(node1, node2, expected):
-    assert polyply.src.fragment_finder._element_match(node1, node2) == expected
+from polyply.src.big_smile_mol_processor import DefBigSmileParser
 
 @pytest.mark.parametrize(
     "match_keys, node1, node2, expected",
@@ -53,224 +32,74 @@ def test_element_match(node1, node2, expected):
 )
 def test_node_match(match_keys, node1, node2, expected):
     # molecule and terminal label don't matter
-    frag_finder = polyply.src.fragment_finder.FragmentFinder(None, "ter")
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(None)
     frag_finder.match_keys = match_keys
     assert frag_finder._node_match(node1, node2) == expected
 
-def find_studs(mol):
-    """
-    By element find all undersatisfied connections
-    at the all-atom level.
-    """
-    atom_degrees = {"H":1,
-                    "C":4,
-                    "O":2,
-                    "N":3}
-    for node in mol.nodes:
-        ele = mol.nodes[node]['element']
-        if mol.degree(node) != atom_degrees[ele]:
-            yield node
-
-def set_mass(mol):
-    masses = {"O": 16, "N":14,"C":12,
-              "S":32, "H":1}
-
-    for atom in mol.nodes:
-        mol.nodes[atom]['mass'] = masses[mol.nodes[atom]['element']]
-    return mol
-
-def polymer_from_fragments(fragments, resnames, remove_resid=True):
-    """
-    Given molecule fragments as smiles
-    combine them into different polymer
-    molecules.
-    """
-    fragments_to_mol = []
-    frag_mols = []
-    frag_graph = pysmiles.read_smiles(fragments[0], explicit_hydrogen=True)
-    nx.set_node_attributes(frag_graph, 1, "resid")
-    nx.set_node_attributes(frag_graph, resnames[0], "resname")
-    frag_mols.append(frag_graph)
-    mol = vermouth.Molecule(frag_graph)
-    # terminals should have one stud anyways
-    prev_stud = next(find_studs(frag_graph))
-    fragments_to_mol.append({node: node for node in mol.nodes})
-    for resname, smile in zip(resnames[1:], fragments[1:]):
-        frag_graph = pysmiles.read_smiles(smile, explicit_hydrogen=True)
-        nx.set_node_attributes(frag_graph, resname, "resname")
-        frag_mols.append(frag_graph)
-        next_mol = vermouth.Molecule(frag_graph)
-        correspondance = mol.merge_molecule(next_mol)
-        fragments_to_mol.append(correspondance)
-        stud_iter = find_studs(frag_graph)
-        mol.add_edge(prev_stud, correspondance[next(stud_iter)])
-
-        try:
-            prev_stud = correspondance[next(stud_iter)]
-        except StopIteration:
-            # we're done molecule is complete
-            continue
-    mol = set_mass(mol)
-    if remove_resid:
-        nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resid")
-        nx.set_node_attributes(mol, {node: None for node in mol.nodes} ,"resname")
-    return mol, frag_mols, fragments_to_mol
+def _scramble_nodes(graph):
+    element_to_masses = {"O": 16,
+                         "N": 14,
+                         "C": 12,
+                         "S": 32,
+                         "H": 1}
+    # Get a list of all nodes in the original graph
+    nodes = list(graph.nodes())
+    # Generate a randomized list of new node names/indices
+    randomized_nodes = nodes.copy()
+    random.shuffle(randomized_nodes)
+    # Create a mapping from old nodes to new nodes
+    node_mapping = {old_node: new_node for old_node, new_node in zip(nodes, randomized_nodes)}
+    # Generate a new graph by applying the mapping to the original graph
+    randomized_graph = nx.relabel_nodes(graph, node_mapping)
+    for node in randomized_graph.nodes:
+        for attr in ['resid', 'resname']:
+            del randomized_graph.nodes[node][attr]
+        ele = randomized_graph.nodes[node]['element']
+        randomized_graph.nodes[node]['mass'] = element_to_masses[ele]
+    return randomized_graph
 
 @pytest.mark.parametrize(
-    "smiles, resnames",
+    "big_smile, resnames",
     [
-     # completely defined molecule with two termini
-     (["[CH3]", "[CH2]O[CH2]", "[CH3]"], ["CH3", "PEO", "CH3"]),
-     # two different termini
-     (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "CH3"]),
-     # two different termini with the same repeat unit
-     (["[OH][CH2]", "[CH2]O[CH2]","[CH2]O[CH2]", "[CH3]"], ["OH", "PEO", "PEO", "CH3"]),
-     # sequence with two monomers and multiple "wrong" matchs
-     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2][OH]"], ["CH3", "PBD", "PEO", "OH"]),
-     # sequence with two monomers, four repeats and multiple "wrong" matchs
-     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]",
-      "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]",
-      "[CH2][OH]"], ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"]),
-     # super symmtry - worst case scenario
-     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
-      ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"]),
-    ])
-def test_label_fragments(smiles, resnames):
-    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames)
-    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
-    unique_fragments = frag_finder.label_fragments_from_graph(frag_mols)
-    for resid, (resname, frag_to_mol) in enumerate(zip(resnames, fragments_in_mol), start=1):
-        for frag_node, mol_node in frag_to_mol.items():
-            assert frag_finder.molecule.nodes[mol_node]['resname'] == resname
-            assert frag_finder.molecule.nodes[mol_node]['resid'] == resid
-
-@pytest.mark.parametrize(
-    "smiles, resnames, remove, new_name",
-    [
-     # do not match termini
-     (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
-      ["CH3", "PEO", "PEO", "PEO", "CH3"],
-      {1:2, 6:3},
-      {1: "PEO", "4": "PEO"},
+     # two residues no branches
+     ("{[#CH3][#PEO]|4[#CH3]}.{#PEO=[$]COC[$],#CH3=[$]C}",
+      ["CH3", "PEO"],
      ),
-     # have dangling atom in center
-     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
-      ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"],
-      {4:5},
-      {4:"PE"},
+     # three residues no branches
+     ("{[#OH][#PEO]|4[#CH3]}.{#PEO=[$]COC[$],#CH3=[$]C,#OH=[$]O}",
+      ["CH3", "PEO", "OH"],
      ),
+     # simple branch expansion
+    ("{[#PMA]([#PEO][#PEO][#OH])|3}.{#PEO=[$]COC[$],#PMA=[>]CC[<]C(=O)OC[$],#OH=[$]O}",
+    ["PMA", "PEO", "OH"]),
+    # something with sulphur
+    ("{[#P3HT]|3}.{#P3HT=CCCCCCC1=C[$]SC[$]=C1}",
+    ["P3HT"])
     ])
-def test_label_unmatched_atoms(smiles, resnames, remove, new_name):
-    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=False)
-    nodes_to_label = {}
-    max_by_resid = {}
-
-    for node in molecule.nodes:
-        resid = molecule.nodes[node]['resid']
-        if resid in remove:
-            del molecule.nodes[node]['resid']
-            del molecule.nodes[node]['resname']
-            nodes_to_label[node] = resid
-        else:
-            if resid in max_by_resid:
-                known_atom = node
-                max_by_resid[resid] += 1
-            else:
-                max_by_resid[resid] = 1
+def test_extract_fragments(big_smile, resnames):
+    ff = ForceField("new")
+    parser = DefBigSmileParser(ff)
+    meta = parser.parse(big_smile)
+    ff = parser.force_field
+    # strips resid, resname, and scrambles order
+    target_molecule = _scramble_nodes(meta.molecule)
 
-    resids = nx.get_node_attributes(molecule, "resid")
-    # the frag finder removes resid attributes so we have to later reset them
-    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
-    nx.set_node_attributes(frag_finder.molecule, resids, "resid")
-    frag_finder.max_by_resid = max_by_resid
-    frag_finder.known_atom = known_atom
-    frag_finder.label_unmatched_atoms()
-    for node, old_id in nodes_to_label.items():
-        assert frag_finder.molecule.nodes[node]['resid'] == remove[old_id]
-        assert frag_finder.molecule.nodes[node]['resname'] == new_name[old_id]
+    # initialize the fragment finder
+    frag_finder = polyply.src.fragment_finder.FragmentFinder(target_molecule)
+    fragments, res_graph = frag_finder.extract_unique_fragments(meta.molecule)
 
-@pytest.mark.parametrize(
-    "smiles, resnames, remove, uni_frags",
-    [
-     # completely defined molecule with two termini
-     (["[CH3]", "[CH2]O[CH2]", "[CH3]"],
-      ["CH3", "PEO", "CH3"],
-      {},
-      {"CH3ter": 0, "PEO": 1}
-     ),
-     # two different termini
-     (["[OH][CH2]", "[CH2]O[CH2]", "[CH3]"],
-      ["OH", "PEO", "CH3"],
-      {},
-      {"OHter": 0, "PEO": 1, "CH3ter": 2}
-     ),
-     # sequence with two monomers, four repeats and multiple "wrong" matchs
-     (["[CH3]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]", "[CH2][CH][CH][CH2]",
-      "[CH2][CH][CH][CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]",
-      "[CH2][OH]"],
-      ["CH3", "PBE", "PBE", "PBE", "PBE", "PEO", "PEO", "PEO", "PEO", "OH"],
-      {},
-      {"CH3ter": 0, "PBE": 1, "PEO": 5, "OHter": 9}
-     ),
-     # super symmtry - worst case scenario
-     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
-      ["CH3", "PE", "PE", "PE", "PE", "PE", "CH3"],
-      {},
-      {"CH3ter":0, "PE": 1}
-     ),
-     # different fragments with same resname
-     (["[CH3]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
-      ["PEO", "PEO", "CH3"],
-      {3:2},
-      {"PEOter": 0, "PEOter_1": (1,2)}
-     ),
-     # do not match termini
-     (["[CH3]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH2]O[CH2]", "[CH3]"],
-      ["CH3", "PEO", "PEO", "PEO", "CH3"],
-      {5: 4},
-      {"CH3ter":0, "PEO": 1, "PEOter": (3, 4)},
-     ),
-     # have dangling atom in center; this is a bit akward but essentially serves
-     # as a guard of having really shitty input
-     (["[CH3]", "[CH2][CH2]", "[CH2][CH2]", "[CH2]O[CH2]", "[CH2][CH2]","[CH2][CH2]", "[CH2][CH2]","[CH3]"],
-      ["CH3", "PE", "PE", "PEO", "PE", "PE", "PE", "CH3"],
-      {4: 3},
-      {"CH3ter": 0, "PE": 1, "PEter": (2, 3, 4, 5, 6, 7)},
-     ),
-    ])
-def test_extract_fragments(smiles, resnames, remove, uni_frags):
-    molecule, frag_mols, fragments_in_mol = polymer_from_fragments(smiles, resnames, remove_resid=True)
-    for node in molecule.nodes:
-        resid = molecule.nodes[node]['resid']
-        if resid in remove:
-            del molecule.nodes[node]['resid']
-            del molecule.nodes[node]['resname']
+    def _res_node_match(a, b):
+        return a['resname'] == b['resname']
 
-    match_mols = []
-    for idx, frag in enumerate(frag_mols):
-        if idx not in remove.values():
-            match_mols.append(frag)
+    def _frag_node_match(a, b):
+        for attr in ['element', 'resname']:
+            if a[attr] != b[attr]:
+                return False
+        return True
 
-    frag_finder = polyply.src.fragment_finder.FragmentFinder(molecule, "ter")
-    fragments, _ = frag_finder.extract_unique_fragments(match_mols)
-    assert len(fragments) == len(uni_frags)
-    for resname, graph in fragments.items():
-        frag_finder.match_keys = ['element', 'mass', 'resname']
-        if type(uni_frags[resname]) == tuple:
-           new_smiles = [smiles[idx] for idx in uni_frags[resname]]
-           new_resnames = [resnames[idx] for idx in uni_frags[resname]]
-           ref, _, _ = polymer_from_fragments(new_smiles, new_resnames)
-           nx.set_node_attributes(ref, resname, "resname")
-        else:
-            ref = frag_mols[uni_frags[resname]]
-        # because the terminii are not labelled yet in the fragment
-        # graphs used to make the match
-        nx.set_node_attributes(ref, resname, "resname")
-        assert nx.is_isomorphic(ref, graph, node_match=frag_finder._node_match)
-        # make sure all molecule nodes are named correctly
-        frag_finder.match_keys = ['atomname', 'resname']
-        for node in frag_finder.res_graph:
-           resname_mol = frag_finder.res_graph.nodes[node]["resname"]
-           if resname == resname_mol:
-               target = frag_finder.res_graph.nodes[node]["graph"]
-               assert nx.is_isomorphic(target, graph, node_match=frag_finder._node_match)
+    assert set(fragments.keys()) == set(resnames)
+    assert nx.is_isomorphic(res_graph, meta, node_match=_res_node_match)
+    for resname in resnames:
+        assert nx.is_isomorphic(fragments[resname],
+                                ff.blocks[resname],
+                                node_match=_frag_node_match)

From 15be6a6a25476dfccdb667a5c0070ca5d19d3ee2 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 7 Mar 2024 21:15:10 +0100
Subject: [PATCH 82/82] remove leftover files

---
 polyply/src/big_smiles.py        |  93 ---------------
 polyply/src/big_smiles_helper.py | 193 -------------------------------
 2 files changed, 286 deletions(-)
 delete mode 100644 polyply/src/big_smiles.py
 delete mode 100644 polyply/src/big_smiles_helper.py

diff --git a/polyply/src/big_smiles.py b/polyply/src/big_smiles.py
deleted file mode 100644
index 41e8535e..00000000
--- a/polyply/src/big_smiles.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 University of Groningen
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-def find_token_indices(line, target):
-    idxs = [idx for idx, token in enumerate(line) if token == target]
-    for idx in idxs:
-        yield idx
-
-def compatible(left, right):
-    if left == right:
-        return True
-    if left[0] == "<" and right[0] == ">":
-        if left[1:] == right[1:]:
-            return True
-    if left[0] == ">" and right[0] == "<":
-        if left[1:] == right[1:]:
-            return True
-    return False
-
-def find_compatible_pair(polymol, residue, bond_type="bond_type", eligible_nodes=None):
-    ref_nodes = nx.get_node_attributes(polymol, bond_type)
-    target_nodes = nx.get_node_attributes(residue, bond_type)
-    for ref_node in ref_nodes:
-        if eligible_nodes and\
-           polymol.nodes[ref_node]['resid'] not in eligible_nodes:
-            continue
-        for target_node in target_nodes:
-            if compatible(ref_nodes[ref_node],
-                          target_nodes[target_node]):
-                return ref_node, target_node
-    return None
-
-class BigSmileParser:
-
-    def __init__(self):
-        self.molecule =
-
-    def parse_stochastic_object():
-
-
-def read_simplified_big_smile_string(line):
-
-    # split the different stochastic objects
-    line = line.strip()
-    # a stochastic object is enclosed in '{' and '}'
-    start_idx = next(find_token_indices(line, "{"))
-    stop_idx = next(find_token_indices(line, "}"))
-    stoch_line = line[start_idx+1:stop_idx]
-    # residues are separated by , and end
-    # groups by ;
-    if ';' in stoch_line:
-        residue_string, terminii_string = stoch_line.split(';')
-    else:
-        residue_string = stoch_line
-        terminii_string = None
-    # let's read the smile residue strings
-    residues = []
-    count = 0
-    for residue_string in residue_string.split(','):
-        # figure out if this is a named object
-        if residue_string[0] == "#":
-            jdx = next(find_token_indices(residue_string, "="))
-            name = residue_string[:jdx]
-            residue_string = residue_string[jdx:]
-        else:
-            name = count
-
-        mol_graph = read_smiles(residue_string)
-        residues.append((name, mol_graph))
-        count += 1
-    # let's read the terminal residue strings
-    end_groups = []
-    if terminii_string:
-        for terminus_string in terminii_string.split(','):
-            mol_graph = read_smiles(terminus_string)
-            bond_types = nx.get_node_attributes(mol_graph, "bond_type")
-            nx.set_node_attributes(mol_graph, bond_types, "ter_bond_type")
-            end_groups.append(mol_graph)
-    return cls(dict(residues), end_groups)
-
-
-
diff --git a/polyply/src/big_smiles_helper.py b/polyply/src/big_smiles_helper.py
deleted file mode 100644
index ae546ffe..00000000
--- a/polyply/src/big_smiles_helper.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright 2020 University of Groningen
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-try:
-    import pysmiles
-except ImportError:
-    msg = "The tool you are using requires pysmiles as dependcy."
-    raise ImportError(msg)
-
-from pysmiles.read_smiles import _tokenize
-
-def find_anchor(mol, pre_mol, atom):
-    anchors = list(pre_mol.neighbors(atom))
-    for anchor in anchors:
-        if anchor in mol.nodes:
-            return False, anchor
-    for anchor in nx.ego_graph(pre_mol, atom, radius=2).nodes:
-        if anchor in mol.nodes:
-            return True, anchor
-    raise RuntimeError
-
-def parse_atom(atom):
-    """
-    Parses a SMILES atom token, and returns a dict with the information.
-
-    Note
-    ----
-    Can not deal with stereochemical information yet. This gets discarded.
-
-    Parameters
-    ----------
-    atom : str
-        The atom string to interpret. Looks something like one of the
-        following: "C", "c", "[13CH3-1:2]"
-
-    Returns
-    -------
-    dict
-        A dictionary containing at least 'element', 'aromatic', and 'charge'. If
-        present, will also contain 'hcount', 'isotope', and 'class'.
-    """
-    defaults = {'charge': 0, 'hcount': 0, 'aromatic': False}
-    if atom.startswith('[') and any(mark in atom for mark in ['$', '>', '<']):
-        bond_type = atom[1:-1]
-        # we have a big smile bond anchor
-        defaults.update({"element": None,
-                         "bond_type": bond_type})
-        return defaults
-
-    if atom.startswith('[') and '#' == atom[1]:
-        # this atom is a replacable place holder
-        defaults.update({"element": None, "replace": atom[2:-1]})
-        return defaults
-
-    if not atom.startswith('[') and not atom.endswith(']'):
-        if atom != '*':
-            # Don't specify hcount to signal we don't actually know anything
-            # about it
-            return {'element': atom.capitalize(), 'charge': 0,
-                    'aromatic': atom.islower()}
-        else:
-            return defaults.copy()
-
-    match = ATOM_PATTERN.match(atom)
-
-    if match is None:
-        raise ValueError('The atom {} is malformatted'.format(atom))
-
-    out = defaults.copy()
-    out.update({k: v for k, v in match.groupdict().items() if v is not None})
-
-    if out.get('element', 'X').islower():
-        out['aromatic'] = True
-
-    parse_helpers = {
-        'isotope': int,
-        'element': str.capitalize,
-        'stereo': lambda x: x,
-        'hcount': parse_hcount,
-        'charge': parse_charge,
-        'class': int,
-        'aromatic': lambda x: x,
-    }
-
-    for attr, val_str in out.items():
-        out[attr] = parse_helpers[attr](val_str)
-
-    if out['element'] == '*':
-        del out['element']
-
-    if out.get('element') == 'H' and out.get('hcount', 0):
-        raise ValueError("A hydrogen atom can't have hydrogens")
-
-    if 'stereo' in out:
-        LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom)
-
-    return out
-
-def big_smile_str_to_graph(smile_str):
-    """
-    
-    """
-    bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
-    pre_mol = nx.Graph()
-    anchor = None
-    idx = 0
-    default_bond = 1
-    next_bond = None
-    branches = []
-    ring_nums = {}
-    for tokentype, token in _tokenize(smiles):
-        if tokentype == TokenType.ATOM:
-            pre_mol.add_node(idx, **parse_atom(token))
-            if anchor is not None:
-                if next_bond is None:
-                    next_bond = default_bond
-                if next_bond or zero_order_bonds:
-                    pre_mol.add_edge(anchor, idx, order=next_bond)
-                next_bond = None
-            anchor = idx
-            idx += 1
-        elif tokentype == TokenType.BRANCH_START:
-            branches.append(anchor)
-        elif tokentype == TokenType.BRANCH_END:
-            anchor = branches.pop()
-        elif tokentype == TokenType.BOND_TYPE:
-            if next_bond is not None:
-                raise ValueError('Previous bond (order {}) not used. '
-                                 'Overwritten by "{}"'.format(next_bond, token))
-            next_bond = bond_to_order[token]
-        elif tokentype == TokenType.RING_NUM:
-            if token in ring_nums:
-                jdx, order = ring_nums[token]
-                if next_bond is None and order is None:
-                    next_bond = default_bond
-                elif order is None:  # Note that the check is needed,
-                    next_bond = next_bond  # But this could be pass.
-                elif next_bond is None:
-                    next_bond = order
-                elif next_bond != order:  # Both are not None
-                    raise ValueError('Conflicting bond orders for ring '
-                                     'between indices {}'.format(token))
-                # idx is the index of the *next* atom we're adding. So: -1.
-                if pre_mol.has_edge(idx-1, jdx):
-                    raise ValueError('Edge specified by marker {} already '
-                                     'exists'.format(token))
-                if idx-1 == jdx:
-                    raise ValueError('Marker {} specifies a bond between an '
-                                     'atom and itself'.format(token))
-                if next_bond or zero_order_bonds:
-                    pre_mol.add_edge(idx - 1, jdx, order=next_bond)
-                next_bond = None
-                del ring_nums[token]
-            else:
-                if idx == 0:
-                    raise ValueError("Can't have a marker ({}) before an atom"
-                                     "".format(token))
-                # idx is the index of the *next* atom we're adding. So: -1.
-                ring_nums[token] = (idx - 1, next_bond)
-                next_bond = None
-        elif tokentype == TokenType.EZSTEREO:
-            LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token)
-    if ring_nums:
-        raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
-
-    return pre_mol
-
-def mol_graph_from_big_smile_graph(pre_mol):
-    # here we condense any BigSmilesBonding information
-    clean_nodes = [node for node in pre_mol.nodes(data=True) if 'bond_type' not in node[1]]
-    mol = nx.Graph()
-    mol.add_nodes_from(clean_nodes)
-    mol.add_edges_from([edge for edge in pre_mol.edges if edge[0] in mol.nodes and edge[1] in mol.nodes])
-    for node in pre_mol.nodes:
-        if 'bond_type' in pre_mol.nodes[node]:
-            terminus, anchor = find_anchor(mol, pre_mol, node)
-            if terminus:
-                mol.nodes[anchor].update({"ter_bond_type": pre_mol.nodes[node]['bond_type'],
-                                          "ter_bond_probs": pre_mol.nodes[node]['bond_probs']})
-            else:
-                mol.nodes[anchor].update({"bond_type": pre_mol.nodes[node]['bond_type'],
-                                          "bond_probs": pre_mol.nodes[node]['bond_probs']})
-    return mol