diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py index ee6abdbf..0a79dc99 100644 --- a/polyply/src/gen_coords.py +++ b/polyply/src/gen_coords.py @@ -74,8 +74,8 @@ def _initialize_cylces(topology, cycles, tolerance): for mol_idx in topology.mol_idx_by_name[mol_name]: # initalize as dfs tree molecule = topology.molecules[mol_idx] - if set(dict(nx.degree(molecule)).values()) != {1, 2}: - raise IOError("Only linear cyclic molecules are allowed at the moment.") + #if set(dict(nx.degree(molecule)).values()) != {1, 2}: + # raise IOError("Only linear cyclic molecules are allowed at the moment.") molecule.dfs=True nodes = (list(molecule.search_tree.edges)[0][0], list(molecule.search_tree.edges)[-1][1]) diff --git a/polyply/src/graph_utils.py b/polyply/src/graph_utils.py index 04f753ce..cb579b81 100644 --- a/polyply/src/graph_utils.py +++ b/polyply/src/graph_utils.py @@ -181,5 +181,49 @@ def get_all_predecessors(graph, node, start_node=0): predecessors.append(pre_node) if pre_node == start_node: break + predecessors.reverse() return predecessors + +def _find_longest_path(graph, source, target): + all_paths = list(nx.all_simple_edge_paths(graph, source, target=target)) + max_path_length = 0 + idx = 0 + for jdx, path in enumerate(all_paths): + if len(path) > max_path_length: + max_path_length = len(path) + idx = jdx + return all_paths[idx] + +def polyply_custom_tree(graph, source, target): + # find all simple paths from source to target + longest_path = _find_longest_path(graph, source, target) + path_nodes = { u for edge in longest_path for u in edge } + # substract path and extract the remaining graphs of the branches + graph_copy = graph.copy() + graph_copy.remove_edges_from(longest_path) + components = list(nx.connected_components(graph_copy)) + # loop over the edges in the longest path and + # see if they are part of a connected component + # then add the bfs tree edges of that component to the + # graph before the edge + tree_graph = nx.DiGraph() + for from_node, to_node in longest_path: + for c in components: + if from_node in c: + break + + subgraph = graph_copy.subgraph(c) + for u, v in nx.bfs_tree(subgraph, source=from_node).edges: + if u not in path_nodes or v not in path_nodes: + tree_graph.add_edge(u, v) + tree_graph.add_edge(from_node, to_node) + + return tree_graph + +def annotate_hierarchy(graph): + hierarchy= {} + for idx, ndx in enumerate(graph.nodes()): + hierarchy[ndx] = idx + nx.set_node_attributes(graph, hierarchy, "hierarchy") + return graph diff --git a/polyply/src/meta_molecule.py b/polyply/src/meta_molecule.py index 889d7ebd..c6d3ae8e 100644 --- a/polyply/src/meta_molecule.py +++ b/polyply/src/meta_molecule.py @@ -18,7 +18,7 @@ from vermouth.graph_utils import make_residue_graph from vermouth.log_helpers import StyleAdapter, get_logger from .polyply_parser import read_polyply -from .graph_utils import find_nodes_with_attributes +from .graph_utils import find_nodes_with_attributes, annotate_hierarchy Monomer = namedtuple('Monomer', 'resname, n_blocks') LOGGER = StyleAdapter(get_logger(__name__)) @@ -196,6 +196,7 @@ def search_tree(self): else: self.__search_tree = nx.dfs_tree(self, source=self.root) + annotate_hierarchy(self.__search_tree) return self.__search_tree @staticmethod diff --git a/polyply/src/restraints.py b/polyply/src/restraints.py index ca0024b8..735000b6 100644 --- a/polyply/src/restraints.py +++ b/polyply/src/restraints.py @@ -12,7 +12,51 @@ # See the License for the specific language governing permissions and # limitations under the License. import networkx as nx -from .graph_utils import compute_avg_step_length, get_all_predecessors +from polyply.src.graph_utils import compute_avg_step_length, get_all_predecessors + +def upper_bound(avg_step_length, distance, graph_dist, tolerance=0): + bound = graph_dist * avg_step_length + distance + tolerance + return bound + +def lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance=0): + bound = avg_needed_step_length * graph_dist_ref - tolerance + return bound + +def _set_restraint_on_path(graph, + path, + avg_step_length, + distance, + tolerance, + ref_node=None, + target_node=None): + + # graph distances can be computed from the path positions + graph_distances_ref = {node: path.index(node) for node in path} + graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} + + if not target_node: + target_node = path[-1] + + if not ref_node: + ref_node = path[0] + + avg_needed_step_length = distance / graph_distances_target[ref_node] + + for node in path[1:]: + if node == target_node: + graph_dist_ref = 1.0 + else: + graph_dist_ref = graph_distances_target[node] + + graph_dist_target = graph_distances_target[node] + up_bound = upper_bound(avg_step_length, distance, graph_dist_ref, tolerance) + low_bound = lower_bound(distance, graph_dist_ref, avg_needed_step_length, tolerance) + + current_restraints = graph.nodes[node].get('distance_restraints', []) + graph.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, + up_bound, + low_bound)] + def set_distance_restraint(molecule, target_node, @@ -50,37 +94,70 @@ def set_distance_restraint(molecule, tolerance: float absolute tolerance (nm) """ - # if the target node is a predecssor to the ref node the order - # needs to be reveresed because the target node will be placed - # before the ref node - # we get the path lying between target and reference node - try: - path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) - except IndexError: - ref_node, target_node = target_node, ref_node - path = get_all_predecessors(molecule.search_tree, node=target_node, start_node=ref_node) + # First we need to figure out if the two nodes to be restrained are + # each others common ancestor. This breaks on cyclic graphs + ancestor = nx.algorithms.lowest_common_ancestor(molecule.search_tree, ref_node, target_node) + # if the ancestor is equal to the target node we have to switch the + # reference and target node + paths = [] + if ancestor == target_node: + ref_nodes = [target_node] + target_nodes = [ref_node] + distances = [distance] + # if ancestor is neither target nor ref_node, there is no continous path + # between the two. In this case we have to split the distance restraint + # into two parts + elif ancestor != ref_node: + print("go here") + # if target node is to be placed before ref node we need to switch them around + # otherwise the designations are fine + if molecule.search_tree.nodes[ref_node]["hierarchy"] >\ + molecule.search_tree.nodes[target_node]["hierarchy"]: + ref_node, target_node = (target_node, ref_node) - # graph distances can be computed from the path positions - graph_distances_ref = {node: path.index(node) for node in path} - graph_distances_target = {node: len(path) - 1 - graph_distances_ref[node] for node in path} + ref_nodes = [ancestor, ancestor] + target_nodes = [ref_node, target_node] - for node in path: - if node == target_node: - graph_distance = 1.0 - elif node == ref_node: - continue - else: - graph_distance = graph_distances_target[node] + # The first part of the split distance restraint is a new restraint + # that is the average expected distance between the ref node and the + # common ancestor + path = get_all_predecessors(molecule.search_tree, + node=ref_node, + start_node=ancestor) - upper_bound = graph_distance * avg_step_length + distance + tolerance + graph_dist_ref_target = len(path) - 1 + up_bound = upper_bound(avg_step_length, distance, tolerance, graph_dist_ref_target) + low_bound = lower_bound(distance, graph_dist_ref_target, graph_dist_ref_target, tolerance) + partial_distance = (up_bound + low_bound) / 2. + paths.append(path) + distances = [partial_distance] + # then we put the other distance restraint that acts like the full one but + # on a partial path between ancestor and target node + path = get_all_predecessors(molecule.search_tree, + node=target_node, + start_node=ancestor) + paths.append(path) + distances.append(distance) + # if ancestor is equal to ref node all order is full-filled and we just proceed + else: + ref_nodes = [ref_node] + target_nodes = [target_node] + distances = [distance] - avg_needed_step_length = distance / graph_distances_target[ref_node] - lower_bound = avg_needed_step_length * graph_distances_ref[node] - tolerance + if not paths: + paths = [get_all_predecessors(molecule.search_tree, + node=target_nodes[0], + start_node=ref_nodes[0])] - current_restraints = molecule.nodes[node].get('distance_restraints', []) - molecule.nodes[node]['distance_restraints'] = current_restraints + [(ref_node, - upper_bound, - lower_bound)] + for ref_node, target_node, dist, path in zip(ref_nodes, target_nodes, distances, paths): + print(ref_node, target_node, path) + _set_restraint_on_path(molecule, + path, + avg_step_length, + dist, + tolerance, + ref_node, + target_node) def set_restraints(topology, nonbond_matrix): """ @@ -96,9 +173,6 @@ def set_restraints(topology, nonbond_matrix): distance_restraints = topology.distance_restraints[(mol_name, mol_idx)] mol = topology.molecules[mol_idx] - if set(dict(nx.degree(mol)).values()) != {1, 2}: - raise IOError("Distance restraints currently can only be applied to linear molecules.") - for ref_node, target_node in distance_restraints: path = list(mol.search_tree.edges) avg_step_length, _ = compute_avg_step_length(mol, @@ -107,4 +181,9 @@ def set_restraints(topology, nonbond_matrix): path) distance, tolerance = distance_restraints[(ref_node, target_node)] - set_distance_restraint(mol, target_node, ref_node, distance, avg_step_length, tolerance) + set_distance_restraint(mol, + target_node, + ref_node, + distance, + avg_step_length, + tolerance) diff --git a/polyply/tests/test_restraints.py b/polyply/tests/test_restraints.py index a19247cc..86a35445 100644 --- a/polyply/tests/test_restraints.py +++ b/polyply/tests/test_restraints.py @@ -17,8 +17,12 @@ import pytest import numpy as np import networkx as nx +from hypothesis import given +from hypothesis_networkx import graph_builder +from hypothesis import strategies as st import polyply from polyply.tests.test_build_file_parser import test_molecule +from polyply.src.meta_molecule import MetaMolecule @pytest.mark.parametrize('target_node, ref_node, distance, avg_step_length, tolerance, expected',( # test simple revers @@ -67,3 +71,35 @@ def test_set_distance_restraint(test_molecule, assert pytest.approx(ref_restr[1], new_restr[1]) assert pytest.approx(ref_restr[2], new_restr[2]) + + + +builder = graph_builder(graph_type=nx.Graph, + node_keys=st.integers(), + min_nodes=4, max_nodes=60, + min_edges=3, max_edges=None, + self_loops=True, + connected=True) +@given(graph=builder) +def test_restraints_on_abitr_topology(graph): + test_molecule = MetaMolecule() + test_molecule.add_nodes_from(graph.nodes) + test_molecule.add_edges_from(graph.edges) + + ref_node, target_node = np.random.choice(list(graph.nodes), replace=False, size=2) + + # if set(dict(nx.degree(test_molecule)).values()) != {1, 2}: + # ancestor = nx.algorithms.lowest_common_ancestor(test_molecule.search_tree, ref_node, target_node) + # if ancestor != ref_node and ancestor != target_node: + # assert False + + distance = 4 + avg_step_length = 0.47 + tolerance = 0 + polyply.src.restraints.set_distance_restraint(test_molecule, + target_node, + ref_node, + distance, + avg_step_length, + tolerance) + assert len(nx.get_node_attributes(test_molecule, "distance_restraints")) != 0 diff --git a/requirements-tests.txt b/requirements-tests.txt index 595a4902..3b33a67e 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -4,3 +4,4 @@ pytest-cov pylint codecov tqdm +hypothesis-networkx