Skip to content

Commit

Permalink
fix rare issue in subgraph_isomorphism due to greedy handling of matches
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Mar 11, 2024
1 parent 893e3cc commit c7eed27
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
12 changes: 6 additions & 6 deletions build/lib/glycowork/motif/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, wildca
return (0, []) if return_matches else 0 if count else False
motif_comp = min_process_glycans([motif, glycan])
if wildcards_ptm:
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', glycan).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for glycan in [glycan, motif]]
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', g).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for g in [glycan, motif]]
g1 = glycan_to_nxGraph(glycan, termini = 'calc') if termini_list else glycan_to_nxGraph(glycan)
g2 = glycan_to_nxGraph(motif, termini = 'provided', termini_list = termini_list) if termini_list else glycan_to_nxGraph(motif)
else:
Expand Down Expand Up @@ -322,11 +322,11 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, wildca
return counts if not return_matches else (counts, mappings)
else:
if graph_pair.subgraph_is_isomorphic():
mapping = graph_pair.mapping
mapping = {v: k for k, v in mapping.items()}
res = all(mapping[node] < mapping[neighbor] for node, neighbor in g2.edges())
return res if not return_matches else (int(res), mappings)
return False if not return_matches else (0, [])
for mapping in graph_pair.subgraph_isomorphisms_iter():
mapping = {v: k for k, v in mapping.items()}
if all(mapping[node] < mapping[neighbor] for node, neighbor in g2.edges()):
return True if not return_matches else (1, mappings)
return False if not return_matches else (0, [])


def generate_graph_features(glycan, glycan_graph = True, label = 'network'):
Expand Down
12 changes: 6 additions & 6 deletions glycowork/motif/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, wildca
return (0, []) if return_matches else 0 if count else False
motif_comp = min_process_glycans([motif, glycan])
if wildcards_ptm:
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', glycan).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for glycan in [glycan, motif]]
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', g).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for g in [glycan, motif]]
g1 = glycan_to_nxGraph(glycan, termini = 'calc') if termini_list else glycan_to_nxGraph(glycan)
g2 = glycan_to_nxGraph(motif, termini = 'provided', termini_list = termini_list) if termini_list else glycan_to_nxGraph(motif)
else:
Expand Down Expand Up @@ -322,11 +322,11 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, wildca
return counts if not return_matches else (counts, mappings)
else:
if graph_pair.subgraph_is_isomorphic():
mapping = graph_pair.mapping
mapping = {v: k for k, v in mapping.items()}
res = all(mapping[node] < mapping[neighbor] for node, neighbor in g2.edges())
return res if not return_matches else (int(res), mappings)
return False if not return_matches else (0, [])
for mapping in graph_pair.subgraph_isomorphisms_iter():
mapping = {v: k for k, v in mapping.items()}
if all(mapping[node] < mapping[neighbor] for node, neighbor in g2.edges()):
return True if not return_matches else (1, mappings)
return False if not return_matches else (0, [])


def generate_graph_features(glycan, glycan_graph = True, label = 'network'):
Expand Down
8 changes: 5 additions & 3 deletions glycowork/motif/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def convert_pattern_component(pattern_component):
| Returns a string for simple components and a dict of form string : occurrence for complex components
"""
if not any([k in pattern_component for k in ['[', '{', '*', '+', '=', '<!', '?!']]):
if pattern_component[-1].isdigit() or pattern_component[-1] == '?':
pattern_component += '-'
return specify_linkages(replace_patterns(pattern_component))
pattern, occurrence = None, None
if '[' in pattern_component:
Expand Down Expand Up @@ -592,7 +594,7 @@ def filter_dealbreakers(lists, ggraph, pattern):
return lists2


def compile(pattern):
def compile_pattern(pattern):
"""pre-compiles glyco-regular expression for faster processing\n
| Arguments:
| :-
Expand All @@ -610,7 +612,7 @@ def get_match(pattern, glycan, return_matches = True):
| Arguments:
| :-
| pattern (string): glyco-regular expression in the form of "Hex-HexNAc-([Hex|Fuc]){1,2}-HexNAc"; accepts pre-compiled pattern
| glycan (string): glycan sequence in IUPAC-condensed
| glycan (string or networkx): glycan sequence in IUPAC-condensed or as networkx graph
| return_matches (bool): whether to return True/False or return the matches as a list of strings; default:True\n
| Returns:
| :-
Expand Down Expand Up @@ -644,7 +646,7 @@ def get_match_batch(pattern, glycan_list, return_matches = True):
| Arguments:
| :-
| pattern (string): glyco-regular expression in the form of "Hex-HexNAc-([Hex|Fuc]){1,2}-HexNAc"; accepts pre-compiled pattern
| glycan_list (list of strings): list of glycan sequence in IUPAC-condensed
| glycan_list (list of strings or networkx): list of glycan sequence in IUPAC-condensed or as networkx graph
| return_matches (bool): whether to return True/False or return the matches as a list of strings; default:True\n
| Returns:
| :-
Expand Down

0 comments on commit c7eed27

Please sign in to comment.