Skip to content

Commit

Permalink
more tests, make compare_glycans direction-sensitive, deprecationWarn…
Browse files Browse the repository at this point in the history
…ings
  • Loading branch information
Bribak committed Nov 15, 2024
1 parent 80e5a6c commit 5a99d6b
Show file tree
Hide file tree
Showing 4 changed files with 397 additions and 17 deletions.
4 changes: 2 additions & 2 deletions glycowork/glycan_data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

def __getattr__(name):
if name == "glycan_binding":
with resources.open_text("glycowork.glycan_data", "glycan_binding.csv") as f:
with resources.files("glycowork.glycan_data").joinpath("glycan_binding.csv").open(encoding = 'utf-8-sig') as f:
glycan_binding = pd.read_csv(f)
globals()[name] = glycan_binding # Cache it to avoid reloading
return glycan_binding
elif name == "df_species":
with resources.open_text("glycowork.glycan_data", "v11_df_species.csv") as f:
with resources.files("glycowork.glycan_data").joinpath("v11_df_species.csv").open(encoding = 'utf-8-sig') as f:
df_species = pd.read_csv(f)
globals()[name] = df_species # Cache it to avoid reloading
return df_species
Expand Down
4 changes: 2 additions & 2 deletions glycowork/motif/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from functools import partial
from typing import Dict, List, Optional, Set, Tuple, Union

from glycowork.glycan_data.loader import linkages, motif_list, find_nth, unwrap, replace_every_second, remove_unmatched_brackets
from glycowork.glycan_data.loader import linkages, motif_list, find_nth, unwrap, replace_every_second, remove_unmatched_brackets, df_species
from glycowork.motif.graph import subgraph_isomorphism, generate_graph_features, glycan_to_nxGraph, graph_to_string, ensure_graph, possible_topology_check
from glycowork.motif.processing import IUPAC_to_SMILES, get_lib, find_isomorphs, rescue_glycans
from glycowork.motif.regex import get_match
Expand Down Expand Up @@ -248,7 +248,7 @@ def clean_up_heatmap(
# Group the DataFrame by identical rows
grouped = df.groupby(list(df.columns))
# Find the row with the longest string index within each group and return a new DataFrame
max_idx_series = grouped.apply(lambda group: group.index.to_series().str.len().idxmax())
max_idx_series = grouped.apply(lambda group: group.index.to_series().str.len().idxmax(), include_groups = False)
result = df.loc[max_idx_series].drop_duplicates()
result.index = result.index.str.strip()
motif_dic = {value: key for key, value in motif_dic.items()}
Expand Down
16 changes: 12 additions & 4 deletions glycowork/motif/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,21 @@ def compare_glycans(glycan_a: Union[str, nx.Graph], # First glycan to compare
narrow_wildcard_list = {k: get_possible_linkages(k) if '?' in k else get_possible_monosaccharides(k) for k in proc
if '?' in k or k in {'Hex', 'HexOS', 'HexNAc', 'HexNAcOS', 'dHex', 'Sia', 'HexA', 'Pen', 'Monosaccharide'} or '!' in k}
if narrow_wildcard_list:
return nx.is_isomorphic(g1, g2, node_match = categorical_node_match_wildcard('string_labels', 'unknown', narrow_wildcard_list, 'termini', 'flexible'))
matcher = nx.isomorphism.GraphMatcher(g1, g2, categorical_node_match_wildcard('string_labels', 'unknown', narrow_wildcard_list, 'termini', 'flexible'))
for m in matcher.isomorphisms_iter():
inverse_mapping = {v: k for k, v in m.items()}
if all(inverse_mapping[node] < inverse_mapping[neighbor] for node, neighbor in g2.edges()):
return True
return False
else:
# First check whether components of both glycan graphs are identical, then check graph isomorphism (costly)
if sorted(nx.get_node_attributes(g1, "string_labels").values()) == sorted(nx.get_node_attributes(g2, "string_labels").values()):
return nx.is_isomorphic(g1, g2, node_match = nx.algorithms.isomorphism.categorical_node_match('string_labels', 'unknown'))
else:
return False
matcher = nx.isomorphism.GraphMatcher(g1, g2, nx.algorithms.isomorphism.categorical_node_match('string_labels', 'unknown'))
for m in matcher.isomorphisms_iter():
inverse_mapping = {v: k for k, v in m.items()}
if all(inverse_mapping[node] < inverse_mapping[neighbor] for node, neighbor in g2.edges()):
return True
return False


def expand_termini_list(motif: Union[str, nx.Graph], # Glycan motif sequence or graph
Expand Down
Loading

0 comments on commit 5a99d6b

Please sign in to comment.