more tests, make compare_glycans direction-sensitive, deprecationWarn…

…ings
BojarLab · Nov 15, 2024 · 5a99d6b · 5a99d6b
1 parent 80e5a6c
commit 5a99d6b
Show file tree

Hide file tree

Showing 4 changed files with 397 additions and 17 deletions.
diff --git a/glycowork/glycan_data/loader.py b/glycowork/glycan_data/loader.py
@@ -17,12 +17,12 @@
 
 def __getattr__(name):
   if name == "glycan_binding":
-    with resources.open_text("glycowork.glycan_data", "glycan_binding.csv") as f:
+    with resources.files("glycowork.glycan_data").joinpath("glycan_binding.csv").open(encoding = 'utf-8-sig') as f:
       glycan_binding = pd.read_csv(f)
     globals()[name] = glycan_binding  # Cache it to avoid reloading
     return glycan_binding
   elif name == "df_species":
-    with resources.open_text("glycowork.glycan_data", "v11_df_species.csv") as f:
+    with resources.files("glycowork.glycan_data").joinpath("v11_df_species.csv").open(encoding = 'utf-8-sig') as f:
       df_species = pd.read_csv(f)
     globals()[name] = df_species  # Cache it to avoid reloading
     return df_species

diff --git a/glycowork/motif/annotate.py b/glycowork/motif/annotate.py
@@ -6,7 +6,7 @@
 from functools import partial
 from typing import Dict, List, Optional, Set, Tuple, Union
 
-from glycowork.glycan_data.loader import linkages, motif_list, find_nth, unwrap, replace_every_second, remove_unmatched_brackets
+from glycowork.glycan_data.loader import linkages, motif_list, find_nth, unwrap, replace_every_second, remove_unmatched_brackets, df_species
 from glycowork.motif.graph import subgraph_isomorphism, generate_graph_features, glycan_to_nxGraph, graph_to_string, ensure_graph, possible_topology_check
 from glycowork.motif.processing import IUPAC_to_SMILES, get_lib, find_isomorphs, rescue_glycans
 from glycowork.motif.regex import get_match
@@ -248,7 +248,7 @@ def clean_up_heatmap(
   # Group the DataFrame by identical rows
   grouped = df.groupby(list(df.columns))
   # Find the row with the longest string index within each group and return a new DataFrame
-  max_idx_series = grouped.apply(lambda group: group.index.to_series().str.len().idxmax())
+  max_idx_series = grouped.apply(lambda group: group.index.to_series().str.len().idxmax(), include_groups = False)
   result = df.loc[max_idx_series].drop_duplicates()
   result.index = result.index.str.strip()
   motif_dic = {value: key for key, value in motif_dic.items()}

diff --git a/glycowork/motif/graph.py b/glycowork/motif/graph.py
@@ -190,13 +190,21 @@ def compare_glycans(glycan_a: Union[str, nx.Graph], # First glycan to compare
   narrow_wildcard_list = {k: get_possible_linkages(k) if '?' in k else get_possible_monosaccharides(k) for k in proc
                           if '?' in k or k in {'Hex', 'HexOS', 'HexNAc', 'HexNAcOS', 'dHex', 'Sia', 'HexA', 'Pen', 'Monosaccharide'} or '!' in k}
   if narrow_wildcard_list:
-    return nx.is_isomorphic(g1, g2, node_match = categorical_node_match_wildcard('string_labels', 'unknown', narrow_wildcard_list, 'termini', 'flexible'))
+    matcher = nx.isomorphism.GraphMatcher(g1, g2, categorical_node_match_wildcard('string_labels', 'unknown', narrow_wildcard_list, 'termini', 'flexible'))
+    for m in matcher.isomorphisms_iter():
+      inverse_mapping = {v: k for k, v in m.items()}
+      if all(inverse_mapping[node] < inverse_mapping[neighbor] for node, neighbor in g2.edges()):
+        return True
+    return False
   else:
     # First check whether components of both glycan graphs are identical, then check graph isomorphism (costly)
     if sorted(nx.get_node_attributes(g1, "string_labels").values()) == sorted(nx.get_node_attributes(g2, "string_labels").values()):
-      return nx.is_isomorphic(g1, g2, node_match = nx.algorithms.isomorphism.categorical_node_match('string_labels', 'unknown'))
-    else:
-      return False
+      matcher = nx.isomorphism.GraphMatcher(g1, g2, nx.algorithms.isomorphism.categorical_node_match('string_labels', 'unknown'))
+      for m in matcher.isomorphisms_iter():
+        inverse_mapping = {v: k for k, v in m.items()}
+        if all(inverse_mapping[node] < inverse_mapping[neighbor] for node, neighbor in g2.edges()):
+          return True
+    return False
 
 
 def expand_termini_list(motif: Union[str, nx.Graph], # Glycan motif sequence or graph