Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maint #33

Merged
merged 10 commits into from
Nov 21, 2024
4 changes: 3 additions & 1 deletion cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ def set_atom_names_atomistic(molecule, meta_graph=None):
assert len(fragids) == 1
fraglist[fragids[0]].append(node)

for fragnodes in fraglist.values():
for meta_node, fragnodes in fraglist.items():
for idx, node in enumerate(fragnodes):
atomname = molecule.nodes[node]['element'] + str(idx)
molecule.nodes[node]['atomname'] = atomname
if meta_graph:
meta_graph.nodes[meta_node]['graph'].nodes[node]['atomname'] = atomname
12 changes: 12 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,21 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
raise SyntaxError(msg)
nx.set_node_attributes(mol_graph, 0, 'hcount')

# first we need to figure out the correct hcounts on each node
# this also corrects for simple aromatic problems like in thiophene
pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)

# optionally we adjust the hcount by the number of bonding operators
if keep_bonding:
bonding_nodes = nx.get_node_attributes(mol_graph, 'bonding')
for node, bond_ops in bonding_nodes.items():
mol_graph.nodes[node]['hcount'] -= sum([int(bond[-1]) for bond in bond_ops])

# now we add the hydrogen atoms
pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)

# if we are having single hydrogen fragments we need to
# make sure the fragid and fragname is keept
for node in mol_graph.nodes:
if mol_graph.nodes[node].get("element", "*") == "H" and\
not mol_graph.nodes[node].get("single_h_frag", False):
Expand Down
7 changes: 4 additions & 3 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def strip_bonding_descriptors(fragment_string):
node_count = 0
prev_node = 0
current_order = None
anchor = []
for token in smile_iter:
if token == '[':
peek = next(smile_iter)
Expand Down Expand Up @@ -157,15 +158,15 @@ def strip_bonding_descriptors(fragment_string):
else:
atom += peek
peek = next(smile_iter)

smile = smile + atom + "]"
prev_node = node_count
node_count += 1
current_order = None
elif token == '(':
anchor = prev_node
anchor.append(prev_node)
smile += token
elif token == ')':
prev_node = anchor
prev_node = anchor.pop()
smile += token
elif token in bond_to_order:
current_order = bond_to_order[token]
Expand Down
10 changes: 4 additions & 6 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,18 +382,16 @@ def resolve(self):
mark_chiral_atoms(self.molecule)
# assign rs isomerism
annotate_ez_isomers(self.molecule)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.molecule, self.meta_graph)

# and redo the meta molecule
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
if all_atom:
set_atom_names_atomistic(self.molecule, self.meta_graph)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.molecule,
self.meta_graph)

# increment the resolution counter
self.resolution_counter += 1
Expand Down
30 changes: 30 additions & 0 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,36 @@ def test_read_cgsmiles(smile, nodes, charges, edges, orders):
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding after branch
("[$]CC(CC)[$]",
"CC(CC)",
{0: ["$1"], 1: ["$1"]},
None,
None),
# smiple symmetric bonding after ring
("[$]CC1[$]CCC1",
"CC1CCC1",
{0: ["$1"], 1: ["$1"]},
None,
None),
# clear order symbol
("[CH][$a]=[CH][$c]",
"[CH]=[CH]",
{0: ["$a1"], 1: ["$c1"]},
None,
None),
# multiple non-one bonding l
("CC=[$a]=[$b]CC",
"CCCC",
{1: ["$a2", "$b2"]},
None,
None),
Comment on lines +290 to +295
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add "CC[$a]=[$b]CC"?

# multiple non-one bonding l
("CC[$a]=[$b]CC",
"CCCC",
{1: ["$a1", "$b2"]},
None,
None),
# smiple symmetric bonding with more than one name
("[$1A]COC[$1A]",
"COC",
Expand Down
32 changes: 32 additions & 0 deletions cgsmiles/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re
import pytest
import cgsmiles

err_msg_rebuild_h = ("Likely you are writing an aromatic molecule that does not "
"show delocalization-induced molecular equivalency and thus "
"is not considered aromatic. For example, 4-methyl imidazole "
"is often written as [nH]1cc(nc1)C, but should be written as "
"[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be "
"{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}")

@pytest.mark.parametrize('frag_str, hatoms_ref, error_type, err_msg', (
('{#A=[$]CCC[$]}', 6, None, None),
('{#A=CCC}', 8, None, None),
('{#A=C[!]CC}', 7, None, None),
('{#A=[$]=CCC=[$]}', 4, None, None),
('{#A=[$]cccc}',5, None, None),
('{#A=[$]ccc}', 0, SyntaxError, err_msg_rebuild_h),
))
def test_rebuild_hatoms(frag_str, hatoms_ref, error_type, err_msg):
frag_dict = cgsmiles.read_fragments(frag_str)
frag_graph = frag_dict['A']
if error_type:
with pytest.raises(error_type, match=re.escape(err_msg)):
cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True)
else:
cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True)
hatoms = 0
for node, ele in frag_graph.nodes(data='element'):
if ele == 'H':
hatoms += 1
assert hatoms == hatoms_ref
Loading