Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cgsmiles 1 #377

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion polyply/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
jit = functools.partial(jit, nopython=True, cache=True, fastmath=True)

# This could be useful for the high level API
from .src.meta_molecule import (Monomer, MetaMolecule)
from .src.meta_molecule import MetaMolecule
from .src.apply_links import ApplyLinks
from .src.map_to_molecule import MapToMolecule
from .src.gen_itp import gen_itp, gen_params
Expand Down
40 changes: 12 additions & 28 deletions polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,14 @@
from vermouth.file_writer import DeferredFileWriter
from vermouth.citation_parser import citation_formatter
from vermouth.graph_utils import make_residue_graph
from polyply import (MetaMolecule, ApplyLinks, Monomer, MapToMolecule)
from polyply import (MetaMolecule, ApplyLinks, MapToMolecule)
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA
from .simple_seq_parsers import parse_simple_seq_string

LOGGER = StyleAdapter(get_logger(__name__))

def split_seq_string(sequence):
"""
Split a string definition for a linear sequence into monomer
blocks and raise errors if the sequence is not valid.

Parameters
-----------
sequence: str
string of residues format name:number

Returns:
----------
list
list of `polyply.Monomers`
"""
raw_monomers = sequence
monomers = []
for monomer in raw_monomers:
resname, n_blocks = monomer.split(":")
n_blocks = int(n_blocks)
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
"""
Top level function for running the polyply parameter generation.
Expand Down Expand Up @@ -89,10 +67,16 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None,
# Generate the MetaMolecule
if seq:
LOGGER.info("reading sequence from command", type="step")
monomers = split_seq_string(seq)
meta_molecule = MetaMolecule.from_monomer_seq_linear(monomers=monomers,
force_field=force_field,
mol_name=name)
# We are dealing with a cgsmiles string
if len(seq) == 1 and seq[0].startswith("{"):
meta_molecule = MetaMolecule.from_cgsmiles_str(cgsmiles_str=seq[0],
force_field=force_field,
mol_name=name)
else:
monomers = parse_simple_seq_string(seq)
meta_molecule = MetaMolecule.from_monomer_seq_linear(monomers=monomers,
force_field=force_field,
mol_name=name)
elif seq_file:
LOGGER.info("reading sequence from file", type="step")
meta_molecule = MetaMolecule.from_sequence_file(force_field, seq_file, name)
Expand Down
73 changes: 71 additions & 2 deletions polyply/src/meta_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
# limitations under the License.
from collections import (namedtuple, OrderedDict)
import networkx as nx
from cgsmiles.resolve import MoleculeResolver
from cgsmiles.read_cgsmiles import read_cgsmiles
from vermouth.graph_utils import make_residue_graph
from vermouth.log_helpers import StyleAdapter, get_logger
from vermouth.gmx.itp_read import read_itp
from .graph_utils import find_nodes_with_attributes
from .simple_seq_parsers import parse_txt, parse_ig, parse_fasta, parse_json
from .simple_seq_parsers import parse_txt, parse_ig, parse_fasta, parse_json, Monomer

Monomer = namedtuple('Monomer', 'resname, n_blocks')
LOGGER = StyleAdapter(get_logger(__name__))

def _make_edges(force_field):
Expand Down Expand Up @@ -360,3 +361,71 @@ def from_block(cls, force_field, mol_name):
meta_mol = cls(graph, force_field=force_field, mol_name=mol_name)
meta_mol.molecule = force_field.blocks[mol_name].to_molecule()
return meta_mol

@classmethod
def from_cgsmiles_str(cls,force_field, cgsmiles_str, mol_name, seq_only=True, all_atom=False):
"""
Constructs a :class::`MetaMolecule` from an CGSmiles string.
The force-field must contain the block with mol_name from
which to create the MetaMolecule. This function automatically
sets the MetaMolecule.molecule attribute.

Parameters
----------
force_field: :class:`vermouth.forcefield.ForceField`
the force-field that must contain the block
cgsmiles_str:
the CGSmiles string describing the molecule graph
mol_name: str
name of the block matching a key in ForceField.blocks
seq_only: bool
if the string only describes the sequence; if this is False
then the molecule attribute is set
all_atom: bool
if the last molecule in the sequence is at all-atom resolution
can only be used if seq_only is False

Returns
-------
:class:`polyply.MetaMolecule`
"""
if seq_only and all_atom:
msg = "You cannot define a sequence at all-atom level.\n"
raise IOError(msg)

# check if we have multiple resolutions
if cgsmiles_str.count('{') == 1:
meta_graph = read_cgsmiles(cgsmiles_str)
take_resname_from = 'fragname'
elif seq_only:
# initalize the cgsmiles molecule resolver
resolver = MoleculeResolver(cgsmiles_str, last_all_atom=all_atom)
# grep the last graph of the resolve iter
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# grep the last graph of the resolve iter
# grab the last graph of the resolve iter

*_, (_, meta_graph) = resolver.resolve_iter()
take_resname_from = 'atomname'
else:
# initalize the cgsmiles molecule resolver
resolver = MoleculeResolver(cgsmiles_str, last_all_atom=all_atom)
*_, (meta_graph, molecule) = resolver.resolve_iter()

# we have to set some node attribute accoding to polyply specs
for node in meta_graph.nodes:
if seq_only:
resname = meta_graph.nodes[node][take_resname_from]
meta_graph.nodes[node]['resname'] = resname
else:
for atom in meta_graph.nodes['graph'].nodes:
meta_graph.nodes['graph'].nodes[atom]['resname'] = resname
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resname is undefined here

meta_graph.nodes['graph'].nodes[atom]['resid'] = node + 1
molecule.nodes[atom]['resname'] = resname
molecule.nodes[atom]['resid'] = node + 1

if 'atomname' in meta_graph.nodes[node]:
del meta_graph.nodes[node]['atomname']
meta_graph.nodes[node]['resid'] = node + 1

meta_mol = cls(meta_graph, force_field=force_field, mol_name=mol_name)
if not seq_only:
meta_mol.molecule = molecule

return meta_mol
27 changes: 26 additions & 1 deletion polyply/src/simple_seq_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from collections import (namedtuple, OrderedDict)
from functools import partial
import json
import networkx as nx
from networkx.readwrite import json_graph
from vermouth.parser_utils import split_comments
from vermouth.log_helpers import StyleAdapter, get_logger

Monomer = namedtuple('Monomer', 'resname, n_blocks')

LOGGER = StyleAdapter(get_logger(__name__))

ONE_LETTER_DNA = {"A": "DA",
Expand Down Expand Up @@ -342,3 +344,26 @@ def parse_json(filepath):
seq_graph.add_nodes_from(nodes)
seq_graph.add_edges_from(init_json_graph.edges(data=True))
return seq_graph

def parse_simple_seq_string(sequence):
"""
Split a string definition for a linear sequence into monomer
blocks and raise errors if the sequence is not valid.

Parameters
-----------
sequence: str
string of residues format name:number
Comment on lines +355 to +356
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
sequence: str
string of residues format name:number
sequence: list[str]
string of residues format name:number


Returns:
----------
list
list of `polyply.Monomers`
"""
raw_monomers = sequence
monomers = []
for monomer in raw_monomers:
resname, n_blocks = monomer.split(":")
n_blocks = int(n_blocks)
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers
Loading