diff --git a/polyply/__init__.py b/polyply/__init__.py index 7f7e4d1a..98a70a08 100644 --- a/polyply/__init__.py +++ b/polyply/__init__.py @@ -50,7 +50,7 @@ jit = functools.partial(jit, nopython=True, cache=True, fastmath=True) # This could be useful for the high level API -from .src.meta_molecule import (Monomer, MetaMolecule) +from .src.meta_molecule import MetaMolecule from .src.apply_links import ApplyLinks from .src.map_to_molecule import MapToMolecule from .src.gen_itp import gen_itp, gen_params diff --git a/polyply/src/gen_itp.py b/polyply/src/gen_itp.py index 395afeda..a5d5ead6 100644 --- a/polyply/src/gen_itp.py +++ b/polyply/src/gen_itp.py @@ -30,36 +30,14 @@ from vermouth.file_writer import DeferredFileWriter from vermouth.citation_parser import citation_formatter from vermouth.graph_utils import make_residue_graph -from polyply import (MetaMolecule, ApplyLinks, Monomer, MapToMolecule) +from polyply import (MetaMolecule, ApplyLinks, MapToMolecule) from polyply.src.graph_utils import find_missing_edges from .load_library import load_ff_library from .gen_dna import complement_dsDNA +from .simple_seq_parsers import parse_simple_seq_string LOGGER = StyleAdapter(get_logger(__name__)) -def split_seq_string(sequence): - """ - Split a string definition for a linear sequence into monomer - blocks and raise errors if the sequence is not valid. - - Parameters - ----------- - sequence: str - string of residues format name:number - - Returns: - ---------- - list - list of `polyply.Monomers` - """ - raw_monomers = sequence - monomers = [] - for monomer in raw_monomers: - resname, n_blocks = monomer.split(":") - n_blocks = int(n_blocks) - monomers.append(Monomer(resname=resname, n_blocks=n_blocks)) - return monomers - def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False): """ Top level function for running the polyply parameter generation. @@ -89,10 +67,16 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, # Generate the MetaMolecule if seq: LOGGER.info("reading sequence from command", type="step") - monomers = split_seq_string(seq) - meta_molecule = MetaMolecule.from_monomer_seq_linear(monomers=monomers, - force_field=force_field, - mol_name=name) + # We are dealing with a cgsmiles string + if len(seq) == 1 and seq[0].startswith("{"): + meta_molecule = MetaMolecule.from_cgsmiles_str(cgsmiles_str=seq[0], + force_field=force_field, + mol_name=name) + else: + monomers = parse_simple_seq_string(seq) + meta_molecule = MetaMolecule.from_monomer_seq_linear(monomers=monomers, + force_field=force_field, + mol_name=name) elif seq_file: LOGGER.info("reading sequence from file", type="step") meta_molecule = MetaMolecule.from_sequence_file(force_field, seq_file, name) diff --git a/polyply/src/meta_molecule.py b/polyply/src/meta_molecule.py index 5ce58edf..18a20a33 100644 --- a/polyply/src/meta_molecule.py +++ b/polyply/src/meta_molecule.py @@ -13,13 +13,14 @@ # limitations under the License. from collections import (namedtuple, OrderedDict) import networkx as nx +from cgsmiles.resolve import MoleculeResolver +from cgsmiles.read_cgsmiles import read_cgsmiles from vermouth.graph_utils import make_residue_graph from vermouth.log_helpers import StyleAdapter, get_logger from vermouth.gmx.itp_read import read_itp from .graph_utils import find_nodes_with_attributes -from .simple_seq_parsers import parse_txt, parse_ig, parse_fasta, parse_json +from .simple_seq_parsers import parse_txt, parse_ig, parse_fasta, parse_json, Monomer -Monomer = namedtuple('Monomer', 'resname, n_blocks') LOGGER = StyleAdapter(get_logger(__name__)) def _make_edges(force_field): @@ -360,3 +361,71 @@ def from_block(cls, force_field, mol_name): meta_mol = cls(graph, force_field=force_field, mol_name=mol_name) meta_mol.molecule = force_field.blocks[mol_name].to_molecule() return meta_mol + + @classmethod + def from_cgsmiles_str(cls,force_field, cgsmiles_str, mol_name, seq_only=True, all_atom=False): + """ + Constructs a :class::`MetaMolecule` from an CGSmiles string. + The force-field must contain the block with mol_name from + which to create the MetaMolecule. This function automatically + sets the MetaMolecule.molecule attribute. + + Parameters + ---------- + force_field: :class:`vermouth.forcefield.ForceField` + the force-field that must contain the block + cgsmiles_str: + the CGSmiles string describing the molecule graph + mol_name: str + name of the block matching a key in ForceField.blocks + seq_only: bool + if the string only describes the sequence; if this is False + then the molecule attribute is set + all_atom: bool + if the last molecule in the sequence is at all-atom resolution + can only be used if seq_only is False + + Returns + ------- + :class:`polyply.MetaMolecule` + """ + if seq_only and all_atom: + msg = "You cannot define a sequence at all-atom level.\n" + raise IOError(msg) + + # check if we have multiple resolutions + if cgsmiles_str.count('{') == 1: + meta_graph = read_cgsmiles(cgsmiles_str) + take_resname_from = 'fragname' + elif seq_only: + # initalize the cgsmiles molecule resolver + resolver = MoleculeResolver(cgsmiles_str, last_all_atom=all_atom) + # grep the last graph of the resolve iter + *_, (_, meta_graph) = resolver.resolve_iter() + take_resname_from = 'atomname' + else: + # initalize the cgsmiles molecule resolver + resolver = MoleculeResolver(cgsmiles_str, last_all_atom=all_atom) + *_, (meta_graph, molecule) = resolver.resolve_iter() + + # we have to set some node attribute accoding to polyply specs + for node in meta_graph.nodes: + if seq_only: + resname = meta_graph.nodes[node][take_resname_from] + meta_graph.nodes[node]['resname'] = resname + else: + for atom in meta_graph.nodes['graph'].nodes: + meta_graph.nodes['graph'].nodes[atom]['resname'] = resname + meta_graph.nodes['graph'].nodes[atom]['resid'] = node + 1 + molecule.nodes[atom]['resname'] = resname + molecule.nodes[atom]['resid'] = node + 1 + + if 'atomname' in meta_graph.nodes[node]: + del meta_graph.nodes[node]['atomname'] + meta_graph.nodes[node]['resid'] = node + 1 + + meta_mol = cls(meta_graph, force_field=force_field, mol_name=mol_name) + if not seq_only: + meta_mol.molecule = molecule + + return meta_mol diff --git a/polyply/src/simple_seq_parsers.py b/polyply/src/simple_seq_parsers.py index fb4b0968..7df26527 100644 --- a/polyply/src/simple_seq_parsers.py +++ b/polyply/src/simple_seq_parsers.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections import OrderedDict +from collections import (namedtuple, OrderedDict) from functools import partial import json import networkx as nx @@ -19,6 +19,8 @@ from vermouth.parser_utils import split_comments from vermouth.log_helpers import StyleAdapter, get_logger +Monomer = namedtuple('Monomer', 'resname, n_blocks') + LOGGER = StyleAdapter(get_logger(__name__)) ONE_LETTER_DNA = {"A": "DA", @@ -342,3 +344,26 @@ def parse_json(filepath): seq_graph.add_nodes_from(nodes) seq_graph.add_edges_from(init_json_graph.edges(data=True)) return seq_graph + +def parse_simple_seq_string(sequence): + """ + Split a string definition for a linear sequence into monomer + blocks and raise errors if the sequence is not valid. + + Parameters + ----------- + sequence: str + string of residues format name:number + + Returns: + ---------- + list + list of `polyply.Monomers` + """ + raw_monomers = sequence + monomers = [] + for monomer in raw_monomers: + resname, n_blocks = monomer.split(":") + n_blocks = int(n_blocks) + monomers.append(Monomer(resname=resname, n_blocks=n_blocks)) + return monomers