diff --git a/packman/__init__.py b/packman/__init__.py index 1bee17c..b86f282 100644 --- a/packman/__init__.py +++ b/packman/__init__.py @@ -15,4 +15,4 @@ #VERSION CHANGE HERE CHANGES IT IN docs AND setup.py -__version__='1.4.6' \ No newline at end of file +__version__='1.4.7' \ No newline at end of file diff --git a/packman/constants/Constants.py b/packman/constants/Constants.py index 8809f6e..29659a8 100644 --- a/packman/constants/Constants.py +++ b/packman/constants/Constants.py @@ -112,6 +112,34 @@ 'U':238.02891, } +#Kidera factors +kidera_factors={ +'ALA':[-1.56,-1.67,-0.97,-0.27,-0.93,-0.78,-0.2,-0.08,0.21,-0.48], +'ARG':[0.22,1.27,1.37,1.87,-1.7,0.46,0.92,-0.39,0.23,0.93], +'ASN':[1.14,-0.07,-0.12,0.81,0.18,0.37,-0.09,1.23,1.1,-1.73], +'ASP':[0.58,-0.22,-1.58,0.81,-0.92,0.15,-1.52,0.47,0.76,0.7], +'CYS':[0.12,-0.89,0.45,-1.05,-0.71,2.41,1.52,-0.69,1.13,1.1], +'GLN':[-0.47,0.24,0.07,1.1,1.1,0.59,0.84,-0.71,-0.03,-2.33], +'GLU':[-1.45,0.19,-1.61,1.17,-1.31,0.4,0.04,0.38,-0.35,-0.12], +'GLY':[1.46,-1.96,-0.23,-0.16,0.1,-0.11,1.32,2.36,-1.66,0.46], +'HIS':[-0.41,0.52,-0.28,0.28,1.61,1.01,-1.85,0.47,1.13,1.63], +'ILE':[-0.73,-0.16,1.79,-0.77,-0.54,0.03,-0.83,0.51,0.66,-1.78], +'LEU':[-1.04,0,-0.24,-1.1,-0.55,-2.05,0.96,-0.76,0.45,0.93], +'LYS':[-0.34,0.82,-0.23,1.7,1.54,-1.62,1.15,-0.08,-0.48,0.6], +'MET':[-1.4,0.18,-0.42,-0.73,2,1.52,0.26,0.11,-1.27,0.27], +'PHE':[-0.21,0.98,-0.36,-1.43,0.22,-0.81,0.67,1.1,1.71,-0.44], +'PRO':[2.06,-0.33,-1.15,-0.75,0.88,-0.45,0.3,-2.3,0.74,-0.28], +'SER':[0.81,-1.08,0.16,0.42,-0.21,-0.43,-1.89,-1.15,-0.97,-0.23], +'THR':[0.26,-0.7,1.21,0.63,-0.1,0.21,0.24,-1.15,-0.56,0.19], +'TRP':[0.3,2.1,-0.72,-1.57,-1.16,0.57,-0.48,-0.4,-2.3,-0.6], +'TYR':[1.38,1.48,0.8,-0.56,0,-0.68,-0.31,1.03,-0.05,0.53], +'VAL':[-0.74,-0.71,2.04,-0.4,0.5,-0.81,-1.07,0.06,-0.46,0.65], +'ASX':[0.86,-0.145,-0.85,0.81,-0.37,0.26,-0.805,0.85,0.93,-0.515], +'GLX':[-0.96,0.215,-0.77,1.135,-0.105,0.495,0.44,-0.165,-0.19,-1.225], +'XAA':[0,0,0,0,0,0,0,0,0,0], +'XLE':[-0.885,-0.08,0.775,-0.935,-0.545,-1.01,0.065,-0.125,0.555,-0.425] +} + #van der Waals Volumes and Radii, A. Bondi, The Journal of Physical Chemistry 1964 68 (3), 441-451, DOI: 10.1021/j100785a001 vdw_surface_bondi={ @@ -234,3 +262,37 @@ 'Ts':None, 'Og':None } + +#Amino acid three letter code to one letter code +THREE_LETTER_TO_ONE_LETTER = { +'ALA':'A', +'ARG':'R', +'ASN':'N', +'ASP':'D', +'CYS':'C', +'GLN':'Q', +'GLU':'E', +'GLY':'G', +'HIS':'H', +'ILE':'I', +'LEU':'L', +'LYS':'K', +'MET':'M', +'PHE':'F', +'PRO':'P', +'PYL':'O', +'SER':'S', +'SEC':'U', +'THR':'T', +'TRP':'W', +'TYR':'Y', +'VAL':'V', +'ASX':'B', +'GLX':'Z', +'XAA':'X', +'XLE':'J', +'UNK':'X' +} + +#Amino acid one letter code to three letter code +ONE_LETTER_TO_THREE_LETTER = {THREE_LETTER_TO_ONE_LETTER[three_letter]:three_letter for three_letter in THREE_LETTER_TO_ONE_LETTER} \ No newline at end of file diff --git a/packman/constants/__init__.py b/packman/constants/__init__.py index 729f35c..f45b9a8 100644 --- a/packman/constants/__init__.py +++ b/packman/constants/__init__.py @@ -1,3 +1,6 @@ from .Constants import amino_acid_molecular_weight from .Constants import atomic_weight -from .Constants import vdw_surface_bondi \ No newline at end of file +from .Constants import vdw_surface_bondi +from .Constants import kidera_factors +from .Constants import THREE_LETTER_TO_ONE_LETTER +from .Constants import ONE_LETTER_TO_THREE_LETTER \ No newline at end of file diff --git a/packman/molecule/chain.py b/packman/molecule/chain.py index b1de2a5..e479fa5 100644 --- a/packman/molecule/chain.py +++ b/packman/molecule/chain.py @@ -309,6 +309,17 @@ def get_backbone(self): """ return [i.get_backbone() for i in self.get_residues()] + def get_sequence(self): + """Get the Amino acid sequence of the chain. (Protein chains only) + + Returns: + FASTA format string of the chain sequence. + """ + try: + return '>'+str(self.get_parent().get_parent().get_id())+'_'+str(self.get_parent().get_id())+'_'+str(self.get_id())+'\n'+''.join([resi.get_changed_alphabet() if len(resi.get_name())>1 else resi.get_name() for resi in self.get_residues()]) + except: + logging.debug('Error in retriving chain sequence.') + def calculate_entropy(self,entropy_type,chains=None, probe_size=1.4, onspherepoints=30): """Calculate the entropy for the each amino acid will be returned. diff --git a/packman/molecule/model.py b/packman/molecule/model.py index 9cc0c37..140d3d2 100644 --- a/packman/molecule/model.py +++ b/packman/molecule/model.py @@ -90,7 +90,10 @@ def __init__(self,id,AllAtoms,AllResidues,AllChains,AllHetAtoms,AllHetMols): self.__properties = {} def __getitem__(self,ChainID): - return self.__AllChains[ChainID] + try: + return self.__AllChains[ChainID] + except KeyError: + logging.warning('Please provide a valid chain ID.') #Get Functions def get_id(self): @@ -348,6 +351,14 @@ def get_torsion(self, bond, neighbor1=None, neighbor2=None, radians=True): else: return numpy.rad2deg(radang) + def get_sequence(self): + """Get the Amino acid sequence of the chain. (Protein chains only) + + Returns: + FASTA format string of the chain sequence. + """ + return '\n'.join( [chain.get_sequence() for chain in self.get_chains() if chain.get_sequence()!=''] ) + #Set Functions def set_id(self,new_id): """Set the ID of the given 'Model' @@ -667,7 +678,7 @@ def calculate_bonds(self): N.set_bond(bond) self.__ModelGraph.add_edge( C.get_id(), N.get_id() , id = counter ) else: - logging.warning('The peptide bond following residue is missing: '+str(resi[i].get_id())+' Chain: '+resi[i].get_parent().get_id()) + logging.info('The peptide bond following residue is missing: '+str(resi[i].get_id())+' Chain: '+resi[i].get_parent().get_id()) try: None diff --git a/packman/molecule/molecule.py b/packman/molecule/molecule.py index b3e1b10..7bf6c24 100644 --- a/packman/molecule/molecule.py +++ b/packman/molecule/molecule.py @@ -136,7 +136,7 @@ def load_pdb(filename): if(len(Models)>2): #NMR - logging.warning('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)') + logging.debug('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)') All_Coords=[] for i in Models: All_Coords.append(numpy.array([j.get_location() for j in i.get_atoms()])) @@ -415,7 +415,7 @@ def load_cif(filename): if(len(AllModels)>2): #NMR - logging.warning('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)') + logging.info('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)') All_Coords=[] for i in AllModels: All_Coords.append(numpy.array([j.get_location() for j in i.get_atoms()])) diff --git a/packman/molecule/protein.py b/packman/molecule/protein.py index 52ec1dc..baf912a 100644 --- a/packman/molecule/protein.py +++ b/packman/molecule/protein.py @@ -25,6 +25,7 @@ """ from numpy import around +from . import model class Protein(): """This class contains the information about the 'Protein' object (packman.molecule.Protein). @@ -60,6 +61,14 @@ def get_id(self): """ return self.__id + def get_models(self): + """Get all the models at once. Useful for the iterations. + + Returns: + Generator of all the models in the PDB file. + """ + yield self.__Models + def get_data(self): """Get the misc data (other than coordinates) from the file. @@ -68,6 +77,23 @@ def get_data(self): """ return self.__Data + def get_sequence(self, all_models = False): + """_summary_ + + Args: + all_models (bool, optional): Get sequence of the all frames; useless if sequence accross the models is identical. Defaults to False. + + Returns: + Protein sequence in FASTA format. + """ + for model in self: + try: + return model.get_sequence() + except: + None + if(all_models==False): + break + #Set functions def set_data(self,data): """Set the misc data (other than coordiantes) to the Protein object. diff --git a/packman/molecule/residue.py b/packman/molecule/residue.py index 9c743d8..f54dbf6 100644 --- a/packman/molecule/residue.py +++ b/packman/molecule/residue.py @@ -25,6 +25,8 @@ import numpy import logging +from ..utilities import change_alphabet + class Residue(): """This class contains the information about the 'Residue' object (packman.molecule.Residue). @@ -165,6 +167,14 @@ def get_property(self,property_name): return self.__properties[property_name] except: logging.warning('The Property Name provided is not assigned.') + + def get_changed_alphabet(self): + """Converts three letter amino acid code to one letter and vise-versa + + Returns: + AA (string) : Three or one letter amino acid code depending and opposite of the argument provided. + """ + return change_alphabet(self.get_name()) #Set Functions def set_id(self,new_id): diff --git a/packman/tests/molecule/test_molecule.py b/packman/tests/molecule/test_molecule.py index 50cc06f..33df4d8 100644 --- a/packman/tests/molecule/test_molecule.py +++ b/packman/tests/molecule/test_molecule.py @@ -29,6 +29,7 @@ def test_Protein(self): rm( 'test.pdb' ) self.assertNotEqual( len([i for i in self.mol]) , 0 ) + self.assertNotEqual( len( [i for i in self.mol.get_sequence()] ), 0 ) def test_Model(self): #Basic @@ -42,6 +43,7 @@ def test_Model(self): self.assertNotEqual( len( [i for i in self.mol[0].get_atoms()] ) , 0 ) self.assertNotEqual( len( [i for i in self.mol[0].get_calpha()] ) , 0 ) self.assertNotEqual( len( [i for i in self.mol[0].get_backbone()] ), 0 ) + self.assertNotEqual( len( [i for i in self.mol[0].get_sequence()] ), 0 ) def test_Chain(self): #Basic @@ -54,6 +56,7 @@ def test_Chain(self): self.assertNotEqual( len( [i for i in self.mol[0].get_atoms()] ) , 0 ) self.assertNotEqual( len( [i for i in self.mol[0].get_calpha()] ) , 0 ) self.assertNotEqual( len( [i for i in self.mol[0].get_backbone()] ), 0 ) + self.assertNotEqual( len( [i for i in self.mol[0]['A'].get_sequence()] ), 0 ) def test_Residue(self): #Basic diff --git a/packman/utilities/utilities.py b/packman/utilities/utilities.py index c7c96a1..28be413 100644 --- a/packman/utilities/utilities.py +++ b/packman/utilities/utilities.py @@ -189,8 +189,10 @@ def change_alphabet(AA): Returns: AA (string) : Three or one letter amino acid code depending and opposite of the argument provided. """ - three_to_one_lookup = { 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V' } - one_to_three_lookup = {three_to_one_lookup[i]:i for i in three_to_one_lookup.keys()} + from ..constants import Constants + + three_to_one_lookup = Constants.THREE_LETTER_TO_ONE_LETTER + one_to_three_lookup = Constants.ONE_LETTER_TO_THREE_LETTER try: return three_to_one_lookup[AA] @@ -198,7 +200,7 @@ def change_alphabet(AA): try: return one_to_three_lookup[AA] except: - logging.warning('Amino acid code provided did not match any of three or one letter code') + logging.warning('Amino acid code provided did not match any of three or one letter code; returning unknown amino acid code.') if(len(AA)==3): return 'X' if(len(AA)==1):