Skip to content

Commit

Permalink
Merge pull request #37 from Pranavkhade/get_sequence
Browse files Browse the repository at this point in the history
get_sequence added.
  • Loading branch information
Pranavkhade authored Mar 28, 2023
2 parents 270ab1e + 9c969a3 commit 3a0bf5e
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 9 deletions.
2 changes: 1 addition & 1 deletion packman/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@


#VERSION CHANGE HERE CHANGES IT IN docs AND setup.py
__version__='1.4.6'
__version__='1.4.7'
62 changes: 62 additions & 0 deletions packman/constants/Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,34 @@
'U':238.02891,
}

#Kidera factors
kidera_factors={
'ALA':[-1.56,-1.67,-0.97,-0.27,-0.93,-0.78,-0.2,-0.08,0.21,-0.48],
'ARG':[0.22,1.27,1.37,1.87,-1.7,0.46,0.92,-0.39,0.23,0.93],
'ASN':[1.14,-0.07,-0.12,0.81,0.18,0.37,-0.09,1.23,1.1,-1.73],
'ASP':[0.58,-0.22,-1.58,0.81,-0.92,0.15,-1.52,0.47,0.76,0.7],
'CYS':[0.12,-0.89,0.45,-1.05,-0.71,2.41,1.52,-0.69,1.13,1.1],
'GLN':[-0.47,0.24,0.07,1.1,1.1,0.59,0.84,-0.71,-0.03,-2.33],
'GLU':[-1.45,0.19,-1.61,1.17,-1.31,0.4,0.04,0.38,-0.35,-0.12],
'GLY':[1.46,-1.96,-0.23,-0.16,0.1,-0.11,1.32,2.36,-1.66,0.46],
'HIS':[-0.41,0.52,-0.28,0.28,1.61,1.01,-1.85,0.47,1.13,1.63],
'ILE':[-0.73,-0.16,1.79,-0.77,-0.54,0.03,-0.83,0.51,0.66,-1.78],
'LEU':[-1.04,0,-0.24,-1.1,-0.55,-2.05,0.96,-0.76,0.45,0.93],
'LYS':[-0.34,0.82,-0.23,1.7,1.54,-1.62,1.15,-0.08,-0.48,0.6],
'MET':[-1.4,0.18,-0.42,-0.73,2,1.52,0.26,0.11,-1.27,0.27],
'PHE':[-0.21,0.98,-0.36,-1.43,0.22,-0.81,0.67,1.1,1.71,-0.44],
'PRO':[2.06,-0.33,-1.15,-0.75,0.88,-0.45,0.3,-2.3,0.74,-0.28],
'SER':[0.81,-1.08,0.16,0.42,-0.21,-0.43,-1.89,-1.15,-0.97,-0.23],
'THR':[0.26,-0.7,1.21,0.63,-0.1,0.21,0.24,-1.15,-0.56,0.19],
'TRP':[0.3,2.1,-0.72,-1.57,-1.16,0.57,-0.48,-0.4,-2.3,-0.6],
'TYR':[1.38,1.48,0.8,-0.56,0,-0.68,-0.31,1.03,-0.05,0.53],
'VAL':[-0.74,-0.71,2.04,-0.4,0.5,-0.81,-1.07,0.06,-0.46,0.65],
'ASX':[0.86,-0.145,-0.85,0.81,-0.37,0.26,-0.805,0.85,0.93,-0.515],
'GLX':[-0.96,0.215,-0.77,1.135,-0.105,0.495,0.44,-0.165,-0.19,-1.225],
'XAA':[0,0,0,0,0,0,0,0,0,0],
'XLE':[-0.885,-0.08,0.775,-0.935,-0.545,-1.01,0.065,-0.125,0.555,-0.425]
}


#van der Waals Volumes and Radii, A. Bondi, The Journal of Physical Chemistry 1964 68 (3), 441-451, DOI: 10.1021/j100785a001
vdw_surface_bondi={
Expand Down Expand Up @@ -234,3 +262,37 @@
'Ts':None,
'Og':None
}

#Amino acid three letter code to one letter code
THREE_LETTER_TO_ONE_LETTER = {
'ALA':'A',
'ARG':'R',
'ASN':'N',
'ASP':'D',
'CYS':'C',
'GLN':'Q',
'GLU':'E',
'GLY':'G',
'HIS':'H',
'ILE':'I',
'LEU':'L',
'LYS':'K',
'MET':'M',
'PHE':'F',
'PRO':'P',
'PYL':'O',
'SER':'S',
'SEC':'U',
'THR':'T',
'TRP':'W',
'TYR':'Y',
'VAL':'V',
'ASX':'B',
'GLX':'Z',
'XAA':'X',
'XLE':'J',
'UNK':'X'
}

#Amino acid one letter code to three letter code
ONE_LETTER_TO_THREE_LETTER = {THREE_LETTER_TO_ONE_LETTER[three_letter]:three_letter for three_letter in THREE_LETTER_TO_ONE_LETTER}
5 changes: 4 additions & 1 deletion packman/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .Constants import amino_acid_molecular_weight
from .Constants import atomic_weight
from .Constants import vdw_surface_bondi
from .Constants import vdw_surface_bondi
from .Constants import kidera_factors
from .Constants import THREE_LETTER_TO_ONE_LETTER
from .Constants import ONE_LETTER_TO_THREE_LETTER
11 changes: 11 additions & 0 deletions packman/molecule/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,17 @@ def get_backbone(self):
"""
return [i.get_backbone() for i in self.get_residues()]

def get_sequence(self):
"""Get the Amino acid sequence of the chain. (Protein chains only)
Returns:
FASTA format string of the chain sequence.
"""
try:
return '>'+str(self.get_parent().get_parent().get_id())+'_'+str(self.get_parent().get_id())+'_'+str(self.get_id())+'\n'+''.join([resi.get_changed_alphabet() if len(resi.get_name())>1 else resi.get_name() for resi in self.get_residues()])
except:
logging.debug('Error in retriving chain sequence.')

def calculate_entropy(self,entropy_type,chains=None, probe_size=1.4, onspherepoints=30):
"""Calculate the entropy for the each amino acid will be returned.
Expand Down
15 changes: 13 additions & 2 deletions packman/molecule/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ def __init__(self,id,AllAtoms,AllResidues,AllChains,AllHetAtoms,AllHetMols):
self.__properties = {}

def __getitem__(self,ChainID):
return self.__AllChains[ChainID]
try:
return self.__AllChains[ChainID]
except KeyError:
logging.warning('Please provide a valid chain ID.')

#Get Functions
def get_id(self):
Expand Down Expand Up @@ -348,6 +351,14 @@ def get_torsion(self, bond, neighbor1=None, neighbor2=None, radians=True):
else:
return numpy.rad2deg(radang)

def get_sequence(self):
"""Get the Amino acid sequence of the chain. (Protein chains only)
Returns:
FASTA format string of the chain sequence.
"""
return '\n'.join( [chain.get_sequence() for chain in self.get_chains() if chain.get_sequence()!=''] )

#Set Functions
def set_id(self,new_id):
"""Set the ID of the given 'Model'
Expand Down Expand Up @@ -667,7 +678,7 @@ def calculate_bonds(self):
N.set_bond(bond)
self.__ModelGraph.add_edge( C.get_id(), N.get_id() , id = counter )
else:
logging.warning('The peptide bond following residue is missing: '+str(resi[i].get_id())+' Chain: '+resi[i].get_parent().get_id())
logging.info('The peptide bond following residue is missing: '+str(resi[i].get_id())+' Chain: '+resi[i].get_parent().get_id())

try:
None
Expand Down
4 changes: 2 additions & 2 deletions packman/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def load_pdb(filename):

if(len(Models)>2):
#NMR
logging.warning('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)')
logging.debug('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)')
All_Coords=[]
for i in Models:
All_Coords.append(numpy.array([j.get_location() for j in i.get_atoms()]))
Expand Down Expand Up @@ -415,7 +415,7 @@ def load_cif(filename):

if(len(AllModels)>2):
#NMR
logging.warning('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)')
logging.info('Multiple models/frames are detected (B-factor field is now a calculated parameter, i.e., the scalar standard deviation of the atom location of all frames)')
All_Coords=[]
for i in AllModels:
All_Coords.append(numpy.array([j.get_location() for j in i.get_atoms()]))
Expand Down
26 changes: 26 additions & 0 deletions packman/molecule/protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"""

from numpy import around
from . import model

class Protein():
"""This class contains the information about the 'Protein' object (packman.molecule.Protein).
Expand Down Expand Up @@ -60,6 +61,14 @@ def get_id(self):
"""
return self.__id

def get_models(self):
"""Get all the models at once. Useful for the iterations.
Returns:
Generator of all the models in the PDB file.
"""
yield self.__Models

def get_data(self):
"""Get the misc data (other than coordinates) from the file.
Expand All @@ -68,6 +77,23 @@ def get_data(self):
"""
return self.__Data

def get_sequence(self, all_models = False):
"""_summary_
Args:
all_models (bool, optional): Get sequence of the all frames; useless if sequence accross the models is identical. Defaults to False.
Returns:
Protein sequence in FASTA format.
"""
for model in self:
try:
return model.get_sequence()
except:
None
if(all_models==False):
break

#Set functions
def set_data(self,data):
"""Set the misc data (other than coordiantes) to the Protein object.
Expand Down
10 changes: 10 additions & 0 deletions packman/molecule/residue.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import numpy
import logging

from ..utilities import change_alphabet


class Residue():
"""This class contains the information about the 'Residue' object (packman.molecule.Residue).
Expand Down Expand Up @@ -165,6 +167,14 @@ def get_property(self,property_name):
return self.__properties[property_name]
except:
logging.warning('The Property Name provided is not assigned.')

def get_changed_alphabet(self):
"""Converts three letter amino acid code to one letter and vise-versa
Returns:
AA (string) : Three or one letter amino acid code depending and opposite of the argument provided.
"""
return change_alphabet(self.get_name())

#Set Functions
def set_id(self,new_id):
Expand Down
3 changes: 3 additions & 0 deletions packman/tests/molecule/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def test_Protein(self):
rm( 'test.pdb' )

self.assertNotEqual( len([i for i in self.mol]) , 0 )
self.assertNotEqual( len( [i for i in self.mol.get_sequence()] ), 0 )

def test_Model(self):
#Basic
Expand All @@ -42,6 +43,7 @@ def test_Model(self):
self.assertNotEqual( len( [i for i in self.mol[0].get_atoms()] ) , 0 )
self.assertNotEqual( len( [i for i in self.mol[0].get_calpha()] ) , 0 )
self.assertNotEqual( len( [i for i in self.mol[0].get_backbone()] ), 0 )
self.assertNotEqual( len( [i for i in self.mol[0].get_sequence()] ), 0 )

def test_Chain(self):
#Basic
Expand All @@ -54,6 +56,7 @@ def test_Chain(self):
self.assertNotEqual( len( [i for i in self.mol[0].get_atoms()] ) , 0 )
self.assertNotEqual( len( [i for i in self.mol[0].get_calpha()] ) , 0 )
self.assertNotEqual( len( [i for i in self.mol[0].get_backbone()] ), 0 )
self.assertNotEqual( len( [i for i in self.mol[0]['A'].get_sequence()] ), 0 )

def test_Residue(self):
#Basic
Expand Down
8 changes: 5 additions & 3 deletions packman/utilities/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,18 @@ def change_alphabet(AA):
Returns:
AA (string) : Three or one letter amino acid code depending and opposite of the argument provided.
"""
three_to_one_lookup = { 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V' }
one_to_three_lookup = {three_to_one_lookup[i]:i for i in three_to_one_lookup.keys()}
from ..constants import Constants

three_to_one_lookup = Constants.THREE_LETTER_TO_ONE_LETTER
one_to_three_lookup = Constants.ONE_LETTER_TO_THREE_LETTER

try:
return three_to_one_lookup[AA]
except:
try:
return one_to_three_lookup[AA]
except:
logging.warning('Amino acid code provided did not match any of three or one letter code')
logging.warning('Amino acid code provided did not match any of three or one letter code; returning unknown amino acid code.')
if(len(AA)==3):
return 'X'
if(len(AA)==1):
Expand Down

0 comments on commit 3a0bf5e

Please sign in to comment.