Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Biological assemblies verified #62

Merged
merged 9 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: pip install --user ruff
- run: ruff --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2
- run: ruff --ignore=E402,E501,E701,E722,E741,F401,F403,F841
- run: ruff check --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2
- run: ruff check --ignore=E402,E501,E701,E722,E741,F401,F403,F841
--output-format=github --target-version=py38 .

build:
Expand Down
20 changes: 0 additions & 20 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion packman/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@


#VERSION CHANGE HERE CHANGES IT IN docs AND setup.py
__version__='1.4.12'
__version__='1.4.13'
2 changes: 1 addition & 1 deletion packman/apps/dci.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class DCI():

def __init__(self, mol, cutoff = 7.0, chain = None, n_com = None):
self.molObj = mol
assert type( self.molObj ) == Protein, "mol should be a packman.molecule.Protein object."
assert isinstance(self.molObj, Protein), "mol should be a packman.molecule.Protein object."

if chain:
self.atoms = [i for i in self.molObj[0][chain].get_calpha()]
Expand Down
62 changes: 55 additions & 7 deletions packman/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import numpy
import logging
from typing import List

from .protein import Protein
from .model import Model
Expand Down Expand Up @@ -486,7 +487,7 @@ def load_structure(filename: str, ftype: str= 'cif') -> Protein:
##################################################################################################
'''

def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'):
def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biological_assembly: bool = False):
"""This function downloads the 3D protein structure.

Example::
Expand All @@ -495,16 +496,23 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'):
molecule.download_structure('1prw')

Args:
pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW)
save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument).
ftype (str) : Format name ('.cif' or '.pdb')
pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW)
save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument).
ftype (str) : Format name ('cif' or 'pdb')
biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False)
"""
import urllib.request as ur

if(ftype == 'cif'):
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif')
if(biological_assembly):
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'-assembly1.cif')
else:
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif')
elif(ftype == 'pdb'):
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb')
if(biological_assembly):
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb1')
else:
response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb')
else:
logging.warning('Please provide appropriate "ftype" argument. (cif/pdb).')
return
Expand All @@ -519,4 +527,44 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'):
open(save_name+'.'+ftype,'wb').write( response.read() )
except(IOError):
None
return True
return True

def batch_download(entry_list: List[str], save_location: str = 'structure_files', ftype: str='cif', biological_assembly: bool = False) -> bool:
'''This function downloads structures from PDB database parallelly.

Degree of parallelism depends on your CPU count. Duplicates will be removed automatically.

Example::
from packman import molecule
molecule.batch_download(['1prw', '1exr', '4hla'])

Args:
entry_list (List[str]) : List of PDBIDS
save_location (str) : Location where the downloaded files will be stored.
save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument).
ftype (str) : Format name ('cif' or 'pdb')
biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False)

'''
import os
import multiprocessing
from multiprocessing.dummy import Pool
from tqdm import tqdm

def single_download(entry):
try:
download_structure(entry)
return entry, True
except:
return entry, False

if not os.path.exists(save_location):
os.makedirs(save_location)

multiprocessing_input = list(set(entry_list))

# Multiprocessing
pool = Pool( multiprocessing.cpu_count() )
for result in tqdm(pool.imap_unordered( single_download, multiprocessing_input ), total=len(multiprocessing_input)):
if(result[1] is False):
print('\n',result[0],' failed to download.')
2 changes: 1 addition & 1 deletion packman/utilities/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def superimporse(reference: 'Chain', target: 'Chain', use: str='calpha', ids: Li
Bm = numpy.subtract(atoms2_location, Centroid2)

#Dot is matrix multiplication for array
H = numpy.mat(Bm.T) * numpy.mat(Am)
H = numpy.asmatrix(Bm.T) * numpy.asmatrix(Am)

#Find Rotation
U, S, Vt = numpy.linalg.svd(H)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,5 @@ def get_version(rel_path):
entry_points = {
'console_scripts': SCRIPTS,
},
install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn'],
install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn', 'tqdm', 'typing_extensions'],
)
Loading