diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 45a2c33..c3c63a7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,8 +15,8 @@ jobs: steps: - uses: actions/checkout@v4 - run: pip install --user ruff - - run: ruff --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2 - - run: ruff --ignore=E402,E501,E701,E722,E741,F401,F403,F841 + - run: ruff check --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2 + - run: ruff check --ignore=E402,E501,E701,E722,E741,F401,F403,F841 --output-format=github --target-version=py38 . build: diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2a99c19..0000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: python - -python: - - "3.6" - - "3.7" - - "3.8" - - "3.9" - -cache: pip - -install: - - pip install numpy - - pip install scipy - - pip install networkx - - pip install mlxtend - - pip install scikit-learn - - pip install nose - -script: - - nosetests packman -a '!slow' diff --git a/packman/__init__.py b/packman/__init__.py index 95e809f..be8a4e2 100644 --- a/packman/__init__.py +++ b/packman/__init__.py @@ -13,4 +13,4 @@ #VERSION CHANGE HERE CHANGES IT IN docs AND setup.py -__version__='1.4.12' \ No newline at end of file +__version__='1.4.13' \ No newline at end of file diff --git a/packman/apps/dci.py b/packman/apps/dci.py index 0461df9..bbd43ab 100644 --- a/packman/apps/dci.py +++ b/packman/apps/dci.py @@ -42,7 +42,7 @@ class DCI(): def __init__(self, mol, cutoff = 7.0, chain = None, n_com = None): self.molObj = mol - assert type( self.molObj ) == Protein, "mol should be a packman.molecule.Protein object." + assert isinstance(self.molObj, Protein), "mol should be a packman.molecule.Protein object." if chain: self.atoms = [i for i in self.molObj[0][chain].get_calpha()] diff --git a/packman/molecule/molecule.py b/packman/molecule/molecule.py index b25970f..78e81e2 100644 --- a/packman/molecule/molecule.py +++ b/packman/molecule/molecule.py @@ -18,6 +18,7 @@ import numpy import logging +from typing import List from .protein import Protein from .model import Model @@ -486,7 +487,7 @@ def load_structure(filename: str, ftype: str= 'cif') -> Protein: ################################################################################################## ''' -def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'): +def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biological_assembly: bool = False): """This function downloads the 3D protein structure. Example:: @@ -495,16 +496,23 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'): molecule.download_structure('1prw') Args: - pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW) - save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). - ftype (str) : Format name ('.cif' or '.pdb') + pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW) + save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). + ftype (str) : Format name ('cif' or 'pdb') + biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False) """ import urllib.request as ur if(ftype == 'cif'): - response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif') + if(biological_assembly): + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'-assembly1.cif') + else: + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif') elif(ftype == 'pdb'): - response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb') + if(biological_assembly): + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb1') + else: + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb') else: logging.warning('Please provide appropriate "ftype" argument. (cif/pdb).') return @@ -519,4 +527,44 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'): open(save_name+'.'+ftype,'wb').write( response.read() ) except(IOError): None - return True \ No newline at end of file + return True + +def batch_download(entry_list: List[str], save_location: str = 'structure_files', ftype: str='cif', biological_assembly: bool = False) -> bool: + '''This function downloads structures from PDB database parallelly. + + Degree of parallelism depends on your CPU count. Duplicates will be removed automatically. + + Example:: + from packman import molecule + molecule.batch_download(['1prw', '1exr', '4hla']) + + Args: + entry_list (List[str]) : List of PDBIDS + save_location (str) : Location where the downloaded files will be stored. + save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). + ftype (str) : Format name ('cif' or 'pdb') + biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False) + + ''' + import os + import multiprocessing + from multiprocessing.dummy import Pool + from tqdm import tqdm + + def single_download(entry): + try: + download_structure(entry) + return entry, True + except: + return entry, False + + if not os.path.exists(save_location): + os.makedirs(save_location) + + multiprocessing_input = list(set(entry_list)) + + # Multiprocessing + pool = Pool( multiprocessing.cpu_count() ) + for result in tqdm(pool.imap_unordered( single_download, multiprocessing_input ), total=len(multiprocessing_input)): + if(result[1] is False): + print('\n',result[0],' failed to download.') \ No newline at end of file diff --git a/packman/utilities/utilities.py b/packman/utilities/utilities.py index 2e87f78..faeca67 100644 --- a/packman/utilities/utilities.py +++ b/packman/utilities/utilities.py @@ -66,7 +66,7 @@ def superimporse(reference: 'Chain', target: 'Chain', use: str='calpha', ids: Li Bm = numpy.subtract(atoms2_location, Centroid2) #Dot is matrix multiplication for array - H = numpy.mat(Bm.T) * numpy.mat(Am) + H = numpy.asmatrix(Bm.T) * numpy.asmatrix(Am) #Find Rotation U, S, Vt = numpy.linalg.svd(H) diff --git a/setup.py b/setup.py index 2a39729..529bf97 100644 --- a/setup.py +++ b/setup.py @@ -71,5 +71,5 @@ def get_version(rel_path): entry_points = { 'console_scripts': SCRIPTS, }, - install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn'], + install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn', 'tqdm', 'typing_extensions'], ) \ No newline at end of file