From c8019094fb8177160678d1e458b4f11414c5561f Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:05:15 -0700 Subject: [PATCH 1/9] biological assembly download option added --- packman/molecule/molecule.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/packman/molecule/molecule.py b/packman/molecule/molecule.py index b25970f..dc64903 100644 --- a/packman/molecule/molecule.py +++ b/packman/molecule/molecule.py @@ -486,7 +486,7 @@ def load_structure(filename: str, ftype: str= 'cif') -> Protein: ################################################################################################## ''' -def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'): +def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biological_assembly: bool = False): """This function downloads the 3D protein structure. Example:: @@ -495,16 +495,23 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif'): molecule.download_structure('1prw') Args: - pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW) - save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). - ftype (str) : Format name ('.cif' or '.pdb') + pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW) + save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). + ftype (str) : Format name ('.cif' or '.pdb') + biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False) """ import urllib.request as ur if(ftype == 'cif'): - response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif') + if(biological_assembly): + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'-assembly1.cif') + else: + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.cif') elif(ftype == 'pdb'): - response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb') + if(biological_assembly): + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb1') + else: + response=ur.urlopen('https://files.rcsb.org/view/'+pdbid+'.pdb') else: logging.warning('Please provide appropriate "ftype" argument. (cif/pdb).') return From 49e01fb003a863c595b23546516877b07ffe8a9e Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:08:23 -0700 Subject: [PATCH 2/9] molecule.download_structure minor correction --- packman/molecule/molecule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packman/molecule/molecule.py b/packman/molecule/molecule.py index dc64903..4201bc6 100644 --- a/packman/molecule/molecule.py +++ b/packman/molecule/molecule.py @@ -497,7 +497,7 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biolog Args: pdbid (str) : A Unique 4 Letter PDB ID (eg.. 1PRW) save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). - ftype (str) : Format name ('.cif' or '.pdb') + ftype (str) : Format name ('cif' or 'pdb') biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False) """ import urllib.request as ur From 7ac6f9c0b13ca2a8d2e9af6a1be7832bf179415f Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:10:49 -0700 Subject: [PATCH 3/9] np.matrix to np.asmatrix fix test 1 --- packman/utilities/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packman/utilities/utilities.py b/packman/utilities/utilities.py index 2e87f78..faeca67 100644 --- a/packman/utilities/utilities.py +++ b/packman/utilities/utilities.py @@ -66,7 +66,7 @@ def superimporse(reference: 'Chain', target: 'Chain', use: str='calpha', ids: Li Bm = numpy.subtract(atoms2_location, Centroid2) #Dot is matrix multiplication for array - H = numpy.mat(Bm.T) * numpy.mat(Am) + H = numpy.asmatrix(Bm.T) * numpy.asmatrix(Am) #Find Rotation U, S, Vt = numpy.linalg.svd(H) From 38852049b837dd83cdb6aeca237a844bf2799922 Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:19:50 -0700 Subject: [PATCH 4/9] ruff fix test 1 --- .github/workflows/python-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 45a2c33..c3c63a7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,8 +15,8 @@ jobs: steps: - uses: actions/checkout@v4 - run: pip install --user ruff - - run: ruff --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2 - - run: ruff --ignore=E402,E501,E701,E722,E741,F401,F403,F841 + - run: ruff check --exit-zero --line-length=320 --target-version=py38 --statistics . | sort -k 2 + - run: ruff check --ignore=E402,E501,E701,E722,E741,F401,F403,F841 --output-format=github --target-version=py38 . build: From 519d5dd76c79439706aa838597ca4fd9b782b647 Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:26:27 -0700 Subject: [PATCH 5/9] ruff fix test 2 --- packman/apps/dci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packman/apps/dci.py b/packman/apps/dci.py index 0461df9..bb41f6d 100644 --- a/packman/apps/dci.py +++ b/packman/apps/dci.py @@ -42,7 +42,7 @@ class DCI(): def __init__(self, mol, cutoff = 7.0, chain = None, n_com = None): self.molObj = mol - assert type( self.molObj ) == Protein, "mol should be a packman.molecule.Protein object." + assert isinstance(self.molObj, Protein) == True, "mol should be a packman.molecule.Protein object." if chain: self.atoms = [i for i in self.molObj[0][chain].get_calpha()] From 9a1c92090d290ddb2c14003b1dd444614fa1abd7 Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Sat, 29 Jun 2024 15:28:18 -0700 Subject: [PATCH 6/9] ruff fix test 3 --- packman/apps/dci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packman/apps/dci.py b/packman/apps/dci.py index bb41f6d..bbd43ab 100644 --- a/packman/apps/dci.py +++ b/packman/apps/dci.py @@ -42,7 +42,7 @@ class DCI(): def __init__(self, mol, cutoff = 7.0, chain = None, n_com = None): self.molObj = mol - assert isinstance(self.molObj, Protein) == True, "mol should be a packman.molecule.Protein object." + assert isinstance(self.molObj, Protein), "mol should be a packman.molecule.Protein object." if chain: self.atoms = [i for i in self.molObj[0][chain].get_calpha()] From 7abbffc719a7547f84ba5f8a115868becee59158 Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Mon, 1 Jul 2024 15:40:17 -0700 Subject: [PATCH 7/9] biological assemblies tested and parallel downloader added --- .travis.yml | 20 ----------------- packman/molecule/molecule.py | 43 +++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 21 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2a99c19..0000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: python - -python: - - "3.6" - - "3.7" - - "3.8" - - "3.9" - -cache: pip - -install: - - pip install numpy - - pip install scipy - - pip install networkx - - pip install mlxtend - - pip install scikit-learn - - pip install nose - -script: - - nosetests packman -a '!slow' diff --git a/packman/molecule/molecule.py b/packman/molecule/molecule.py index 4201bc6..78e81e2 100644 --- a/packman/molecule/molecule.py +++ b/packman/molecule/molecule.py @@ -18,6 +18,7 @@ import numpy import logging +from typing import List from .protein import Protein from .model import Model @@ -526,4 +527,44 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biolog open(save_name+'.'+ftype,'wb').write( response.read() ) except(IOError): None - return True \ No newline at end of file + return True + +def batch_download(entry_list: List[str], save_location: str = 'structure_files', ftype: str='cif', biological_assembly: bool = False) -> bool: + '''This function downloads structures from PDB database parallelly. + + Degree of parallelism depends on your CPU count. Duplicates will be removed automatically. + + Example:: + from packman import molecule + molecule.batch_download(['1prw', '1exr', '4hla']) + + Args: + entry_list (List[str]) : List of PDBIDS + save_location (str) : Location where the downloaded files will be stored. + save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument). + ftype (str) : Format name ('cif' or 'pdb') + biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False) + + ''' + import os + import multiprocessing + from multiprocessing.dummy import Pool + from tqdm import tqdm + + def single_download(entry): + try: + download_structure(entry) + return entry, True + except: + return entry, False + + if not os.path.exists(save_location): + os.makedirs(save_location) + + multiprocessing_input = list(set(entry_list)) + + # Multiprocessing + pool = Pool( multiprocessing.cpu_count() ) + for result in tqdm(pool.imap_unordered( single_download, multiprocessing_input ), total=len(multiprocessing_input)): + if(result[1] is False): + print('\n',result[0],' failed to download.') \ No newline at end of file From c7c41a2c1f00608cb463c77ccca421a6be60c557 Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Mon, 1 Jul 2024 15:42:32 -0700 Subject: [PATCH 8/9] version changed to v1.4.13 --- packman/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packman/__init__.py b/packman/__init__.py index 95e809f..be8a4e2 100644 --- a/packman/__init__.py +++ b/packman/__init__.py @@ -13,4 +13,4 @@ #VERSION CHANGE HERE CHANGES IT IN docs AND setup.py -__version__='1.4.12' \ No newline at end of file +__version__='1.4.13' \ No newline at end of file From 70e738a38f68c2198a470d3290699bd8474e4c0b Mon Sep 17 00:00:00 2001 From: Pranav Khade Date: Mon, 1 Jul 2024 15:49:00 -0700 Subject: [PATCH 9/9] requirement additions 1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2a39729..529bf97 100644 --- a/setup.py +++ b/setup.py @@ -71,5 +71,5 @@ def get_version(rel_path): entry_points = { 'console_scripts': SCRIPTS, }, - install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn'], + install_requires = ['numpy', 'scipy', 'networkx', 'mlxtend', 'scikit-learn', 'tqdm', 'typing_extensions'], ) \ No newline at end of file