Skip to content

Commit

Permalink
biological assemblies tested and parallel downloader added
Browse files Browse the repository at this point in the history
  • Loading branch information
Pranavkhade committed Jul 1, 2024
1 parent 9a1c920 commit 7abbffc
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 21 deletions.
20 changes: 0 additions & 20 deletions .travis.yml

This file was deleted.

43 changes: 42 additions & 1 deletion packman/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import numpy
import logging
from typing import List

from .protein import Protein
from .model import Model
Expand Down Expand Up @@ -526,4 +527,44 @@ def download_structure(pdbid: str, save_name: str=None, ftype: str='cif', biolog
open(save_name+'.'+ftype,'wb').write( response.read() )
except(IOError):
None
return True
return True

def batch_download(entry_list: List[str], save_location: str = 'structure_files', ftype: str='cif', biological_assembly: bool = False) -> bool:
'''This function downloads structures from PDB database parallelly.
Degree of parallelism depends on your CPU count. Duplicates will be removed automatically.
Example::
from packman import molecule
molecule.batch_download(['1prw', '1exr', '4hla'])
Args:
entry_list (List[str]) : List of PDBIDS
save_location (str) : Location where the downloaded files will be stored.
save_name (str) : Save name of the downloaded file (extension will be added automatically depending on the ftype argument).
ftype (str) : Format name ('cif' or 'pdb')
biological_assembly (bool) : Download biological assemblies in lieu of PDB entry. (Default: False)
'''
import os
import multiprocessing
from multiprocessing.dummy import Pool
from tqdm import tqdm

def single_download(entry):
try:
download_structure(entry)
return entry, True
except:
return entry, False

if not os.path.exists(save_location):
os.makedirs(save_location)

multiprocessing_input = list(set(entry_list))

# Multiprocessing
pool = Pool( multiprocessing.cpu_count() )
for result in tqdm(pool.imap_unordered( single_download, multiprocessing_input ), total=len(multiprocessing_input)):
if(result[1] is False):
print('\n',result[0],' failed to download.')

0 comments on commit 7abbffc

Please sign in to comment.