From 38fd20607c0aaf14c07bb20ed739688154d3414d Mon Sep 17 00:00:00 2001 From: Andrew Schaub Date: Tue, 24 Dec 2024 21:45:40 -0500 Subject: [PATCH] added fetch_pdb --- protein_design_tools/__init__.py | 8 ------ protein_design_tools/core/__init__.py | 8 ------ protein_design_tools/io/__init__.py | 3 --- protein_design_tools/io/pdb.py | 17 +++++++++--- protein_design_tools/metrics/rmsd.py | 2 +- tests/io/test_pdb.py | 38 ++++++++++++++++----------- 6 files changed, 37 insertions(+), 39 deletions(-) delete mode 100644 protein_design_tools/io/__init__.py diff --git a/protein_design_tools/__init__.py b/protein_design_tools/__init__.py index 537e650..e69de29 100644 --- a/protein_design_tools/__init__.py +++ b/protein_design_tools/__init__.py @@ -1,8 +0,0 @@ -# protein_design_tools/__init__.py - -__version__ = '0.1.29' - -from .core import Atom, Chain, Residue, ProteinStructure -from .io import read_pdb, write_pdb -from .metrics import compute_rmsd, compute_tmscore -from .utils import get_coordinates, get_masses diff --git a/protein_design_tools/core/__init__.py b/protein_design_tools/core/__init__.py index 3f9d38e..e69de29 100644 --- a/protein_design_tools/core/__init__.py +++ b/protein_design_tools/core/__init__.py @@ -1,8 +0,0 @@ -# protein_design_tools/core/__init__.py - -from .atom import Atom -from .residue import Residue -from .chain import Chain -from .protein_structure import ProteinStructure - -__all__ = ["Atom", "Residue", "Chain", "ProteinStructure"] diff --git a/protein_design_tools/io/__init__.py b/protein_design_tools/io/__init__.py deleted file mode 100644 index 26b1e87..0000000 --- a/protein_design_tools/io/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .pdb import fetch_pdb, read_pdb, write_pdb - -__all__ = ["fetch_pdb", "read_pdb", "write_pdb"] \ No newline at end of file diff --git a/protein_design_tools/io/pdb.py b/protein_design_tools/io/pdb.py index 5c0bf3e..43b937d 100644 --- a/protein_design_tools/io/pdb.py +++ b/protein_design_tools/io/pdb.py @@ -9,6 +9,7 @@ import requests from io import StringIO + def fetch_pdb(pdb_id: str, file_path: Optional[str] = None) -> ProteinStructure: """ Fetch a PDB file from RCSB PDB by its ID and optionally save it to a file. @@ -37,7 +38,10 @@ def fetch_pdb(pdb_id: str, file_path: Optional[str] = None) -> ProteinStructure: temp_path = StringIO(response.text) return read_pdb(temp_path) else: - raise ValueError(f"Failed to fetch PDB ID {pdb_id}: HTTP status {response.status_code}") + raise ValueError( + f"Failed to fetch PDB ID {pdb_id}: HTTP status {response.status_code}" + ) + def read_pdb( file_path: str, chains: Optional[List[str]] = None, name: Optional[str] = None @@ -124,6 +128,7 @@ def read_pdb( return structure + def write_pdb(structure: ProteinStructure, file_path: str) -> None: """ Write a ProteinStructure object to a PDB file. @@ -140,7 +145,13 @@ def write_pdb(structure: ProteinStructure, file_path: str) -> None: for chain in structure.chains: for residue in chain.residues: for atom in residue.atoms: - content += f"ATOM {atom.atom_id:5} {atom.name:<4} {residue.name:<3} {chain.name}{residue.res_seq:4}{residue.i_code:<1} {atom.x:8.3f}{atom.y:8.3f}{atom.z:8.3f}{atom.occupancy:6.2f}{atom.temp_factor:6.2f} {atom.element:2}{atom.charge:2}\n" + content += ( + f"ATOM {atom.atom_id:5} {atom.name:<4} {residue.name:<3} " + f"{chain.name}{residue.res_seq:4}{residue.i_code:<1} " + f"{atom.x:8.3f}{atom.y:8.3f}{atom.z:8.3f}" + f"{atom.occupancy:6.2f}{atom.temp_factor:6.2f} " + f"{atom.element:2}{atom.charge:2}\n" + ) content += "TER\n" content += "END\n" - f.write(content) \ No newline at end of file + f.write(content) diff --git a/protein_design_tools/metrics/rmsd.py b/protein_design_tools/metrics/rmsd.py index a9b920b..2397522 100644 --- a/protein_design_tools/metrics/rmsd.py +++ b/protein_design_tools/metrics/rmsd.py @@ -83,4 +83,4 @@ def compute_rmsd_tensorflow(P: tnp.ndarray, Q: tnp.ndarray) -> tnp.ndarray: RMSD between P and Q """ assert P.shape == Q.shape - return tnp.sqrt(tnp.mean(tnp.sum((P - Q) ** 2, axis=1))) \ No newline at end of file + return tnp.sqrt(tnp.mean(tnp.sum((P - Q) ** 2, axis=1))) diff --git a/tests/io/test_pdb.py b/tests/io/test_pdb.py index 54d123a..4f291e1 100644 --- a/tests/io/test_pdb.py +++ b/tests/io/test_pdb.py @@ -8,6 +8,7 @@ from protein_design_tools.core.residue import Residue from protein_design_tools.core.atom import Atom + @pytest.fixture def sample_pdb_content(): """from a previous AF2 test""" @@ -216,21 +217,24 @@ def test_read_pdb_malformed_lines(): def test_read_pdb_with_hetatm(sample_pdb_content): mock_file = mock_open(read_data=sample_pdb_content) - + with patch("builtins.open", mock_file): - structure = read_pdb("dummy_path.pdb", chains=['A'], name="TestProteinWithHETATM") - + structure = read_pdb( + "dummy_path.pdb", chains=["A"], name="TestProteinWithHETATM" + ) + # Assertions for HETATM record chain = structure.chains[0] residue2 = chain.residues[1] assert len(residue2.atoms) == 6 # Including OXT from HETATM - hetatm_atom = next((atom for atom in residue2.atoms if atom.name == 'OXT'), None) + hetatm_atom = next((atom for atom in residue2.atoms if atom.name == "OXT"), None) assert hetatm_atom is not None - assert hetatm_atom.element == 'O' + assert hetatm_atom.element == "O" assert hetatm_atom.x == 15.604 assert hetatm_atom.y == 15.707 assert hetatm_atom.z == 6.000 + def test_read_pdb_multiple_chains(): multi_chain_pdb_content = """ ATOM 1 N ALA A 1 11.104 13.207 2.100 1.00 20.00 N @@ -247,26 +251,28 @@ def test_read_pdb_multiple_chains(): END """ mock_file = mock_open(read_data=multi_chain_pdb_content) - + with patch("builtins.open", mock_file): - structure = read_pdb("dummy_path.pdb", chains=['A', 'B'], name="MultiChainProtein") - + structure = read_pdb( + "dummy_path.pdb", chains=["A", "B"], name="MultiChainProtein" + ) + # Assertions assert len(structure.chains) == 2 - chain_a = next((c for c in structure.chains if c.name == 'A'), None) - chain_b = next((c for c in structure.chains if c.name == 'B'), None) - + chain_a = next((c for c in structure.chains if c.name == "A"), None) + chain_b = next((c for c in structure.chains if c.name == "B"), None) + assert chain_a is not None assert chain_b is not None - + assert len(chain_a.residues) == 1 assert len(chain_b.residues) == 1 - + # Check residues and atoms residue_a = chain_a.residues[0] - assert residue_a.name == 'ALA' + assert residue_a.name == "ALA" assert len(residue_a.atoms) == 5 - + residue_b = chain_b.residues[0] - assert residue_b.name == 'ARG' + assert residue_b.name == "ARG" assert len(residue_b.atoms) == 6 # Including OXT