Skip to content

Commit

Permalink
Check uniqueness and residue numbers when validating bonds
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 723912607
Change-Id: I668632a9737e69243618a00a2fefc5c542203c3d
  • Loading branch information
Augustin-Zidek authored and copybara-github committed Feb 6, 2025
1 parent e825aea commit a3cf058
Showing 1 changed file with 26 additions and 3 deletions.
29 changes: 26 additions & 3 deletions src/alphafold3/common/folding_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ def unpaired_msa(self) -> str | None:
def templates(self) -> Sequence[Template] | None:
return self._templates

def __len__(self) -> int:
return len(self._sequence)

def __eq__(self, other: Self) -> bool:
return (
self._id == other._id
Expand Down Expand Up @@ -466,6 +469,9 @@ def modifications(self) -> Sequence[tuple[str, int]]:
def unpaired_msa(self) -> str | None:
return self._unpaired_msa

def __len__(self) -> int:
return len(self._sequence)

def __eq__(self, other: Self) -> bool:
return (
self._id == other._id
Expand Down Expand Up @@ -613,6 +619,9 @@ def sequence(self) -> str:
for r in self.to_ccd_sequence()
])

def __len__(self) -> int:
return len(self._sequence)

def __eq__(self, other: Self) -> bool:
return (
self._id == other._id
Expand Down Expand Up @@ -717,6 +726,12 @@ def __post_init__(self):
if self.ccd_ids is not None:
object.__setattr__(self, 'ccd_ids', tuple(self.ccd_ids))

def __len__(self) -> int:
if self.ccd_ids is not None:
return len(self.ccd_ids)
else:
return 1

def hash_without_id(self) -> int:
"""Returns a hash ignoring the ID - useful for deduplication."""
return hash((self.ccd_ids, self.smiles))
Expand Down Expand Up @@ -1055,7 +1070,10 @@ def from_json(
else:
raise ValueError(f'Unknown sequence type: {sequence}')

ligands = [chain for chain in chains if isinstance(chain, Ligand)]
smiles_ligand_ids = set(
c.id for c in chains if isinstance(c, Ligand) and c.smiles is not None
)
chain_lengths = {chain.id: len(chain) for chain in chains}
bonded_atom_pairs = None
if bonds := raw_json.get('bondedAtomPairs'):
bonded_atom_pairs = []
Expand Down Expand Up @@ -1085,9 +1103,11 @@ def from_json(
)
if bond_beg[0] not in flat_seq_ids or bond_end[0] not in flat_seq_ids:
raise ValueError(f'Invalid chain ID(s) in bond {bond}')
if bond_beg[1] <= 0 or bond_end[1] <= 0:
if (
not 0 < bond_beg[1] <= chain_lengths[bond_beg[0]]
or not 0 < bond_end[1] <= chain_lengths[bond_end[0]]
):
raise ValueError(f'Invalid residue ID(s) in bond {bond}')
smiles_ligand_ids = set(l.id for l in ligands if l.smiles is not None)
if bond_beg[0] in smiles_ligand_ids:
raise ValueError(
f'Bond {bond} involves an unsupported SMILES ligand {bond_beg[0]}'
Expand All @@ -1098,6 +1118,9 @@ def from_json(
)
bonded_atom_pairs.append((tuple(bond_beg), tuple(bond_end)))

if len(bonded_atom_pairs) != len(set(bonded_atom_pairs)):
raise ValueError(f'Bonds are not unique: {bonded_atom_pairs}')

return cls(
name=raw_json['name'],
chains=chains,
Expand Down

0 comments on commit a3cf058

Please sign in to comment.