Skip to content

Commit b0e4f23

Browse files
authored
Merge pull request #18 from prescient-design/deps
dependencies
2 parents 02e1334 + c741b48 commit b0e4f23

File tree

8 files changed

+3842
-23
lines changed

8 files changed

+3842
-23
lines changed

.github/workflows/push.yml

+6-8
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,17 @@ jobs:
2323
- "macos-latest"
2424
- "ubuntu-latest"
2525
# - "windows-latest"
26-
python:
26+
python-version:
2727
- "3.10"
2828
runs-on: ${{ matrix.platform }}
2929
steps:
3030
- uses: "actions/checkout@v4"
31-
- uses: "actions/setup-python@v5"
31+
- name: Install uv and set the python version
32+
uses: astral-sh/setup-uv@v5
3233
with:
33-
python-version: ${{ matrix.python }}
34-
- run: "python -m pip install -r requirements.in"
35-
- run: "python -m pip install -r requirements-dev.in"
36-
- run: "python -m pip install -r requirements-mgm.in"
37-
- run: "python -m pip install --editable ."
38-
- run: "python -m pytest"
34+
python-version: ${{ matrix.python-version }}
35+
- run: uv sync --all-extras --dev
36+
- run: uv run pytest
3937
- env:
4038
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
4139
uses: "codecov/codecov-action@v3"

requirements.in

+1-4
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@ datasets
1616
tokenizers
1717
transformers>=4.24.0
1818
scikit-learn
19-
matplotlib
20-
seaborn
2119
python-dotenv
2220
torcheval
23-
fastparquet
2421
datasketch
2522
peft
2623
icecream
@@ -29,5 +26,5 @@ pooch
2926
edlib
3027
onnx
3128
onnxscript
32-
beignet[all]
29+
beignet[datasets, mdtraj]
3330
fair-esm

src/lobster/cmdline/_intervene.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,13 @@ def intervene(cfg: DictConfig) -> bool:
260260
all_data = np.concatenate((all_data, data), axis=1) if all_data is not None else np.array(data)
261261

262262
if all_data.shape[1] > 1000:
263-
print(f"{concept} {i+1} of {len(predict_dataloader)} Saving")
263+
print(f"{concept} {i + 1} of {len(predict_dataloader)} Saving")
264264
df = pd.DataFrame(all_data.transpose(), columns=columns)
265265
df.to_csv(f"{data_folder_dir}/{index}.csv", index=False)
266266
index += 1
267267
all_data = None
268268

269269
if all_data is not None:
270-
print(f"{concept} {i+1} of {len(predict_dataloader)} Saving")
270+
print(f"{concept} {i + 1} of {len(predict_dataloader)} Saving")
271271
df = pd.DataFrame(all_data.transpose(), columns=columns)
272272
df.to_csv(f"{data_folder_dir}/{index}.csv", index=False)

src/lobster/cmdline/_intervene_multiproperty.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -386,13 +386,13 @@ def intervene_multiproperty(cfg: DictConfig) -> bool:
386386
all_data = np.concatenate((all_data, data), axis=1) if all_data is not None else np.array(data)
387387

388388
if all_data.shape[1] > 1000:
389-
print(f"{i+1} of {len(predict_dataloader)} Saving")
389+
print(f"{i + 1} of {len(predict_dataloader)} Saving")
390390
df = pd.DataFrame(all_data.transpose(), columns=columns)
391-
df.to_csv(f"{data_folder_dir}/{index__+1}_indexed.csv", index=False)
391+
df.to_csv(f"{data_folder_dir}/{index__ + 1}_indexed.csv", index=False)
392392
index__ += 1
393393
all_data = None
394394

395395
if all_data is not None:
396-
print(f"{i+1} of {len(predict_dataloader)} Saving")
396+
print(f"{i + 1} of {len(predict_dataloader)} Saving")
397397
df = pd.DataFrame(all_data.transpose(), columns=columns)
398-
df.to_csv(f"{data_folder_dir}/{index__+1}_indexed.csv", index=False)
398+
df.to_csv(f"{data_folder_dir}/{index__ + 1}_indexed.csv", index=False)

src/lobster/data/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
DataFrameDatasetInMemory,
88
DataFrameLightningDataModule,
99
)
10-
from ._farthest_first_traversal import FarthestFirstTraversal, ranked_fft
11-
from ._fasta_datamodule import FastaLightningDataModule # nopycln: import
1210
from ._dyab_data import ( # nopycln: import
1311
DyAbDataFrameDatasetInMemory,
1412
DyAbDataFrameLightningDataModule,
1513
)
14+
from ._farthest_first_traversal import FarthestFirstTraversal, ranked_fft
15+
from ._fasta_datamodule import FastaLightningDataModule # nopycln: import
1616
from ._minhasher import LobsterMinHasher
1717
from ._mmseqs import MMSeqsRunner
1818
from ._structure_datamodule import PDBDataModule

src/lobster/transforms/_convert_seqs.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1+
from importlib.util import find_spec
12
from typing import Callable, Dict, Optional
23

3-
import selfies as sf
4-
from rdkit import Chem
4+
_SELFIES_AVAILABLE = False
5+
_RDKIT_AVAILABLE = False
6+
7+
if find_spec("selfies"):
8+
import selfies as sf
9+
10+
_SELFIES_AVAILABLE = True
11+
12+
if find_spec("rdkit"):
13+
from rdkit import Chem
14+
15+
_RDKIT_AVAILABLE = True
516

617

718
def convert_nt_to_aa(
@@ -51,6 +62,8 @@ def convert_aa_to_nt(
5162

5263

5364
def convert_aa_to_smiles(aa_seq: str, allowed_aa: set) -> Optional[str]:
65+
assert _RDKIT_AVAILABLE, "rdkit not available. This dependency is part of the mgm extra"
66+
5467
if not aa_seq.isupper():
5568
aa_seq = aa_seq.upper()
5669

@@ -69,6 +82,8 @@ def convert_aa_to_smiles(aa_seq: str, allowed_aa: set) -> Optional[str]:
6982
def convert_smiles_to_aa(
7083
smiles_seq: str,
7184
) -> Optional[str]:
85+
assert _RDKIT_AVAILABLE, "rdkit not available. This dependency is part of the mgm extra"
86+
7287
try:
7388
mol = Chem.MolFromSmiles(smiles_seq)
7489
aa_seq = Chem.MolToSequence(mol)
@@ -81,6 +96,7 @@ def convert_smiles_to_aa(
8196
def convert_smiles_to_selfies(
8297
smiles_seq: str,
8398
) -> Optional[str]:
99+
assert _SELFIES_AVAILABLE, "selfies not available. This dependency is part of the mgm extra"
84100
try:
85101
sf_seq = sf.encoder(smiles_seq)
86102
return sf_seq
@@ -92,6 +108,7 @@ def convert_selfies_to_smiles(
92108
selfies_seq: str,
93109
) -> Optional[str]:
94110
# TODO: add conversion of unknown selfies tokens to Ala selfies
111+
assert _SELFIES_AVAILABLE, "selfies not available. This dependency is part of the mgm extra"
95112
try:
96113
smiles_seq = sf.decoder(selfies_seq)
97114
return smiles_seq

tests/lobster/transforms/test__convert_seqs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_convert_aa_to_smiles(self):
115115
# print(smi_seq)
116116
assert len(smi_seq) == 100, f"Failed for AA seq {aa_seq}, smiles seq does not have the expected length"
117117
assert smi_seq == (
118-
"CC(C)C[C@H](NC(=O)[C@H](CCC(N)=O)NC(=O)[C@@H](NC(=O)[C@@H](N)CCC(=O)O)" "C(C)C)C(=O)N[C@H](C(=O)O)C(C)C"
118+
"CC(C)C[C@H](NC(=O)[C@H](CCC(N)=O)NC(=O)[C@@H](NC(=O)[C@@H](N)CCC(=O)O)C(C)C)C(=O)N[C@H](C(=O)O)C(C)C"
119119
)
120120

121121
def test_convert_smiles_to_aa(self):

0 commit comments

Comments
 (0)