Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dependencies #18

Merged
merged 7 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@ jobs:
- "macos-latest"
- "ubuntu-latest"
# - "windows-latest"
python:
python-version:
- "3.10"
runs-on: ${{ matrix.platform }}
steps:
- uses: "actions/checkout@v4"
- uses: "actions/setup-python@v5"
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
with:
python-version: ${{ matrix.python }}
- run: "python -m pip install -r requirements.in"
- run: "python -m pip install -r requirements-dev.in"
- run: "python -m pip install -r requirements-mgm.in"
- run: "python -m pip install --editable ."
- run: "python -m pytest"
python-version: ${{ matrix.python-version }}
- run: uv sync --all-extras --dev
- run: uv run pytest
- env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
uses: "codecov/codecov-action@v3"
Expand Down
5 changes: 1 addition & 4 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@ datasets
tokenizers
transformers>=4.24.0
scikit-learn
matplotlib
seaborn
python-dotenv
torcheval
fastparquet
datasketch
peft
icecream
Expand All @@ -29,5 +26,5 @@ pooch
edlib
onnx
onnxscript
beignet[all]
beignet[datasets, mdtraj]
fair-esm
4 changes: 2 additions & 2 deletions src/lobster/cmdline/_intervene.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,13 +260,13 @@ def intervene(cfg: DictConfig) -> bool:
all_data = np.concatenate((all_data, data), axis=1) if all_data is not None else np.array(data)

if all_data.shape[1] > 1000:
print(f"{concept} {i+1} of {len(predict_dataloader)} Saving")
print(f"{concept} {i + 1} of {len(predict_dataloader)} Saving")
df = pd.DataFrame(all_data.transpose(), columns=columns)
df.to_csv(f"{data_folder_dir}/{index}.csv", index=False)
index += 1
all_data = None

if all_data is not None:
print(f"{concept} {i+1} of {len(predict_dataloader)} Saving")
print(f"{concept} {i + 1} of {len(predict_dataloader)} Saving")
df = pd.DataFrame(all_data.transpose(), columns=columns)
df.to_csv(f"{data_folder_dir}/{index}.csv", index=False)
8 changes: 4 additions & 4 deletions src/lobster/cmdline/_intervene_multiproperty.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,13 @@ def intervene_multiproperty(cfg: DictConfig) -> bool:
all_data = np.concatenate((all_data, data), axis=1) if all_data is not None else np.array(data)

if all_data.shape[1] > 1000:
print(f"{i+1} of {len(predict_dataloader)} Saving")
print(f"{i + 1} of {len(predict_dataloader)} Saving")
df = pd.DataFrame(all_data.transpose(), columns=columns)
df.to_csv(f"{data_folder_dir}/{index__+1}_indexed.csv", index=False)
df.to_csv(f"{data_folder_dir}/{index__ + 1}_indexed.csv", index=False)
index__ += 1
all_data = None

if all_data is not None:
print(f"{i+1} of {len(predict_dataloader)} Saving")
print(f"{i + 1} of {len(predict_dataloader)} Saving")
df = pd.DataFrame(all_data.transpose(), columns=columns)
df.to_csv(f"{data_folder_dir}/{index__+1}_indexed.csv", index=False)
df.to_csv(f"{data_folder_dir}/{index__ + 1}_indexed.csv", index=False)
4 changes: 2 additions & 2 deletions src/lobster/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
DataFrameDatasetInMemory,
DataFrameLightningDataModule,
)
from ._farthest_first_traversal import FarthestFirstTraversal, ranked_fft
from ._fasta_datamodule import FastaLightningDataModule # nopycln: import
from ._dyab_data import ( # nopycln: import
DyAbDataFrameDatasetInMemory,
DyAbDataFrameLightningDataModule,
)
from ._farthest_first_traversal import FarthestFirstTraversal, ranked_fft
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can probably get rid of this and point to the beignet implementation

from ._fasta_datamodule import FastaLightningDataModule # nopycln: import
from ._minhasher import LobsterMinHasher
from ._mmseqs import MMSeqsRunner
from ._structure_datamodule import PDBDataModule
Expand Down
21 changes: 19 additions & 2 deletions src/lobster/transforms/_convert_seqs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
from importlib.util import find_spec
from typing import Callable, Dict, Optional

import selfies as sf
from rdkit import Chem
_SELFIES_AVAILABLE = False
_RDKIT_AVAILABLE = False

if find_spec("selfies"):
import selfies as sf

_SELFIES_AVAILABLE = True

if find_spec("rdkit"):
from rdkit import Chem

_RDKIT_AVAILABLE = True


def convert_nt_to_aa(
Expand Down Expand Up @@ -51,6 +62,8 @@ def convert_aa_to_nt(


def convert_aa_to_smiles(aa_seq: str, allowed_aa: set) -> Optional[str]:
assert _RDKIT_AVAILABLE, "rdkit not available. This dependency is part of the mgm extra"

if not aa_seq.isupper():
aa_seq = aa_seq.upper()

Expand All @@ -69,6 +82,8 @@ def convert_aa_to_smiles(aa_seq: str, allowed_aa: set) -> Optional[str]:
def convert_smiles_to_aa(
smiles_seq: str,
) -> Optional[str]:
assert _RDKIT_AVAILABLE, "rdkit not available. This dependency is part of the mgm extra"

try:
mol = Chem.MolFromSmiles(smiles_seq)
aa_seq = Chem.MolToSequence(mol)
Expand All @@ -81,6 +96,7 @@ def convert_smiles_to_aa(
def convert_smiles_to_selfies(
smiles_seq: str,
) -> Optional[str]:
assert _SELFIES_AVAILABLE, "selfies not available. This dependency is part of the mgm extra"
try:
sf_seq = sf.encoder(smiles_seq)
return sf_seq
Expand All @@ -92,6 +108,7 @@ def convert_selfies_to_smiles(
selfies_seq: str,
) -> Optional[str]:
# TODO: add conversion of unknown selfies tokens to Ala selfies
assert _SELFIES_AVAILABLE, "selfies not available. This dependency is part of the mgm extra"
try:
smiles_seq = sf.decoder(selfies_seq)
return smiles_seq
Expand Down
2 changes: 1 addition & 1 deletion tests/lobster/transforms/test__convert_seqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_convert_aa_to_smiles(self):
# print(smi_seq)
assert len(smi_seq) == 100, f"Failed for AA seq {aa_seq}, smiles seq does not have the expected length"
assert smi_seq == (
"CC(C)C[C@H](NC(=O)[C@H](CCC(N)=O)NC(=O)[C@@H](NC(=O)[C@@H](N)CCC(=O)O)" "C(C)C)C(=O)N[C@H](C(=O)O)C(C)C"
"CC(C)C[C@H](NC(=O)[C@H](CCC(N)=O)NC(=O)[C@@H](NC(=O)[C@@H](N)CCC(=O)O)C(C)C)C(=O)N[C@H](C(=O)O)C(C)C"
)

def test_convert_smiles_to_aa(self):
Expand Down
Loading
Loading