prescient-design
diff --git a/‎.github/workflows/push.yml
+30-30 b/‎.github/workflows/push.yml
+30-30
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎docs/CONTRIBUTORS.md
-1 b/‎docs/CONTRIBUTORS.md
-1
diff --git a/‎src/lobster/data/_cyno_pk_datamodule.py
-173 b/‎src/lobster/data/_cyno_pk_datamodule.py
-173
diff --git a/‎tests/__init__.py b/‎tests/__init__.py
diff --git a/‎tests/lobster/__init__.py b/‎tests/lobster/__init__.py
diff --git a/‎tests/lobster/cmdline/__init__.py b/‎tests/lobster/cmdline/__init__.py
diff --git a/‎tests/lobster/cmdline/test__cmdline.py
+20 b/‎tests/lobster/cmdline/test__cmdline.py
+20
diff --git a/‎tests/lobster/conftest.py
+23 b/‎tests/lobster/conftest.py
+23
diff --git a/‎tests/lobster/data/__init__.py b/‎tests/lobster/data/__init__.py
diff --git a/‎tests/lobster/data/test__calm_datamodule.py
+43 b/‎tests/lobster/data/test__calm_datamodule.py
+43
@@ -14,29 +14,36 @@ jobs:
         with:
           name: "python-package-distributions"
           path: "dist/"
-  # pytest:
-  #   strategy:
-  #     matrix:
-  #       platform:
-  #         - "macos-latest"
-  #         - "ubuntu-latest"
-  #         # - "windows-latest"
-  #       python:
-  #         - "3.10"
-  #         - "3.11"
-  #   runs-on: ${{ matrix.platform }}
-  #   steps:
-  #     - uses: "actions/checkout@v4"
-  #     - uses: "actions/setup-python@v5"
-  #       with:
-  #         python-version: ${{ matrix.python }}
-  #     - run: "python -m pip install -r requirements.in"
-  #     - run: "python -m pip install -r requirements-dev.in"
-  #     - run: "python -m pip install --editable ."
-  #     - run: "python -m pytest"
-  #     - env:
-  #         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
-  #       uses: "codecov/codecov-action@v3"
+  pytest:
+    strategy:
+      matrix:
+        platform:
+          - "macos-latest"
+          - "ubuntu-latest"
+          # - "windows-latest"
+        python:
+          - "3.10"
+          - "3.11"
+    runs-on: ${{ matrix.platform }}
+    steps:
+      - uses: "actions/checkout@v4"
+      - uses: "actions/setup-python@v5"
+        with:
+          python-version: ${{ matrix.python }}
+      - run: "python -m pip install -r requirements.in"
+      - run: "python -m pip install -r requirements-dev.in"
+      - run: "python -m pip install --editable ."
+      - run: "python -m pytest"
+      - env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+        uses: "codecov/codecov-action@v3"
+  ruff:
+    runs-on: "ubuntu-latest"
+    steps:
+      - uses: "actions/checkout@v4"
+      - uses: "chartboost/ruff-action@v1"
+        with:
+          args: "format --check"
   # pypi:
   #   environment:
   #     name: "pypi.org"
@@ -74,13 +81,6 @@ jobs:
   #     - env:
   #         GITHUB_TOKEN: "${{ github.token }}"
   #       run: "gh release upload '${{ github.ref_name }}' dist/** --repo '${{ github.repository }}'"
-  # ruff:
-  #   runs-on: "ubuntu-latest"
-  #   steps:
-  #     - uses: "actions/checkout@v4"
-  #     - uses: "chartboost/ruff-action@v1"
-  #       with:
-  #         args: "format --check"
 #   testpypi:
 #     environment:
 #       name: "test.pypi.org"
 
@@ -2,7 +2,7 @@
 **L**anguage models for **B**iological **S**equence **T**ransformation and **E**volutionary **R**epresentation
 
 
-`lobster` is a "batteries included" language model library for proteins and other biological sequences. Led by [Nathan Frey](https://github.com/ncfrey), [Taylor Joren](https://github.com/taylormjs), [Aya Abdlesalam Ismail](https://github.com/ayaabdelsalam91), and [Allen Goodman](https://github.com/0x00b1), with many valuable contributions from [Contributors](docs/CONTRIBUTORS.md) across [Prescient Design, Genentech](https://www.gene.com/scientists/our-scientists/prescient-design).
+`lobster` is a "batteries included" language model library for proteins and other biological sequences. Led by [Nathan Frey](https://github.com/ncfrey), [Taylor Joren](https://github.com/taylormjs), [Aya Abdlesalam Ismail](https://github.com/ayaabdelsalam91), [Joseph Kleinhenz](https://github.com/kleinhenz) and [Allen Goodman](https://github.com/0x00b1), with many valuable contributions from [Contributors](docs/CONTRIBUTORS.md) across [Prescient Design, Genentech](https://www.gene.com/scientists/our-scientists/prescient-design).
 
 This repository contains code and access to pre-trained language models for biological sequence data.
 
 
@@ -1,5 +1,4 @@
 * Karina Zadorozhny
-* Joseph Kleinhenz
 * Matthieu Kirchmeyer
 * Sai Pooja Mahajan
 * Amy Wang
@@ -0,0 +1,20 @@
+import subprocess
+
+
+class TestLobsterCmdline:
+    def test_train(self):
+        output = subprocess.check_output("lobster_train -h", shell=True)
+
+        output = output.splitlines()
+
+        assert output[0] == b"_train is powered by Hydra."
+
+        assert len(output) > 1
+
+    def test_embed(self):
+        output = subprocess.check_output("lobster_embed -h", shell=True)
+        output = output.splitlines()
+
+        assert output[0] == b"_embed is powered by Hydra."
+
+        assert len(output) > 1
@@ -0,0 +1,23 @@
+import logging
+
+import pytest
+from lobster.data import NegLogDataModule
+
+
+@pytest.fixture(autouse=True)
+def configure_logging():
+    logger = logging.getLogger()
+    logger.setLevel(logging.DEBUG)
+    for handler in logger.handlers:
+        handler.setLevel(logging.DEBUG)
+
+
+@pytest.fixture(scope="session")
+def ppi_datamodule(tmp_path_factory):
+    root = tmp_path_factory.mktemp("neglog")
+    return NegLogDataModule(
+        root=root,
+        download=False,
+        lengths=[0.7, 0.2, 0.1],
+        truncation_seq_length=50,
+    )
@@ -0,0 +1,43 @@
+import pytest
+import torch
+from lobster.data import CalmLightningDataModule
+from lobster.model import LobsterPMLM
+from torch import Size
+
+
+@pytest.fixture(autouse=True)
+def dm(tmp_path):
+    datamodule = CalmLightningDataModule(
+        root="/data/bucket/freyn6/data/",
+        batch_size=8,
+        lengths=(0.8, 0.1, 0.1),
+        train=False,
+        download=False,
+    )
+    datamodule.setup(stage="fit")
+
+    return datamodule
+
+
+@pytest.mark.skip(reason="Need to mock.")
+class TestCalmLightningDataModule:
+    def test_setup(self, dm: CalmLightningDataModule):
+        assert len(dm._train_dataset) == 3481
+        assert len(dm._val_dataset) == 435
+        assert len(dm._test_dataset) == 435
+
+        batch, _targets = next(iter(dm.train_dataloader()))
+
+        assert batch["input_ids"].shape == Size([8, 1, 512])
+        assert batch["attention_mask"].shape == Size([8, 1, 512])
+        assert batch["labels"].shape == Size([8, 1, 512])
+
+        model = LobsterPMLM(model_name="MLM_mini", max_length=2048)
+        model.eval()
+
+        batch, _targets = next(iter(dm.train_dataloader()))
+
+        with torch.inference_mode():
+            loss, _ = model._compute_loss(batch)
+
+        assert isinstance(loss, torch.Tensor)