cogent3 · GavinHuttley · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/src/ensembl_tui/_annotation.py b/src/ensembl_tui/_annotation.py
@@ -289,12 +289,14 @@ def get_features_matching(
         **kwargs,  # noqa: ANN003
     ) -> typing.Iterator[FeatureDataBase]:
         # add supoport for querying by symbol and description
+        stable_id = stable_id or kwargs.pop("name", None)
         limit = kwargs.pop("limit", None)
         local_vars = locals()
         if kwargs := {
             k: v
             for k, v in local_vars.items()
-            if k not in ("self", "kwargs", "columns", "limit") and v is not None
+            if k not in ("self", "kwargs", "columns", "limit", "local_vars")
+            and v is not None
         }:
             like_conds = (
                 {"description": kwargs.pop("description")} if description else None
@@ -592,7 +594,7 @@ def get_features_matching(
         if kwargs := {
             k: v
             for k, v in local_vars.items()
-            if k not in ("self", "kwargs", "limit") and v is not None
+            if k not in ("self", "kwargs", "limit", "local_vars") and v is not None
         }:
             like_cols = "repeat_type", "repeat_class", "repeat_name"
             like_conds = {k: v for k, v in kwargs.items() if k in like_cols}

diff --git a/src/ensembl_tui/_genome.py b/src/ensembl_tui/_genome.py
@@ -374,7 +374,7 @@ def get_features(
         for ft in self.annotation_db.get_features_matching(
             biotype=biotype,
             seqid=seqid,
-            name=name,
+            stable_id=name,
             start=start,
             stop=stop,
             limit=limit,
@@ -424,11 +424,22 @@ def get_cds(
 
     def get_ids_for_biotype(
         self,
+        *,
         biotype: str,
+        seqid: str | list[str] | None = None,
         limit: OptionalInt = None,
     ) -> typing.Iterable[str]:
         genes = self.annotation_db.genes
-        return genes.get_ids_for_biotype(biotype=biotype, limit=limit)
+        if genes is None:
+            msg = f"no gene data for {self.species}"
+            raise ValueError(msg)
+        seqids = [seqid] if isinstance(seqid, str | type(None)) else seqid
+        for seqid in seqids:
+            yield from genes.get_ids_for_biotype(
+                biotype=biotype,
+                seqid=seqid,
+                limit=limit,
+            )
 
     def close(self) -> None:
         self._seqs.close()

diff --git a/src/ensembl_tui/cli.py b/src/ensembl_tui/cli.py
@@ -15,6 +15,18 @@
 from ensembl_tui import _util as eti_util
 
 
+def _get_coord_names(ctx, param, coord_names) -> list[str] | None:
+    """returns a list of chrom/coord names"""
+    if coord_names is None:
+        return None
+
+    path = pathlib.Path(coord_names)
+    if path.is_file():
+        return [l.strip() for l in path.read_text().splitlines()]
+
+    return [f.strip() for f in coord_names.split(",")]
+
+
 def _get_installed_config_path(ctx, param, path) -> eti_util.PathType:
     """path to installed.cfg"""
     path = pathlib.Path(path)
@@ -154,6 +166,12 @@ def _species_names_from_csv(ctx, param, species) -> list[str] | None:
     callback=_values_from_csv,
     help="Biotypes to mask (comma separated).",
 )
+_coord_names = click.option(
+    "--coord_names",
+    default=None,
+    callback=_get_coord_names,
+    help="Comma separated list of ref species chrom/coord names or a path leading to names, one per line.",
+)
 
 
 @trogon.tui()
@@ -503,6 +521,7 @@ def alignments(
     help="type of homology",
 )
 @_ref
+@_coord_names
 @_nprocs
 @_limit
 @_force
@@ -512,6 +531,7 @@ def homologs(
     outdir,
     relationship,
     ref,
+    coord_names,
     num_procs,
     limit,
     force_overwrite,
@@ -547,7 +567,14 @@ def homologs(
     if verbose:
         eti_util.print_colour(text=f"Loaded genome for {ref!r}", colour="yellow")
 
-    gene_ids = list(genome.get_ids_for_biotype(biotype="protein_coding"))
+    # we don't use the limit argument for this query since we want the limit
+    # to be the number of homology matches
+    gene_ids = list(
+        genome.get_ids_for_biotype(
+            biotype="protein_coding",
+            seqid=coord_names,
+        ),
+    )
 
     if verbose:
         eti_util.print_colour(

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -111,8 +111,8 @@ def test_dump_genes(installed):
 
 
 @pytest.mark.slow
-def test_homologs(installed):
-    outdir = installed.parent / "output"
+def test_homologs(installed, tmp_dir):
+    outdir = tmp_dir / "output"
     limit = 10
     args = [
         f"-i{installed}",
@@ -135,3 +135,32 @@ def test_homologs(installed):
     assert r.exit_code == 0, r.output
     dstore = cogent3.open_data_store(outdir, suffix="fa", mode="r")
     assert len(dstore.completed) == limit
+
+
+@pytest.mark.slow
+def test_homologs_coord_name(installed, tmp_dir):
+    outdir = tmp_dir / "output"
+    limit = 10
+    args = [
+        f"-i{installed}",
+        "--ref",
+        "saccharomyces_cerevisiae",
+        "--outdir",
+        f"{outdir}",
+        "--limit",
+        str(limit),
+        "--coord_names",
+        "I,XVI,II",
+        "-r",
+        "ortholog_one2one",
+        "-v",
+    ]
+
+    r = RUNNER.invoke(
+        eti_cli.homologs,
+        args,
+        catch_exceptions=False,
+    )
+    assert r.exit_code == 0, r.output
+    dstore = cogent3.open_data_store(outdir, suffix="fa", mode="r")
+    assert len(dstore.completed) == limit
diff --git a/tests/test_genome.py b/tests/test_genome.py
@@ -172,3 +172,18 @@ def test_get_features(yeast):
 def test_get_ids_for_biotype(yeast):
     features = list(yeast.get_ids_for_biotype(biotype="rRNA", limit=10))
     assert len(features) == 10
+
+
+def test_get_ids_for_biotype_seqid(yeast):
+    stable_ids = list(yeast.get_ids_for_biotype(biotype="protein_coding", seqid="III"))
+    assert len(stable_ids) == 184  # from direct inspection of sql count distinct
+    stable_ids = list(
+        yeast.get_ids_for_biotype(biotype="protein_coding", seqid=["III", "XVI"]),
+    )
+    assert len(stable_ids) == 184 + 511  # from direct inspection of sql count distinct
+    # make sure the seqid match the input
+    seqids = {"III", "XVI"}
+    got = {
+        r.seqid for stable_id in stable_ids for r in yeast.get_features(name=stable_id)
+    }
+    assert got == seqids