Skip to content

Commit

Permalink
Merge pull request #191 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
MAINT: mixed patches
  • Loading branch information
GavinHuttley authored Feb 21, 2025
2 parents 1086bf0 + d041d11 commit 3e53515
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/ensembl_tui/_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

_no_gaps = numpy.array([], dtype=_DEFAULT_GAP_DTYPE)

ALIGN_STORE_SUFFIX = "align_coords-sqlitedb"
ALIGN_STORE_SUFFIX = "parquet"

ALIGN_ATTR_SCHEMA = (
"align_id INTEGER PRIMARY KEY DEFAULT nextval('align_id_seq')",
Expand Down
21 changes: 15 additions & 6 deletions src/ensembl_tui/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,16 @@ def path_to_alignment(self, pattern: str, suffix: str) -> pathlib.Path | None:
pattern
glob pattern for the Ensembl alignment name
"""
align_dirs = [
d
for d in self.aligns_path.glob("*")
if fnmatch.fnmatch(d.stem, pattern) and d.name.endswith(suffix)
]
if eti_util.contains_glob_pattern(pattern):
align_dirs = [
d
for d in self.aligns_path.glob("*")
if fnmatch.fnmatch(d.name, pattern)
]
elif pattern:
align_dirs = [d for d in self.aligns_path.glob("*") if pattern in d.name]
else:
align_dirs = None
if not align_dirs:
return None

Expand All @@ -214,7 +219,11 @@ def path_to_alignment(self, pattern: str, suffix: str) -> pathlib.Path | None:
msg,
)

return align_dirs[0]
align_dir = align_dirs[0]
if not list(align_dir.glob(f"*{suffix}")):
msg = f"{align_dir} does not contain file with suffix {suffix}"
raise FileNotFoundError(msg)
return align_dir


def write_installed_cfg(config: Config) -> eti_util.PathType:
Expand Down
7 changes: 7 additions & 0 deletions src/ensembl_tui/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,10 @@ def tempdir(working_dir: pathlib.Path | str | None = None) -> pathlib.Path:

def make_column_constant(schema: tuple[str, ...]) -> tuple[str, ...]:
return tuple(c.split()[0] for c in schema)


_has_wildcard = re.compile(r"[*?\[\]]")


def contains_glob_pattern(s: str) -> bool:
return _has_wildcard.search(s) is not None
18 changes: 18 additions & 0 deletions src/ensembl_tui/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def species_summary(installed, species):
@_outdir
@_align_name
@_ref
@_coord_names
@_ref_genes_file
@_mask_features
@_limit
Expand All @@ -397,6 +398,7 @@ def alignments(
outdir,
align_name,
ref,
coord_names,
ref_genes_file,
mask_features,
limit,
Expand Down Expand Up @@ -430,6 +432,14 @@ def alignments(
text=f"{align_name!r} does not match any alignments under {str(config.aligns_path)!r}",
colour="red",
)
available = "\n".join(
[
fn.stem
for fn in config.aligns_path.glob("*")
if not fn.name.startswith(".") and fn.is_dir()
],
)
eti_util.print_colour(text=f"Available alignments:\n{available}", colour="red")
sys.exit(1)

align_db = eti_align.AlignDb(source=align_path)
Expand Down Expand Up @@ -463,6 +473,14 @@ def alignments(
)
sys.exit(1)
stableids = table.columns["stableid"]
elif coord_names:
genome = genomes[ref_species]
stableids = list(
genome.get_ids_for_biotype(
biotype="protein_coding",
seqid=coord_names,
),
)
else:
stableids = None

Expand Down
33 changes: 24 additions & 9 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,45 @@ def test_installed_config_hash():
@pytest.fixture
def installed_aligns(tmp_path):
align_dir = tmp_path / eti_config._COMPARA_NAME / eti_config._ALIGNS_NAME
align_dir.mkdir(parents=True, exist_ok=True)
# make two alignment paths with similar names
(align_dir / f"10_primates.epo.{eti_align.ALIGN_STORE_SUFFIX}").open(mode="w")
(align_dir / f"24_primates.epo_extended.{eti_align.ALIGN_STORE_SUFFIX}").open(
mode="w",
)
names = "10_primates.epo", "24_primates.epo_extended"
for name in names:
dirname = align_dir / name
dirname.mkdir(parents=True, exist_ok=True)
(dirname / f"align_blocks.{eti_align.ALIGN_STORE_SUFFIX}").open(mode="w")
return eti_config.InstalledConfig(release="11", install_path=tmp_path)


@pytest.mark.parametrize("pattern", ("10*", "1*prim*", "10_p*", "10_primates.epo"))
@pytest.fixture
def incomplete_installed(installed_aligns):
align_path = installed_aligns.aligns_path
for path in align_path.glob(
f"*/*.{eti_align.ALIGN_STORE_SUFFIX}",
):
path.unlink()
return installed_aligns


@pytest.mark.parametrize("pattern", ["10*", "1*prim*", "10_p*", "10_primates.epo"])
def test_get_alignment_path(installed_aligns, pattern):
got = installed_aligns.path_to_alignment(pattern, eti_align.ALIGN_STORE_SUFFIX)
assert got.name == f"10_primates.epo.{eti_align.ALIGN_STORE_SUFFIX}"
assert got.name == "10_primates.epo"


def test_get_alignment_path_incomplete(incomplete_installed):
with pytest.raises(FileNotFoundError):
incomplete_installed.path_to_alignment("10*", eti_align.ALIGN_STORE_SUFFIX)


@pytest.mark.parametrize("pattern", ("10pri*", "blah-blah", ""))
@pytest.mark.parametrize("pattern", ["10pri*", "blah-blah", ""])
def test_get_alignment_path_invalid(installed_aligns, pattern):
assert (
installed_aligns.path_to_alignment(pattern, eti_align.ALIGN_STORE_SUFFIX)
is None
)


@pytest.mark.parametrize("pattern", ("*pri*", "*epo*"))
@pytest.mark.parametrize("pattern", ["*pri*", "*epo*"])
def test_get_alignment_path_multiple(installed_aligns, pattern):
with pytest.raises(ValueError):
installed_aligns.path_to_alignment(pattern, eti_align.ALIGN_STORE_SUFFIX)
Expand Down

0 comments on commit 3e53515

Please sign in to comment.