Skip to content

Commit

Permalink
Merge pull request #121 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
Refactor for performance querying for homologs
  • Loading branch information
GavinHuttley authored Jun 14, 2024
2 parents 7424e3b + e03c98b commit f822015
Show file tree
Hide file tree
Showing 10 changed files with 335 additions and 131 deletions.
3 changes: 2 additions & 1 deletion src/ensembl_lite/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def write_installed_cfg(config: Config) -> PathType:

def read_installed_cfg(path: PathType) -> InstalledConfig:
"""reads an ini file under config.installed_path"""
path = pathlib.Path(path).expanduser()
parser = configparser.ConfigParser()
path = (
path if path.name == INSTALLED_CONFIG_NAME else (path / INSTALLED_CONFIG_NAME)
Expand All @@ -228,7 +229,7 @@ def read_installed_cfg(path: PathType) -> InstalledConfig:
return InstalledConfig(release=release, install_path=path.parent)


def _standardise_path(path: str, config_path: pathlib.Path) -> pathlib.Path:
def _standardise_path(path: PathType, config_path: pathlib.Path) -> pathlib.Path:
path = pathlib.Path(path).expanduser()
return path if path.is_absolute() else (config_path / path).resolve()

Expand Down
7 changes: 5 additions & 2 deletions src/ensembl_lite/_db_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import dataclasses
import sqlite3

Expand Down Expand Up @@ -146,10 +147,12 @@ def __setstate__(self, state):
obj._file = None

def __del__(self):
if self._is_open and self._file is not None:
with contextlib.suppress(ValueError, AttributeError):
self._file.flush()
if self._file is not None:

with contextlib.suppress(AttributeError):
self._file.close()

self._is_open = False

def close(self):
Expand Down
10 changes: 8 additions & 2 deletions src/ensembl_lite/_genomedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ def custom_gff_parser(
reduced[record].start = min(reduced[record].start, record.start)
reduced[record].stop = max(reduced[record].stop, record.stop)

# make sure feature location data is sorted
for record in reduced.values():
record.spans = sorted([sorted(span) for span in record.spans])

return reduced, num_fake_ids


Expand Down Expand Up @@ -562,7 +566,6 @@ def main(self, db_name: str) -> bool:

src_dir = src_dir / "fasta"
for path in src_dir.glob("*.fa.gz"):
# for label, seq in quicka_parser(path, one_seq=False):
for label, seq in quicka_parser(path):
seqid = self.label_to_name(label)
seq_store.add_record(seq, seqid)
Expand Down Expand Up @@ -847,7 +850,10 @@ def get_gene_cds(self, name: str, is_canonical: bool = True):
stop = cds["spans"].max()
seq = self.get_seq(seqid=seqid, start=start, stop=stop)
cds["spans"] = cds["spans"] - start
yield seq.make_feature(feature=cds)
try:
yield seq.make_feature(feature=cds)
except ValueError:
raise ValueError(f"invalid location data for {cds!r}")

def get_ids_for_biotype(self, biotype: str, limit: OptionalInt = None):
annot_db = self.annotation_db
Expand Down
Loading

0 comments on commit f822015

Please sign in to comment.