Skip to content

Commit

Permalink
add basic scaled/moltype/ksize checking via select
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Feb 26, 2025
1 parent 7b94229 commit e042787
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 5 deletions.
6 changes: 6 additions & 0 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,12 @@ SourmashSignature *disk_revindex_best_containment(const SourmashDiskRevIndex *db

void disk_revindex_free(SourmashDiskRevIndex *ptr);

uint64_t disk_revindex_ksize(const SourmashDiskRevIndex *ptr);

uint64_t disk_revindex_len(const SourmashDiskRevIndex *ptr);

const char *disk_revindex_moltype(const SourmashDiskRevIndex *ptr);

SourmashDiskRevIndex *disk_revindex_new_from_rocksdb(const char *path_ptr);

SourmashSignature *disk_revindex_peek(const SourmashDiskRevIndex *db_ptr,
Expand All @@ -158,6 +162,8 @@ const SourmashSearchResult *const *disk_revindex_prefetch(const SourmashDiskRevI
uint64_t threshold_bp,
uintptr_t *return_size);

uint32_t disk_revindex_scaled(const SourmashDiskRevIndex *ptr);

const SourmashSearchResult *const *disk_revindex_search_jaccard(const SourmashDiskRevIndex *db_ptr,
const SourmashSignature *query_ptr,
double threshold,
Expand Down
25 changes: 23 additions & 2 deletions src/core/src/ffi/index/disk_revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::ffi::signature::SourmashSignature;
use crate::ffi::utils::{ForeignObject};
use crate::index::revindex::RevIndex as BasicRevIndex;
use crate::index::revindex::disk_revindex::RevIndex as DDRevIndex;
use std::ffi::CString;
// use crate::collection::Collection;
// use crate::index::Index;
// use crate::prelude::*;
Expand Down Expand Up @@ -161,6 +162,27 @@ pub unsafe extern "C" fn disk_revindex_len(ptr: *const SourmashDiskRevIndex) ->
revindex.collection().len() as u64
}

Check warning on line 163 in src/core/src/ffi/index/disk_revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/ffi/index/disk_revindex.rs#L160-L163

Added lines #L160 - L163 were not covered by tests

#[no_mangle]
pub unsafe extern "C" fn disk_revindex_ksize(ptr: *const SourmashDiskRevIndex) -> u64 {
let revindex = SourmashDiskRevIndex::as_rust(ptr);
31 // @CTB :)
}

Check warning on line 169 in src/core/src/ffi/index/disk_revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/ffi/index/disk_revindex.rs#L166-L169

Added lines #L166 - L169 were not covered by tests

#[no_mangle]
pub unsafe extern "C" fn disk_revindex_scaled(ptr: *const SourmashDiskRevIndex) -> u32 {
let revindex = SourmashDiskRevIndex::as_rust(ptr);
let (_, scaled) = revindex.collection().min_max_scaled().expect("no records!?");
*scaled
}

Check warning on line 176 in src/core/src/ffi/index/disk_revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/ffi/index/disk_revindex.rs#L172-L176

Added lines #L172 - L176 were not covered by tests

#[no_mangle]
pub unsafe extern "C" fn disk_revindex_moltype(ptr: *const SourmashDiskRevIndex) -> *const c_char {
let s = "DNA";

let c_string = CString::new(s).expect("foo");
c_string.as_ptr()
}

Check warning on line 184 in src/core/src/ffi/index/disk_revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/ffi/index/disk_revindex.rs#L179-L184

Added lines #L179 - L184 were not covered by tests

ffi_fn! {
unsafe fn disk_revindex_signatures(
ptr: *const SourmashDiskRevIndex,
Expand Down Expand Up @@ -295,7 +317,6 @@ unsafe fn disk_revindex_search_jaccard(
// extract KmerMinHash for query
let query_mh: KmerMinHash = sig.clone()
.try_into().expect("cannot get kmerminhash");
let scaled = query_mh.scaled();

// do search
let counter = revindex.counter_for_query(&query_mh);
Expand All @@ -306,7 +327,7 @@ unsafe fn disk_revindex_search_jaccard(
let results: Vec<(f64, Signature, String)> = counter
.most_common()
.into_iter()
.filter_map(|(dataset_id, size)| {
.filter_map(|(dataset_id, _size)| {
let filename = "some rocksdb database";
let sig: Signature = revindex
.collection()
Expand Down
23 changes: 20 additions & 3 deletions src/sourmash/index/revindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,26 @@ def __init__(self, path):
def __len__(self):
return self._methodcall(lib.disk_revindex_len)

def select(self, ksize=None, moltype=None, scaled=None, num=None,
abund=None, containment=None, picklist=None):
assert abund is None
assert num is None
# ignore containment!

my_ksize = self._methodcall(lib.disk_revindex_ksize)
my_scaled = self._methodcall(lib.disk_revindex_scaled)
my_moltype = self._methodcall(lib.disk_revindex_moltype)

if ksize is not None:
if ksize != my_ksize:
raise ValueError(f"revindex ksize is {my_ksize}, not {ksize}")
if scaled is not None and (scaled < my_scaled or type(scaled)) != int :
raise ValueError(f"revindex scaled is {my_scaled}, not {scaled}")
if moltype is not None and moltype != my_moltype:
raise ValueError(f"revindex moltype is {my_moltype}, not {moltype}")

return self

def signatures(self):
size = ffi.new("uintptr_t *")
sigs_ptr = self._methodcall(lib.disk_revindex_signatures, size)
Expand Down Expand Up @@ -391,6 +411,3 @@ def peek(self, query_mh, *, threshold_bp=0):

def consume(self, *args, **kwargs):
pass

def select(self, *args, **kwargs):
return self
15 changes: 15 additions & 0 deletions tests/test_revindex.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
import sourmash_tst_utils as utils

from sourmash.index import revindex
Expand Down Expand Up @@ -186,3 +187,17 @@ def test_rocksdb_prefetch():
assert round(match.score, 5) == 0.48851

assert len(matches) == 2


def test_rocksdb_ksize_wrong():
rocksdb_path = utils.get_test_data("3sigs.branch_0913.rocksdb")
db = DiskRevIndex(rocksdb_path)
with pytest.raises(ValueError):
db.select(ksize=21)


def test_rocksdb_ksize():
rocksdb_path = utils.get_test_data("3sigs.branch_0913.rocksdb")
db = DiskRevIndex(rocksdb_path)
print('xxx', db, db.select(ksize=31))
assert db == db.select(ksize=31)

0 comments on commit e042787

Please sign in to comment.