diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 980682141d..bc03274c41 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -12,11 +12,17 @@ use crate::{Error, Result}; #[cfg(feature = "parallel")] use rayon::prelude::*; +/// a Manifest and Storage, combined. Can contain any collection of signatures. + +#[derive(Clone)] pub struct Collection { manifest: Manifest, storage: InnerStorage, } +/// A consistent collection of signatures. Can be created using `select`. + +#[derive(Clone)] pub struct CollectionSet { collection: Collection, } @@ -129,6 +135,17 @@ impl Collection { }) } + #[cfg(all(feature = "branchwater", not(target_arch = "wasm32")))] + pub fn from_rocksdb>(dirname: P) -> Result { + use crate::index::revindex::{RevIndex, RevIndexOps}; + + let path = dirname.as_ref().as_str().to_string(); + let index = RevIndex::open(path, true, None)?; + let collection: Collection = index.collection().clone().into_inner(); + + Ok(collection) + } + pub fn from_sigs(sigs: Vec) -> Result { let storage = MemStorage::new(); @@ -219,6 +236,7 @@ mod test { use crate::prelude::Select; use crate::selection::Selection; use crate::signature::Signature; + use crate::Result; #[test] fn sigstore_selection_with_downsample() { @@ -416,4 +434,54 @@ mod test { assert_eq!(this_mh.scaled(), 100); } } + + #[test] + #[cfg(all(feature = "branchwater", not(target_arch = "wasm32")))] + fn collection_from_rocksdb_storage() -> Result<()> { + use crate::index::revindex::{RevIndex, RevIndexOps}; + use camino::Utf8PathBuf as PathBuf; + use tempfile::TempDir; + + let basedir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let mut zip_collection = basedir.clone(); + zip_collection.push("../../tests/test-data/track_abund/track_abund.zip"); + + let outdir = TempDir::new()?; + + let zip_copy = PathBuf::from( + outdir + .path() + .join("sigs.zip") + .into_os_string() + .into_string() + .unwrap(), + ); + std::fs::copy(zip_collection, zip_copy.as_path())?; + + let selection = Selection::builder().ksize(31).scaled(10000).build(); + let collection = Collection::from_zipfile(zip_copy.as_path())?.select(&selection)?; + let output: PathBuf = outdir.path().join("index").try_into().unwrap(); + + // Step 1: create an index + let index = RevIndex::create(output.as_path(), collection.clone().try_into()?, false)?; + + // Step 2: internalize the storage for the index + { + let mut index = index; + index + .internalize_storage() + .expect("Error internalizing storage"); + } + + // Step 3: Create a new collection from rocksdb + let new_collection = Collection::from_rocksdb(output.as_path())?; + + // Step 4: assert all content is the same + for (a, b) in collection.iter().zip(new_collection.iter()) { + assert_eq!(a, b); + } + + Ok(()) + } } diff --git a/src/core/src/from.rs b/src/core/src/from.rs index dbeeb58a2f..347d90afc5 100644 --- a/src/core/src/from.rs +++ b/src/core/src/from.rs @@ -16,7 +16,7 @@ impl From for KmerMinHash { let mut new_mh = KmerMinHash::new( 0, - values.get(0).unwrap().kmer.len() as u32, + values.first().unwrap().kmer.len() as u32, HashFunctions::Murmur64Dna, 42, true, diff --git a/src/core/src/index/linear.rs b/src/core/src/index/linear.rs index 71c7aef30a..5fd0c11e1e 100644 --- a/src/core/src/index/linear.rs +++ b/src/core/src/index/linear.rs @@ -17,6 +17,8 @@ use crate::sketch::Sketch; use crate::storage::SigStore; use crate::Result; +/// Supports parallel search without a particular index. + pub struct LinearIndex { collection: CollectionSet, template: Sketch, diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 3b4434824e..511bdbcc4c 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -364,7 +364,7 @@ impl RevIndexOps for RevIndex { orig_query: &KmerMinHash, selection: Option, ) -> Result> { - let mut match_size = usize::max_value(); + let mut match_size = usize::MAX; let mut matches = vec![]; let mut query = KmerMinHashBTree::from(orig_query.clone()); let mut sum_weighted_found = 0; @@ -554,8 +554,9 @@ impl RevIndexOps for RevIndex { // Using unchecked version because we just used the manifest // above to make sure the storage is still consistent unsafe { - Arc::get_mut(&mut self.collection) - .map(|v| v.set_storage_unchecked(InnerStorage::new(new_storage))); + if let Some(v) = Arc::get_mut(&mut self.collection) { + v.set_storage_unchecked(InnerStorage::new(new_storage)) + } } // write storage spec diff --git a/src/core/src/index/revindex/mem_revindex.rs b/src/core/src/index/revindex/mem_revindex.rs index 20f4c3aabd..64580d6ac2 100644 --- a/src/core/src/index/revindex/mem_revindex.rs +++ b/src/core/src/index/revindex/mem_revindex.rs @@ -204,7 +204,7 @@ impl RevIndex { threshold: usize, query: &KmerMinHash, ) -> Result> { - let mut match_size = usize::max_value(); + let mut match_size = usize::MAX; let mut matches = vec![]; while match_size > threshold && !counter.is_empty() { diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index b5325189b5..0a84a3e495 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -15,6 +15,8 @@ use crate::signature::SigsTrait; use crate::sketch::Sketch; use crate::Result; +/// Individual manifest record, containing information about sketches. + #[derive(Debug, Serialize, Deserialize, Clone, CopyGetters, Getters, Setters, PartialEq, Eq)] pub struct Record { #[getset(get = "pub", set = "pub")] @@ -30,8 +32,13 @@ pub struct Record { moltype: String, + #[getset(get = "pub")] num: u32, + + #[getset(get = "pub")] scaled: u64, + + #[getset(get = "pub")] n_hashes: usize, #[getset(get_copy = "pub", set = "pub")] @@ -73,12 +80,15 @@ where } } +/// A description of a collection of sketches. + #[derive(Debug, Default, Serialize, Deserialize, Clone)] pub struct Manifest { records: Vec, } impl Record { + /// Build a Record from a Signature pub fn from_sig(sig: &Signature, path: &str) -> Vec { sig.iter() .map(|sketch| { diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index c476baff56..12f456fc22 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -112,6 +112,7 @@ pub struct FSStorage { subdir: String, } +/// Store files in a zip file. #[ouroboros::self_referencing] pub struct ZipStorage { mapping: Option, diff --git a/src/core/src/storage/rocksdb.rs b/src/core/src/storage/rocksdb.rs index 0bf540aaf1..52b082af18 100644 --- a/src/core/src/storage/rocksdb.rs +++ b/src/core/src/storage/rocksdb.rs @@ -47,7 +47,7 @@ impl Storage for RocksDBStorage { fn save(&self, path: &str, content: &[u8]) -> Result { let cf_storage = self.db.cf_handle(STORAGE).unwrap(); // TODO(lirber): deal with conflict for path? - self.db.put_cf(&cf_storage, path.as_bytes(), &content[..])?; + self.db.put_cf(&cf_storage, path.as_bytes(), content)?; Ok(path.into()) }