From e252787268025c1e733d642052fefbe89c0bf599 Mon Sep 17 00:00:00 2001
From: Oliver Schonrock <oliver@schonrocks.com>
Date: Tue, 3 Dec 2024 07:44:58 +0000
Subject: [PATCH 1/4] make this repo depend on the up2date xor_singleheader lib

via a gitsubmodule
at https://github.com/FastFilter/xor_singleheader.git

rather than an older and local copy of that lib
---
 .gitmodules                                   |    3 +
 dependencies/xor_singleheader                 |    1 +
 .../include/binaryfusefilter.h                |  740 ----------
 .../xor_singleheader/include/xorfilter.h      | 1283 -----------------
 4 files changed, 4 insertions(+), 2023 deletions(-)
 create mode 160000 dependencies/xor_singleheader
 delete mode 100644 dependencies/xor_singleheader/include/binaryfusefilter.h
 delete mode 100644 dependencies/xor_singleheader/include/xorfilter.h

diff --git a/.gitmodules b/.gitmodules
index 7f9cfe6..37f2f13 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "dependencies/fastfilter_cpp"]
 	path = dependencies/fastfilter_cpp
 	url = https://github.com/FastFilter/fastfilter_cpp.git
+[submodule "dependencies/xor_singleheader"]
+	path = dependencies/xor_singleheader
+	url = https://github.com/FastFilter/xor_singleheader.git
diff --git a/dependencies/xor_singleheader b/dependencies/xor_singleheader
new file mode 160000
index 0000000..3c0fd15
--- /dev/null
+++ b/dependencies/xor_singleheader
@@ -0,0 +1 @@
+Subproject commit 3c0fd15b1e04281b2ada00cf82ddffc4b3292dee
diff --git a/dependencies/xor_singleheader/include/binaryfusefilter.h b/dependencies/xor_singleheader/include/binaryfusefilter.h
deleted file mode 100644
index 5cc1651..0000000
--- a/dependencies/xor_singleheader/include/binaryfusefilter.h
+++ /dev/null
@@ -1,740 +0,0 @@
-#ifndef BINARYFUSEFILTER_H
-#define BINARYFUSEFILTER_H
-#include <math.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef XOR_MAX_ITERATIONS
-#define XOR_MAX_ITERATIONS                                                     \
-  100 // probability of success should always be > 0.5 so 100 iterations is
-      // highly unlikely
-#endif
-
-static int binary_fuse_cmpfunc(const void * a, const void * b) {
-   return ( *(const uint64_t*)a - *(const uint64_t*)b );
-}
-
-static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) {
-  qsort(keys, length, sizeof(uint64_t), binary_fuse_cmpfunc);
-  size_t j = 0;
-  for(size_t i = 1; i < length; i++) {
-    if(keys[i] != keys[i-1]) {
-      keys[j] = keys[i];
-      j++;
-    }
-  }
-  return j+1;
-}
-
-/**
- * We start with a few utilities.
- ***/
-static inline uint64_t binary_fuse_murmur64(uint64_t h) {
-  h ^= h >> 33;
-  h *= UINT64_C(0xff51afd7ed558ccd);
-  h ^= h >> 33;
-  h *= UINT64_C(0xc4ceb9fe1a85ec53);
-  h ^= h >> 33;
-  return h;
-}
-static inline uint64_t binary_fuse_mix_split(uint64_t key, uint64_t seed) {
-  return binary_fuse_murmur64(key + seed);
-}
-static inline uint64_t binary_fuse_rotl64(uint64_t n, unsigned int c) {
-  return (n << (c & 63)) | (n >> ((-c) & 63));
-}
-static inline uint32_t binary_fuse_reduce(uint32_t hash, uint32_t n) {
-  // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-  return (uint32_t)(((uint64_t)hash * n) >> 32);
-}
-static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
-}
-
-/**
- * We need a decent random number generator.
- **/
-
-// returns random number, modifies the seed
-static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) {
-  uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
-  z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
-  z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
-  return z ^ (z >> 31);
-}
-
-typedef struct binary_fuse8_s {
-  uint64_t Seed;
-  uint32_t SegmentLength;
-  uint32_t SegmentLengthMask;
-  uint32_t SegmentCount;
-  uint32_t SegmentCountLength;
-  uint32_t ArrayLength;
-  uint8_t *Fingerprints;
-} binary_fuse8_t;
-
-// #ifdefs adapted from:
-//  https://stackoverflow.com/a/50958815
-#ifdef __SIZEOF_INT128__  // compilers supporting __uint128, e.g., gcc, clang
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
-  return ((__uint128_t)a * b) >> 64;
-}
-#elif defined(_M_X64) || defined(_MARM64)   // MSVC
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
-  return __umulh(a, b);
-}
-#elif defined(_M_IA64)  // also MSVC
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
-  unsigned __int64 hi;
-  (void) _umul128(a, b, &hi);
-  return hi;
-}
-#else  // portable implementation using uint64_t
-static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
-  // Adapted from:
-  //  https://stackoverflow.com/a/51587262
-
-  /*
-        This is implementing schoolbook multiplication:
-
-                a1 a0
-        X       b1 b0
-        -------------
-                   00  LOW PART
-        -------------
-                00
-             10 10     MIDDLE PART
-        +       01
-        -------------
-             01
-        + 11 11        HIGH PART
-        -------------
-  */
-
-  const uint64_t a0 = (uint32_t) a;
-  const uint64_t a1 = a >> 32;
-  const uint64_t b0 = (uint32_t) b;
-  const uint64_t b1 = b >> 32;
-  const uint64_t p11 = a1 * b1;
-  const uint64_t p01 = a0 * b1;
-  const uint64_t p10 = a1 * b0;
-  const uint64_t p00 = a0 * b0;
-
-  // 64-bit product + two 32-bit values
-  const uint64_t middle = p10 + (p00 >> 32) + (uint32_t) p01;
-
-  /*
-    Proof that 64-bit products can accumulate two more 32-bit values
-    without overflowing:
-
-    Max 32-bit value is 2^32 - 1.
-    PSum = (2^32-1) * (2^32-1) + (2^32-1) + (2^32-1)
-         = 2^64 - 2^32 - 2^32 + 1 + 2^32 - 1 + 2^32 - 1
-         = 2^64 - 1
-    Therefore the high half below cannot overflow regardless of input.
-  */
-
-  // high half
-  return p11 + (middle >> 32) + (p01 >> 32);
-
-  // low half (which we don't care about, but here it is)
-  // (middle << 32) | (uint32_t) p00;
-}
-#endif
-
-typedef struct binary_hashes_s {
-  uint32_t h0;
-  uint32_t h1;
-  uint32_t h2;
-} binary_hashes_t;
-
-static inline binary_hashes_t binary_fuse8_hash_batch(uint64_t hash,
-                                        const binary_fuse8_t *filter) {
-  uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength);
-  binary_hashes_t ans;
-  ans.h0 = (uint32_t)hi;
-  ans.h1 = ans.h0 + filter->SegmentLength;
-  ans.h2 = ans.h1 + filter->SegmentLength;
-  ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
-  ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
-  return ans;
-}
-
-static inline uint32_t binary_fuse8_hash(int index, uint64_t hash,
-                                        const binary_fuse8_t *filter) {
-    uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
-    h += index * filter->SegmentLength;
-    // keep the lower 36 bits
-    uint64_t hh = hash & ((1UL << 36) - 1);
-    // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
-    h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
-    return h;
-}
-
-// Report if the key is in the set, with false positive rate.
-static inline bool binary_fuse8_contain(uint64_t key,
-                                        const binary_fuse8_t *filter) {
-  uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
-  uint8_t f = binary_fuse8_fingerprint(hash);
-  binary_hashes_t hashes = binary_fuse8_hash_batch(hash, filter);
-  f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
-       filter->Fingerprints[hashes.h2];
-  return f == 0;
-}
-
-static inline uint32_t binary_fuse_calculate_segment_length(uint32_t arity,
-                                                             uint32_t size) {
-  // These parameters are very sensitive. Replacing 'floor' by 'round' can
-  // substantially affect the construction time.
-  if (arity == 3) {
-    return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(3.33) + 2.25));
-  } else if (arity == 4) {
-    return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(2.91) - 0.5));
-  } else {
-    return 65536;
-  }
-}
-
-static inline double binary_fuse_max(double a, double b) {
-  if (a < b) {
-    return b;
-  }
-  return a;
-}
-
-static inline double binary_fuse_calculate_size_factor(uint32_t arity,
-                                                        uint32_t size) {
-  if (arity == 3) {
-    return binary_fuse_max(1.125, 0.875 + 0.25 * log(1000000.0) / log((double)size));
-  } else if (arity == 4) {
-    return binary_fuse_max(1.075, 0.77 + 0.305 * log(600000.0) / log((double)size));
-  } else {
-    return 2.0;
-  }
-}
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call binary_fuse8_free(filter)
-// size should be at least 2.
-static inline bool binary_fuse8_allocate(uint32_t size,
-                                         binary_fuse8_t *filter) {
-  uint32_t arity = 3;
-  filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size);
-  if (filter->SegmentLength > 262144) {
-    filter->SegmentLength = 262144;
-  }
-  filter->SegmentLengthMask = filter->SegmentLength - 1;
-  double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size);
-  uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor));
-  uint32_t initSegmentCount =
-      (capacity + filter->SegmentLength - 1) / filter->SegmentLength -
-      (arity - 1);
-  filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength;
-  filter->SegmentCount =
-      (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength;
-  if (filter->SegmentCount <= arity - 1) {
-    filter->SegmentCount = 1;
-  } else {
-    filter->SegmentCount = filter->SegmentCount - (arity - 1);
-  }
-  filter->ArrayLength =
-      (filter->SegmentCount + arity - 1) * filter->SegmentLength;
-  filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength;
-  filter->Fingerprints = (uint8_t*)malloc(filter->ArrayLength);
-  return filter->Fingerprints != NULL;
-}
-
-// report memory usage
-static inline size_t binary_fuse8_size_in_bytes(const binary_fuse8_t *filter) {
-  return filter->ArrayLength * sizeof(uint8_t) + sizeof(binary_fuse8_t);
-}
-
-// release memory
-static inline void binary_fuse8_free(binary_fuse8_t *filter) {
-  free(filter->Fingerprints);
-  filter->Fingerprints = NULL;
-  filter->Seed = 0;
-  filter->SegmentLength = 0;
-  filter->SegmentLengthMask = 0;
-  filter->SegmentCount = 0;
-  filter->SegmentCountLength = 0;
-  filter->ArrayLength = 0;
-}
-
-static inline uint8_t binary_fuse_mod3(uint8_t x) {
-    return x > 2 ? x - 3 : x;
-}
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling binary_fuse8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
-                           binary_fuse8_t *filter) {
-  uint64_t rng_counter = 0x726b2b9d438b9d4d;
-  filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-  uint64_t *reverseOrder = (uint64_t *)calloc((size + 1), sizeof(uint64_t));
-  uint32_t capacity = filter->ArrayLength;
-  uint32_t *alone = (uint32_t *)malloc(capacity * sizeof(uint32_t));
-  uint8_t *t2count = (uint8_t *)calloc(capacity, sizeof(uint8_t));
-  uint8_t *reverseH = (uint8_t *)malloc(size * sizeof(uint8_t));
-  uint64_t *t2hash = (uint64_t *)calloc(capacity, sizeof(uint64_t));
-
-  uint32_t blockBits = 1;
-  while (((uint32_t)1 << blockBits) < filter->SegmentCount) {
-    blockBits += 1;
-  }
-  uint32_t block = ((uint32_t)1 << blockBits);
-  uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
-  uint32_t h012[5];
-
-  if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
-      (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) {
-    free(alone);
-    free(t2count);
-    free(reverseH);
-    free(t2hash);
-    free(reverseOrder);
-    free(startPos);
-    return false;
-  }
-  reverseOrder[size] = 1;
-  for (int loop = 0; true; ++loop) {
-    if (loop + 1 > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system)
-      memset(filter->Fingerprints, ~0, filter->ArrayLength);
-      free(alone);
-      free(t2count);
-      free(reverseH);
-      free(t2hash);
-      free(reverseOrder);
-      free(startPos);
-      return false;
-    }
-
-    for (uint32_t i = 0; i < block; i++) {
-      // important : i * size would overflow as a 32-bit number in some
-      // cases.
-      startPos[i] = ((uint64_t)i * size) >> blockBits;
-    }
-
-    uint64_t maskblock = block - 1;
-    for (uint32_t i = 0; i < size; i++) {
-      uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed);
-      uint64_t segment_index = hash >> (64 - blockBits);
-      while (reverseOrder[startPos[segment_index]] != 0) {
-        segment_index++;
-        segment_index &= maskblock;
-      }
-      reverseOrder[startPos[segment_index]] = hash;
-      startPos[segment_index]++;
-    }
-    int error = 0;
-    uint32_t duplicates = 0;
-    for (uint32_t i = 0; i < size; i++) {
-      uint64_t hash = reverseOrder[i];
-      uint32_t h0 = binary_fuse8_hash(0, hash, filter);
-      t2count[h0] += 4;
-      t2hash[h0] ^= hash;
-      uint32_t h1= binary_fuse8_hash(1, hash, filter);
-      t2count[h1] += 4;
-      t2count[h1] ^= 1;
-      t2hash[h1] ^= hash;
-      uint32_t h2 = binary_fuse8_hash(2, hash, filter);
-      t2count[h2] += 4;
-      t2hash[h2] ^= hash;
-      t2count[h2] ^= 2;
-      if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
-        if   (((t2hash[h0] == 0) && (t2count[h0] == 8))
-          ||  ((t2hash[h1] == 0) && (t2count[h1] == 8))
-          ||  ((t2hash[h2] == 0) && (t2count[h2] == 8))) {
-					duplicates += 1;
- 					t2count[h0] -= 4;
- 					t2hash[h0] ^= hash;
- 					t2count[h1] -= 4;
- 					t2count[h1] ^= 1;
- 					t2hash[h1] ^= hash;
- 					t2count[h2] -= 4;
- 					t2count[h2] ^= 2;
- 					t2hash[h2] ^= hash;
-        }
-      }
-      error = (t2count[h0] < 4) ? 1 : error;
-      error = (t2count[h1] < 4) ? 1 : error;
-      error = (t2count[h2] < 4) ? 1 : error;
-    }
-    if(error) {
-      memset(reverseOrder, 0, sizeof(uint64_t) * size);
-      memset(t2count, 0, sizeof(uint8_t) * capacity);
-      memset(t2hash, 0, sizeof(uint64_t) * capacity);
-      filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-      continue;
-    }
-
-    // End of key addition
-    uint32_t Qsize = 0;
-    // Add sets with one key to the queue.
-    for (uint32_t i = 0; i < capacity; i++) {
-      alone[Qsize] = i;
-      Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
-    }
-    uint32_t stacksize = 0;
-    while (Qsize > 0) {
-      Qsize--;
-      uint32_t index = alone[Qsize];
-      if ((t2count[index] >> 2) == 1) {
-        uint64_t hash = t2hash[index];
-
-        //h012[0] = binary_fuse8_hash(0, hash, filter);
-        h012[1] = binary_fuse8_hash(1, hash, filter);
-        h012[2] = binary_fuse8_hash(2, hash, filter);
-        h012[3] = binary_fuse8_hash(0, hash, filter); // == h012[0];
-        h012[4] = h012[1];
-        uint8_t found = t2count[index] & 3;
-        reverseH[stacksize] = found;
-        reverseOrder[stacksize] = hash;
-        stacksize++;
-        uint32_t other_index1 = h012[found + 1];
-        alone[Qsize] = other_index1;
-        Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
-
-        t2count[other_index1] -= 4;
-        t2count[other_index1] ^= binary_fuse_mod3(found + 1);
-        t2hash[other_index1] ^= hash;
-
-        uint32_t other_index2 = h012[found + 2];
-        alone[Qsize] = other_index2;
-        Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
-        t2count[other_index2] -= 4;
-        t2count[other_index2] ^= binary_fuse_mod3(found + 2);
-        t2hash[other_index2] ^= hash;
-      }
-    }
-    if (stacksize + duplicates == size) {
-      // success
-      size = stacksize;
-      break;
-    } else if(duplicates > 0) {
-      size = binary_fuse_sort_and_remove_dup(keys, size);
-    }
-    memset(reverseOrder, 0, sizeof(uint64_t) * size);
-    memset(t2count, 0, sizeof(uint8_t) * capacity);
-    memset(t2hash, 0, sizeof(uint64_t) * capacity);
-    filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-  }
-
-  for (uint32_t i = size - 1; i < size; i--) {
-    // the hash of the key we insert next
-    uint64_t hash = reverseOrder[i];
-    uint8_t xor2 = binary_fuse8_fingerprint(hash);
-    uint8_t found = reverseH[i];
-    h012[0] = binary_fuse8_hash(0, hash, filter);
-    h012[1] = binary_fuse8_hash(1, hash, filter);
-    h012[2] = binary_fuse8_hash(2, hash, filter);
-    h012[3] = h012[0];
-    h012[4] = h012[1];
-    filter->Fingerprints[h012[found]] = xor2 ^
-                                        filter->Fingerprints[h012[found + 1]] ^
-                                        filter->Fingerprints[h012[found + 2]];
-  }
-  free(alone);
-  free(t2count);
-  free(reverseH);
-  free(t2hash);
-  free(reverseOrder);
-  free(startPos);
-  return true;
-}
-
-//////////////////
-// fuse16
-//////////////////
-
-typedef struct binary_fuse16_s {
-  uint64_t Seed;
-  uint32_t SegmentLength;
-  uint32_t SegmentLengthMask;
-  uint32_t SegmentCount;
-  uint32_t SegmentCountLength;
-  uint32_t ArrayLength;
-  uint16_t *Fingerprints;
-} binary_fuse16_t;
-
-static inline uint64_t binary_fuse16_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
-}
-
-static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash,
-                                        const binary_fuse16_t *filter) {
-  uint64_t hi = binary_fuse_mulhi(hash, filter->SegmentCountLength);
-  binary_hashes_t ans;
-  ans.h0 = (uint32_t)hi;
-  ans.h1 = ans.h0 + filter->SegmentLength;
-  ans.h2 = ans.h1 + filter->SegmentLength;
-  ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
-  ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
-  return ans;
-}
-static inline uint32_t binary_fuse16_hash(int index, uint64_t hash,
-                                        const binary_fuse16_t *filter) {
-    uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
-    h += index * filter->SegmentLength;
-    // keep the lower 36 bits
-    uint64_t hh = hash & ((1UL << 36) - 1);
-    // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
-    h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
-    return h;
-}
-
-// Report if the key is in the set, with false positive rate.
-static inline bool binary_fuse16_contain(uint64_t key,
-                                        const binary_fuse16_t *filter) {
-  uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
-  uint16_t f = binary_fuse16_fingerprint(hash);
-  binary_hashes_t hashes = binary_fuse16_hash_batch(hash, filter);
-  f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
-       filter->Fingerprints[hashes.h2];
-  return f == 0;
-}
-
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call binary_fuse16_free(filter)
-// size should be at least 2.
-static inline bool binary_fuse16_allocate(uint32_t size,
-                                         binary_fuse16_t *filter) {
-  uint32_t arity = 3;
-  filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size);
-  if (filter->SegmentLength > 262144) {
-    filter->SegmentLength = 262144;
-  }
-  filter->SegmentLengthMask = filter->SegmentLength - 1;
-  double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size);
-  uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor));
-  uint32_t initSegmentCount =
-      (capacity + filter->SegmentLength - 1) / filter->SegmentLength -
-      (arity - 1);
-  filter->ArrayLength = (initSegmentCount + arity - 1) * filter->SegmentLength;
-  filter->SegmentCount =
-      (filter->ArrayLength + filter->SegmentLength - 1) / filter->SegmentLength;
-  if (filter->SegmentCount <= arity - 1) {
-    filter->SegmentCount = 1;
-  } else {
-    filter->SegmentCount = filter->SegmentCount - (arity - 1);
-  }
-  filter->ArrayLength =
-      (filter->SegmentCount + arity - 1) * filter->SegmentLength;
-  filter->SegmentCountLength = filter->SegmentCount * filter->SegmentLength;
-  filter->Fingerprints = (uint16_t*)malloc(filter->ArrayLength * sizeof(uint16_t));
-  return filter->Fingerprints != NULL;
-}
-
-// report memory usage
-static inline size_t binary_fuse16_size_in_bytes(const binary_fuse16_t *filter) {
-  return filter->ArrayLength * sizeof(uint16_t) + sizeof(binary_fuse16_t);
-}
-
-// release memory
-static inline void binary_fuse16_free(binary_fuse16_t *filter) {
-  free(filter->Fingerprints);
-  filter->Fingerprints = NULL;
-  filter->Seed = 0;
-  filter->SegmentLength = 0;
-  filter->SegmentLengthMask = 0;
-  filter->SegmentCount = 0;
-  filter->SegmentCountLength = 0;
-  filter->ArrayLength = 0;
-}
-
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling binary_fuse8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
-                           binary_fuse16_t *filter) {
-  uint64_t rng_counter = 0x726b2b9d438b9d4d;
-  filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-  uint64_t *reverseOrder = (uint64_t *)calloc((size + 1), sizeof(uint64_t));
-  uint32_t capacity = filter->ArrayLength;
-  uint32_t *alone = (uint32_t *)malloc(capacity * sizeof(uint32_t));
-  uint8_t *t2count = (uint8_t *)calloc(capacity, sizeof(uint8_t));
-  uint8_t *reverseH = (uint8_t *)malloc(size * sizeof(uint8_t));
-  uint64_t *t2hash = (uint64_t *)calloc(capacity, sizeof(uint64_t));
-
-  uint32_t blockBits = 1;
-  while (((uint32_t)1 << blockBits) < filter->SegmentCount) {
-    blockBits += 1;
-  }
-  uint32_t block = ((uint32_t)1 << blockBits);
-  uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
-  uint32_t h012[5];
-
-  if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
-      (t2hash == NULL) || (reverseOrder == NULL) || (startPos == NULL)) {
-    free(alone);
-    free(t2count);
-    free(reverseH);
-    free(t2hash);
-    free(reverseOrder);
-    free(startPos);
-    return false;
-  }
-  reverseOrder[size] = 1;
-  for (int loop = 0; true; ++loop) {
-    if (loop + 1 > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
-      free(alone);
-      free(t2count);
-      free(reverseH);
-      free(t2hash);
-      free(reverseOrder);
-      free(startPos);
-      return false;
-    }
-
-    for (uint32_t i = 0; i < block; i++) {
-      // important : i * size would overflow as a 32-bit number in some
-      // cases.
-      startPos[i] = ((uint64_t)i * size) >> blockBits;
-    }
-
-    uint64_t maskblock = block - 1;
-    for (uint32_t i = 0; i < size; i++) {
-      uint64_t hash = binary_fuse_murmur64(keys[i] + filter->Seed);
-      uint64_t segment_index = hash >> (64 - blockBits);
-      while (reverseOrder[startPos[segment_index]] != 0) {
-        segment_index++;
-        segment_index &= maskblock;
-      }
-      reverseOrder[startPos[segment_index]] = hash;
-      startPos[segment_index]++;
-    }
-    int error = 0;
-    uint32_t duplicates = 0;
-    for (uint32_t i = 0; i < size; i++) {
-      uint64_t hash = reverseOrder[i];
-      uint32_t h0 = binary_fuse16_hash(0, hash, filter);
-      t2count[h0] += 4;
-      t2hash[h0] ^= hash;
-      uint32_t h1= binary_fuse16_hash(1, hash, filter);
-      t2count[h1] += 4;
-      t2count[h1] ^= 1;
-      t2hash[h1] ^= hash;
-      uint32_t h2 = binary_fuse16_hash(2, hash, filter);
-      t2count[h2] += 4;
-      t2hash[h2] ^= hash;
-      t2count[h2] ^= 2;
-      if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
-        if   (((t2hash[h0] == 0) && (t2count[h0] == 8))
-          ||  ((t2hash[h1] == 0) && (t2count[h1] == 8))
-          ||  ((t2hash[h2] == 0) && (t2count[h2] == 8))) {
-					duplicates += 1;
- 					t2count[h0] -= 4;
- 					t2hash[h0] ^= hash;
- 					t2count[h1] -= 4;
- 					t2count[h1] ^= 1;
- 					t2hash[h1] ^= hash;
- 					t2count[h2] -= 4;
- 					t2count[h2] ^= 2;
- 					t2hash[h2] ^= hash;
-        }
-      }
-      error = (t2count[h0] < 4) ? 1 : error;
-      error = (t2count[h1] < 4) ? 1 : error;
-      error = (t2count[h2] < 4) ? 1 : error;
-    }
-    if(error) {
-      memset(reverseOrder, 0, sizeof(uint64_t) * size);
-      memset(t2count, 0, sizeof(uint8_t) * capacity);
-      memset(t2hash, 0, sizeof(uint64_t) * capacity);
-      filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-      continue;
-    }
-
-    // End of key addition
-    uint32_t Qsize = 0;
-    // Add sets with one key to the queue.
-    for (uint32_t i = 0; i < capacity; i++) {
-      alone[Qsize] = i;
-      Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
-    }
-    uint32_t stacksize = 0;
-    while (Qsize > 0) {
-      Qsize--;
-      uint32_t index = alone[Qsize];
-      if ((t2count[index] >> 2) == 1) {
-        uint64_t hash = t2hash[index];
-
-        //h012[0] = binary_fuse16_hash(0, hash, filter);
-        h012[1] = binary_fuse16_hash(1, hash, filter);
-        h012[2] = binary_fuse16_hash(2, hash, filter);
-        h012[3] = binary_fuse16_hash(0, hash, filter); // == h012[0];
-        h012[4] = h012[1];
-        uint8_t found = t2count[index] & 3;
-        reverseH[stacksize] = found;
-        reverseOrder[stacksize] = hash;
-        stacksize++;
-        uint32_t other_index1 = h012[found + 1];
-        alone[Qsize] = other_index1;
-        Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
-
-        t2count[other_index1] -= 4;
-        t2count[other_index1] ^= binary_fuse_mod3(found + 1);
-        t2hash[other_index1] ^= hash;
-
-        uint32_t other_index2 = h012[found + 2];
-        alone[Qsize] = other_index2;
-        Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
-        t2count[other_index2] -= 4;
-        t2count[other_index2] ^= binary_fuse_mod3(found + 2);
-        t2hash[other_index2] ^= hash;
-      }
-    }
-    if (stacksize + duplicates == size) {
-      // success
-      size = stacksize;
-      break;
-    } else if(duplicates > 0) {
-      size = binary_fuse_sort_and_remove_dup(keys, size);
-    }
-    memset(reverseOrder, 0, sizeof(uint64_t) * size);
-    memset(t2count, 0, sizeof(uint8_t) * capacity);
-    memset(t2hash, 0, sizeof(uint64_t) * capacity);
-    filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
-  }
-
-  for (uint32_t i = size - 1; i < size; i--) {
-    // the hash of the key we insert next
-    uint64_t hash = reverseOrder[i];
-    uint16_t xor2 = binary_fuse16_fingerprint(hash);
-    uint8_t found = reverseH[i];
-    h012[0] = binary_fuse16_hash(0, hash, filter);
-    h012[1] = binary_fuse16_hash(1, hash, filter);
-    h012[2] = binary_fuse16_hash(2, hash, filter);
-    h012[3] = h012[0];
-    h012[4] = h012[1];
-    filter->Fingerprints[h012[found]] = xor2 ^
-                                        filter->Fingerprints[h012[found + 1]] ^
-                                        filter->Fingerprints[h012[found + 2]];
-  }
-  free(alone);
-  free(t2count);
-  free(reverseH);
-  free(t2hash);
-  free(reverseOrder);
-  free(startPos);
-  return true;
-}
-
-
-
-
-#endif
diff --git a/dependencies/xor_singleheader/include/xorfilter.h b/dependencies/xor_singleheader/include/xorfilter.h
deleted file mode 100644
index e2aff91..0000000
--- a/dependencies/xor_singleheader/include/xorfilter.h
+++ /dev/null
@@ -1,1283 +0,0 @@
-#ifndef XORFILTER_H
-#define XORFILTER_H
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef XOR_SORT_ITERATIONS
-#define XOR_SORT_ITERATIONS 10 // after 10 iterations, we sort and remove duplicates
-#endif
-
-#ifndef XOR_MAX_ITERATIONS
-#define XOR_MAX_ITERATIONS 100 // probabillity of success should always be > 0.5 so 100 iterations is highly unlikely
-#endif
-
-
-static int xor_cmpfunc(const void * a, const void * b) {
-   return ( *(const uint64_t*)a - *(const uint64_t*)b );
-}
-
-static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) {
-  qsort(keys, length, sizeof(uint64_t), xor_cmpfunc);
-  size_t j = 0;
-  for(size_t i = 1; i < length; i++) {
-    if(keys[i] != keys[i-1]) {
-      keys[j] = keys[i];
-      j++;
-    }
-  }
-  return j+1;
-}
-/**
- * We assume that you have a large set of 64-bit integers
- * and you want a data structure to do membership tests using
- * no more than ~8 or ~16 bits per key. If your initial set
- * is made of strings or other types, you first need to hash them
- * to a 64-bit integer.
- */
-
-/**
- * We start with a few utilities.
- ***/
-static inline uint64_t xor_murmur64(uint64_t h) {
-  h ^= h >> 33;
-  h *= UINT64_C(0xff51afd7ed558ccd);
-  h ^= h >> 33;
-  h *= UINT64_C(0xc4ceb9fe1a85ec53);
-  h ^= h >> 33;
-  return h;
-}
-
-static inline uint64_t xor_mix_split(uint64_t key, uint64_t seed) {
-  return xor_murmur64(key + seed);
-}
-
-static inline uint64_t xor_rotl64(uint64_t n, unsigned int c) {
-  return (n << (c & 63)) | (n >> ((-c) & 63));
-}
-
-static inline uint32_t xor_reduce(uint32_t hash, uint32_t n) {
-  // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-  return (uint32_t)(((uint64_t)hash * n) >> 32);
-}
-
-static inline uint64_t xor_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
-}
-
-/**
- * We need a decent random number generator.
- **/
-
-// returns random number, modifies the seed
-static inline uint64_t xor_rng_splitmix64(uint64_t *seed) {
-  uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
-  z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
-  z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
-  return z ^ (z >> 31);
-}
-
-/**
- * xor8 is the recommended default, no more than
- * a 0.3% false-positive probability.
- */
-typedef struct xor8_s {
-  uint64_t seed;
-  uint64_t blockLength;
-  uint8_t
-      *fingerprints; // after xor8_allocate, will point to 3*blockLength values
-} xor8_t;
-
-// Report if the key is in the set, with false positive rate.
-static inline bool xor8_contain(uint64_t key, const xor8_t *filter) {
-  uint64_t hash = xor_mix_split(key, filter->seed);
-  uint8_t f = xor_fingerprint(hash);
-  uint32_t r0 = (uint32_t)hash;
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  uint32_t h0 = xor_reduce(r0, filter->blockLength);
-  uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength;
-  uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength;
-  return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^
-       filter->fingerprints[h2]);
-}
-
-typedef struct xor16_s {
-  uint64_t seed;
-  uint64_t blockLength;
-  uint16_t
-      *fingerprints; // after xor16_allocate, will point to 3*blockLength values
-} xor16_t;
-
-// Report if the key is in the set, with false positive rate.
-static inline bool xor16_contain(uint64_t key, const xor16_t *filter) {
-  uint64_t hash = xor_mix_split(key, filter->seed);
-  uint16_t f = xor_fingerprint(hash);
-  uint32_t r0 = (uint32_t)hash;
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  uint32_t h0 = xor_reduce(r0, filter->blockLength);
-  uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength;
-  uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength;
-  return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^
-       filter->fingerprints[h2]);
-}
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call xor8_free(filter)
-static inline bool xor8_allocate(uint32_t size, xor8_t *filter) {
-  size_t capacity = 32 + 1.23 * size;
-  capacity = capacity / 3 * 3;
-  filter->fingerprints = (uint8_t *)malloc(capacity * sizeof(uint8_t));
-  if (filter->fingerprints != NULL) {
-    filter->blockLength = capacity / 3;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-// allocate enough capacity for a set containing up to 'size' elements
-// caller is responsible to call xor16_free(filter)
-static inline bool xor16_allocate(uint32_t size, xor16_t *filter) {
-  size_t capacity = 32 + 1.23 * size;
-  capacity = capacity / 3 * 3;
-  filter->fingerprints = (uint16_t *)malloc(capacity * sizeof(uint16_t));
-  if (filter->fingerprints != NULL) {
-    filter->blockLength = capacity / 3;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-// report memory usage
-static inline size_t xor8_size_in_bytes(const xor8_t *filter) {
-  return 3 * filter->blockLength * sizeof(uint8_t) + sizeof(xor8_t);
-}
-
-// report memory usage
-static inline size_t xor16_size_in_bytes(const xor16_t *filter) {
-  return 3 * filter->blockLength * sizeof(uint16_t) + sizeof(xor16_t);
-}
-
-// release memory
-static inline void xor8_free(xor8_t *filter) {
-  free(filter->fingerprints);
-  filter->fingerprints = NULL;
-  filter->blockLength = 0;
-}
-
-// release memory
-static inline void xor16_free(xor16_t *filter) {
-  free(filter->fingerprints);
-  filter->fingerprints = NULL;
-  filter->blockLength = 0;
-}
-
-struct xor_xorset_s {
-  uint64_t xormask;
-  uint32_t count;
-};
-
-typedef struct xor_xorset_s xor_xorset_t;
-
-struct xor_hashes_s {
-  uint64_t h;
-  uint32_t h0;
-  uint32_t h1;
-  uint32_t h2;
-};
-
-typedef struct xor_hashes_s xor_hashes_t;
-
-static inline xor_hashes_t xor8_get_h0_h1_h2(uint64_t k, const xor8_t *filter) {
-  uint64_t hash = xor_mix_split(k, filter->seed);
-  xor_hashes_t answer;
-  answer.h = hash;
-  uint32_t r0 = (uint32_t)hash;
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-
-  answer.h0 = xor_reduce(r0, filter->blockLength);
-  answer.h1 = xor_reduce(r1, filter->blockLength);
-  answer.h2 = xor_reduce(r2, filter->blockLength);
-  return answer;
-}
-
-struct xor_h0h1h2_s {
-  uint32_t h0;
-  uint32_t h1;
-  uint32_t h2;
-};
-
-typedef struct xor_h0h1h2_s xor_h0h1h2_t;
-
-static inline uint32_t xor8_get_h0(uint64_t hash, const xor8_t *filter) {
-  uint32_t r0 = (uint32_t)hash;
-  return xor_reduce(r0, filter->blockLength);
-}
-static inline uint32_t xor8_get_h1(uint64_t hash, const xor8_t *filter) {
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  return xor_reduce(r1, filter->blockLength);
-}
-static inline uint32_t xor8_get_h2(uint64_t hash, const xor8_t *filter) {
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  return xor_reduce(r2, filter->blockLength);
-}
-static inline uint32_t xor16_get_h0(uint64_t hash, const xor16_t *filter) {
-  uint32_t r0 = (uint32_t)hash;
-  return xor_reduce(r0, filter->blockLength);
-}
-static inline uint32_t xor16_get_h1(uint64_t hash, const xor16_t *filter) {
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  return xor_reduce(r1, filter->blockLength);
-}
-static inline uint32_t xor16_get_h2(uint64_t hash, const xor16_t *filter) {
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  return xor_reduce(r2, filter->blockLength);
-}
-static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k,
-                                              const xor16_t *filter) {
-  uint64_t hash = xor_mix_split(k, filter->seed);
-  xor_hashes_t answer;
-  answer.h = hash;
-  uint32_t r0 = (uint32_t)hash;
-  uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-
-  answer.h0 = xor_reduce(r0, filter->blockLength);
-  answer.h1 = xor_reduce(r1, filter->blockLength);
-  answer.h2 = xor_reduce(r2, filter->blockLength);
-  return answer;
-}
-
-struct xor_keyindex_s {
-  uint64_t hash;
-  uint32_t index;
-};
-
-typedef struct xor_keyindex_s xor_keyindex_t;
-
-struct xor_setbuffer_s {
-  xor_keyindex_t *buffer;
-  uint32_t *counts;
-  int insignificantbits;
-  uint32_t slotsize; // should be 1<< insignificantbits
-  uint32_t slotcount;
-  size_t originalsize;
-};
-
-typedef struct xor_setbuffer_s xor_setbuffer_t;
-
-static inline bool xor_init_buffer(xor_setbuffer_t *buffer, size_t size) {
-  buffer->originalsize = size;
-  buffer->insignificantbits = 18;
-  buffer->slotsize = UINT32_C(1) << buffer->insignificantbits;
-  buffer->slotcount = (size + buffer->slotsize - 1) / buffer->slotsize;
-  buffer->buffer = (xor_keyindex_t *)malloc(
-      buffer->slotcount * buffer->slotsize * sizeof(xor_keyindex_t));
-  buffer->counts = (uint32_t *)malloc(buffer->slotcount * sizeof(uint32_t));
-  if ((buffer->counts == NULL) || (buffer->buffer == NULL)) {
-    free(buffer->counts);
-    free(buffer->buffer);
-    return false;
-  }
-  memset(buffer->counts, 0, buffer->slotcount * sizeof(uint32_t));
-  return true;
-}
-
-static inline void xor_free_buffer(xor_setbuffer_t *buffer) {
-  free(buffer->counts);
-  free(buffer->buffer);
-  buffer->counts = NULL;
-  buffer->buffer = NULL;
-}
-
-static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash,
-                                                  xor_setbuffer_t *buffer,
-                                                  xor_xorset_t *sets) {
-  uint32_t slot = index >> buffer->insignificantbits;
-  size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits);
-  buffer->buffer[addr].index = index;
-  buffer->buffer[addr].hash = hash;
-  buffer->counts[slot]++;
-  size_t offset = (slot << buffer->insignificantbits);
-  if (buffer->counts[slot] == buffer->slotsize) {
-    // must empty the buffer
-    for (size_t i = offset; i < buffer->slotsize + offset; i++) {
-      xor_keyindex_t ki =
-          buffer->buffer[i];
-      sets[ki.index].xormask ^= ki.hash;
-      sets[ki.index].count++;
-    }
-    buffer->counts[slot] = 0;
-  }
-}
-
-static inline void xor_make_buffer_current(xor_setbuffer_t *buffer,
-                                           xor_xorset_t *sets, uint32_t index,
-                                           xor_keyindex_t *Q, size_t *Qsize) {
-  uint32_t slot = index >> buffer->insignificantbits;
-  if(buffer->counts[slot] > 0) { // uncommon!
-    size_t qsize = *Qsize;
-    size_t offset = (slot << buffer->insignificantbits);
-    for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
-      xor_keyindex_t ki = buffer->buffer[i];
-      sets[ki.index].xormask ^= ki.hash;
-      sets[ki.index].count--;
-      if (sets[ki.index].count == 1) {// this branch might be hard to predict
-        ki.hash = sets[ki.index].xormask;
-        Q[qsize] = ki;
-        qsize += 1;
-      }
-    }
-    *Qsize = qsize;
-    buffer->counts[slot] = 0;
-  }
-}
-
-
-
-static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash,
-                                                  xor_setbuffer_t *buffer,
-                                                  xor_xorset_t *sets,
-                                                  xor_keyindex_t *Q,
-                                                  size_t *Qsize) {
-  uint32_t slot = index >> buffer->insignificantbits;
-  size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits);
-  buffer->buffer[addr].index = index;
-  buffer->buffer[addr].hash = hash;
-  buffer->counts[slot]++;
-  if (buffer->counts[slot] == buffer->slotsize) {
-    size_t qsize = *Qsize;
-    size_t offset = (slot << buffer->insignificantbits);
-    for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
-      xor_keyindex_t ki =
-          buffer->buffer[i];
-      sets[ki.index].xormask ^= ki.hash;
-      sets[ki.index].count--;
-      if (sets[ki.index].count == 1) {
-        ki.hash = sets[ki.index].xormask;
-        Q[qsize] = ki;
-        qsize += 1;
-      }
-    }
-    *Qsize = qsize;
-    buffer->counts[slot] = 0;
-  }
-}
-
-static inline void xor_flush_increment_buffer(xor_setbuffer_t *buffer,
-                                              xor_xorset_t *sets) {
-  for (uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-    size_t offset = (slot << buffer->insignificantbits);
-    for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
-      xor_keyindex_t ki =
-          buffer->buffer[i];
-      sets[ki.index].xormask ^= ki.hash;
-      sets[ki.index].count++;
-    }
-    buffer->counts[slot] = 0;
-  }
-}
-
-static inline void xor_flush_decrement_buffer(xor_setbuffer_t *buffer,
-                                              xor_xorset_t *sets,
-                                              xor_keyindex_t *Q,
-                                              size_t *Qsize) {
-  size_t qsize = *Qsize;
-  for (uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-    uint32_t base = (slot << buffer->insignificantbits);
-    for (size_t i = base; i < buffer->counts[slot] + base; i++) {
-      xor_keyindex_t ki = buffer->buffer[i];
-      sets[ki.index].xormask ^= ki.hash;
-      sets[ki.index].count--;
-      if (sets[ki.index].count == 1) {
-        ki.hash = sets[ki.index].xormask;
-        Q[qsize] = ki;
-        qsize += 1;
-      }
-    }
-    buffer->counts[slot] = 0;
-  }
-  *Qsize = qsize;
-}
-
-static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer,
-                                                     xor_xorset_t *sets,
-                                                     xor_keyindex_t *Q,
-                                                     size_t *Qsize) {
-  uint32_t bestslot = 0;
-  uint32_t bestcount = buffer->counts[bestslot];
-  for (uint32_t slot = 1; slot < buffer->slotcount; slot++) {
-    if (buffer->counts[slot] > bestcount) {
-      bestslot = slot;
-      bestcount = buffer->counts[slot];
-    }
-  }
-  uint32_t slot = bestslot;
-  size_t qsize = *Qsize;
-  // for(uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-  uint32_t base = (slot << buffer->insignificantbits);
-  for (size_t i = base; i < buffer->counts[slot] + base; i++) {
-    xor_keyindex_t ki = buffer->buffer[i];
-    sets[ki.index].xormask ^= ki.hash;
-    sets[ki.index].count--;
-    if (sets[ki.index].count == 1) {
-      ki.hash = sets[ki.index].xormask;
-      Q[qsize] = ki;
-      qsize += 1;
-    }
-  }
-  *Qsize = qsize;
-  buffer->counts[slot] = 0;
-  //}
-  return bestslot;
-}
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling xor8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t *filter) {
-  if(size == 0) { return false; }
-  uint64_t rng_counter = 1;
-  filter->seed = xor_rng_splitmix64(&rng_counter);
-  size_t arrayLength = filter->blockLength * 3; // size of the backing array
-  xor_setbuffer_t buffer0, buffer1, buffer2;
-  size_t blockLength = filter->blockLength;
-  bool ok0 = xor_init_buffer(&buffer0, blockLength);
-  bool ok1 = xor_init_buffer(&buffer1, blockLength);
-  bool ok2 = xor_init_buffer(&buffer2, blockLength);
-  if (!ok0 || !ok1 || !ok2) {
-    xor_free_buffer(&buffer0);
-    xor_free_buffer(&buffer1);
-    xor_free_buffer(&buffer2);
-    return false;
-  }
-
-  xor_xorset_t *sets =
-      (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t));
-  xor_xorset_t *sets0 = sets;
-
-  xor_keyindex_t *Q =
-      (xor_keyindex_t *)malloc(arrayLength * sizeof(xor_keyindex_t));
-
-  xor_keyindex_t *stack =
-      (xor_keyindex_t *)malloc(size * sizeof(xor_keyindex_t));
-
-  if ((sets == NULL) || (Q == NULL) || (stack == NULL)) {
-    xor_free_buffer(&buffer0);
-    xor_free_buffer(&buffer1);
-    xor_free_buffer(&buffer2);
-    free(sets);
-    free(Q);
-    free(stack);
-    return false;
-  }
-  xor_xorset_t *sets1 = sets + blockLength;
-  xor_xorset_t *sets2 = sets + 2 * blockLength;
-  xor_keyindex_t *Q0 = Q;
-  xor_keyindex_t *Q1 = Q + blockLength;
-  xor_keyindex_t *Q2 = Q + 2 * blockLength;
-
-  int iterations = 0;
-
-  while (true) {
-    iterations ++;
-    if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
-    }
-    if(iterations > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
-      xor_free_buffer(&buffer0);
-      xor_free_buffer(&buffer1);
-      xor_free_buffer(&buffer2);
-      free(sets);
-      free(Q);
-      free(stack);
-      return false;
-    }
-    memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
-    for (size_t i = 0; i < size; i++) {
-      uint64_t key = keys[i];
-      xor_hashes_t hs = xor8_get_h0_h1_h2(key, filter);
-      xor_buffered_increment_counter(hs.h0, hs.h, &buffer0, sets0);
-      xor_buffered_increment_counter(hs.h1, hs.h, &buffer1,
-                                     sets1);
-      xor_buffered_increment_counter(hs.h2, hs.h, &buffer2,
-                                     sets2);
-    }
-    xor_flush_increment_buffer(&buffer0, sets0);
-    xor_flush_increment_buffer(&buffer1, sets1);
-    xor_flush_increment_buffer(&buffer2, sets2);
-    // todo: the flush should be sync with the detection that follows
-    // scan for values with a count of one
-    size_t Q0size = 0, Q1size = 0, Q2size = 0;
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
-        Q0[Q0size].hash = sets0[i].xormask;
-        Q0size++;
-      }
-    }
-
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
-        Q1[Q1size].hash = sets1[i].xormask;
-        Q1size++;
-      }
-    }
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
-        Q2[Q2size].hash = sets2[i].xormask;
-        Q2size++;
-      }
-    }
-
-    size_t stack_size = 0;
-    while (Q0size + Q1size + Q2size > 0) {
-      while (Q0size > 0) {
-        xor_keyindex_t keyindex = Q0[--Q0size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size);
-
-        if (sets0[index].count == 0)
-          continue; // not actually possible after the initial scan.
-        //sets0[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h1 = xor8_get_h1(hash, filter);
-        uint32_t h2 = xor8_get_h2(hash, filter);
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h1, hash, &buffer1, sets1,
-                                       Q1, &Q1size);
-        xor_buffered_decrement_counter(h2, hash, &buffer2,
-                                       sets2, Q2, &Q2size);
-      }
-      if (Q1size == 0)
-        xor_flushone_decrement_buffer(&buffer1, sets1, Q1, &Q1size);
-
-      while (Q1size > 0) {
-        xor_keyindex_t keyindex = Q1[--Q1size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size);
-
-        if (sets1[index].count == 0)
-          continue;
-        //sets1[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h0 = xor8_get_h0(hash, filter);
-        uint32_t h2 = xor8_get_h2(hash, filter);
-        keyindex.index += blockLength;
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
-        xor_buffered_decrement_counter(h2, hash, &buffer2,
-                                       sets2, Q2, &Q2size);
-      }
-      if (Q2size == 0)
-        xor_flushone_decrement_buffer(&buffer2, sets2, Q2, &Q2size);
-      while (Q2size > 0) {
-        xor_keyindex_t keyindex = Q2[--Q2size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size);
-        if (sets2[index].count == 0)
-          continue;
-
-        //sets2[index].count = 0;
-        uint64_t hash = keyindex.hash;
-
-        uint32_t h0 = xor8_get_h0(hash, filter);
-        uint32_t h1 = xor8_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
-        xor_buffered_decrement_counter(h1, hash, &buffer1, sets1,
-                                       Q1, &Q1size);
-      }
-      if (Q0size == 0)
-        xor_flushone_decrement_buffer(&buffer0, sets0, Q0, &Q0size);
-      if ((Q0size + Q1size + Q2size == 0) && (stack_size < size)) {
-        // this should basically never happen
-        xor_flush_decrement_buffer(&buffer0, sets0, Q0, &Q0size);
-        xor_flush_decrement_buffer(&buffer1, sets1, Q1, &Q1size);
-        xor_flush_decrement_buffer(&buffer2, sets2, Q2, &Q2size);
-      }
-    }
-    if (stack_size == size) {
-      // success
-      break;
-    }
-
-    filter->seed = xor_rng_splitmix64(&rng_counter);
-  }
-  uint8_t * fingerprints0 = filter->fingerprints;
-  uint8_t * fingerprints1 = filter->fingerprints + blockLength;
-  uint8_t * fingerprints2 = filter->fingerprints + 2 * blockLength;
-
-  size_t stack_size = size;
-  while (stack_size > 0) {
-    xor_keyindex_t ki = stack[--stack_size];
-    uint64_t val = xor_fingerprint(ki.hash);
-    if(ki.index < blockLength) {
-      val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
-    } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
-    } else {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
-    }
-    filter->fingerprints[ki.index] = val;
-  }
-  xor_free_buffer(&buffer0);
-  xor_free_buffer(&buffer1);
-  xor_free_buffer(&buffer2);
-
-  free(sets);
-  free(Q);
-  free(stack);
-  return true;
-}
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling xor8_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) {
-  if(size == 0) { return false; }
-  uint64_t rng_counter = 1;
-  filter->seed = xor_rng_splitmix64(&rng_counter);
-  size_t arrayLength = filter->blockLength * 3; // size of the backing array
-  size_t blockLength = filter->blockLength;
-
-  xor_xorset_t *sets =
-      (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t));
-
-  xor_keyindex_t *Q =
-      (xor_keyindex_t *)malloc(arrayLength * sizeof(xor_keyindex_t));
-
-  xor_keyindex_t *stack =
-      (xor_keyindex_t *)malloc(size * sizeof(xor_keyindex_t));
-
-  if ((sets == NULL) || (Q == NULL) || (stack == NULL)) {
-    free(sets);
-    free(Q);
-    free(stack);
-    return false;
-  }
-  xor_xorset_t *sets0 = sets;
-  xor_xorset_t *sets1 = sets + blockLength;
-  xor_xorset_t *sets2 = sets + 2 * blockLength;
-  xor_keyindex_t *Q0 = Q;
-  xor_keyindex_t *Q1 = Q + blockLength;
-  xor_keyindex_t *Q2 = Q + 2 * blockLength;
-
-  int iterations = 0;
-
-  while (true) {
-    iterations ++;
-    if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
-    }
-    if(iterations > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
-      free(sets);
-      free(Q);
-      free(stack);
-      return false;
-    }
-
-    memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
-    for (size_t i = 0; i < size; i++) {
-      uint64_t key = keys[i];
-      xor_hashes_t hs = xor8_get_h0_h1_h2(key, filter);
-      sets0[hs.h0].xormask ^= hs.h;
-      sets0[hs.h0].count++;
-      sets1[hs.h1].xormask ^= hs.h;
-      sets1[hs.h1].count++;
-      sets2[hs.h2].xormask ^= hs.h;
-      sets2[hs.h2].count++;
-    }
-    // todo: the flush should be sync with the detection that follows
-    // scan for values with a count of one
-    size_t Q0size = 0, Q1size = 0, Q2size = 0;
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
-        Q0[Q0size].hash = sets0[i].xormask;
-        Q0size++;
-      }
-    }
-
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
-        Q1[Q1size].hash = sets1[i].xormask;
-        Q1size++;
-      }
-    }
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
-        Q2[Q2size].hash = sets2[i].xormask;
-        Q2size++;
-      }
-    }
-
-    size_t stack_size = 0;
-    while (Q0size + Q1size + Q2size > 0) {
-      while (Q0size > 0) {
-        xor_keyindex_t keyindex = Q0[--Q0size];
-        size_t index = keyindex.index;
-        if (sets0[index].count == 0)
-          continue; // not actually possible after the initial scan.
-        //sets0[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h1 = xor8_get_h1(hash, filter);
-        uint32_t h2 = xor8_get_h2(hash, filter);
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets1[h1].xormask ^= hash;
-        sets1[h1].count--;
-        if (sets1[h1].count == 1) {
-          Q1[Q1size].index = h1;
-          Q1[Q1size].hash = sets1[h1].xormask;
-          Q1size++;
-        }
-        sets2[h2].xormask ^= hash;
-        sets2[h2].count--;
-        if (sets2[h2].count == 1) {
-          Q2[Q2size].index = h2;
-          Q2[Q2size].hash = sets2[h2].xormask;
-          Q2size++;
-        }
-      }
-      while (Q1size > 0) {
-        xor_keyindex_t keyindex = Q1[--Q1size];
-        size_t index = keyindex.index;
-        if (sets1[index].count == 0)
-          continue;
-        //sets1[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h0 = xor8_get_h0(hash, filter);
-        uint32_t h2 = xor8_get_h2(hash, filter);
-        keyindex.index += blockLength;
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets0[h0].xormask ^= hash;
-        sets0[h0].count--;
-        if (sets0[h0].count == 1) {
-          Q0[Q0size].index = h0;
-          Q0[Q0size].hash = sets0[h0].xormask;
-          Q0size++;
-        }
-        sets2[h2].xormask ^= hash;
-        sets2[h2].count--;
-        if (sets2[h2].count == 1) {
-          Q2[Q2size].index = h2;
-          Q2[Q2size].hash = sets2[h2].xormask;
-          Q2size++;
-        }
-      }
-      while (Q2size > 0) {
-        xor_keyindex_t keyindex = Q2[--Q2size];
-        size_t index = keyindex.index;
-        if (sets2[index].count == 0)
-          continue;
-
-        //sets2[index].count = 0;
-        uint64_t hash = keyindex.hash;
-
-        uint32_t h0 = xor8_get_h0(hash, filter);
-        uint32_t h1 = xor8_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets0[h0].xormask ^= hash;
-        sets0[h0].count--;
-        if (sets0[h0].count == 1) {
-          Q0[Q0size].index = h0;
-          Q0[Q0size].hash = sets0[h0].xormask;
-          Q0size++;
-        }
-        sets1[h1].xormask ^= hash;
-        sets1[h1].count--;
-        if (sets1[h1].count == 1) {
-          Q1[Q1size].index = h1;
-          Q1[Q1size].hash = sets1[h1].xormask;
-          Q1size++;
-        }
-
-      }
-    }
-    if (stack_size == size) {
-      // success
-      break;
-    }
-
-    filter->seed = xor_rng_splitmix64(&rng_counter);
-  }
-  uint8_t * fingerprints0 = filter->fingerprints;
-  uint8_t * fingerprints1 = filter->fingerprints + blockLength;
-  uint8_t * fingerprints2 = filter->fingerprints + 2 * blockLength;
-
-  size_t stack_size = size;
-  while (stack_size > 0) {
-    xor_keyindex_t ki = stack[--stack_size];
-    uint64_t val = xor_fingerprint(ki.hash);
-    if(ki.index < blockLength) {
-      val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
-    } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
-    } else {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
-    }
-    filter->fingerprints[ki.index] = val;
-  }
-
-  free(sets);
-  free(Q);
-  free(stack);
-  return true;
-}
-
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling xor16_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_t *filter) {
-  if(size == 0) { return false; }
-  uint64_t rng_counter = 1;
-  filter->seed = xor_rng_splitmix64(&rng_counter);
-  size_t arrayLength = filter->blockLength * 3; // size of the backing array
-  xor_setbuffer_t buffer0, buffer1, buffer2;
-  size_t blockLength = filter->blockLength;
-  bool ok0 = xor_init_buffer(&buffer0, blockLength);
-  bool ok1 =  xor_init_buffer(&buffer1, blockLength);
-  bool ok2 =  xor_init_buffer(&buffer2, blockLength);
-  if (!ok0 || !ok1 || !ok2) {
-    xor_free_buffer(&buffer0);
-    xor_free_buffer(&buffer1);
-    xor_free_buffer(&buffer2);
-    return false;
-  }
-
-  xor_xorset_t *sets =
-      (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t));
-
-  xor_keyindex_t *Q =
-      (xor_keyindex_t *)malloc(arrayLength * sizeof(xor_keyindex_t));
-
-  xor_keyindex_t *stack =
-      (xor_keyindex_t *)malloc(size * sizeof(xor_keyindex_t));
-
-  if ((sets == NULL) || (Q == NULL) || (stack == NULL)) {
-    xor_free_buffer(&buffer0);
-    xor_free_buffer(&buffer1);
-    xor_free_buffer(&buffer2);
-    free(sets);
-    free(Q);
-    free(stack);
-    return false;
-  }
-  xor_xorset_t *sets0 = sets;
-  xor_xorset_t *sets1 = sets + blockLength;
-  xor_xorset_t *sets2 = sets + 2 * blockLength;
-  xor_keyindex_t *Q0 = Q;
-  xor_keyindex_t *Q1 = Q + blockLength;
-  xor_keyindex_t *Q2 = Q + 2 * blockLength;
-
-  int iterations = 0;
-
-  while (true) {
-    iterations ++;
-    if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
-    }
-    if(iterations > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system)é
-      xor_free_buffer(&buffer0);
-      xor_free_buffer(&buffer1);
-      xor_free_buffer(&buffer2);
-      free(sets);
-      free(Q);
-      free(stack);
-      return false;
-    }
-
-    memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
-    for (size_t i = 0; i < size; i++) {
-      uint64_t key = keys[i];
-      xor_hashes_t hs = xor16_get_h0_h1_h2(key, filter);
-      xor_buffered_increment_counter(hs.h0, hs.h, &buffer0, sets0);
-      xor_buffered_increment_counter(hs.h1, hs.h, &buffer1,
-                                     sets1);
-      xor_buffered_increment_counter(hs.h2, hs.h, &buffer2,
-                                     sets2);
-    }
-    xor_flush_increment_buffer(&buffer0, sets0);
-    xor_flush_increment_buffer(&buffer1, sets1);
-    xor_flush_increment_buffer(&buffer2, sets2);
-    // todo: the flush should be sync with the detection that follows
-    // scan for values with a count of one
-    size_t Q0size = 0, Q1size = 0, Q2size = 0;
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
-        Q0[Q0size].hash = sets0[i].xormask;
-        Q0size++;
-      }
-    }
-
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
-        Q1[Q1size].hash = sets1[i].xormask;
-        Q1size++;
-      }
-    }
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
-        Q2[Q2size].hash = sets2[i].xormask;
-        Q2size++;
-      }
-    }
-
-    size_t stack_size = 0;
-    while (Q0size + Q1size + Q2size > 0) {
-      while (Q0size > 0) {
-        xor_keyindex_t keyindex = Q0[--Q0size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size);
-
-        if (sets0[index].count == 0)
-          continue; // not actually possible after the initial scan.
-        //sets0[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h1 = xor16_get_h1(hash, filter);
-        uint32_t h2 = xor16_get_h2(hash, filter);
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h1, hash, &buffer1, sets1,
-                                       Q1, &Q1size);
-        xor_buffered_decrement_counter(h2, hash, &buffer2,
-                                       sets2, Q2, &Q2size);
-      }
-      if (Q1size == 0)
-        xor_flushone_decrement_buffer(&buffer1, sets1, Q1, &Q1size);
-
-      while (Q1size > 0) {
-        xor_keyindex_t keyindex = Q1[--Q1size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size);
-
-        if (sets1[index].count == 0)
-          continue;
-        //sets1[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h0 = xor16_get_h0(hash, filter);
-        uint32_t h2 = xor16_get_h2(hash, filter);
-        keyindex.index += blockLength;
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
-        xor_buffered_decrement_counter(h2, hash, &buffer2,
-                                       sets2, Q2, &Q2size);
-      }
-      if (Q2size == 0)
-        xor_flushone_decrement_buffer(&buffer2, sets2, Q2, &Q2size);
-      while (Q2size > 0) {
-        xor_keyindex_t keyindex = Q2[--Q2size];
-        size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size);
-        if (sets2[index].count == 0)
-          continue;
-
-        //sets2[index].count = 0;
-        uint64_t hash = keyindex.hash;
-
-        uint32_t h0 = xor16_get_h0(hash, filter);
-        uint32_t h1 = xor16_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
-        xor_buffered_decrement_counter(h1, hash, &buffer1, sets1,
-                                       Q1, &Q1size);
-      }
-      if (Q0size == 0)
-        xor_flushone_decrement_buffer(&buffer0, sets0, Q0, &Q0size);
-      if ((Q0size + Q1size + Q2size == 0) && (stack_size < size)) {
-        // this should basically never happen
-        xor_flush_decrement_buffer(&buffer0, sets0, Q0, &Q0size);
-        xor_flush_decrement_buffer(&buffer1, sets1, Q1, &Q1size);
-        xor_flush_decrement_buffer(&buffer2, sets2, Q2, &Q2size);
-      }
-    }
-    if (stack_size == size) {
-      // success
-      break;
-    }
-
-    filter->seed = xor_rng_splitmix64(&rng_counter);
-  }
-  uint16_t * fingerprints0 = filter->fingerprints;
-  uint16_t * fingerprints1 = filter->fingerprints + blockLength;
-  uint16_t * fingerprints2 = filter->fingerprints + 2 * blockLength;
-
-  size_t stack_size = size;
-  while (stack_size > 0) {
-    xor_keyindex_t ki = stack[--stack_size];
-    uint64_t val = xor_fingerprint(ki.hash);
-    if(ki.index < blockLength) {
-      val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
-    } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
-    } else {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
-    }
-    filter->fingerprints[ki.index] = val;
-  }
-  xor_free_buffer(&buffer0);
-  xor_free_buffer(&buffer1);
-  xor_free_buffer(&buffer2);
-
-  free(sets);
-  free(Q);
-  free(stack);
-  return true;
-}
-
-
-
-// Construct the filter, returns true on success, false on failure.
-// The algorithm fails when there is insufficient memory.
-// The caller is responsable for calling xor16_allocate(size,filter)
-// before. For best performance, the caller should ensure that there are not too
-// many duplicated keys.
-static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter) {
-  if(size == 0) { return false; }
-  uint64_t rng_counter = 1;
-  filter->seed = xor_rng_splitmix64(&rng_counter);
-  size_t arrayLength = filter->blockLength * 3; // size of the backing array
-  size_t blockLength = filter->blockLength;
-
-  xor_xorset_t *sets =
-      (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t));
-
-  xor_keyindex_t *Q =
-      (xor_keyindex_t *)malloc(arrayLength * sizeof(xor_keyindex_t));
-
-  xor_keyindex_t *stack =
-      (xor_keyindex_t *)malloc(size * sizeof(xor_keyindex_t));
-
-  if ((sets == NULL) || (Q == NULL) || (stack == NULL)) {
-    free(sets);
-    free(Q);
-    free(stack);
-    return false;
-  }
-  xor_xorset_t *sets0 = sets;
-  xor_xorset_t *sets1 = sets + blockLength;
-  xor_xorset_t *sets2 = sets + 2 * blockLength;
-
-  xor_keyindex_t *Q0 = Q;
-  xor_keyindex_t *Q1 = Q + blockLength;
-  xor_keyindex_t *Q2 = Q + 2 * blockLength;
-
-  int iterations = 0;
-
-  while (true) {
-    iterations ++;
-    if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
-    }
-    if(iterations > XOR_MAX_ITERATIONS) {
-      // The probability of this happening is lower than the
-      // the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
-      free(sets);
-      free(Q);
-      free(stack);
-      return false;
-    }
-
-    memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
-    for (size_t i = 0; i < size; i++) {
-      uint64_t key = keys[i];
-      xor_hashes_t hs = xor16_get_h0_h1_h2(key, filter);
-      sets0[hs.h0].xormask ^= hs.h;
-      sets0[hs.h0].count++;
-      sets1[hs.h1].xormask ^= hs.h;
-      sets1[hs.h1].count++;
-      sets2[hs.h2].xormask ^= hs.h;
-      sets2[hs.h2].count++;
-    }
-    // todo: the flush should be sync with the detection that follows
-    // scan for values with a count of one
-    size_t Q0size = 0, Q1size = 0, Q2size = 0;
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
-        Q0[Q0size].hash = sets0[i].xormask;
-        Q0size++;
-      }
-    }
-
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
-        Q1[Q1size].hash = sets1[i].xormask;
-        Q1size++;
-      }
-    }
-    for (size_t i = 0; i < filter->blockLength; i++) {
-      if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
-        Q2[Q2size].hash = sets2[i].xormask;
-        Q2size++;
-      }
-    }
-
-    size_t stack_size = 0;
-    while (Q0size + Q1size + Q2size > 0) {
-      while (Q0size > 0) {
-        xor_keyindex_t keyindex = Q0[--Q0size];
-        size_t index = keyindex.index;
-        if (sets0[index].count == 0)
-          continue; // not actually possible after the initial scan.
-        //sets0[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h1 = xor16_get_h1(hash, filter);
-        uint32_t h2 = xor16_get_h2(hash, filter);
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets1[h1].xormask ^= hash;
-        sets1[h1].count--;
-        if (sets1[h1].count == 1) {
-          Q1[Q1size].index = h1;
-          Q1[Q1size].hash = sets1[h1].xormask;
-          Q1size++;
-        }
-        sets2[h2].xormask ^= hash;
-        sets2[h2].count--;
-        if (sets2[h2].count == 1) {
-          Q2[Q2size].index = h2;
-          Q2[Q2size].hash = sets2[h2].xormask;
-          Q2size++;
-        }
-      }
-      while (Q1size > 0) {
-        xor_keyindex_t keyindex = Q1[--Q1size];
-        size_t index = keyindex.index;
-        if (sets1[index].count == 0)
-          continue;
-        //sets1[index].count = 0;
-        uint64_t hash = keyindex.hash;
-        uint32_t h0 = xor16_get_h0(hash, filter);
-        uint32_t h2 = xor16_get_h2(hash, filter);
-        keyindex.index += blockLength;
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets0[h0].xormask ^= hash;
-        sets0[h0].count--;
-        if (sets0[h0].count == 1) {
-          Q0[Q0size].index = h0;
-          Q0[Q0size].hash = sets0[h0].xormask;
-          Q0size++;
-        }
-        sets2[h2].xormask ^= hash;
-        sets2[h2].count--;
-        if (sets2[h2].count == 1) {
-          Q2[Q2size].index = h2;
-          Q2[Q2size].hash = sets2[h2].xormask;
-          Q2size++;
-        }
-      }
-      while (Q2size > 0) {
-        xor_keyindex_t keyindex = Q2[--Q2size];
-        size_t index = keyindex.index;
-        if (sets2[index].count == 0)
-          continue;
-
-        //sets2[index].count = 0;
-        uint64_t hash = keyindex.hash;
-
-        uint32_t h0 = xor16_get_h0(hash, filter);
-        uint32_t h1 = xor16_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
-
-        stack[stack_size] = keyindex;
-        stack_size++;
-        sets0[h0].xormask ^= hash;
-        sets0[h0].count--;
-        if (sets0[h0].count == 1) {
-          Q0[Q0size].index = h0;
-          Q0[Q0size].hash = sets0[h0].xormask;
-          Q0size++;
-        }
-        sets1[h1].xormask ^= hash;
-        sets1[h1].count--;
-        if (sets1[h1].count == 1) {
-          Q1[Q1size].index = h1;
-          Q1[Q1size].hash = sets1[h1].xormask;
-          Q1size++;
-        }
-
-      }
-    }
-    if (stack_size == size) {
-      // success
-      break;
-    }
-
-    filter->seed = xor_rng_splitmix64(&rng_counter);
-  }
-  uint16_t * fingerprints0 = filter->fingerprints;
-  uint16_t * fingerprints1 = filter->fingerprints + blockLength;
-  uint16_t * fingerprints2 = filter->fingerprints + 2 * blockLength;
-
-  size_t stack_size = size;
-  while (stack_size > 0) {
-    xor_keyindex_t ki = stack[--stack_size];
-    uint64_t val = xor_fingerprint(ki.hash);
-    if(ki.index < blockLength) {
-      val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
-    } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
-    } else {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
-    }
-    filter->fingerprints[ki.index] = val;
-  }
-
-  free(sets);
-  free(Q);
-  free(stack);
-  return true;
-}
-
-
-
-#endif

From fb0ed32933fa78db58699a3d1377050589cbfeb3 Mon Sep 17 00:00:00 2001
From: Oliver Schonrock <oliver@schonrocks.com>
Date: Tue, 3 Dec 2024 08:58:07 +0000
Subject: [PATCH 2/4] tidying up Makfile

one dependency per line to make things clearer

dependcies for each target made more complete by generating with

g++ -M -MF -

init submodules for deps from either foreign repo
---
 Makefile | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 9a17efd..036c172 100644
--- a/Makefile
+++ b/Makefile
@@ -3,15 +3,39 @@ all: build_filter query_filter
 dependencies/fastfilter_cpp/src/xorfilter/xorfilter.h:
 	git submodule update --init --recursive
 
+dependencies/xor_singleheader/include/binaryfusefilter.h:
+	git submodule update --init --recursive
 
-query_filter: src/query_filter.cpp src/hexutil.h  dependencies/xor_singleheader/include/xorfilter.h
-	c++ -O3 -o query_filter src/query_filter.cpp -Wall -std=c++11 -Idependencies/fastfilter_cpp/src  -Idependencies
+query_filter: src/query_filter.cpp \
+	src/hexutil.h \
+	src/mappeablebloomfilter.h \
+	src/util.h \
+	src/sha.h \
+	dependencies/xor_singleheader/include/binaryfusefilter.h \
+	dependencies/xor_singleheader/include/xorfilter.h
+	c++ -O3 -o query_filter src/query_filter.cpp -Wall -std=c++11 -Idependencies
 
-build_filter: src/build_filter.cpp dependencies/fastfilter_cpp/src/xorfilter/xorfilter.h dependencies/fastfilter_cpp/src/xorfilter/xorfilter_plus.h src/hexutil.h dependencies/xor_singleheader/include/xorfilter.h
+build_filter: src/build_filter.cpp \
+	src/hexutil.h \
+	src/mappeablebloomfilter.h \
+	src/util.h \
+	dependencies/fastfilter_cpp/src/bloom/bloom.h \
+	dependencies/fastfilter_cpp/src/hashutil.h \
+	dependencies/fastfilter_cpp/src/xorfilter/xorfilter.h \
+	dependencies/xor_singleheader/include/binaryfusefilter.h \
+	dependencies/xor_singleheader/include/xorfilter.h
 	c++ -O3 -o build_filter src/build_filter.cpp -std=c++11 -Wall -Idependencies/fastfilter_cpp/src -Idependencies
 
 test: build_filter query_filter
-	./build_filter -V -f xor8 -o filter.bin sample.txt  && ./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && ./build_filter -V -f binaryfuse8 -o filter.bin sample.txt  &&   ./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && ./build_filter -V -f binaryfuse16 -o filter.bin sample.txt  &&  ./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" &&  ./build_filter -V -f bloom12 -o filter.bin sample.txt && ./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && echo "SUCCESS" || (echo "Failure. There is a bug."| exit -1)
+	./build_filter -V -f xor8 -o filter.bin sample.txt  && \
+	./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && \
+	./build_filter -V -f binaryfuse8 -o filter.bin sample.txt  && \
+	./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && \
+	./build_filter -V -f binaryfuse16 -o filter.bin sample.txt  && \
+	./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && \
+	./build_filter -V -f bloom12 -o filter.bin sample.txt && \
+	./query_filter filter.bin 7C4A8D09CA3762AF | grep "Probably in the set" && \
+	echo "SUCCESS" || (echo "Failure. There is a bug."| exit -1)
 
 clean:
 	rm -f build_filter query_filter

From 5cdb78103d0c525f84049cc4e2c8f06e823a8d13 Mon Sep 17 00:00:00 2001
From: Oliver Schonrock <oliver@schonrocks.com>
Date: Tue, 3 Dec 2024 09:05:27 +0000
Subject: [PATCH 3/4] removing unused #includes

which also means we are now only including one xorfilter.h from one
repo

change Makefile to reflect this
---
 Makefile             | 1 -
 src/build_filter.cpp | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/Makefile b/Makefile
index 036c172..9394373 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,6 @@ build_filter: src/build_filter.cpp \
 	src/util.h \
 	dependencies/fastfilter_cpp/src/bloom/bloom.h \
 	dependencies/fastfilter_cpp/src/hashutil.h \
-	dependencies/fastfilter_cpp/src/xorfilter/xorfilter.h \
 	dependencies/xor_singleheader/include/binaryfusefilter.h \
 	dependencies/xor_singleheader/include/xorfilter.h
 	c++ -O3 -o build_filter src/build_filter.cpp -std=c++11 -Wall -Idependencies/fastfilter_cpp/src -Idependencies
diff --git a/src/build_filter.cpp b/src/build_filter.cpp
index 4a26e88..bef630f 100644
--- a/src/build_filter.cpp
+++ b/src/build_filter.cpp
@@ -1,8 +1,6 @@
 #include <getopt.h>
 #include <inttypes.h>
-#include <iostream>
 #include <limits.h>
-#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -11,15 +9,12 @@
 #include "hexutil.h"
 #include "xor_singleheader/include/binaryfusefilter.h"
 #include "xor_singleheader/include/xorfilter.h"
-#include "xorfilter/xorfilter.h"
 
 #include "mappeablebloomfilter.h"
 
 static void printusage(char *command) {
   printf(" Try %s -f binaryfuse8 -o filter.bin mydatabase \n", command);
-  ;
   printf("The supported filters are xor8, binaryfuse8, binaryfuse16 and bloom12.\n");
-
   printf("The -V flag verifies the resulting filter.\n");
 }
 

From a9630fa29873f54ec22f22593422912a766250e7 Mon Sep 17 00:00:00 2001
From: Oliver Schonrock <oliver@schonrocks.com>
Date: Tue, 3 Dec 2024 09:08:27 +0000
Subject: [PATCH 4/4] also remove unsused includes from query_filter.cpp

no impact on Makefile here
---
 src/query_filter.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/query_filter.cpp b/src/query_filter.cpp
index 19e7f54..8a8dc0e 100644
--- a/src/query_filter.cpp
+++ b/src/query_filter.cpp
@@ -7,9 +7,7 @@
 #include <fcntl.h>
 #include <getopt.h>
 #include <inttypes.h>
-#include <iostream>
 #include <limits.h>
-#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>