-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsasamples.h
129 lines (94 loc) · 3.62 KB
/
sasamples.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#ifndef SASAMPLES_H
#define SASAMPLES_H
#include <cstdio>
#include <fstream>
#include "sampler.h"
#include "misc/utils.h"
#include "bits/bitbuffer.h"
#include "bits/deltavector.h"
#ifdef SUCCINCT_SA_VECTOR
#include "bits/succinctvector.h"
#endif
namespace CSA
{
#ifdef SUCCINCT_SA_VECTOR
typedef SuccinctVector SAVector;
#else
typedef DeltaVector SAVector;
#endif
class SASamples
{
public:
#ifdef SUCCINCT_SA_VECTOR
const static usint INDEX_BLOCK_SIZE = 32;
#else
const static usint INDEX_BLOCK_SIZE = 16;
#endif
SASamples(std::ifstream& sample_file, usint sample_rate, bool _weighted);
SASamples(FILE* sample_file, usint sample_rate, bool _weighted);
// These assume < 4 GB data.
SASamples(short_pair* sa, DeltaVector* end_points, usint data_size, usint sample_rate, usint threads);
SASamples(short_pair* sa, Sampler* sampler, usint threads); // Use the given samples.
// Use these samples. Assumes regular sampling.
SASamples(pair_type* sample_pairs, usint data_size, usint sample_rate, usint threads);
~SASamples();
// Destroys contents of index and increment.
// We assume index and increment have same sample rate.
// positions must not containt the positions of end of sequence markers.
// number_of_sequences is subtracted from each position before the value is used.
SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions, usint number_of_sequences);
void writeTo(std::ofstream& sample_file) const;
void writeTo(FILE* sample_file) const;
// Returns (i, inverseSA(i)) such that i is the last sampled position up to value.
// The return value can also be thought of as (SA[j], j).
// Value is actual 0-based suffix array value.
// Returns (size, size) if value is too large.
pair_type inverseSA(usint value) const;
// Returns the value of ith sample in suffix array order.
inline usint getSample(usint i) const
{
return std::min(this->samples->readItemConst(i) * this->rate, this->size - 1);
}
// Returns (ind, sample number) where ind >= index or (size, ???).
inline pair_type getFirstSampleAfter(usint index) const
{
SAVector::Iterator iter(*(this->indexes));
return iter.valueAfter(index);
}
inline bool isSampled(usint index) const
{
SAVector::Iterator iter(*(this->indexes));
return iter.isSet(index);
}
inline usint getSampleAt(usint index) const
{
SAVector::Iterator iter(*(this->indexes));
return this->getSample(iter.rank(index) - 1);
}
inline usint getSampleRate() const { return this->rate; }
inline usint getNumberOfSamples() const { return this->items; }
inline bool isWeighted() const { return this->weighted; }
inline bool supportsLocate() const { return (this->samples != 0); }
inline bool supportsDisplay() const { return (this->inverse_samples != 0); }
usint reportSize() const;
// Removes structures not necessary for merging.
void strip();
private:
bool weighted;
usint rate, size, items;
SAVector* indexes;
ReadBuffer* samples;
SAVector* inverse_indexes;
ReadBuffer* inverse_samples;
void buildInverseSamples();
// Weighted case.
void buildSamples(pair_type* sample_pairs, bool inverse, usint threads);
// Note: contents of original samples are deleted.
void mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n, usint skip);
// These are not allowed.
SASamples();
SASamples(const SASamples&);
SASamples& operator = (const SASamples&);
};
} // namespace CSA
#endif // SASAMPLES_H