Wavelet matrix class. More...
#include <wavelet-matrix.hpp>
Public Member Functions | |
WaveletMatrix () | |
Constructor. More... | |
virtual | ~WaveletMatrix () |
Destructor. More... | |
void | clear () |
Clear wavelet matrix. More... | |
void | swap (WaveletMatrix &x) |
Exchanges the content of the instance. More... | |
void | build (std::vector< uint64_t > &src) |
Build the wavelet matrix form array(src ) More... | |
uint64_t | size () const |
Return the number of elements. More... | |
uint64_t | operator[] (uint64_t pos) |
Lookup A[pos]. More... | |
uint64_t | lookup (uint64_t pos) const |
Lookup A[pos]. More... | |
uint64_t | rank (uint64_t c, uint64_t pos) const |
Compute the rank = the frequency of a character 'c' in the prefix of the array A[0...pos) More... | |
uint64_t | rankLessThan (uint64_t c, uint64_t pos) const |
Compute the frequency of characters c' < c in the subarray A[0...pos) More... | |
uint64_t | rankMoreThan (uint64_t c, uint64_t pos) const |
Compute the frequency of characters c' > c in the subarray A[0...pos) More... | |
void | rankAll (uint64_t c, uint64_t begin_pos, uint64_t end_pos, uint64_t &rank, uint64_t &rank_less_than, uint64_t &rank_more_than) const |
Compute the frequency of characters c' < c, c'=c, and c' > c, in the subarray A[begin_pos...end_pos) More... | |
uint64_t | select (uint64_t c, uint64_t rank) const |
Compute the select = the position of the (rank+1)-th occurrence of 'c' in the array. More... | |
uint64_t | selectFromPos (uint64_t c, uint64_t pos, uint64_t rank) const |
Compute the select = the position of the (rank+1)-th occurrence of 'c' in the suffix of the array starting from 'pos'. More... | |
uint64_t | freq (uint64_t c) const |
Compute the frequency of the character c. More... | |
uint64_t | freqSum (uint64_t min_c, uint64_t max_c) const |
Compute the frequency of the characters. More... | |
uint64_t | freqRange (uint64_t min_c, uint64_t max_c, uint64_t begin_pos, uint64_t end_pos) const |
Compute the frequency of characters min_c <= c' < max_c in the subarray A[beg_pos ... More... | |
void | maxRange (uint64_t begin_pos, uint64_t end_pos, uint64_t &pos, uint64_t &val) const |
Range Max Query. More... | |
void | minRange (uint64_t begin_pos, uint64_t end_pos, uint64_t &pos, uint64_t &val) const |
Range Min Query. More... | |
void | quantileRange (uint64_t begin_pos, uint64_t end_pos, uint64_t k, uint64_t &pos, uint64_t &val) const |
Range Quantile Query, Return the K-th smallest value in the subarray. More... | |
void | listModeRange (uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num, std::vector< ListResult > &res) const |
List the distinct characters appeared in A[beg_pos ... More... | |
void | listMinRange (uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num, std::vector< ListResult > &res) const |
List the distinct characters in A[beg_pos ... More... | |
void | listMaxRange (uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num, std::vector< ListResult > &res) const |
List the distinct characters appeared in A[beg_pos ... More... | |
void | save (std::ostream &os) const throw (hsds::Exception) |
Save the current status to a stream. More... | |
void | load (std::istream &is) throw (hsds::Exception) |
Load the current status from a stream. More... | |
uint64_t | map (void *ptr, uint64_t mapSize) throw (hsds::Exception) |
Mapping pointer to the WaveletMatrix. More... | |
Wavelet matrix class.
See also http://www.dcc.uchile.cl/~gnavarro/ps/spire12.4.pdf
Definition at line 67 of file wavelet-matrix.hpp.
hsds::WaveletMatrix::WaveletMatrix | ( | ) |
Constructor.
|
virtual |
Destructor.
void hsds::WaveletMatrix::build | ( | std::vector< uint64_t > & | src | ) |
Build the wavelet matrix form array(src
)
[in] | src | An array to be initialized |
void hsds::WaveletMatrix::clear | ( | ) |
Clear wavelet matrix.
uint64_t hsds::WaveletMatrix::freq | ( | uint64_t | c | ) | const |
Compute the frequency of the character c.
[in] | c | The character to be examined |
uint64_t hsds::WaveletMatrix::freqRange | ( | uint64_t | min_c, |
uint64_t | max_c, | ||
uint64_t | begin_pos, | ||
uint64_t | end_pos | ||
) | const |
Compute the frequency of characters min_c <= c' < max_c in the subarray A[beg_pos ...
end_pos)
[in] | min_c | The smallest character to be examined |
[in] | max_c | The upper bound of the character to be examined |
[in] | begin_pos | The beginning position of the array (inclusive) |
[in] | end_pos | The ending position of the array (not inclusive) |
uint64_t hsds::WaveletMatrix::freqSum | ( | uint64_t | min_c, |
uint64_t | max_c | ||
) | const |
Compute the frequency of the characters.
[in] | min_c | The minimum character |
[in] | max_c | The maximum character |
void hsds::WaveletMatrix::listMaxRange | ( | uint64_t | min_c, |
uint64_t | max_c, | ||
uint64_t | beg_pos, | ||
uint64_t | end_pos, | ||
uint64_t | num, | ||
std::vector< ListResult > & | res | ||
) | const |
List the distinct characters appeared in A[beg_pos ...
end_pos) from largest ones
min_c | The smallest character to be examined |
max_c | The upper bound of the character to be examined |
beg_pos | The beginning position of the array (inclusive) |
end_pos | The ending position of the array (not inclusive) |
num | The maximum number of reporting results. |
res | The distinct characters in the A[beg_pos ... end_pos) from largest ones. Each item consists of c:character and freq: frequency of c. |
void hsds::WaveletMatrix::listMinRange | ( | uint64_t | min_c, |
uint64_t | max_c, | ||
uint64_t | beg_pos, | ||
uint64_t | end_pos, | ||
uint64_t | num, | ||
std::vector< ListResult > & | res | ||
) | const |
List the distinct characters in A[beg_pos ...
end_pos) min_c <= c < max_c from smallest ones
min_c | The smallest character to be examined |
max_c | The upper bound of the character to be examined |
beg_pos | The beginning position of the array (inclusive) |
end_pos | The ending position of the array (not inclusive) |
num | The maximum number of reporting results. |
res | The distinct characters in the A[beg_pos ... end_pos) from smallest ones. Each item consists of c:character and freq: frequency of c. |
void hsds::WaveletMatrix::listModeRange | ( | uint64_t | min_c, |
uint64_t | max_c, | ||
uint64_t | beg_pos, | ||
uint64_t | end_pos, | ||
uint64_t | num, | ||
std::vector< ListResult > & | res | ||
) | const |
List the distinct characters appeared in A[beg_pos ...
end_pos) from most frequent ones
min_c | The smallest character to be examined |
max_c | The upper bound of the character to be examined |
beg_pos | The beginning position of the array (inclusive) |
end_pos | The ending position of the array (not inclusive) |
num | The maximum number of reporting results. |
res | The distinct characters in the A[beg_pos ... end_pos) from most frequent ones. Each item consists of c:character and freq: frequency of c. |
void hsds::WaveletMatrix::load | ( | std::istream & | is | ) | throw (hsds::Exception) |
Load the current status from a stream.
[in] | is | The input stream where the status is saved |
hsds::Exception | When failed to load. |
uint64_t hsds::WaveletMatrix::lookup | ( | uint64_t | pos | ) | const |
Lookup A[pos].
[in] | pos | The position |
uint64_t hsds::WaveletMatrix::map | ( | void * | ptr, |
uint64_t | mapSize | ||
) | throw (hsds::Exception) |
Mapping pointer to the WaveletMatrix.
[in] | ptr | The pointer of the mmaped file |
[in] | mapSize | The size of mmaped file |
ptr
).hsds::Exception | When failed to load. |
void hsds::WaveletMatrix::maxRange | ( | uint64_t | begin_pos, |
uint64_t | end_pos, | ||
uint64_t & | pos, | ||
uint64_t & | val | ||
) | const |
Range Max Query.
[in] | begin_pos | The beginning position |
[in] | end_pos | The ending position |
[out] | pos | The position where the largest value appeared in the subarray A[beg_pos .. end_pos) If there are many items having the largest values, the smallest pos will be reported |
[out] | val | The largest value appeared in the subarray A[beg_pos ... end_pos) |
void hsds::WaveletMatrix::minRange | ( | uint64_t | begin_pos, |
uint64_t | end_pos, | ||
uint64_t & | pos, | ||
uint64_t & | val | ||
) | const |
Range Min Query.
[in] | begin_pos | The beginning position |
[in] | end_pos | The ending position |
[out] | pos | The position where the smallest value appeared in the subarray A[beg_pos .. end_pos) If there are many items having the smallest values, the smallest pos will be reported |
[out] | val | The smallest value appeared in the subarray A[beg_pos ... end_pos) |
|
inline |
Lookup A[pos].
[in] | pos | The position |
Definition at line 115 of file wavelet-matrix.hpp.
void hsds::WaveletMatrix::quantileRange | ( | uint64_t | begin_pos, |
uint64_t | end_pos, | ||
uint64_t | k, | ||
uint64_t & | pos, | ||
uint64_t & | val | ||
) | const |
Range Quantile Query, Return the K-th smallest value in the subarray.
[in] | begin_pos | The beginning position |
[in] | end_pos | The ending position |
[in] | k | The order (should be smaller than end_pos - beg_pos). |
[out] | pos | The position where the k-th largest value appeared in the subarray A[beg_pos .. end_pos) If there are many items having the k-th largest values, the smallest pos will be reported |
[out] | val | The k-th largest value appeared in the subarray A[beg_pos ... end_pos) |
uint64_t hsds::WaveletMatrix::rank | ( | uint64_t | c, |
uint64_t | pos | ||
) | const |
Compute the rank = the frequency of a character 'c' in the prefix of the array A[0...pos)
[in] | c | Character to be examined |
[in] | pos | The position of the prefix (not inclusive) |
void hsds::WaveletMatrix::rankAll | ( | uint64_t | c, |
uint64_t | begin_pos, | ||
uint64_t | end_pos, | ||
uint64_t & | rank, | ||
uint64_t & | rank_less_than, | ||
uint64_t & | rank_more_than | ||
) | const |
Compute the frequency of characters c' < c, c'=c, and c' > c, in the subarray A[begin_pos...end_pos)
c | The character |
begin_pos | The beginning position of the array (not inclusive) |
end_pos | The ending position of the array (not inclusive) |
rank | The frefquency of c in A[0...pos) |
rank_less_than | The frequency of c' < c in A[0...pos) |
rank_more_than | The frequency of c' > c in A[0...pos) |
uint64_t hsds::WaveletMatrix::rankLessThan | ( | uint64_t | c, |
uint64_t | pos | ||
) | const |
Compute the frequency of characters c' < c in the subarray A[0...pos)
[in] | c | The upper bound of the characters |
[in] | pos | The position of the end of the prefix (not inclusive) |
uint64_t hsds::WaveletMatrix::rankMoreThan | ( | uint64_t | c, |
uint64_t | pos | ||
) | const |
Compute the frequency of characters c' > c in the subarray A[0...pos)
[in] | c | The lower bound of the characters |
[in] | pos | The position of the end of the prefix (not inclusive) |
void hsds::WaveletMatrix::save | ( | std::ostream & | os | ) | const throw (hsds::Exception) |
Save the current status to a stream.
[out] | os | The output stream where the data is saved |
hsds::Exception | When failed to save. |
uint64_t hsds::WaveletMatrix::select | ( | uint64_t | c, |
uint64_t | rank | ||
) | const |
Compute the select = the position of the (rank+1)-th occurrence of 'c' in the array.
[in] | c | Character to be examined |
[in] | rank | The rank of the character |
uint64_t hsds::WaveletMatrix::selectFromPos | ( | uint64_t | c, |
uint64_t | pos, | ||
uint64_t | rank | ||
) | const |
Compute the select = the position of the (rank+1)-th occurrence of 'c' in the suffix of the array starting from 'pos'.
[in] | c | Character to be examined |
[in] | pos | The beginning position of the suffix (inclusive) |
[in] | rank | The rank of the character |
|
inline |
Return the number of elements.
Definition at line 104 of file wavelet-matrix.hpp.
void hsds::WaveletMatrix::swap | ( | WaveletMatrix & | x | ) |
Exchanges the content of the instance.
[in,out] | x | Another WaveletMatrix instance |