HSDS
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros
wavelet-matrix.hpp
Go to the documentation of this file.
1 
7 #if !defined(HSDS_WAVELET_MATRIX_HPP_)
8 #define HSDS_WAVELET_MATRIX_HPP_
9 
10 #include <vector>
11 #include <queue>
12 #include "hsds/scoped_ptr.hpp"
13 #include "hsds/bit-vector.hpp"
14 #include "hsds/vector.hpp"
15 
20 namespace hsds {
21 
30 bool uint2bit(uint64_t bits, uint64_t pos) {
31  return ((bits >> (sizeof(uint64_t) * 8 - 1 - pos)) & 0x1ULL) == 0x1ULL;
32 }
33 
34 // forward declaration
35 class Exception;
36 
40 struct ListResult {
47  ListResult(uint64_t c, uint64_t freq) :
48  c(c), freq(freq) {
49  }
50  uint64_t c;
51  uint64_t freq;
52 
57  bool operator <(const ListResult& lr) const {
58  if (c != lr.c)
59  return c < lr.c;
60  return freq < lr.freq;
61  }
62 };
63 
68 public:
69 
73  WaveletMatrix();
74 
78  virtual ~WaveletMatrix();
79 
83  void clear();
84 
90  void swap(WaveletMatrix& x);
91 
97  void build(std::vector<uint64_t>& src);
98 
104  inline uint64_t size() const {
105  return size_;
106  }
107 
115  inline uint64_t operator[](uint64_t pos) {
116  return lookup(pos);
117  }
118 
126  uint64_t lookup(uint64_t pos) const;
127 
137  uint64_t rank(uint64_t c, uint64_t pos) const;
138 
148  uint64_t rankLessThan(uint64_t c, uint64_t pos) const;
149 
159  uint64_t rankMoreThan(uint64_t c, uint64_t pos) const;
160 
170  void rankAll(uint64_t c, uint64_t begin_pos, uint64_t end_pos, uint64_t& rank, uint64_t& rank_less_than,
171  uint64_t& rank_more_than) const;
172 
182  uint64_t select(uint64_t c, uint64_t rank) const;
183 
193  uint64_t selectFromPos(uint64_t c, uint64_t pos, uint64_t rank) const;
194 
202  uint64_t freq(uint64_t c) const;
203 
212  uint64_t freqSum(uint64_t min_c, uint64_t max_c) const;
213 
225  uint64_t freqRange(uint64_t min_c, uint64_t max_c, uint64_t begin_pos, uint64_t end_pos) const;
226 
236  void maxRange(uint64_t begin_pos, uint64_t end_pos, uint64_t& pos, uint64_t& val) const;
237 
247  void minRange(uint64_t begin_pos, uint64_t end_pos, uint64_t& pos, uint64_t& val) const;
248 
259  void quantileRange(uint64_t begin_pos, uint64_t end_pos, uint64_t k, uint64_t& pos, uint64_t& val) const;
260 
272  void listModeRange(uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num,
273  std::vector<ListResult>& res) const;
274 
286  void listMinRange(uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num,
287  std::vector<ListResult>& res) const;
288 
300  void listMaxRange(uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num,
301  std::vector<ListResult>& res) const;
302 
310  void save(std::ostream& os) const throw (hsds::Exception);
311 
319  void load(std::istream& is) throw (hsds::Exception);
320 
331  uint64_t map(void* ptr, uint64_t mapSize) throw (hsds::Exception);
332 
333 private:
334  typedef hsds::Vector<hsds::BitVector> bv_type;
335  typedef hsds::Vector<hsds::Vector<uint64_t> > range_type;
336  typedef hsds::Vector<uint64_t> uint64_vector_type;
337 
338  uint64_t size_;
339  const uint64_t bitSize_;
340  uint64_t alphabetNum_;
341  uint64_t alphabetBitNum_;
342  bv_type bv_;
343  range_type nodePos_;
344  uint64_vector_type seps_;
345 
346  inline uint64_t bitSize() const {
347  return bitSize_;
348  }
349 
350  uint64_t getAlphabetNum(const std::vector<uint64_t>& array) const;
351  uint64_t log2(uint64_t x) const;
352 
353  struct QueryOnNode {
354  QueryOnNode(uint64_t beg_node, uint64_t end_node, uint64_t beg_pos, uint64_t end_pos, uint64_t depth,
355  uint64_t prefix_char) :
356  beg_node(beg_node),
357  end_node(end_node),
358  beg_pos(beg_pos),
359  end_pos(end_pos),
360  depth(depth),
361  prefix_char(prefix_char) {
362  }
363  uint64_t beg_node;
364  uint64_t end_node;
365  uint64_t beg_pos;
366  uint64_t end_pos;
367  uint64_t depth;
368  uint64_t prefix_char;
369  void print() {
370  std::cout << beg_node << " " << end_node << " " << beg_pos << " " << end_pos << " " << depth << " ";
371  for (uint64_t i = 0; i < depth; ++i) {
372  std::cout << ((prefix_char >> (depth - (i + 1))) & 1LLU);
373  }
374  std::cout << std::endl;
375  }
376  };
377 
378  class ListModeComparator;
379  class ListMinComparator;
380  class ListMaxComparator;
381 
382  template<class Comparator>
383  void listRange(uint64_t min_c, uint64_t max_c, uint64_t beg_pos, uint64_t end_pos, uint64_t num,
384  std::vector<ListResult>& res) const {
385  res.clear();
386  if (end_pos > size_ || beg_pos >= end_pos)
387  return;
388 
389  std::priority_queue<QueryOnNode, std::vector<QueryOnNode>, Comparator> qons;
390  qons.push(QueryOnNode(0, size_, beg_pos, end_pos, 0, 0));
391 
392  while (res.size() < num && !qons.empty()) {
393  QueryOnNode qon = qons.top();
394  qons.pop();
395  if (qon.depth >= alphabetBitNum_) {
396  res.push_back(ListResult(qon.prefix_char, qon.end_pos - qon.beg_pos));
397  } else {
398  std::vector<QueryOnNode> next;
399  expandNode(min_c, max_c, qon, next);
400  for (size_t i = 0; i < next.size(); ++i) {
401  qons.push(next[i]);
402  }
403  }
404  }
405  }
406 
407  uint64_t prefixCode(uint64_t x, uint64_t len, uint64_t bit_num) const;
408  bool checkPrefix(uint64_t prefix, uint64_t depth, uint64_t min_c, uint64_t max_c) const;
409  void expandNode(uint64_t min_c, uint64_t max_c, const QueryOnNode& qon, std::vector<QueryOnNode>& next) const;
410 };
411 
412 }
413 
414 #endif /* !defined(HSDS_WAVELET_MATRIX_HPP_) */