mdds
segment_tree.hpp
1 /*************************************************************************
2  *
3  * Copyright (c) 2010-2015 Kohei Yoshida
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  ************************************************************************/
27 
28 #ifndef INCLUDED_MDDS_SEGMENTTREE_HPP
29 #define INCLUDED_MDDS_SEGMENTTREE_HPP
30 
31 #include "mdds/node.hpp"
32 #include "mdds/global.hpp"
33 
34 #include <vector>
35 #include <iostream>
36 #include <map>
37 #include <unordered_map>
38 #include <memory>
39 
40 #ifdef MDDS_UNIT_TEST
41 #include <sstream>
42 #endif
43 
44 namespace mdds {
45 
46 template<typename _Key, typename _Value>
48 {
49 public:
50  typedef _Key key_type;
51  typedef _Value value_type;
52  typedef size_t size_type;
53  typedef std::vector<value_type> search_results_type;
54 
55 #ifdef MDDS_UNIT_TEST
56  struct segment_data
57  {
58  key_type begin_key;
59  key_type end_key;
60  value_type pdata;
61 
62  segment_data(key_type _beg, key_type _end, value_type p) :
63  begin_key(_beg), end_key(_end), pdata(p) {}
64 
65  bool operator==(const segment_data& r) const
66  {
67  return begin_key == r.begin_key && end_key == r.end_key && pdata == r.pdata;
68  }
69 
70  bool operator!=(const segment_data& r) const
71  {
72  return !operator==(r);
73  }
74  };
75 
76  struct segment_map_printer
77  {
78  void operator() (const ::std::pair<value_type, ::std::pair<key_type, key_type> >& r) const
79  {
80  using namespace std;
81  cout << r.second.first << "-" << r.second.second << ": " << r.first->name << endl;
82  }
83  };
84 #endif
85 
86 public:
87  typedef ::std::vector<value_type> data_chain_type;
88  typedef std::unordered_map<value_type, ::std::pair<key_type, key_type> > segment_map_type;
89  typedef ::std::map<value_type, ::std::pair<key_type, key_type> > sorted_segment_map_type;
90 
92  {
93  key_type low;
94  key_type high;
95  data_chain_type* data_chain;
96 
97  bool operator== (const nonleaf_value_type& r) const
98  {
99  return low == r.low && high == r.high && data_chain == r.data_chain;
100  }
101  };
102 
104  {
105  key_type key;
106  data_chain_type* data_chain;
107 
108  bool operator== (const leaf_value_type& r) const
109  {
110  return key == r.key && data_chain == r.data_chain;
111  }
112  };
113 
115  struct init_handler;
116  struct dispose_handler;
117 #ifdef MDDS_UNIT_TEST
118  struct to_string_handler;
119 #endif
120 
122  typedef typename node::node_ptr node_ptr;
123 
125 
127  {
128  void operator() (__st::nonleaf_node<segment_tree>& _self, const __st::node_base* left_node, const __st::node_base* right_node)
129  {
130  // Parent node should carry the range of all of its child nodes.
131  if (left_node)
132  {
133  _self.value_nonleaf.low = left_node->is_leaf ?
134  static_cast<const node*>(left_node)->value_leaf.key :
135  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.low;
136  }
137  else
138  {
139  // Having a left node is prerequisite.
140  throw general_error("segment_tree::fill_nonleaf_value_handler: Having a left node is prerequisite.");
141  }
142 
143  if (right_node)
144  {
145  if (right_node->is_leaf)
146  {
147  // When the child nodes are leaf nodes, the upper bound
148  // must be the value of the node that comes after the
149  // right leaf node (if such node exists).
150 
151  const node* p = static_cast<const node*>(right_node);
152  if (p->next)
153  _self.value_nonleaf.high = p->next->value_leaf.key;
154  else
155  _self.value_nonleaf.high = p->value_leaf.key;
156  }
157  else
158  {
159  _self.value_nonleaf.high = static_cast<const nonleaf_node*>(right_node)->value_nonleaf.high;
160  }
161  }
162  else
163  {
164  _self.value_nonleaf.high = left_node->is_leaf ?
165  static_cast<const node*>(left_node)->value_leaf.key :
166  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.high;
167  }
168  }
169  };
170 
171 #ifdef MDDS_UNIT_TEST
172  struct to_string_handler
173  {
174  std::string operator() (const node& _self) const
175  {
176  std::ostringstream os;
177  os << "[" << _self.value_leaf.key << "] ";
178  return os.str();
179  }
180 
181  std::string operator() (const __st::nonleaf_node<segment_tree>& _self) const
182  {
183  std::ostringstream os;
184  os << "[" << _self.value_nonleaf.low << "-" << _self.value_nonleaf.high << ")";
185  if (_self.value_nonleaf.data_chain)
186  {
187  os << " { ";
188  typename data_chain_type::const_iterator
189  itr,
190  itr_beg = _self.value_nonleaf.data_chain->begin(),
191  itr_end = _self.value_nonleaf.data_chain->end();
192  for (itr = itr_beg; itr != itr_end; ++itr)
193  {
194  if (itr != itr_beg)
195  os << ", ";
196  os << (*itr)->name;
197  }
198  os << " }";
199  }
200  os << " ";
201  return os.str();
202  }
203  };
204 #endif
205 
207  {
208  void operator() (node& _self)
209  {
210  _self.value_leaf.data_chain = nullptr;
211  }
212 
213  void operator() (__st::nonleaf_node<segment_tree>& _self)
214  {
215  _self.value_nonleaf.data_chain = nullptr;
216  }
217  };
218 
220  {
221  void operator() (node& _self)
222  {
223  delete _self.value_leaf.data_chain;
224  }
225 
226  void operator() (__st::nonleaf_node<segment_tree>& _self)
227  {
228  delete _self.value_nonleaf.data_chain;
229  }
230  };
231 
232 #ifdef MDDS_UNIT_TEST
233  struct node_printer
234  {
235  void operator() (const __st::node_base* p) const
236  {
237  if (p->is_leaf)
238  std::cout << static_cast<const node*>(p)->to_string() << " ";
239  else
240  std::cout << static_cast<const nonleaf_node*>(p)->to_string() << " ";
241  }
242  };
243 #endif
244 
245 private:
246 
251  class search_results_base
252  {
253  public:
254  typedef std::vector<data_chain_type*> res_chains_type;
255  typedef std::shared_ptr<res_chains_type> res_chains_ptr;
256  public:
257 
258  search_results_base() :
259  mp_res_chains(static_cast<res_chains_type*>(nullptr)) {}
260 
261  search_results_base(const search_results_base& r) :
262  mp_res_chains(r.mp_res_chains) {}
263 
264  size_t size() const
265  {
266  size_t combined = 0;
267  if (!mp_res_chains)
268  return combined;
269 
270  typename res_chains_type::const_iterator
271  itr = mp_res_chains->begin(), itr_end = mp_res_chains->end();
272  for (; itr != itr_end; ++itr)
273  combined += (*itr)->size();
274  return combined;
275  }
276 
277  void push_back_chain(data_chain_type* chain)
278  {
279  if (!chain || chain->empty())
280  return;
281 
282  if (!mp_res_chains)
283  mp_res_chains.reset(new res_chains_type);
284  mp_res_chains->push_back(chain);
285  }
286 
287  res_chains_ptr& get_res_chains() { return mp_res_chains; }
288 
289  private:
290  res_chains_ptr mp_res_chains;
291  };
292 
293  class iterator_base
294  {
295  protected:
296  typedef typename search_results_base::res_chains_type res_chains_type;
297  typedef typename search_results_base::res_chains_ptr res_chains_ptr;
298 
299  iterator_base(const res_chains_ptr& p) :
300  mp_res_chains(p), m_end_pos(true) {}
301 
302  public:
303  typedef ::std::bidirectional_iterator_tag iterator_category;
304  typedef typename data_chain_type::value_type value_type;
305  typedef typename data_chain_type::pointer pointer;
306  typedef typename data_chain_type::reference reference;
307  typedef typename data_chain_type::difference_type difference_type;
308 
309  iterator_base() :
310  mp_res_chains(static_cast<res_chains_type*>(nullptr)), m_end_pos(true) {}
311 
312  iterator_base(const iterator_base& r) :
313  mp_res_chains(r.mp_res_chains),
314  m_cur_chain(r.m_cur_chain),
315  m_cur_pos_in_chain(r.m_cur_pos_in_chain),
316  m_end_pos(r.m_end_pos) {}
317 
318  iterator_base& operator= (const iterator_base& r)
319  {
320  mp_res_chains = r.mp_res_chains;
321  m_cur_chain = r.m_cur_chain;
322  m_cur_pos_in_chain = r.m_cur_pos_in_chain;
323  m_end_pos = r.m_end_pos;
324  return *this;
325  }
326 
327  typename data_chain_type::value_type* operator++ ()
328  {
329  // We don't check for end position flag for performance reasons.
330  // The caller is responsible for making sure not to increment past
331  // end position.
332 
333  // When reaching the end position, the internal iterators still
334  // need to be pointing at the last item before the end position.
335  // This is why we need to make copies of the iterators, and copy
336  // them back once done.
337 
338  typename data_chain_type::iterator cur_pos_in_chain = m_cur_pos_in_chain;
339 
340  if (++cur_pos_in_chain == (*m_cur_chain)->end())
341  {
342  // End of current chain. Inspect the next chain if exists.
343  typename res_chains_type::iterator cur_chain = m_cur_chain;
344  ++cur_chain;
345  if (cur_chain == mp_res_chains->end())
346  {
347  m_end_pos = true;
348  return nullptr;
349  }
350  m_cur_chain = cur_chain;
351  m_cur_pos_in_chain = (*m_cur_chain)->begin();
352  }
353  else
354  ++m_cur_pos_in_chain;
355 
356  return operator->();
357  }
358 
359  typename data_chain_type::value_type* operator-- ()
360  {
361  if (!mp_res_chains)
362  return nullptr;
363 
364  if (m_end_pos)
365  {
366  m_end_pos = false;
367  return &(*m_cur_pos_in_chain);
368  }
369 
370  if (m_cur_pos_in_chain == (*m_cur_chain)->begin())
371  {
372  if (m_cur_chain == mp_res_chains->begin())
373  {
374  // Already at the first data chain. Don't move the iterator position.
375  return nullptr;
376  }
377  --m_cur_chain;
378  m_cur_pos_in_chain = (*m_cur_chain)->end();
379  }
380  --m_cur_pos_in_chain;
381  return operator->();
382  }
383 
384  bool operator== (const iterator_base& r) const
385  {
386  if (mp_res_chains.get())
387  {
388  // non-empty result set.
389  return mp_res_chains.get() == r.mp_res_chains.get() &&
390  m_cur_chain == r.m_cur_chain && m_cur_pos_in_chain == r.m_cur_pos_in_chain &&
391  m_end_pos == r.m_end_pos;
392  }
393 
394  // empty result set.
395  if (r.mp_res_chains.get())
396  return false;
397  return m_end_pos == r.m_end_pos;
398  }
399 
400  bool operator!= (const iterator_base& r) const { return !operator==(r); }
401 
402  typename data_chain_type::value_type& operator*()
403  {
404  return *m_cur_pos_in_chain;
405  }
406 
407  typename data_chain_type::value_type* operator->()
408  {
409  return &(*m_cur_pos_in_chain);
410  }
411 
412  protected:
413  void move_to_front()
414  {
415  if (!mp_res_chains)
416  {
417  // Empty data set.
418  m_end_pos = true;
419  return;
420  }
421 
422  // We assume that there is at least one chain list, and no
423  // empty chain list exists. So, skip the check.
424  m_cur_chain = mp_res_chains->begin();
425  m_cur_pos_in_chain = (*m_cur_chain)->begin();
426  m_end_pos = false;
427  }
428 
429  void move_to_end()
430  {
431  m_end_pos = true;
432  if (!mp_res_chains)
433  // Empty data set.
434  return;
435 
436  m_cur_chain = mp_res_chains->end();
437  --m_cur_chain;
438  m_cur_pos_in_chain = (*m_cur_chain)->end();
439  --m_cur_pos_in_chain;
440  }
441 
442  private:
443  res_chains_ptr mp_res_chains;
444  typename res_chains_type::iterator m_cur_chain;
445  typename data_chain_type::iterator m_cur_pos_in_chain;
446  bool m_end_pos:1;
447  };
448 
449 public:
450 
451  class search_results : public search_results_base
452  {
453  typedef typename search_results_base::res_chains_type res_chains_type;
454  typedef typename search_results_base::res_chains_ptr res_chains_ptr;
455  public:
456 
457  class iterator : public iterator_base
458  {
459  friend class segment_tree<_Key,_Value>::search_results;
460  private:
461  iterator(const res_chains_ptr& p) : iterator_base(p) {}
462  public:
463  iterator() : iterator_base() {}
464  };
465 
466  typename search_results::iterator begin()
467  {
468  typename search_results::iterator itr(search_results_base::get_res_chains());
469  itr.move_to_front();
470  return itr;
471  }
472 
473  typename search_results::iterator end()
474  {
475  typename search_results::iterator itr(search_results_base::get_res_chains());
476  itr.move_to_end();
477  return itr;
478  }
479  };
480 
482  {
483  public:
484  search_result_vector_inserter(search_results_type& result) : m_result(result) {}
485  void operator() (data_chain_type* node_data)
486  {
487  if (!node_data)
488  return;
489 
490  typename data_chain_type::const_iterator itr = node_data->begin(), itr_end = node_data->end();
491  for (; itr != itr_end; ++itr)
492  m_result.push_back(*itr);
493  }
494  private:
495  search_results_type& m_result;
496  };
497 
499  {
500  public:
501  search_result_inserter(search_results_base& result) : m_result(result) {}
502  void operator() (data_chain_type* node_data)
503  {
504  if (!node_data)
505  return;
506 
507  m_result.push_back_chain(node_data);
508  }
509  private:
510  search_results_base& m_result;
511  };
512 
513  segment_tree();
514  segment_tree(const segment_tree& r);
515  ~segment_tree();
516 
521  bool operator==(const segment_tree& r) const;
522 
523  bool operator!=(const segment_tree& r) const { return !operator==(r); }
524 
531  bool is_tree_valid() const { return m_valid_tree; }
532 
536  void build_tree();
537 
547  bool insert(key_type begin_key, key_type end_key, value_type pdata);
548 
565  bool search(key_type point, search_results_type& result) const;
566 
576  search_results search(key_type point) const;
577 
585  void remove(value_type value);
586 
590  void clear();
591 
595  size_t size() const;
596 
600  bool empty() const;
601 
607  size_t leaf_size() const;
608 
609 #ifdef MDDS_UNIT_TEST
610  void dump_tree() const;
611  void dump_leaf_nodes() const;
612  void dump_segment_data() const;
613  bool verify_node_lists() const;
614 
615  struct leaf_node_check
616  {
617  key_type key;
618  data_chain_type data_chain;
619  };
620 
621  bool verify_leaf_nodes(const ::std::vector<leaf_node_check>& checks) const;
622 
629  bool verify_segment_data(const segment_map_type& checks) const;
630 #endif
631 
632 private:
636  void search(key_type point, search_results_base& result) const;
637 
638  typedef std::vector<__st::node_base*> node_list_type;
639  typedef std::map<value_type, std::unique_ptr<node_list_type>> data_node_map_type;
640 
641  static void create_leaf_node_instances(const ::std::vector<key_type>& keys, node_ptr& left, node_ptr& right);
642 
648  void descend_tree_and_mark(
649  __st::node_base* pnode, value_type pdata, key_type begin_key, key_type end_key, node_list_type* plist);
650 
651  void build_leaf_nodes();
652 
657  void remove_data_from_nodes(node_list_type* plist, const value_type pdata);
658  void remove_data_from_chain(data_chain_type& chain, const value_type pdata);
659 
660  void clear_all_nodes();
661 
662 #ifdef MDDS_UNIT_TEST
663  static bool has_data_pointer(const node_list_type& node_list, const value_type pdata);
664  static void print_leaf_value(const leaf_value_type& v);
665 #endif
666 
667 private:
668  std::vector<nonleaf_node> m_nonleaf_node_pool;
669 
670  segment_map_type m_segment_data;
671 
677  data_node_map_type m_tagged_node_map;
678 
679  nonleaf_node* m_root_node;
680  node_ptr m_left_leaf;
681  node_ptr m_right_leaf;
682  bool m_valid_tree:1;
683 };
684 
685 }
686 
687 #include "segment_tree_def.inl"
688 
689 #endif
Definition: global.hpp:82
Definition: segment_tree.hpp:499
Definition: segment_tree.hpp:458
Definition: segment_tree.hpp:452
Definition: segment_tree.hpp:48
bool insert(key_type begin_key, key_type end_key, value_type pdata)
bool operator==(const segment_tree &r) const
bool empty() const
bool search(key_type point, search_results_type &result) const
void remove(value_type value)
size_t leaf_size() const
bool is_tree_valid() const
Definition: segment_tree.hpp:531
size_t size() const
search_results search(key_type point) const
Definition: node.hpp:44
bool is_leaf
parent nonleaf_node
Definition: node.hpp:46
Definition: node.hpp:144
node_ptr next
previous sibling leaf node.
Definition: node.hpp:166
Definition: node.hpp:54
Definition: segment_tree.hpp:220
Definition: segment_tree.hpp:127
Definition: segment_tree.hpp:207
Definition: segment_tree.hpp:104
Definition: segment_tree.hpp:92
key_type high
low range value (inclusive)
Definition: segment_tree.hpp:94
data_chain_type * data_chain
high range value (non-inclusive)
Definition: segment_tree.hpp:95