Flag_complex_edge_collapser.h
1 /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2  * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3  * Author(s): Siddharth Pritam, Marc Glisse
4  *
5  * Copyright (C) 2020 Inria
6  *
7  * Modification(s):
8  * - 2020/03 Vincent Rouvreau: integration to the gudhi library
9  * - 2021 Marc Glisse: complete rewrite
10  * - YYYY/MM Author: Description of the modification
11  */
12 
13 #ifndef FLAG_COMPLEX_EDGE_COLLAPSER_H_
14 #define FLAG_COMPLEX_EDGE_COLLAPSER_H_
15 
16 #include <gudhi/Debug_utils.h>
17 
18 #include <boost/container/flat_map.hpp>
19 #include <boost/container/flat_set.hpp>
20 
21 #ifdef GUDHI_USE_TBB
22 #include <tbb/parallel_sort.h>
23 #endif
24 
25 #include <utility>
26 #include <vector>
27 #include <tuple>
28 #include <algorithm>
29 #include <limits>
30 
31 namespace Gudhi {
32 
33 namespace collapse {
34 
42 template<typename Vertex, typename Filtration_value>
43 struct Flag_complex_edge_collapser {
44  using Filtered_edge = std::tuple<Vertex, Vertex, Filtration_value>;
45  typedef std::pair<Vertex,Vertex> Edge;
46  struct Cmpi { template<class T, class U> bool operator()(T const&a, U const&b)const{return b<a; } };
47  typedef boost::container::flat_map<Vertex, Filtration_value> Ngb_list;
48  typedef std::vector<Ngb_list> Neighbors;
49  Neighbors neighbors; // closed neighborhood
50  std::size_t num_vertices;
51  std::vector<std::tuple<Vertex, Vertex, Filtration_value>> res;
52 
53 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
54  // Minimal matrix interface
55  // Using this matrix generally helps performance, but the memory use may be excessive for a very sparse graph
56  // (and in extreme cases the constant initialization of the matrix may start to dominate the running time).
57  // Are there cases where the matrix is too big but a hash table would help?
58  std::vector<Filtration_value> neighbors_data;
59  void init_neighbors_dense(){
60  neighbors_data.clear();
61  neighbors_data.resize(num_vertices*num_vertices, std::numeric_limits<Filtration_value>::infinity());
62  }
63  Filtration_value& neighbors_dense(Vertex i, Vertex j){return neighbors_data[num_vertices*j+i];}
64 #endif
65 
66  // This does not touch the events list, only the adjacency matrix(es)
67  void delay_neighbor(Vertex u, Vertex v, Filtration_value f) {
68  neighbors[u][v]=f;
69  neighbors[v][u]=f;
70 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
71  neighbors_dense(u,v)=f;
72  neighbors_dense(v,u)=f;
73 #endif
74  }
75  void remove_neighbor(Vertex u, Vertex v) {
76  neighbors[u].erase(v);
77  neighbors[v].erase(u);
78 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
79  neighbors_dense(u,v)=std::numeric_limits<Filtration_value>::infinity();
80  neighbors_dense(v,u)=std::numeric_limits<Filtration_value>::infinity();
81 #endif
82  }
83 
84  template<class FilteredEdgeRange>
85  void read_edges(FilteredEdgeRange const&r){
86  neighbors.resize(num_vertices);
87 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
88  init_neighbors_dense();
89 #endif
90  // Use the raw sequence to avoid maintaining the order
91  std::vector<typename Ngb_list::sequence_type> neighbors_seq(num_vertices);
92  for(auto&&e : r){
93  using std::get;
94  Vertex u = get<0>(e);
95  Vertex v = get<1>(e);
96  Filtration_value f = get<2>(e);
97  neighbors_seq[u].emplace_back(v, f);
98  neighbors_seq[v].emplace_back(u, f);
99 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
100  neighbors_dense(u,v)=f;
101  neighbors_dense(v,u)=f;
102 #endif
103  }
104  for(std::size_t i=0;i<neighbors_seq.size();++i){
105  neighbors_seq[i].emplace_back(i, -std::numeric_limits<Filtration_value>::infinity());
106  neighbors[i].adopt_sequence(std::move(neighbors_seq[i])); // calls sort
107 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
108  neighbors_dense(i,i)=-std::numeric_limits<Filtration_value>::infinity();
109 #endif
110  }
111  }
112 
113  // Open neighborhood
114  // At some point it helped gcc to add __attribute__((noinline)) here, otherwise we had +50% on the running time
115  // on one example. It looks ok now, or I forgot which example that was.
116  void common_neighbors(boost::container::flat_set<Vertex>& e_ngb,
117  std::vector<std::pair<Filtration_value, Vertex>>& e_ngb_later,
118  Vertex u, Vertex v, Filtration_value f_event){
119  // Using neighbors_dense here seems to hurt, even if we loop on the smaller of nu and nv.
120  Ngb_list const&nu = neighbors[u];
121  Ngb_list const&nv = neighbors[v];
122  auto ui = nu.begin();
123  auto ue = nu.end();
124  auto vi = nv.begin();
125  auto ve = nv.end();
126  assert(ui != ue && vi != ve);
127  while(ui != ue && vi != ve){
128  Vertex w = ui->first;
129  if(w < vi->first) { ++ui; continue; }
130  if(w > vi->first) { ++vi; continue; }
131  // nu and nv are closed, so we need to exclude e here.
132  if(w != u && w != v) {
133  Filtration_value f = std::max(ui->second, vi->second);
134  if(f > f_event)
135  e_ngb_later.emplace_back(f, w);
136  else
137  e_ngb.insert(e_ngb.end(), w);
138  }
139  ++ui; ++vi;
140  }
141  }
142 
143  // Test if the neighborhood of e is included in the closed neighborhood of c
144  template<class Ngb>
145  bool is_dominated_by(Ngb const& e_ngb, Vertex c, Filtration_value f){
146  // The best strategy probably depends on the distribution, how sparse / dense the adjacency matrix is,
147  // how (un)balanced the sizes of e_ngb and nc are.
148  // Some efficient operations on sets work best with bitsets, although the need for a map complicates things.
149 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
150  for(auto v : e_ngb) {
151  // if(v==c)continue;
152  if(neighbors_dense(v,c) > f) return false;
153  }
154  return true;
155 #else
156  auto&&nc = neighbors[c];
157  // if few neighbors, use dichotomy? Seems slower.
158  // I tried storing a copy of neighbors as a vector<absl::flat_hash_map> and using it for nc, but it was
159  // a bit slower here. It did help with neighbors[dominator].find(w) in the main function though,
160  // sometimes enough, sometimes not.
161  auto ci = nc.begin();
162  auto ce = nc.end();
163  auto eni = e_ngb.begin();
164  auto ene = e_ngb.end();
165  assert(eni != ene);
166  assert(ci != ce);
167  // if(*eni == c && ++eni == ene) return true;
168  for(;;){
169  Vertex ve = *eni;
170  Vertex vc = ci->first;
171  while(ve > vc) {
172  // try a gallop strategy (exponential search)? Seems slower
173  if(++ci == ce) return false;
174  vc = ci->first;
175  }
176  if(ve < vc) return false;
177  // ve == vc
178  if(ci->second > f) return false;
179  if(++eni == ene)return true;
180  // If we stored an open neighborhood of c (excluding c), we would need to test for c here and before the loop
181  // if(*eni == c && ++eni == ene)return true;
182  if(++ci == ce) return false;
183  }
184 #endif
185  }
186 
187  template<class FilteredEdgeRange, class Delay>
188  void process_edges(FilteredEdgeRange const& edges, Delay&& delay) {
189  {
190  Vertex maxi = 0, maxj = 0;
191  for(auto& fe : edges) {
192  Vertex i = std::get<0>(fe);
193  Vertex j = std::get<1>(fe);
194  if (i > maxi) maxi = i;
195  if (j > maxj) maxj = j;
196  }
197  num_vertices = std::max(maxi, maxj) + 1;
198  }
199 
200  read_edges(edges);
201 
202  boost::container::flat_set<Vertex> e_ngb;
203  e_ngb.reserve(num_vertices);
204  std::vector<std::pair<Filtration_value, Vertex>> e_ngb_later;
205  for(auto&e:edges) {
206  {
207  Vertex u = std::get<0>(e);
208  Vertex v = std::get<1>(e);
209  Filtration_value input_time = std::get<2>(e);
210  auto time = delay(input_time);
211  auto start_time = time;
212  e_ngb.clear();
213  e_ngb_later.clear();
214  common_neighbors(e_ngb, e_ngb_later, u, v, time);
215  // If we identify a good candidate (the first common neighbor) for being a dominator of e until infinity,
216  // we could check that a bit more cheaply. It does not seem to help though.
217  auto cmp1=[](auto const&a, auto const&b){return a.first > b.first;};
218  auto e_ngb_later_begin=e_ngb_later.begin();
219  auto e_ngb_later_end=e_ngb_later.end();
220  bool heapified = false;
221 
222  bool dead = false;
223  while(true) {
224  Vertex dominator = -1;
225  // special case for size 1
226  // if(e_ngb.size()==1){dominator=*e_ngb.begin();}else
227  // It is tempting to test the dominators in increasing order of filtration value, which is likely to reduce
228  // the number of calls to is_dominated_by before finding a dominator, but sorting, even partially / lazily,
229  // is very expensive.
230  for(auto c : e_ngb){
231  if(is_dominated_by(e_ngb, c, time)){
232  dominator = c;
233  break;
234  }
235  }
236  if(dominator==-1) break;
237  // Push as long as dominator remains a dominator.
238  // Iterate on times where at least one neighbor appears.
239  for (bool still_dominated = true; still_dominated; ) {
240  if(e_ngb_later_begin == e_ngb_later_end) {
241  dead = true; goto end_move;
242  }
243  if(!heapified) {
244  // Eagerly sorting can be slow
245  std::make_heap(e_ngb_later_begin, e_ngb_later_end, cmp1);
246  heapified=true;
247  }
248  time = e_ngb_later_begin->first; // first place it may become critical
249  // Update the neighborhood for this new time, while checking if any of the new neighbors break domination.
250  while (e_ngb_later_begin != e_ngb_later_end && e_ngb_later_begin->first <= time) {
251  Vertex w = e_ngb_later_begin->second;
252 #ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
253  if (neighbors_dense(dominator,w) > e_ngb_later_begin->first)
254  still_dominated = false;
255 #else
256  auto& ngb_dom = neighbors[dominator];
257  auto wit = ngb_dom.find(w); // neighborhood may be open or closed, it does not matter
258  if (wit == ngb_dom.end() || wit->second > e_ngb_later_begin->first)
259  still_dominated = false;
260 #endif
261  e_ngb.insert(w);
262  std::pop_heap(e_ngb_later_begin, e_ngb_later_end--, cmp1);
263  }
264  } // this doesn't seem to help that much...
265  }
266 end_move:
267  if(dead) {
268  remove_neighbor(u, v);
269  } else if(start_time != time) {
270  delay_neighbor(u, v, time);
271  res.emplace_back(u, v, time);
272  } else {
273  res.emplace_back(u, v, input_time);
274  }
275  }
276  }
277  }
278 
279  std::vector<Filtered_edge> output() {
280  return std::move(res);
281  }
282 
283 };
284 
285 template<class R> R to_range(R&& r) { return std::move(r); }
286 template<class R, class T> R to_range(T&& t) { R r; r.insert(r.end(), t.begin(), t.end()); return r; }
287 
288 template<class FilteredEdgeRange, class Delay>
289 auto flag_complex_collapse_edges(FilteredEdgeRange&& edges, Delay&&delay) {
290  // Would it help to label the points according to some spatial sorting?
291  auto first_edge_itr = std::begin(edges);
292  using Vertex = std::decay_t<decltype(std::get<0>(*first_edge_itr))>;
293  using Filtration_value = std::decay_t<decltype(std::get<2>(*first_edge_itr))>;
294  using Edge_collapser = Flag_complex_edge_collapser<Vertex, Filtration_value>;
295  if (first_edge_itr != std::end(edges)) {
296  auto edges2 = to_range<std::vector<typename Edge_collapser::Filtered_edge>>(std::forward<FilteredEdgeRange>(edges));
297 #ifdef GUDHI_USE_TBB
298  // I think this sorting is always negligible compared to the collapse, but parallelizing it shouldn't hurt.
299  tbb::parallel_sort(edges2.begin(), edges2.end(),
300  [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);});
301 #else
302  std::sort(edges2.begin(), edges2.end(), [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);});
303 #endif
304  Edge_collapser edge_collapser;
305  edge_collapser.process_edges(edges2, std::forward<Delay>(delay));
306  return edge_collapser.output();
307  }
308  return std::vector<typename Edge_collapser::Filtered_edge>();
309 }
310 
329 template<class FilteredEdgeRange> auto flag_complex_collapse_edges(const FilteredEdgeRange& edges) {
330  return flag_complex_collapse_edges(edges, [](auto const&d){return d;});
331 }
332 
333 } // namespace collapse
334 
335 } // namespace Gudhi
336 
337 #endif // FLAG_COMPLEX_EDGE_COLLAPSER_H_
auto flag_complex_collapse_edges(const FilteredEdgeRange &edges)
Implicitly constructs a flag complex from edges as an input, collapses edges while preserving the per...
Definition: Flag_complex_edge_collapser.h:329
Value type for a filtration function on a cell complex.
Definition: FiltrationValue.h:20