choose_n_farthest_points.h
1 /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2  * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3  * Author(s): Siargey Kachanovich
4  *
5  * Copyright (C) 2016 Inria
6  *
7  * Modification(s):
8  * - YYYY/MM Author: Description of the modification
9  */
10 
11 #ifndef CHOOSE_N_FARTHEST_POINTS_H_
12 #define CHOOSE_N_FARTHEST_POINTS_H_
13 
14 #include <boost/range.hpp>
15 
16 #include <gudhi/Null_output_iterator.h>
17 
18 #include <iterator>
19 #include <vector>
20 #include <random>
21 #include <limits> // for numeric_limits<>
22 
23 namespace Gudhi {
24 
25 namespace subsampling {
26 
30 enum : std::size_t {
34  random_starting_point = std::size_t(-1)
35 };
36 
65 template < typename Distance,
66 typename Point_range,
67 typename PointOutputIterator,
68 typename DistanceOutputIterator = Null_output_iterator>
69 void choose_n_farthest_points(Distance dist,
70  Point_range const &input_pts,
71  std::size_t final_size,
72  std::size_t starting_point,
73  PointOutputIterator output_it,
74  DistanceOutputIterator dist_it = {}) {
75  std::size_t nb_points = boost::size(input_pts);
76  if (final_size > nb_points)
77  final_size = nb_points;
78 
79  // Tests to the limit
80  if (final_size < 1)
81  return;
82 
83  if (starting_point == random_starting_point) {
84  // Choose randomly the first landmark
85  std::random_device rd;
86  std::mt19937 gen(rd());
87  std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
88  starting_point = dis(gen);
89  }
90 
91  // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity.
92  static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()");
93 
94  *output_it++ = input_pts[starting_point];
95  *dist_it++ = std::numeric_limits<double>::infinity();
96  if (final_size == 1) return;
97 
98  std::vector<std::size_t> points(nb_points); // map from remaining points to indexes in input_pts
99  std::vector< double > dist_to_L(nb_points); // vector of current distances to L from points
100  for(std::size_t i = 0; i < nb_points; ++i) {
101  points[i] = i;
102  dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]);
103  }
104  // The indirection through points makes the program a bit slower. Some alternatives:
105  // - the original code never removed points and counted on them not
106  // reappearing because of a self-distance of 0. This causes unnecessary
107  // computations when final_size is large. It also causes trouble if there are
108  // input points at distance 0 from each other.
109  // - copy input_pts and update the local copy when removing points.
110 
111  std::size_t curr_max_w = starting_point;
112 
113  for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
114  std::size_t latest_landmark = points[curr_max_w];
115  // To remove the latest landmark at index curr_max_w, replace it
116  // with the last point and reduce the length of the vector.
117  std::size_t last = points.size() - 1;
118  if (curr_max_w != last) {
119  points[curr_max_w] = points[last];
120  dist_to_L[curr_max_w] = dist_to_L[last];
121  }
122  points.pop_back();
123 
124  // Update distances to L.
125  std::size_t i = 0;
126  for (auto p : points) {
127  double curr_dist = dist(input_pts[p], input_pts[latest_landmark]);
128  if (curr_dist < dist_to_L[i])
129  dist_to_L[i] = curr_dist;
130  ++i;
131  }
132  // choose the next landmark
133  curr_max_w = 0;
134  double curr_max_dist = dist_to_L[curr_max_w]; // used for defining the furthest point from L
135  for (i = 1; i < points.size(); i++)
136  if (dist_to_L[i] > curr_max_dist) {
137  curr_max_dist = dist_to_L[i];
138  curr_max_w = i;
139  }
140  *output_it++ = input_pts[points[curr_max_w]];
141  *dist_it++ = dist_to_L[curr_max_w];
142  }
143 }
144 
145 } // namespace subsampling
146 
147 } // namespace Gudhi
148 
149 #endif // CHOOSE_N_FARTHEST_POINTS_H_
void choose_n_farthest_points(Distance dist, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, PointOutputIterator output_it, DistanceOutputIterator dist_it={})
Subsample by a greedy strategy of iteratively adding the farthest point from the current chosen point...
Definition: choose_n_farthest_points.h:69
@ random_starting_point
Definition: choose_n_farthest_points.h:34
Definition: Null_output_iterator.h:19