GeographicLib  1.51
NearestNeighbor.hpp
Go to the documentation of this file.
1 /**
2  * \file NearestNeighbor.hpp
3  * \brief Header for GeographicLib::NearestNeighbor class
4  *
5  * Copyright (c) Charles Karney (2016-2020) <charles@karney.com> and licensed
6  * under the MIT/X11 License. For more information, see
7  * https://geographiclib.sourceforge.io/
8  **********************************************************************/
9 
10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12 
13 #include <algorithm> // for nth_element, max_element, etc.
14 #include <vector>
15 #include <queue> // for priority_queue
16 #include <utility> // for swap + pair
17 #include <cstring>
18 #include <limits>
19 #include <cmath>
20 #include <iostream>
21 #include <sstream>
22 // Only for GeographicLib::GeographicErr
24 
25 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
26  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
27 #include <boost/serialization/nvp.hpp>
28 #include <boost/serialization/split_member.hpp>
29 #include <boost/serialization/array.hpp>
30 #include <boost/serialization/vector.hpp>
31 #endif
32 
33 #if defined(_MSC_VER)
34 // Squelch warnings about constant conditional expressions
35 # pragma warning (push)
36 # pragma warning (disable: 4127)
37 #endif
38 
39 namespace GeographicLib {
40 
41  /**
42  * \brief Nearest-neighbor calculations
43  *
44  * This class solves the nearest-neighbor problm using a vantage-point tree
45  * as described in \ref nearest.
46  *
47  * This class is templated so that it can handle arbitrary metric spaces as
48  * follows:
49  *
50  * @tparam dist_t the type used for measuring distances; it can be a real or
51  * signed integer type; in typical geodetic applications, \e dist_t might
52  * be <code>double</code>.
53  * @tparam pos_t the type for specifying the positions of points; geodetic
54  * application might bundled the latitude and longitude into a
55  * <code>std::pair<dist_t, dist_t></code>.
56  * @tparam distfun_t the type of a function object which takes takes two
57  * positions (of type \e pos_t) and returns the distance (of type \e
58  * dist_t); in geodetic applications, this might be a class which is
59  * constructed with a Geodesic object and which implements a member
60  * function with a signature <code>dist_t operator() (const pos_t&, const
61  * pos_t&) const</code>, which returns the geodesic distance between two
62  * points.
63  *
64  * \note The distance measure must satisfy the triangle inequality, \f$
65  * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
66  * geodesic distance (given by Geodesic::Inverse) does, while the great
67  * ellipse distance and the rhumb line distance <i>do not</i>. If you use
68  * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
69  * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
70  * square root in the interests of "efficiency"; the squared distance does
71  * not satisfy the triangle inequality!
72  *
73  * \note This is a "header-only" implementation and, as such, depends in a
74  * minimal way on the rest of GeographicLib (the only dependency is through
75  * the use of GeographicLib::GeographicErr for handling compile-time and
76  * run-time exceptions). Therefore, it is easy to extract this class from
77  * the rest of GeographicLib and use it as a stand-alone facility.
78  *
79  * The \e dist_t type must support numeric_limits queries (specifically:
80  * is_signed, is_integer, max(), digits).
81  *
82  * The NearestNeighbor object is constructed with a vector of points (type \e
83  * pos_t) and a distance function (type \e distfun_t). However the object
84  * does \e not store the points. When querying the object with Search(),
85  * it's necessary to supply the same vector of points and the same distance
86  * function.
87  *
88  * There's no capability in this implementation to add or remove points from
89  * the set. Instead Initialize() should be called to re-initialize the
90  * object with the modified vector of points.
91  *
92  * Because of the overhead in constructing a NearestNeighbor object for a
93  * large set of points, functions Save() and Load() are provided to save the
94  * object to an external file. operator<<(), operator>>() and <a
95  * href="https://www.boost.org/libs/serialization/doc"> Boost
96  * serialization</a> can also be used to save and restore a NearestNeighbor
97  * object. This is illustrated in the example.
98  *
99  * Example of use:
100  * \include example-NearestNeighbor.cpp
101  **********************************************************************/
102  template <typename dist_t, typename pos_t, class distfun_t>
104  // For tracking changes to the I/O format
105  static const int version = 1;
106  // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
107  // 4 slots (and this accommodates the default value bucket = 4).
108  static const int maxbucket =
109  (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
110  (4 * sizeof(dist_t)) / sizeof(int) : 2));
111  public:
112 
113  /**
114  * Default constructor for NearestNeighbor.
115  *
116  * This is equivalent to specifying an empty set of points.
117  **********************************************************************/
118  NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
119 
120  /**
121  * Constructor for NearestNeighbor.
122  *
123  * @param[in] pts a vector of points to include in the set.
124  * @param[in] dist the distance function object.
125  * @param[in] bucket the size of the buckets at the leaf nodes; this must
126  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
127  * @exception GeographicErr if the value of \e bucket is out of bounds or
128  * the size of \e pts is too big for an int.
129  * @exception std::bad_alloc if memory for the tree can't be allocated.
130  *
131  * \e pts may contain coincident points (i.e., the distance between them
132  * vanishes); these are treated as distinct.
133  *
134  * The choice of \e bucket is a tradeoff between space and efficiency. A
135  * larger \e bucket decreases the size of the NearestNeighbor object which
136  * scales as pts.size() / max(1, bucket) and reduces the number of distance
137  * calculations to construct the object by log2(bucket) * pts.size().
138  * However each search then requires about bucket additional distance
139  * calculations.
140  *
141  * \warning The distances computed by \e dist must satisfy the standard
142  * metric conditions. If not, the results are undefined. Neither the data
143  * in \e pts nor the query points should contain NaNs or infinities because
144  * such data violates the metric conditions.
145  *
146  * \warning The same arguments \e pts and \e dist must be provided
147  * to the Search() function.
148  **********************************************************************/
149  NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
150  int bucket = 4) {
151  Initialize(pts, dist, bucket);
152  }
153 
154  /**
155  * Initialize or re-initialize NearestNeighbor.
156  *
157  * @param[in] pts a vector of points to include in the tree.
158  * @param[in] dist the distance function object.
159  * @param[in] bucket the size of the buckets at the leaf nodes; this must
160  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
161  * @exception GeographicErr if the value of \e bucket is out of bounds or
162  * the size of \e pts is too big for an int.
163  * @exception std::bad_alloc if memory for the tree can't be allocated.
164  *
165  * See also the documentation on the constructor.
166  *
167  * If an exception is thrown, the state of the NearestNeighbor is
168  * unchanged.
169  **********************************************************************/
170  void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
171  int bucket = 4) {
172  static_assert(std::numeric_limits<dist_t>::is_signed,
173  "dist_t must be a signed type");
174  if (!( 0 <= bucket && bucket <= maxbucket ))
176  ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
177  if (pts.size() > size_t(std::numeric_limits<int>::max()))
178  throw GeographicLib::GeographicErr("pts array too big");
179  // the pair contains distance+id
180  std::vector<item> ids(pts.size());
181  for (int k = int(ids.size()); k--;)
182  ids[k] = std::make_pair(dist_t(0), k);
183  int cost = 0;
184  std::vector<Node> tree;
185  init(pts, dist, bucket, tree, ids, cost,
186  0, int(ids.size()), int(ids.size()/2));
187  _tree.swap(tree);
188  _numpoints = int(pts.size());
189  _bucket = bucket;
190  _mc = _sc = 0;
191  _cost = cost; _c1 = _k = _cmax = 0;
192  _cmin = std::numeric_limits<int>::max();
193  }
194 
195  /**
196  * Search the NearestNeighbor.
197  *
198  * @param[in] pts the vector of points used for initialization.
199  * @param[in] dist the distance function object used for initialization.
200  * @param[in] query the query point.
201  * @param[out] ind a vector of indices to the closest points found.
202  * @param[in] k the number of points to search for (default = 1).
203  * @param[in] maxdist only return points with distances of \e maxdist or
204  * less from \e query (default is the maximum \e dist_t).
205  * @param[in] mindist only return points with distances of more than
206  * \e mindist from \e query (default = &minus;1).
207  * @param[in] exhaustive whether to do an exhaustive search (default true).
208  * @param[in] tol the tolerance on the results (default 0).
209  * @return the distance to the closest point found (&minus;1 if no points
210  * are found).
211  * @exception GeographicErr if \e pts has a different size from that used
212  * to construct the object.
213  *
214  * The indices returned in \e ind are sorted by distance from \e query
215  * (closest first).
216  *
217  * The simplest invocation is with just the 4 non-optional arguments. This
218  * returns the closest distance and the index to the closest point in
219  * <i>ind</i><sub>0</sub>. If there are several points equally close, then
220  * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
221  * there's no closest point (because the set of points is empty), then \e
222  * ind is empty and &minus;1 is returned.
223  *
224  * With \e exhaustive = true and \e tol = 0 (their default values), this
225  * finds the indices of \e k closest neighbors to \e query whose distances
226  * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
227  * maxdist have their default values, then these bounds have no effect. If
228  * \e query is one of the points in the tree, then set \e mindist = 0 to
229  * prevent this point (and other coincident points) from being returned.
230  *
231  * If \e exhaustive = false, exit as soon as \e k results satisfying the
232  * distance criteria are found. If less than \e k results are returned
233  * then the search was exhaustive even if \e exhaustive = false.
234  *
235  * If \e tol is positive, do an approximate search; in this case the
236  * results are to be interpreted as follows: if the <i>k</i>'th distance is
237  * \e dk, then all results with distances less than or equal \e dk &minus;
238  * \e tol are correct; all others are suspect &mdash; there may be other
239  * closer results with distances greater or equal to \e dk &minus; \e tol.
240  * If less than \e k results are found, then the search is exact.
241  *
242  * \e mindist should be used to exclude a "small" neighborhood of the query
243  * point (relative to the average spacing of the data). If \e mindist is
244  * large, the efficiency of the search deteriorates.
245  *
246  * \note Only the shortest distance is returned (as as the function value).
247  * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
248  * for \e j > 0) can be found by invoking \e dist again.
249  *
250  * \warning The arguments \e pts and \e dist must be identical to those
251  * used to initialize the NearestNeighbor; if not, this function will
252  * return some meaningless result (however, if the size of \e pts is wrong,
253  * this function throw an exception).
254  *
255  * \warning The query point cannot be a NaN or infinite because then the
256  * metric conditions are violated.
257  **********************************************************************/
258  dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
259  const pos_t& query,
260  std::vector<int>& ind,
261  int k = 1,
262  dist_t maxdist = std::numeric_limits<dist_t>::max(),
263  dist_t mindist = -1,
264  bool exhaustive = true,
265  dist_t tol = 0) const {
266  if (_numpoints != int(pts.size()))
267  throw GeographicLib::GeographicErr("pts array has wrong size");
268  std::priority_queue<item> results;
269  if (_numpoints > 0 && k > 0 && maxdist > mindist) {
270  // distance to the kth closest point so far
271  dist_t tau = maxdist;
272  // first is negative of how far query is outside boundary of node
273  // +1 if on boundary or inside
274  // second is node index
275  std::priority_queue<item> todo;
276  todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
277  int c = 0;
278  while (!todo.empty()) {
279  int n = todo.top().second;
280  dist_t d = -todo.top().first;
281  todo.pop();
282  dist_t tau1 = tau - tol;
283  // compare tau and d again since tau may have become smaller.
284  if (!( n >= 0 && tau1 >= d )) continue;
285  const Node& current = _tree[n];
286  dist_t dst = 0; // to suppress warning about uninitialized variable
287  bool exitflag = false, leaf = current.index < 0;
288  for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
289  int index = leaf ? current.leaves[i] : current.index;
290  if (index < 0) break;
291  dst = dist(pts[index], query);
292  ++c;
293 
294  if (dst > mindist && dst <= tau) {
295  if (int(results.size()) == k) results.pop();
296  results.push(std::make_pair(dst, index));
297  if (int(results.size()) == k) {
298  if (exhaustive)
299  tau = results.top().first;
300  else {
301  exitflag = true;
302  break;
303  }
304  if (tau <= tol) {
305  exitflag = true;
306  break;
307  }
308  }
309  }
310  }
311  if (exitflag) break;
312 
313  if (current.index < 0) continue;
314  tau1 = tau - tol;
315  for (int l = 0; l < 2; ++l) {
316  if (current.data.child[l] >= 0 &&
317  dst + current.data.upper[l] >= mindist) {
318  if (dst < current.data.lower[l]) {
319  d = current.data.lower[l] - dst;
320  if (tau1 >= d)
321  todo.push(std::make_pair(-d, current.data.child[l]));
322  } else if (dst > current.data.upper[l]) {
323  d = dst - current.data.upper[l];
324  if (tau1 >= d)
325  todo.push(std::make_pair(-d, current.data.child[l]));
326  } else
327  todo.push(std::make_pair(dist_t(1), current.data.child[l]));
328  }
329  }
330  }
331  ++_k;
332  _c1 += c;
333  double omc = _mc;
334  _mc += (c - omc) / _k;
335  _sc += (c - omc) * (c - _mc);
336  if (c > _cmax) _cmax = c;
337  if (c < _cmin) _cmin = c;
338  }
339 
340  dist_t d = -1;
341  ind.resize(results.size());
342 
343  for (int i = int(ind.size()); i--;) {
344  ind[i] = int(results.top().second);
345  if (i == 0) d = results.top().first;
346  results.pop();
347  }
348  return d;
349 
350  }
351 
352  /**
353  * @return the total number of points in the set.
354  **********************************************************************/
355  int NumPoints() const { return _numpoints; }
356 
357  /**
358  * Write the object to an I/O stream.
359  *
360  * @param[in,out] os the stream to write to.
361  * @param[in] bin if true (the default) save in binary mode.
362  * @exception std::bad_alloc if memory for the string representation of the
363  * object can't be allocated.
364  *
365  * The counters tracking the statistics of searches are not saved; however
366  * the initializtion cost is saved. The format of the binary saves is \e
367  * not portable.
368  *
369  * \note <a href="https://www.boost.org/libs/serialization/doc">
370  * Boost serialization</a> can also be used to save and restore a
371  * NearestNeighbor object. This requires that the
372  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
373  **********************************************************************/
374  void Save(std::ostream& os, bool bin = true) const {
375  int realspec = std::numeric_limits<dist_t>::digits *
376  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
377  if (bin) {
378  char id[] = "NearestNeighbor_";
379  os.write(id, 16);
380  int buf[6];
381  buf[0] = version;
382  buf[1] = realspec;
383  buf[2] = _bucket;
384  buf[3] = _numpoints;
385  buf[4] = int(_tree.size());
386  buf[5] = _cost;
387  os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
388  for (int i = 0; i < int(_tree.size()); ++i) {
389  const Node& node = _tree[i];
390  os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
391  if (node.index >= 0) {
392  os.write(reinterpret_cast<const char *>(node.data.lower),
393  2 * sizeof(dist_t));
394  os.write(reinterpret_cast<const char *>(node.data.upper),
395  2 * sizeof(dist_t));
396  os.write(reinterpret_cast<const char *>(node.data.child),
397  2 * sizeof(int));
398  } else {
399  os.write(reinterpret_cast<const char *>(node.leaves),
400  _bucket * sizeof(int));
401  }
402  }
403  } else {
404  std::stringstream ostring;
405  // Ensure enough precision for type dist_t. With C++11, max_digits10
406  // can be used instead.
407  if (!std::numeric_limits<dist_t>::is_integer) {
408  static const int prec
409  = int(std::ceil(std::numeric_limits<dist_t>::digits *
410  std::log10(2.0) + 1));
411  ostring.precision(prec);
412  }
413  ostring << version << " " << realspec << " " << _bucket << " "
414  << _numpoints << " " << _tree.size() << " " << _cost;
415  for (int i = 0; i < int(_tree.size()); ++i) {
416  const Node& node = _tree[i];
417  ostring << "\n" << node.index;
418  if (node.index >= 0) {
419  for (int l = 0; l < 2; ++l)
420  ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
421  << " " << node.data.child[l];
422  } else {
423  for (int l = 0; l < _bucket; ++l)
424  ostring << " " << node.leaves[l];
425  }
426  }
427  os << ostring.str();
428  }
429  }
430 
431  /**
432  * Read the object from an I/O stream.
433  *
434  * @param[in,out] is the stream to read from
435  * @param[in] bin if true (the default) load in binary mode.
436  * @exception GeographicErr if the state read from \e is is illegal.
437  * @exception std::bad_alloc if memory for the tree can't be allocated.
438  *
439  * The counters tracking the statistics of searches are reset by this
440  * operation. Binary data must have been saved on a machine with the same
441  * architecture. If an exception is thrown, the state of the
442  * NearestNeighbor is unchanged.
443  *
444  * \note <a href="https://www.boost.org/libs/serialization/doc">
445  * Boost serialization</a> can also be used to save and restore a
446  * NearestNeighbor object. This requires that the
447  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
448  *
449  * \warning The same arguments \e pts and \e dist used for
450  * initialization must be provided to the Search() function.
451  **********************************************************************/
452  void Load(std::istream& is, bool bin = true) {
453  int version1, realspec, bucket, numpoints, treesize, cost;
454  if (bin) {
455  char id[17];
456  is.read(id, 16);
457  id[16] = '\0';
458  if (!(std::strcmp(id, "NearestNeighbor_") == 0))
459  throw GeographicLib::GeographicErr("Bad ID");
460  is.read(reinterpret_cast<char *>(&version1), sizeof(int));
461  is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
462  is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
463  is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
464  is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
465  is.read(reinterpret_cast<char *>(&cost), sizeof(int));
466  } else {
467  if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
468  >> cost ))
469  throw GeographicLib::GeographicErr("Bad header");
470  }
471  if (!( version1 == version ))
472  throw GeographicLib::GeographicErr("Incompatible version");
473  if (!( realspec == std::numeric_limits<dist_t>::digits *
474  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
475  throw GeographicLib::GeographicErr("Different dist_t types");
476  if (!( 0 <= bucket && bucket <= maxbucket ))
477  throw GeographicLib::GeographicErr("Bad bucket size");
478  if (!( 0 <= treesize && treesize <= numpoints ))
479  throw
480  GeographicLib::GeographicErr("Bad number of points or tree size");
481  if (!( 0 <= cost ))
482  throw GeographicLib::GeographicErr("Bad value for cost");
483  std::vector<Node> tree;
484  tree.reserve(treesize);
485  for (int i = 0; i < treesize; ++i) {
486  Node node;
487  if (bin) {
488  is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
489  if (node.index >= 0) {
490  is.read(reinterpret_cast<char *>(node.data.lower),
491  2 * sizeof(dist_t));
492  is.read(reinterpret_cast<char *>(node.data.upper),
493  2 * sizeof(dist_t));
494  is.read(reinterpret_cast<char *>(node.data.child),
495  2 * sizeof(int));
496  } else {
497  is.read(reinterpret_cast<char *>(node.leaves),
498  bucket * sizeof(int));
499  for (int l = bucket; l < maxbucket; ++l)
500  node.leaves[l] = 0;
501  }
502  } else {
503  if (!( is >> node.index ))
504  throw GeographicLib::GeographicErr("Bad index");
505  if (node.index >= 0) {
506  for (int l = 0; l < 2; ++l) {
507  if (!( is >> node.data.lower[l] >> node.data.upper[l]
508  >> node.data.child[l] ))
509  throw GeographicLib::GeographicErr("Bad node data");
510  }
511  } else {
512  // Must be at least one valid leaf followed by a sequence end
513  // markers (-1).
514  for (int l = 0; l < bucket; ++l) {
515  if (!( is >> node.leaves[l] ))
516  throw GeographicLib::GeographicErr("Bad leaf data");
517  }
518  for (int l = bucket; l < maxbucket; ++l)
519  node.leaves[l] = 0;
520  }
521  }
522  node.Check(numpoints, treesize, bucket);
523  tree.push_back(node);
524  }
525  _tree.swap(tree);
526  _numpoints = numpoints;
527  _bucket = bucket;
528  _mc = _sc = 0;
529  _cost = cost; _c1 = _k = _cmax = 0;
530  _cmin = std::numeric_limits<int>::max();
531  }
532 
533  /**
534  * Write the object to stream \e os as text.
535  *
536  * @param[in,out] os the output stream.
537  * @param[in] t the NearestNeighbor object to be saved.
538  * @exception std::bad_alloc if memory for the string representation of the
539  * object can't be allocated.
540  **********************************************************************/
541  friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
542  { t.Save(os, false); return os; }
543 
544  /**
545  * Read the object from stream \e is as text.
546  *
547  * @param[in,out] is the input stream.
548  * @param[out] t the NearestNeighbor object to be loaded.
549  * @exception GeographicErr if the state read from \e is is illegal.
550  * @exception std::bad_alloc if memory for the tree can't be allocated.
551  **********************************************************************/
552  friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
553  { t.Load(is, false); return is; }
554 
555  /**
556  * Swap with another NearestNeighbor object.
557  *
558  * @param[in,out] t the NearestNeighbor object to swap with.
559  **********************************************************************/
561  std::swap(_numpoints, t._numpoints);
562  std::swap(_bucket, t._bucket);
563  std::swap(_cost, t._cost);
564  _tree.swap(t._tree);
565  std::swap(_mc, t._mc);
566  std::swap(_sc, t._sc);
567  std::swap(_c1, t._c1);
568  std::swap(_k, t._k);
569  std::swap(_cmin, t._cmin);
570  std::swap(_cmax, t._cmax);
571  }
572 
573  /**
574  * The accumulated statistics on the searches so far.
575  *
576  * @param[out] setupcost the cost of initializing the NearestNeighbor.
577  * @param[out] numsearches the number of calls to Search().
578  * @param[out] searchcost the total cost of the calls to Search().
579  * @param[out] mincost the minimum cost of a Search().
580  * @param[out] maxcost the maximum cost of a Search().
581  * @param[out] mean the mean cost of a Search().
582  * @param[out] sd the standard deviation in the cost of a Search().
583  *
584  * Here "cost" measures the number of distance calculations needed. Note
585  * that the accumulation of statistics is \e not thread safe.
586  **********************************************************************/
587  void Statistics(int& setupcost, int& numsearches, int& searchcost,
588  int& mincost, int& maxcost,
589  double& mean, double& sd) const {
590  setupcost = _cost; numsearches = _k; searchcost = _c1;
591  mincost = _cmin; maxcost = _cmax;
592  mean = _mc; sd = std::sqrt(_sc / (_k - 1));
593  }
594 
595  /**
596  * Reset the counters for the accumulated statistics on the searches so
597  * far.
598  **********************************************************************/
599  void ResetStatistics() const {
600  _mc = _sc = 0;
601  _c1 = _k = _cmax = 0;
602  _cmin = std::numeric_limits<int>::max();
603  }
604 
605  private:
606  // Package up a dist_t and an int. We will want to sort on the dist_t so
607  // put it first.
608  typedef std::pair<dist_t, int> item;
609  // \cond SKIP
610  class Node {
611  public:
612  struct bounds {
613  dist_t lower[2], upper[2]; // bounds on inner/outer distances
614  int child[2];
615  };
616  union {
617  bounds data;
618  int leaves[maxbucket];
619  };
620  int index;
621 
622  Node()
623  : index(-1)
624  {
625  for (int i = 0; i < 2; ++i) {
626  data.lower[i] = data.upper[i] = 0;
627  data.child[i] = -1;
628  }
629  }
630 
631  // Sanity check on a Node
632  void Check(int numpoints, int treesize, int bucket) const {
633  if (!( -1 <= index && index < numpoints ))
634  throw GeographicLib::GeographicErr("Bad index");
635  if (index >= 0) {
636  if (!( -1 <= data.child[0] && data.child[0] < treesize &&
637  -1 <= data.child[1] && data.child[1] < treesize ))
638  throw GeographicLib::GeographicErr("Bad child pointers");
639  if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
640  data.upper[0] <= data.lower[1] &&
641  data.lower[1] <= data.upper[1] ))
642  throw GeographicLib::GeographicErr("Bad bounds");
643  } else {
644  // Must be at least one valid leaf followed by a sequence end markers
645  // (-1).
646  bool start = true;
647  for (int l = 0; l < bucket; ++l) {
648  if (!( (start ?
649  ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
650  leaves[l] == -1) ))
651  throw GeographicLib::GeographicErr("Bad leaf data");
652  start = leaves[l] >= 0;
653  }
654  for (int l = bucket; l < maxbucket; ++l) {
655  if (leaves[l] != 0)
656  throw GeographicLib::GeographicErr("Bad leaf data");
657  }
658  }
659  }
660 
661 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
662  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
663  friend class boost::serialization::access;
664  template<class Archive>
665  void save(Archive& ar, const unsigned int) const {
666  ar & boost::serialization::make_nvp("index", index);
667  if (index < 0)
668  ar & boost::serialization::make_nvp("leaves", leaves);
669  else
670  ar & boost::serialization::make_nvp("lower", data.lower)
671  & boost::serialization::make_nvp("upper", data.upper)
672  & boost::serialization::make_nvp("child", data.child);
673  }
674  template<class Archive>
675  void load(Archive& ar, const unsigned int) {
676  ar & boost::serialization::make_nvp("index", index);
677  if (index < 0)
678  ar & boost::serialization::make_nvp("leaves", leaves);
679  else
680  ar & boost::serialization::make_nvp("lower", data.lower)
681  & boost::serialization::make_nvp("upper", data.upper)
682  & boost::serialization::make_nvp("child", data.child);
683  }
684  template<class Archive>
685  void serialize(Archive& ar, const unsigned int file_version)
686  { boost::serialization::split_member(ar, *this, file_version); }
687 #endif
688  };
689  // \endcond
690 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
691  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
692  friend class boost::serialization::access;
693  template<class Archive> void save(Archive& ar, const unsigned) const {
694  int realspec = std::numeric_limits<dist_t>::digits *
695  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
696  // Need to use version1, otherwise load error in debug mode on Linux:
697  // undefined reference to GeographicLib::NearestNeighbor<...>::version.
698  int version1 = version;
699  ar & boost::serialization::make_nvp("version", version1)
700  & boost::serialization::make_nvp("realspec", realspec)
701  & boost::serialization::make_nvp("bucket", _bucket)
702  & boost::serialization::make_nvp("numpoints", _numpoints)
703  & boost::serialization::make_nvp("cost", _cost)
704  & boost::serialization::make_nvp("tree", _tree);
705  }
706  template<class Archive> void load(Archive& ar, const unsigned) {
707  int version1, realspec, bucket, numpoints, cost;
708  ar & boost::serialization::make_nvp("version", version1);
709  if (version1 != version)
710  throw GeographicLib::GeographicErr("Incompatible version");
711  std::vector<Node> tree;
712  ar & boost::serialization::make_nvp("realspec", realspec);
713  if (!( realspec == std::numeric_limits<dist_t>::digits *
714  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
715  throw GeographicLib::GeographicErr("Different dist_t types");
716  ar & boost::serialization::make_nvp("bucket", bucket);
717  if (!( 0 <= bucket && bucket <= maxbucket ))
718  throw GeographicLib::GeographicErr("Bad bucket size");
719  ar & boost::serialization::make_nvp("numpoints", numpoints)
720  & boost::serialization::make_nvp("cost", cost)
721  & boost::serialization::make_nvp("tree", tree);
722  if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
723  throw
724  GeographicLib::GeographicErr("Bad number of points or tree size");
725  for (int i = 0; i < int(tree.size()); ++i)
726  tree[i].Check(numpoints, int(tree.size()), bucket);
727  _tree.swap(tree);
728  _numpoints = numpoints;
729  _bucket = bucket;
730  _mc = _sc = 0;
731  _cost = cost; _c1 = _k = _cmax = 0;
732  _cmin = std::numeric_limits<int>::max();
733  }
734  template<class Archive>
735  void serialize(Archive& ar, const unsigned int file_version)
736  { boost::serialization::split_member(ar, *this, file_version); }
737 #endif
738 
739  int _numpoints, _bucket, _cost;
740  std::vector<Node> _tree;
741  // Counters to track stastistics on the cost of searches
742  mutable double _mc, _sc;
743  mutable int _c1, _k, _cmin, _cmax;
744 
745  int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
746  std::vector<Node>& tree, std::vector<item>& ids, int& cost,
747  int l, int u, int vp) {
748 
749  if (u == l)
750  return -1;
751  Node node;
752 
753  if (u - l > (bucket == 0 ? 1 : bucket)) {
754 
755  // choose a vantage point and move it to the start
756  int i = vp;
757  std::swap(ids[l], ids[i]);
758 
759  int m = (u + l + 1) / 2;
760 
761  for (int k = l + 1; k < u; ++k) {
762  ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
763  ++cost;
764  }
765  // partition around the median distance
766  std::nth_element(ids.begin() + l + 1,
767  ids.begin() + m,
768  ids.begin() + u);
769  node.index = ids[l].second;
770  if (m > l + 1) { // node.child[0] is possibly empty
771  typename std::vector<item>::iterator
772  t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
773  node.data.lower[0] = t->first;
774  t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
775  node.data.upper[0] = t->first;
776  // Use point with max distance as vantage point; this point act as a
777  // "corner" point and leads to a good partition.
778  node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
779  l + 1, m, int(t - ids.begin()));
780  }
781  typename std::vector<item>::iterator
782  t = std::max_element(ids.begin() + m, ids.begin() + u);
783  node.data.lower[1] = ids[m].first;
784  node.data.upper[1] = t->first;
785  // Use point with max distance as vantage point here too
786  node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
787  m, u, int(t - ids.begin()));
788  } else {
789  if (bucket == 0)
790  node.index = ids[l].second;
791  else {
792  node.index = -1;
793  // Sort the bucket entries so that the tree is independent of the
794  // implementation of nth_element.
795  std::sort(ids.begin() + l, ids.begin() + u);
796  for (int i = l; i < u; ++i)
797  node.leaves[i-l] = ids[i].second;
798  for (int i = u - l; i < bucket; ++i)
799  node.leaves[i] = -1;
800  for (int i = bucket; i < maxbucket; ++i)
801  node.leaves[i] = 0;
802  }
803  }
804 
805  tree.push_back(node);
806  return int(tree.size()) - 1;
807  }
808 
809  };
810 
811 } // namespace GeographicLib
812 
813 namespace std {
814 
815  /**
816  * Swap two GeographicLib::NearestNeighbor objects.
817  *
818  * @tparam dist_t the type used for measuring distances.
819  * @tparam pos_t the type for specifying the positions of points.
820  * @tparam distfun_t the type for a function object which calculates
821  * distances between points.
822  * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
823  * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
824  **********************************************************************/
825  template <typename dist_t, typename pos_t, class distfun_t>
828  a.swap(b);
829  }
830 
831 } // namespace std
832 
833 #if defined(_MSC_VER)
834 # pragma warning (pop)
835 #endif
836 
837 #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
GeographicLib::NearestNeighbor::operator<<
friend std::ostream & operator<<(std::ostream &os, const NearestNeighbor &t)
Definition: NearestNeighbor.hpp:541
GeographicLib
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
GeographicLib::NearestNeighbor::operator>>
friend std::istream & operator>>(std::istream &is, NearestNeighbor &t)
Definition: NearestNeighbor.hpp:552
GeographicLib::NearestNeighbor::Search
dist_t Search(const std::vector< pos_t > &pts, const distfun_t &dist, const pos_t &query, std::vector< int > &ind, int k=1, dist_t maxdist=std::numeric_limits< dist_t >::max(), dist_t mindist=-1, bool exhaustive=true, dist_t tol=0) const
Definition: NearestNeighbor.hpp:258
GeographicLib::GeographicErr
Exception handling for GeographicLib.
Definition: Constants.hpp:315
GeographicLib::NearestNeighbor::NearestNeighbor
NearestNeighbor(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
Definition: NearestNeighbor.hpp:149
GeographicLib::NearestNeighbor::swap
void swap(NearestNeighbor &t)
Definition: NearestNeighbor.hpp:560
GeographicLib::NearestNeighbor::NearestNeighbor
NearestNeighbor()
Definition: NearestNeighbor.hpp:118
GeographicLib::NearestNeighbor::ResetStatistics
void ResetStatistics() const
Definition: NearestNeighbor.hpp:599
GeographicLib::NearestNeighbor
Nearest-neighbor calculations.
Definition: NearestNeighbor.hpp:103
std::swap
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)
Definition: NearestNeighbor.hpp:826
Constants.hpp
Header for GeographicLib::Constants class.
GeographicLib::NearestNeighbor::NumPoints
int NumPoints() const
Definition: NearestNeighbor.hpp:355
std
Definition: NearestNeighbor.hpp:813
GeographicLib::NearestNeighbor::Load
void Load(std::istream &is, bool bin=true)
Definition: NearestNeighbor.hpp:452
GeographicLib::NearestNeighbor::Statistics
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
Definition: NearestNeighbor.hpp:587
GeographicLib::NearestNeighbor::Save
void Save(std::ostream &os, bool bin=true) const
Definition: NearestNeighbor.hpp:374
GeographicLib::NearestNeighbor::Initialize
void Initialize(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
Definition: NearestNeighbor.hpp:170