92 #include <pcl/cuda/point_cloud.h>
93 #include <pcl/cuda/cutil_math.h>
105 float prec_sqr = FLT_EPSILON * FLT_EPSILON;
106 return x * x <= prec_sqr * y * y;
122 float invnm = 1.0f / sqrtf (src.x*src.x + src.y*src.y);
123 perp.x = -src.y*invnm;
124 perp.y = src.x*invnm;
133 float invnm = 1.0f / sqrtf (src.z*src.z + src.y*src.y);
135 perp.y = -src.z*invnm;
136 perp.z = src.y*invnm;
142 inline __host__ __device__
void computeRoots2 (
const float& b,
const float& c, float3& roots)
145 float d = b * b - 4.0f * c;
151 roots.z = 0.5f * (b + sd);
152 roots.y = 0.5f * (b - sd);
155 inline __host__ __device__
void swap (
float& a,
float& b)
157 float c(a); a=b; b=c;
162 inline __host__ __device__
void
173 float c1 = m.
data[0].x*m.
data[1].y -
182 if (std::abs(c0) < FLT_EPSILON)
186 const float s_inv3 = 1.0f/3.0f;
187 const float s_sqrt3 = sqrtf (3.0f);
190 float c2_over_3 = c2 * s_inv3;
191 float a_over_3 = (c1 - c2 * c2_over_3) * s_inv3;
195 float half_b = 0.5f * (c0 + c2_over_3 * (2.0f * c2_over_3 * c2_over_3 - c1));
197 float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
202 float rho = sqrtf (-a_over_3);
203 float theta = std::atan2 (sqrtf (-q), half_b) * s_inv3;
204 float cos_theta = std::cos (theta);
205 float sin_theta = sin (theta);
206 roots.x = c2_over_3 + 2.f * rho * cos_theta;
207 roots.y = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
208 roots.z = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);
211 if (roots.x >= roots.y)
212 swap (roots.x, roots.y);
213 if (roots.y >= roots.z)
215 swap (roots.y, roots.z);
216 if (roots.x >= roots.y)
217 swap (roots.x, roots.y);
225 inline __host__ __device__
void
228 evals = evecs.
data[0] = evecs.
data[1] = evecs.
data[2] = make_float3 (0.0f, 0.0f, 0.0f);
234 float3 scale_tmp = fmaxf (fmaxf (fabs (mat.
data[0]), fabs (mat.
data[1])), fabs (mat.
data[2]));
235 float scale = fmaxf (fmaxf (scale_tmp.x, scale_tmp.y), scale_tmp.z);
236 if (scale <= FLT_MIN)
240 scaledMat.
data[0] = mat.
data[0] / scale;
241 scaledMat.
data[1] = mat.
data[1] / scale;
242 scaledMat.
data[2] = mat.
data[2] / scale;
247 if ((evals.z-evals.x) <= FLT_EPSILON)
250 evecs.
data[0] = make_float3 (1.0f, 0.0f, 0.0f);
251 evecs.
data[1] = make_float3 (0.0f, 1.0f, 0.0f);
252 evecs.
data[2] = make_float3 (0.0f, 0.0f, 1.0f);
254 else if ((evals.y-evals.x) <= FLT_EPSILON)
262 tmp.
data[0].x -= evals.z;
263 tmp.
data[1].y -= evals.z;
264 tmp.
data[2].z -= evals.z;
266 float3 vec1 = cross (tmp.
data[0], tmp.
data[1]);
267 float3 vec2 = cross (tmp.
data[0], tmp.
data[2]);
268 float3 vec3 = cross (tmp.
data[1], tmp.
data[2]);
270 float len1 = dot (vec1, vec1);
271 float len2 = dot (vec2, vec2);
272 float len3 = dot (vec3, vec3);
274 if (len1 >= len2 && len1 >= len3)
275 evecs.
data[2] = vec1 / sqrtf (len1);
276 else if (len2 >= len1 && len2 >= len3)
277 evecs.
data[2] = vec2 / sqrtf (len2);
279 evecs.
data[2] = vec3 / sqrtf (len3);
284 else if ((evals.z-evals.y) <= FLT_EPSILON)
291 tmp.
data[0].x -= evals.x;
292 tmp.
data[1].y -= evals.x;
293 tmp.
data[2].z -= evals.x;
295 float3 vec1 = cross (tmp.
data[0], tmp.
data[1]);
296 float3 vec2 = cross (tmp.
data[0], tmp.
data[2]);
297 float3 vec3 = cross (tmp.
data[1], tmp.
data[2]);
299 float len1 = dot (vec1, vec1);
300 float len2 = dot (vec2, vec2);
301 float len3 = dot (vec3, vec3);
303 if (len1 >= len2 && len1 >= len3)
304 evecs.
data[0] = vec1 / sqrtf (len1);
305 else if (len2 >= len1 && len2 >= len3)
306 evecs.
data[0] = vec2 / sqrtf (len2);
308 evecs.
data[0] = vec3 / sqrtf (len3);
319 tmp.
data[0].x -= evals.z;
320 tmp.
data[1].y -= evals.z;
321 tmp.
data[2].z -= evals.z;
323 float3 vec1 = cross (tmp.
data[0], tmp.
data[1]);
324 float3 vec2 = cross (tmp.
data[0], tmp.
data[2]);
325 float3 vec3 = cross (tmp.
data[1], tmp.
data[2]);
327 float len1 = dot (vec1, vec1);
328 float len2 = dot (vec2, vec2);
329 float len3 = dot (vec3, vec3);
332 unsigned int min_el = 2;
333 unsigned int max_el = 2;
334 if (len1 >= len2 && len1 >= len3)
337 evecs.
data[2] = vec1 / sqrtf (len1);
339 else if (len2 >= len1 && len2 >= len3)
342 evecs.
data[2] = vec2 / sqrtf (len2);
347 evecs.
data[2] = vec3 / sqrtf (len3);
353 tmp.
data[0].x -= evals.y;
354 tmp.
data[1].y -= evals.y;
355 tmp.
data[2].z -= evals.y;
357 vec1 = cross (tmp.
data[0], tmp.
data[1]);
358 vec2 = cross (tmp.
data[0], tmp.
data[2]);
359 vec3 = cross (tmp.
data[1], tmp.
data[2]);
361 len1 = dot (vec1, vec1);
362 len2 = dot (vec2, vec2);
363 len3 = dot (vec3, vec3);
364 if (len1 >= len2 && len1 >= len3)
367 evecs.
data[1] = vec1 / sqrtf (len1);
368 min_el = len1 <= mmax[min_el]? 1: min_el;
369 max_el = len1 > mmax[max_el]? 1: max_el;
371 else if (len2 >= len1 && len2 >= len3)
374 evecs.
data[1] = vec2 / sqrtf (len2);
375 min_el = len2 <= mmax[min_el]? 1: min_el;
376 max_el = len2 > mmax[max_el]? 1: max_el;
381 evecs.
data[1] = vec3 / sqrtf (len3);
382 min_el = len3 <= mmax[min_el]? 1: min_el;
383 max_el = len3 > mmax[max_el]? 1: max_el;
389 tmp.
data[0].x -= evals.x;
390 tmp.
data[1].y -= evals.x;
391 tmp.
data[2].z -= evals.x;
393 vec1 = cross (tmp.
data[0], tmp.
data[1]);
394 vec2 = cross (tmp.
data[0], tmp.
data[2]);
395 vec3 = cross (tmp.
data[1], tmp.
data[2]);
397 len1 = dot (vec1, vec1);
398 len2 = dot (vec2, vec2);
399 len3 = dot (vec3, vec3);
400 if (len1 >= len2 && len1 >= len3)
403 evecs.
data[0] = vec1 / sqrtf (len1);
404 min_el = len3 <= mmax[min_el]? 0: min_el;
405 max_el = len3 > mmax[max_el]? 0: max_el;
407 else if (len2 >= len1 && len2 >= len3)
410 evecs.
data[0] = vec2 / sqrtf (len2);
411 min_el = len3 <= mmax[min_el]? 0: min_el;
412 max_el = len3 > mmax[max_el]? 0: max_el;
417 evecs.
data[0] = vec3 / sqrtf (len3);
418 min_el = len3 <= mmax[min_el]? 0: min_el;
419 max_el = len3 > mmax[max_el]? 0: max_el;
422 unsigned mid_el = 3 - min_el - max_el;
423 evecs.
data[min_el] = normalize (cross (evecs.
data[(min_el+1)%3], evecs.
data[(min_el+2)%3] ));
424 evecs.
data[mid_el] = normalize (cross (evecs.
data[(mid_el+1)%3], evecs.
data[(mid_el+2)%3] ));
433 __inline__ __host__ __device__ float3
443 __inline__ __host__ __device__
458 __inline__ __host__ __device__ float3
466 __inline__ __host__ __device__
474 cov.
data[1].y = pt.y * pt.y;
475 cov.
data[1].z = pt.y * pt.z;
476 cov.
data[2].z = pt.z * pt.z;
479 cov.
data[0].x = pt.x;
480 cov.
data[0].y = pt.y;
481 cov.
data[0].z = pt.z;
487 template <
class IteratorT>
491 centroid.x = centroid.y = centroid.z = 0;
494 centroid /= (float) (end - begin);
498 template <
class IteratorT>
501 cov.
data[0] = make_float3 (0.0f, 0.0f, 0.0f);
502 cov.
data[1] = make_float3 (0.0f, 0.0f, 0.0f);
503 cov.
data[2] = make_float3 (0.0f, 0.0f, 0.0f);
505 cov = transform_reduce (begin, end,
516 cov.
data[0] /= (float) (end - begin);
517 cov.
data[1] /= (float) (end - begin);
518 cov.
data[2] /= (float) (end - begin);
522 template <
typename CloudPtr>
527 :
points_(thrust::raw_pointer_cast (&input->points[0]))
537 inline __host__ __device__
542 float r_quadr, z_sqr;
543 float sqrt_term_y, sqrt_term_x, norm;
544 float x_times_z, y_times_z;
550 z_sqr = point_arg.z * point_arg.z;
558 x_times_z = point_arg.x * point_arg.z;
559 y_times_z = point_arg.y * point_arg.z;
562 bounds.x = (x_times_z - sqrt_term_x) * norm;
563 bounds.y = (x_times_z + sqrt_term_x) * norm;
564 bounds.z = (y_times_z - sqrt_term_y) * norm;
565 bounds.w = (y_times_z + sqrt_term_y) * norm;
569 bounds.x +=
width_ / 2.0f;
570 bounds.y +=
width_ / 2.0f;
574 res.x = (int)std::floor (bounds.x);
575 res.y = (int)std::ceil (bounds.y);
576 res.z = (int)std::floor (bounds.z);
577 res.w = (int)std::ceil (bounds.w);
580 res.x = clamp (res.x, 0,
width_-1);
581 res.y = clamp (res.y, 0,
width_-1);
582 res.z = clamp (res.z, 0,
height_-1);
583 res.w = clamp (res.w, 0,
height_-1);
588 inline __host__ __device__
589 int radiusSearch (
const float3 &query_pt,
int k_indices[],
int max_nnn)
596 for (
int x = bounds.x; (x <= bounds.y) & (nnn < max_nnn); ++x)
598 for (
int y = bounds.z; (y <= bounds.w) & (nnn < max_nnn); ++y)
605 float3 point_dif =
points_[idx] - query_pt;
610 k_indices[nnn] = idx;
620 inline __host__ __device__
640 bounds.x = clamp (bounds.x, 0,
width_-1);
641 bounds.y = clamp (bounds.y, 0,
width_-1);
642 bounds.z = clamp (bounds.z, 0,
height_-1);
643 bounds.w = clamp (bounds.w, 0,
height_-1);
649 float skipX = max (sqrtf ((
float)bounds.y-bounds.x) / sqrt_desired_nr_neighbors, 1.0f);
650 float skipY = max (sqrtf ((
float)bounds.w-bounds.z) / sqrt_desired_nr_neighbors, 1.0f);
654 cov.
data[0] = make_float3(0,0,0);
655 cov.
data[1] = make_float3(0,0,0);
656 cov.
data[2] = make_float3(0,0,0);
657 float3 centroid = make_float3(0,0,0);
660 for (
float y = (
float) bounds.z; y <= bounds.w; y += skipY)
662 for (
float x = (
float) bounds.x; x <= bounds.y; x += skipX)
665 int idx = ((int)y) *
width_ + ((int)x);
671 float3 point_dif =
points_[idx] - query_pt;
677 float3 demean_old =
points_[idx] - centroid;
678 centroid += demean_old / (float) nnn;
679 float3 demean_new =
points_[idx] - centroid;
681 cov.
data[1].y += demean_new.y * demean_old.y;
682 cov.
data[1].z += demean_new.y * demean_old.z;
683 cov.
data[2].z += demean_new.z * demean_old.z;
685 demean_old *= demean_new.x;
686 cov.
data[0].x += demean_old.x;
687 cov.
data[0].y += demean_old.y;
688 cov.
data[0].z += demean_old.z;
696 cov.
data[0] /= (float) nnn;
697 cov.
data[1] /= (float) nnn;
698 cov.
data[2] /= (float) nnn;
703 inline __host__ __device__
723 bounds.x = clamp (bounds.x, 0,
width_-1);
724 bounds.y = clamp (bounds.y, 0,
width_-1);
725 bounds.z = clamp (bounds.z, 0,
height_-1);
726 bounds.w = clamp (bounds.w, 0,
height_-1);
731 float skipX = max (sqrtf ((
float)bounds.y-bounds.x) / sqrt_desired_nr_neighbors, 1.0f);
732 float skipY = max (sqrtf ((
float)bounds.w-bounds.z) / sqrt_desired_nr_neighbors, 1.0f);
736 float3 centroid = make_float3(0,0,0);
739 for (
float y = (
float) bounds.z; y <= bounds.w; y += skipY)
741 for (
float x = (
float) bounds.x; x <= bounds.y; x += skipX)
744 int idx = ((int)y) *
width_ + ((int)x);
750 float3 point_dif =
points_[idx] - query_pt;
761 return centroid / (float) nnn;
Kernel to compute a radius neighborhood given a organized point cloud (aka range image cloud)
__host__ __device__ int radiusSearch(const float3 &query_pt, int k_indices[], int max_nnn)
__host__ __device__ float3 computeCentroid(const float3 &query_pt, CovarianceMatrix &cov, float sqrt_desired_nr_neighbors)
OrganizedRadiusSearch(const CloudPtr &input, float focalLength, float sqr_radius)
__host__ __device__ int computeCovarianceOnline(const float3 &query_pt, CovarianceMatrix &cov, float sqrt_desired_nr_neighbors)
__host__ __device__ int4 getProjectedRadiusSearchBox(const float3 &point_arg)
const PointXYZRGB * points_
__host__ __device__ float3 unitOrthogonal(const float3 &src)
void computeCovariance(IteratorT begin, IteratorT end, CovarianceMatrix &cov, float3 centroid)
Computes a covariance matrix for a given range of points.
__host__ __device__ void eigen33(const CovarianceMatrix &mat, CovarianceMatrix &evecs, float3 &evals)
__host__ __device__ void computeRoots(const CovarianceMatrix &m, float3 &roots)
__host__ __device__ void swap(float &a, float &b)
__host__ __device__ bool isMuchSmallerThan(float x, float y)
__host__ __device__ void computeRoots2(const float &b, const float &c, float3 &roots)
void compute3DCentroid(IteratorT begin, IteratorT end, float3 ¢roid)
Computes a centroid for a given range of points.
Adds two matrices element-wise.
__inline__ __host__ __device__ CovarianceMatrix operator()(CovarianceMatrix lhs, CovarianceMatrix rhs)
Simple kernel to add two points.
__inline__ __host__ __device__ float3 operator()(float3 lhs, float3 rhs)
Kernel to compute a `‘covariance matrix’' for a single point.
__inline__ __host__ __device__ CovarianceMatrix operator()(const PointXYZRGB &point)
__inline__ __host__ __device__ ComputeCovarianceForPoint(const float3 ¢roid)
misnamed class holding a 3x3 matrix
Default point xyz-rgb structure.
Simple kernel to convert a PointXYZRGB to float3.
__inline__ __host__ __device__ float3 operator()(const PointXYZRGB &pt)