casacore
Loading...
Searching...
No Matches
StatisticsUtilities.h
Go to the documentation of this file.
1//# Copyright (C) 2000,2001
2//# Associated Universities, Inc. Washington DC, USA.
3//#
4//# This library is free software; you can redistribute it and/or modify it
5//# under the terms of the GNU Library General Public License as published by
6//# the Free Software Foundation; either version 2 of the License, or (at your
7//# option) any later version.
8//#
9//# This library is distributed in the hope that it will be useful, but WITHOUT
10//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12//# License for more details.
13//#
14//# You should have received a copy of the GNU Library General Public License
15//# along with this library; if not, write to the Free Software Foundation,
16//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17//#
18//# Correspondence concerning AIPS++ should be addressed as follows:
19//# Internet email: aips2-request@nrao.edu.
20//# Postal address: AIPS++ Project Office
21//# National Radio Astronomy Observatory
22//# 520 Edgemont Road
23//# Charlottesville, VA 22903-2475 USA
24//#
25
26#ifndef SCIMATH_STATISTICSUTILITIES_H
27#define SCIMATH_STATISTICSUTILITIES_H
28
29#include <casacore/casa/Exceptions/Error.h>
30#include <casacore/scimath/StatsFramework/StatisticsTypes.h>
31#include <casacore/scimath/StatsFramework/StatsHistogram.h>
32#include <casacore/casa/Utilities/DataType.h>
33#include <casacore/casa/aips.h>
34
35#include <iostream>
36#include <casacore/casa/iosfwd.h>
37
38namespace casacore {
39
40template <class T> class PtrHolder;
41
42CASA_STATD class StatsDataProvider;
43
44// Various statistics related methods for the statistics framework.
45
46template <class AccumType> class StatisticsUtilities {
47public:
48
50
52
53 // <group>
54 // accumulate values. It is the responsibility of the caller to keep track
55 // of the accumulated values after each call. This class does not since it
56 // has no state. The accumulation derivation for mean and variance can be
57 // found at
58 // www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
59 // nvariance is an accumulated value. It is related to the variance via
60 // variance = nvariance/npts or nvariance/(npts-1) depending on your
61 // preferred definition in the non-weighted case and
62 // wvariance = wnvariance/sumofweights or wnvariance/(sumofweights-1)
63 // in the weighted case Its basic definition is
64 // nvariance = sum((x_i - mean)**2),
65 // wnvariance = sum((weight_i*(x_i - mean)**2)
66 // npts is a Double rather than an Int64 because of compilation issues when
67 // T is a Complex
68 inline static void accumulate (
69 Double& npts, AccumType& sum, AccumType& mean, const AccumType& datum
70 );
71
72 // in order to optimize performance, no checking is done for the
73 // weight == 0 case callers should ensure that the weigth is not zero before
74 // calling this method, and shouldn't call this method if the weight is 0.
75 // Expect a segfault because of division by zero if sumweights and weight
76 // are both zero.
77 inline static void waccumulate (
78 Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
79 const AccumType& datum, const AccumType& weight
80 );
81
82 inline static void accumulate (
83 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
84 AccumType& sumsq, const AccumType& datum
85 );
86
87 // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
88 inline static void waccumulate (
89 Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
90 AccumType& wnvariance, AccumType& wsumsq, const AccumType& datum,
91 const AccumType& weight
92 );
93 // </group>
94
95 // <group>
96 // The assignment operator of class LocationType should use copy, not
97 // reference, semantics.
98 template <class LocationType> inline static void accumulate (
99 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
100 AccumType& sumsq, AccumType& datamin, AccumType& datamax,
101 LocationType& minpos, LocationType& maxpos, const AccumType& datum,
102 const LocationType& location
103 );
104
105 template <class LocationType, class DataType>
106 inline static void accumulate (
107 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
108 AccumType& sumsq, DataType& datamin, DataType& datamax,
109 LocationType& minpos, LocationType& maxpos, const DataType& datum,
110 const LocationType& location
111 );
112
113 template <class LocationType>
114 inline static void waccumulate (
115 Double& npts, AccumType& sumofweights, AccumType& sum, AccumType& mean,
116 AccumType& nvariance, AccumType& sumsq, AccumType& datamin,
117 AccumType& datamax, LocationType& minpos, LocationType& maxpos,
118 const AccumType& datum, const AccumType& weight,
119 const LocationType& location
120 );
121 // </group>
122
123 // <group>
124 // return True if the max or min was updated, False otherwise.
125 template <class LocationType>
126 inline static Bool doMax(
127 AccumType& datamax, LocationType& maxpos, Bool isFirst,
128 const AccumType& datum, const LocationType& location
129 );
130
131 template <class LocationType>
132 inline static Bool doMin(
133 AccumType& datamin, LocationType& minpos, Bool isFirst,
134 const AccumType& datum, const LocationType& location
135 );
136 // </group>
137
138 // <group>
139 // These versions are for symmetric accumulation about a specified center
140 // point. The actual point is accumulated, as is a "virtual" point that is
141 // symmetric about the specified center. Of course, the trivial relationship
142 // that the mean is the specified center is used to simplify things
143 inline static void accumulateSym (
144 Double& npts, AccumType& nvariance, AccumType& sumsq,
145 const AccumType& datum, const AccumType& center
146 );
147
148 // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
149 inline static void waccumulateSym (
150 Double& npts, AccumType& sumweights, AccumType& wnvariance,
151 AccumType& wsumsq, const AccumType& datum, const AccumType& weight,
152 const AccumType& center
153 );
154
155 // <src>maxpos</src> and <src>minpos</src> refer to actual, not
156 // virtually created, data only.
157 template <class LocationType> inline static void accumulateSym (
158 Double& npts, AccumType& nvariance, AccumType& sumsq,
159 AccumType& datamin, AccumType& datamax, LocationType& minpos,
160 LocationType& maxpos, const AccumType& datum,
161 const LocationType& location, const AccumType& center
162 );
163
164 template <class LocationType> inline static void waccumulateSym (
165 Double& npts, AccumType& sumofweights, AccumType& nvariance,
166 AccumType& sumsq, AccumType& datamin, AccumType& datamax,
167 LocationType& minpos, LocationType& maxpos, const AccumType& datum,
168 const AccumType& weight, const LocationType& location,
169 const AccumType& center
170 );
171
172 // convert in place by taking the absolute value of the difference of the
173 // std::vector and the median
174 inline static void convertToAbsDevMedArray(
175 DataArray& myArray, AccumType median
176 );
177 // </group>
178
179 inline static Bool includeDatum(
180 const AccumType& datum, typename DataRanges::const_iterator beginRange,
181 typename DataRanges::const_iterator endRange, Bool isInclude
182 );
183
184
185 // The array can be changed by partially sorting it up to the largest index.
186 // Return a map of index to value in the sorted array.
187 static std::map<uInt64, AccumType> indicesToValues(
188 std::vector<AccumType>& myArray, const std::set<uInt64>& indices
189 );
190
191 static void mergeResults(
192 std::vector<BinCountArray>& bins,
193 std::vector<CountedPtr<AccumType> >& sameVal,
194 std::vector<Bool>& allSame,
195 const PtrHolder<std::vector<BinCountArray>>& tBins,
196 const PtrHolder<std::vector<CountedPtr<AccumType>>>& tSameVal,
197 const PtrHolder<std::vector<Bool>>& tAllSame, uInt nThreadsMax
198 );
199
200 // use two statistics sets to get the statistics set that would
201 // result in combining the two data sets used to produce the
202 // individual statistics sets. The quantile related stats are
203 // not considered, since it is not in general possible to determine
204 // the resultant quantiles from the information provided; only
205 // the aggregate statistics make sense.
207 const std::vector<StatsData<AccumType>>& stats
208 );
209
210 template <class DataIterator, class MaskIterator, class WeightsIterator>
212 const StatsDataProvider<CASA_STATP> *const dataProvider
213 );
214
215 static uInt threadIdx();
216
217private:
218
219 const static AccumType TWO;
220
221};
222
223}
224
225#ifndef CASACORE_NO_AUTO_TEMPLATES
226#include <casacore/scimath/StatsFramework/StatisticsUtilities.tcc>
227#endif //# CASACORE_NO_AUTO_TEMPLATES
228
229#endif
#define DataArray
Commonly used types in statistics framework.
#define CASA_STATD
because the template signature has become unwieldy
Referenced counted pointer for constant data.
Definition CountedPtr.h:81
Various statistics related methods for the statistics framework.
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, const AccumType &datum)
accumulate values.
static uInt nThreadsMax(const StatsDataProvider< CASA_STATP > *const dataProvider)
static Bool doMax(AccumType &datamax, LocationType &maxpos, Bool isFirst, const AccumType &datum, const LocationType &location)
return True if the max or min was updated, False otherwise.
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, const AccumType &datum, const AccumType &center)
These versions are for symmetric accumulation about a specified center point.
static void waccumulateSym(Double &npts, AccumType &sumweights, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight, const AccumType &center)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static StatsData< AccumType > combine(const std::vector< StatsData< AccumType > > &stats)
use two statistics sets to get the statistics set that would result in combining the two data sets us...
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location, const AccumType &center)
maxpos and minpos refer to actual, not virtually created, data only.
static void waccumulate(Double &npts, AccumType &sumofweights, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static void mergeResults(std::vector< BinCountArray > &bins, std::vector< CountedPtr< AccumType > > &sameVal, std::vector< Bool > &allSame, const PtrHolder< std::vector< BinCountArray > > &tBins, const PtrHolder< std::vector< CountedPtr< AccumType > > > &tSameVal, const PtrHolder< std::vector< Bool > > &tAllSame, uInt nThreadsMax)
static void waccumulateSym(Double &npts, AccumType &sumofweights, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location, const AccumType &center)
static std::map< uInt64, AccumType > indicesToValues(std::vector< AccumType > &myArray, const std::set< uInt64 > &indices)
The array can be changed by partially sorting it up to the largest index.
static void convertToAbsDevMedArray(DataArray &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the std::vector and the median
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, const AccumType &datum)
static Bool includeDatum(const AccumType &datum, typename DataRanges::const_iterator beginRange, typename DataRanges::const_iterator endRange, Bool isInclude)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, const AccumType &datum, const AccumType &weight)
in order to optimize performance, no checking is done for the weight == 0 case callers should ensure ...
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, DataType &datamin, DataType &datamax, LocationType &minpos, LocationType &maxpos, const DataType &datum, const LocationType &location)
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location)
The assignment operator of class LocationType should use copy, not reference, semantics.
static Bool doMin(AccumType &datamin, LocationType &minpos, Bool isFirst, const AccumType &datum, const LocationType &location)
Abstract base class which defines interface for providing "datasets" to the statistics framework in c...
this file contains all the compiler specific defines
Definition mainpage.dox:28
LatticeExprNode mean(const LatticeExprNode &expr)
LatticeExprNode sum(const LatticeExprNode &expr)
unsigned int uInt
Definition aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:42
PtrHolder(const PtrHolder< T > &other)
double Double
Definition aipstype.h:55
LatticeExprNode median(const LatticeExprNode &expr)
std::pair< Int64, Int64 > LocationType