BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlsta_moment.h
Go to the documentation of this file.
1/// @file bdlsta_moment.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlsta_moment.h -*-C++-*-
8#ifndef INCLUDED_BDLSTA_MOMENT
9#define INCLUDED_BDLSTA_MOMENT
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14// BDE_VERIFY pragma: -LL01 // Link is just too long
15
16/// @defgroup bdlsta_moment bdlsta_moment
17/// @brief Online algorithm for mean, variance, skew, and kurtosis.
18/// @addtogroup bdl
19/// @{
20/// @addtogroup bdlsta
21/// @{
22/// @addtogroup bdlsta_moment
23/// @{
24///
25/// <h1> Outline </h1>
26/// * <a href="#bdlsta_moment-purpose"> Purpose</a>
27/// * <a href="#bdlsta_moment-classes"> Classes </a>
28/// * <a href="#bdlsta_moment-description"> Description </a>
29/// * <a href="#bdlsta_moment-usage"> Usage </a>
30/// * <a href="#bdlsta_moment-example-1-calculating-skew-variance-and-mean"> Example 1: Calculating skew, variance, and mean </a>
31///
32/// # Purpose {#bdlsta_moment-purpose}
33/// Online algorithm for mean, variance, skew, and kurtosis.
34///
35/// # Classes {#bdlsta_moment-classes}
36///
37/// - bdlsta::Moment: online calculation of mean, variance, skew, and kurtosis
38///
39/// # Description {#bdlsta_moment-description}
40/// This component provides a mechanism, `bdlsta::Moment`, that
41/// provides online calculation of basic statistics: mean, variance, skew, and
42/// kurtosis while maintaining accuracy. Online algorithms process the data in
43/// one pass, while keeping good accuracy. The online algorithms used are
44/// Welford for variance, and the stable skew and kurtosis algorithms taken
45/// from:
46/// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
47///
48/// The implementation uses template specialization so the user can choose the
49/// statistics necessary, and not calculate or allocate memory for those
50/// statistics that are not needed.
51///
52/// The template parameter is a value from the provided enum and having the
53/// following interpretation:
54/// @code
55/// M1 - mean
56/// M2 - variance+mean
57/// M3 - skew+variance+mean
58/// M4 - kurtosis+skew+variance+mean
59/// @endcode
60///
61/// ## Usage {#bdlsta_moment-usage}
62///
63///
64/// This section illustrates intended use of this component.
65///
66/// ### Example 1: Calculating skew, variance, and mean {#bdlsta_moment-example-1-calculating-skew-variance-and-mean}
67///
68///
69/// This example shows how to accumulate a set of values, and calculate the
70/// skew, variance, and kurtosis.
71///
72/// First, we create example input and instantiate the appropriate mechanism:
73/// @code
74/// double input[] = { 1.0, 2.0, 4.0, 5.0 };
75///
76/// bdlsta::Moment<bdlsta::MomentLevel::e_M3> m3;
77/// @endcode
78/// Then, we invoke the `add` routine to accumulate the data:
79/// @code
80/// for(int i = 0; i < 4; ++i) {
81/// m3.add(input[i]);
82/// }
83/// @endcode
84/// Finally, we assert that the mean, variance, and skew are what we expect:
85/// @code
86/// ASSERT(4 == m3.count());
87/// ASSERT(3.0 == m3.mean());
88/// ASSERT(1e-5 > fabs(3.33333 - m3.variance()));
89/// ASSERT(1e-5 > fabs(0.0 - m3.skew()));
90/// @endcode
91/// @}
92/** @} */
93/** @} */
94
95/** @addtogroup bdl
96 * @{
97 */
98/** @addtogroup bdlsta
99 * @{
100 */
101/** @addtogroup bdlsta_moment
102 * @{
103 */
104
105// BDE_VERIFY pragma: +LL01
106
107#include <bdlscm_version.h>
108
109#include <bsl_cmath.h>
110
111#include <bsls_assert.h>
112#include <bsls_review.h>
113
114
115namespace bdlsta {
116
118 // TYPES
119 enum Enum {
120 // Enumeration of moment level of data desired.
121
122 e_M1, // mean
123 e_M2, // variance+mean
124 e_M3, // skew+variance+mean
125 e_M4 // kurtosis+skew+variance+mean
126 };
127};
128
129 // ==========================
130 // private struct Moment_Data
131 // ==========================
132
133template <MomentLevel::Enum ML>
135
136/// Data members for Mean only.
137template<>
139
140 // PUBLIC DATA
141 int d_count; // Number of entries.
142 double d_sum; // Sum of entries.
143
144 // CREATORS
145
146 /// Constructor initializes all members to zero.
147 Moment_Data();
148};
149
150/// Data members for Variance and below.
151template<>
153
154 // PUBLIC DATA
155 int d_count; // Number of entries.
156 double d_sum; // Sum of entries.
157 double d_mean; // Mean of entries.
158 double d_M2; // 2nd moment, for variance.
159
160 // CREATORS
161
162 /// Constructor initializes all members to zero.
163 Moment_Data();
164};
165
166/// Data members for Skew and below
167template<>
169
170 // PUBLIC DATA
171 int d_count; // Number of entries.
172 double d_sum; // Sum of entries.
173 double d_mean; // Mean of entries.
174 double d_M2; // 2nd moment, for variance.
175 double d_M3; // 3rd moment, for skew
176
177 // CREATORS
178
179 /// Constructor initializes all members to zero.
180 Moment_Data();
181};
182
183/// Data members for Kurtosis and below
184template<>
186
187 // PUBLIC DATA
188 int d_count; // Number of entries.
189 double d_sum; // Sum of entries.
190 double d_mean; // Mean of entries.
191 double d_M2; // 2nd moment, for variance.
192 double d_M3; // 3rd moment, for skew
193 double d_M4; // 4th moment, for kurtosis
194
195 // CREATORS
196
197 /// Constructor initializes all members to zero.
198 Moment_Data();
199};
200
201 // ============
202 // class Moment
203 // ============
204
205// BDE_VERIFY pragma: -LL01 // Link is just too long
206
207/// This class provides efficient and accurate online algorithms for
208/// calculating mean, variance, skew, and kurtosis. The class provides
209/// template specializations, so that no unnecessary data members will be
210/// kept or unnecessary calculations done. The online algorithms used are
211/// Welford for variance, and the stable M3 and M4 are taken from:
212/// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
213///
214/// The formula for sample skewness is taken from:
215/// http://www.macroption.com/skewness-formula/
216///
217/// The formula for sample excess kurtosis is taken from:
218/// http://www.macroption.com/kurtosis-formula/
219///
220/// See @ref bdlsta_moment
221template <MomentLevel::Enum ML>
222class Moment {
223
224 // BDE_VERIFY pragma: +LL01
225
226 private:
227 // PRIVATE TYPES
228 typedef struct Moment_Data<ML> Moment_Data_t;
229
230 // DATA
231 Moment_Data_t d_data;
232
233 public:
234 // CONSTANTS
235 enum {
238 };
239
240 // MANIPULATORS
241
242 /// Add the specified `value` to the data set.
243 void add(double value);
244
245 // ACCESSORS
246
247 /// Returns the number of elements in the data set.
248 int count() const;
249
250 /// Return the kurtosis of the data set. The behavior is undefined
251 /// unless `4 <= count` and the variance is not zero.
252 double kurtosis() const;
253
254 /// Load into the specified `result`, the kurtosis of the data set.
255 /// Return 0 on success, and a non-zero value otherwise. Specifically,
256 /// `e_INADEQUATE_DATA` is returned if `4 > count` or the variance is
257 /// zero.
258 int kurtosisIfValid(double *result) const;
259
260 /// Return the mean of the data set. The behavior is undefined unless
261 /// `1 <= count`.
262 double mean() const;
263
264 /// Load into the specified `result`, the mean of the data set. Return
265 /// 0 on success, and a non-zero value otherwise. Specifically,
266 /// `e_INADEQUATE_DATA` is returned if `1 > count`.
267 int meanIfValid(double *result) const;
268
269 /// Return skew of the data set. The behavior is undefined unless
270 /// `3 <= count` or the variance is zero.
271 double skew() const;
272
273 /// Load into the specified `result`, the skew of the data set. Return
274 /// 0 on success, and a non-zero value otherwise. Specifically,
275 /// `e_INADEQUATE_DATA` is returned if `3 > count` or the variance is
276 /// zero.
277 int skewIfValid(double *result) const;
278
279 /// Return the variance of the data set. The behavior is undefined
280 /// unless `2 <= count`.
281 double variance() const;
282
283 /// Load into the specified `result`, the variance of the data set.
284 /// Return 0 on success, and a non-zero value otherwise. Specifically,
285 /// `e_INADEQUATE_DATA` is returned if `2 > count`.
286 int varianceIfValid(double *result) const;
287};
288
289// ============================================================================
290// INLINE DEFINITIONS
291// ============================================================================
292
293 // --------------------------
294 // struct bdlsta::Moment_Data
295 // --------------------------
296
297// CREATORS
298inline
300: d_count(0)
301, d_sum(0.0)
302{
303}
304
305inline
307: d_count(0)
308, d_sum(0.0)
309, d_mean(0.0)
310, d_M2(0.0)
311{
312}
313
314inline
316: d_count(0)
317, d_sum(0.0)
318, d_mean(0.0)
319, d_M2(0.0)
320, d_M3(0.0)
321{
322}
323
324inline
326: d_count(0)
327, d_sum(0.0)
328, d_mean(0.0)
329, d_M2(0.0)
330, d_M3(0.0)
331, d_M4(0.0)
332{
333}
334
335 // --------------------
336 // class bdlsta::Moment
337 // --------------------
338
339// MANIPULATORS
340template<>
341inline
343{
344 ++d_data.d_count;
345 d_data.d_sum += value;
346}
347
348template<>
349inline
351{
352 // Modified Welford algorithm for variance.
353 const double delta = value - d_data.d_mean;
354 d_data.d_sum += value;
355 ++d_data.d_count;
356 d_data.d_mean = d_data.d_sum / static_cast<double>(d_data.d_count);
357 const double delta2 = value - d_data.d_mean;
358 d_data.d_M2 += delta * delta2;
359}
360
361template<>
362inline
364{
365 // Modified Welford algorithm for variance, and similar algorithm for skew.
366 const double delta = value - d_data.d_mean;
367 const double nm1 = d_data.d_count;
368 d_data.d_sum += value;
369 ++d_data.d_count;
370 const double n = d_data.d_count;
371 const double deltaN = delta / n;
372 d_data.d_mean = d_data.d_sum / n;
373 const double term1 = delta * deltaN * nm1;
374 d_data.d_M3 += term1 * deltaN * (n - 2.0) - 3.0 * deltaN * d_data.d_M2;
375 d_data.d_M2 += term1;
376}
377
378template<>
379inline
381{
382 // Modified Welford algorithm for variance, and similar algorithms for skew
383 // and kurtosis.
384 const double delta = value - d_data.d_mean;
385 const double nm1 = d_data.d_count;
386 d_data.d_sum += value;
387 ++d_data.d_count;
388 const double n = d_data.d_count;
389 const double n2 = n * n;
390 const double deltaN = delta / n;
391 d_data.d_mean = d_data.d_sum / n;
392 const double term1 = delta * deltaN * nm1;
393 const double deltaN2 = deltaN * deltaN;
394 d_data.d_M4 += term1 * deltaN2 * (n2 - 3.0 * n + 3.0) +
395 6 * deltaN2 * d_data.d_M2 - 4.0 * deltaN * d_data.d_M3;
396 d_data.d_M3 += term1 * deltaN * (n - 2.0) - 3.0 * deltaN * d_data.d_M2;
397 d_data.d_M2 += term1;
398}
399
400// ACCESSORS
401template <MomentLevel::Enum ML>
402inline
404{
405 return d_data.d_count;
406}
407
408template<>
409inline
411{
412 BSLS_ASSERT(4 <= d_data.d_count && 0.0 != d_data.d_M2);
413
414 const double n = static_cast<double>(d_data.d_count);
415 const double n1 = (n - 1.0);
416 const double n2n3 = (n - 2.0) * (n - 3.0);
417 return n * (n + 1.0) * n1 / n2n3 * d_data.d_M4 / d_data.d_M2 / d_data.d_M2
418 - 3.0 * n1 * n1 / n2n3;
419}
420
421template<>
422inline
424{
425 if (4 > d_data.d_count || 0.0 == d_data.d_M2) {
426 return e_INADEQUATE_DATA; // RETURN
427 }
428 *result = kurtosis();
429 return 0;
430}
431
432template <MomentLevel::Enum ML>
433inline
434double Moment<ML>::mean() const
435{
436 BSLS_ASSERT(1 <= d_data.d_count);
437
438 return d_data.d_sum / static_cast<double>(d_data.d_count);
439}
440
441template <MomentLevel::Enum ML>
442inline
443int Moment<ML>::meanIfValid(double *result) const
444{
445 if (1 > d_data.d_count) {
446 return e_INADEQUATE_DATA; // RETURN
447 }
448 *result = mean();
449 return 0;
450}
451
452template <MomentLevel::Enum ML>
453inline
454double Moment<ML>::skew() const
455{
456 BSLS_ASSERT(3 <= d_data.d_count && 0.0 != d_data.d_M2);
457
458 const double n = static_cast<double>(d_data.d_count);
459 return bsl::sqrt(n - 1.0) * n / (n- 2.0) * d_data.d_M3
460 / bsl::pow(d_data.d_M2, 1.5);
461}
462
463template <MomentLevel::Enum ML>
464inline int Moment<ML>::skewIfValid(double *result) const
465{
466 if (3 > d_data.d_count || 0.0 == d_data.d_M2) {
467 return e_INADEQUATE_DATA; // RETURN
468 }
469 *result = skew();
470 return 0;
471}
472
473template <MomentLevel::Enum ML>
474inline
476{
477 BSLS_ASSERT(2 <= d_data.d_count);
478
479 return d_data.d_M2 / (d_data.d_count - 1);
480}
481
482template <MomentLevel::Enum ML>
483inline
484int Moment<ML>::varianceIfValid(double *result) const
485{
486 if (2 > d_data.d_count) {
487 return e_INADEQUATE_DATA; // RETURN
488 }
489 *result = variance();
490 return 0;
491}
492
493} // close package namespace
494
495
496#endif
497
498// ----------------------------------------------------------------------------
499// Copyright 2017 Bloomberg Finance L.P.
500//
501// Licensed under the Apache License, Version 2.0 (the "License");
502// you may not use this file except in compliance with the License.
503// You may obtain a copy of the License at
504//
505// http://www.apache.org/licenses/LICENSE-2.0
506//
507// Unless required by applicable law or agreed to in writing, software
508// distributed under the License is distributed on an "AS IS" BASIS,
509// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
510// See the License for the specific language governing permissions and
511// limitations under the License.
512// ----------------------------- END-OF-FILE ----------------------------------
513
514/** @} */
515/** @} */
516/** @} */
Definition bdlsta_moment.h:222
double variance() const
Definition bdlsta_moment.h:475
double skew() const
Definition bdlsta_moment.h:454
int count() const
Returns the number of elements in the data set.
Definition bdlsta_moment.h:403
int kurtosisIfValid(double *result) const
double kurtosis() const
@ e_INADEQUATE_DATA
Definition bdlsta_moment.h:237
@ e_SUCCESS
Definition bdlsta_moment.h:236
int varianceIfValid(double *result) const
Definition bdlsta_moment.h:484
double mean() const
Definition bdlsta_moment.h:434
int skewIfValid(double *result) const
Definition bdlsta_moment.h:464
int meanIfValid(double *result) const
Definition bdlsta_moment.h:443
void add(double value)
Add the specified value to the data set.
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlsta_linefit.h:110
Definition bdlsta_moment.h:117
Enum
Definition bdlsta_moment.h:119
@ e_M4
Definition bdlsta_moment.h:125
@ e_M3
Definition bdlsta_moment.h:124
@ e_M1
Definition bdlsta_moment.h:122
@ e_M2
Definition bdlsta_moment.h:123
int d_count
Definition bdlsta_moment.h:141
double d_sum
Definition bdlsta_moment.h:142
double d_sum
Definition bdlsta_moment.h:156
double d_mean
Definition bdlsta_moment.h:157
int d_count
Definition bdlsta_moment.h:155
double d_M2
Definition bdlsta_moment.h:158
double d_mean
Definition bdlsta_moment.h:173
double d_M2
Definition bdlsta_moment.h:174
int d_count
Definition bdlsta_moment.h:171
double d_M3
Definition bdlsta_moment.h:175
double d_sum
Definition bdlsta_moment.h:172
int d_count
Definition bdlsta_moment.h:188
double d_M2
Definition bdlsta_moment.h:191
double d_mean
Definition bdlsta_moment.h:190
double d_M3
Definition bdlsta_moment.h:192
double d_M4
Definition bdlsta_moment.h:193
double d_sum
Definition bdlsta_moment.h:189
Definition bdlsta_moment.h:134