// bdlsta_moment.h                                                    -*-C++-*-
#ifndef INCLUDED_BDLSTA_MOMENT
#define INCLUDED_BDLSTA_MOMENT

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

// BDE_VERIFY pragma: -LL01 // Link is just too long

//@PURPOSE: Online algorithm for mean, variance, skew, and kurtosis.
//
//@CLASSES:
//  bdlsta::Moment: online calculation of mean, variance, skew, and kurtosis
//
//@DESCRIPTION: This component provides a mechanism, 'bdlsta::Moment', that
// provides online calculation of basic statistics: mean, variance, skew, and
// kurtosis while maintaining accuracy.  Online algorithms process the data in
// one pass, while keeping good accuracy.  The online algorithms used are
// Welford for variance, and the stable skew and kurtosis algorithms taken
// from:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
//
// The implementation uses template specialization so the user can choose the
// statistics necessary, and not calculate or allocate memory for those
// statistics that are not needed.
//
// The template parameter is a value from the provided enum and having the
// following interpretation:
//..
//  M1 - mean
//  M2 - variance+mean
//  M3 - skew+variance+mean
//  M4 - kurtosis+skew+variance+mean
//..
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Calculating skew, variance, and mean
///- - - - - - - - - - - - - - - -
// This example shows how to accumulate a set of values, and calculate the
// skew, variance, and kurtosis.
//
// First, we create example input and instantiate the appropriate mechanism:
//..
//  double input[] = { 1.0, 2.0, 4.0, 5.0 };
//
//  bdlsta::Moment<bdlsta::MomentLevel::e_M3> m3;
//..
// Then, we invoke the 'add' routine to accumulate the data:
//..
//  for(int i = 0; i < 4; ++i) {
//      m3.add(input[i]);
//  }
//..
// Finally, we assert that the mean, variance, and skew are what we expect:
//..
//  ASSERT(4   == m3.count());
//  ASSERT(3.0 == m3.mean());
//  ASSERT(1e-5 > fabs(3.33333 - m3.variance()));
//  ASSERT(1e-5 > fabs(0.0     - m3.skew()));
//..

// BDE_VERIFY pragma: +LL01

#include <bdlscm_version.h>

#include <bsl_cmath.h>

#include <bsls_assert.h>
#include <bsls_review.h>

namespace BloombergLP {
namespace bdlsta {

struct MomentLevel {
    // TYPES
    enum Enum {
        // Enumeration of moment level of data desired.

        e_M1, // mean
        e_M2, // variance+mean
        e_M3, // skew+variance+mean
        e_M4  // kurtosis+skew+variance+mean
    };
};

                      // ==========================
                      // private struct Moment_Data
                      // ==========================

template <MomentLevel::Enum ML>
struct Moment_Data;

template<>
struct Moment_Data<MomentLevel::e_M1> {
    // Data members for Mean only.

    // PUBLIC DATA
    int    d_count; // Number of entries.
    double d_sum;   // Sum of entries.

    // CREATORS
    Moment_Data();
        // Constructor initializes all members to zero.
};

template<>
struct Moment_Data<MomentLevel::e_M2> {
    // Data members for Variance and below.

    // PUBLIC DATA
    int    d_count; // Number of entries.
    double d_sum;   // Sum of entries.
    double d_mean;  // Mean of entries.
    double d_M2;    // 2nd moment, for variance.

    // CREATORS
    Moment_Data();
        // Constructor initializes all members to zero.
};

template<>
struct Moment_Data<MomentLevel::e_M3> {
    // Data members for Skew and below

    // PUBLIC DATA
    int    d_count; // Number of entries.
    double d_sum;   // Sum of entries.
    double d_mean;  // Mean of entries.
    double d_M2;    // 2nd moment, for variance.
    double d_M3;    // 3rd moment, for skew

    // CREATORS
    Moment_Data();
        // Constructor initializes all members to zero.
};

template<>
struct Moment_Data<MomentLevel::e_M4> {
    // Data members for Kurtosis and below

    // PUBLIC DATA
    int    d_count; // Number of entries.
    double d_sum;   // Sum of entries.
    double d_mean;  // Mean of entries.
    double d_M2;    // 2nd moment, for variance.
    double d_M3;    // 3rd moment, for skew
    double d_M4;    // 4th moment, for kurtosis

    // CREATORS
    Moment_Data();
        // Constructor initializes all members to zero.
};

                            // ============
                            // class Moment
                            // ============

// BDE_VERIFY pragma: -LL01 // Link is just too long

template <MomentLevel::Enum ML>
class Moment {
    // This class provides efficient and accurate online algorithms for
    // calculating mean, variance, skew, and kurtosis.  The class provides
    // template specializations, so that no unnecessary data members will be
    // kept or unnecessary calculations done.  The online algorithms used are
    // Welford for variance, and the stable M3 and M4 are taken from:
    // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
    //
    // The formula for sample skewness is taken from:
    // http://www.macroption.com/skewness-formula/
    //
    // The formula for sample excess kurtosis is taken from:
    // http://www.macroption.com/kurtosis-formula/

    // BDE_VERIFY pragma: +LL01

  private:
    // PRIVATE TYPES
    typedef struct Moment_Data<ML> Moment_Data_t;

    // DATA
    Moment_Data_t d_data;

  public:
    // CONSTANTS
    enum {
        e_SUCCESS         = 0,
        e_INADEQUATE_DATA = -1
    };

    // MANIPULATORS
    void add(double value);
        // Add the specified 'value' to the data set.

    // ACCESSORS
    int count() const;
        // Returns the number of elements in the data set.

    double kurtosis() const;
        // Return the kurtosis of the data set.  The behavior is undefined
        // unless '4 <= count' and the variance is not zero.

    int kurtosisIfValid(double *result) const;
        // Load into the specified 'result', the kurtosis of the data set.
        // Return 0 on success, and a non-zero value otherwise.  Specifically,
        // 'e_INADEQUATE_DATA' is returned if '4 > count' or the variance is
        // zero.

    double mean() const;
        // Return the mean of the data set.  The behavior is undefined unless
        // '1 <= count'.

    int meanIfValid(double *result) const;
        // Load into the specified 'result', the mean of the data set.  Return
        // 0 on success, and a non-zero value otherwise.  Specifically,
        // 'e_INADEQUATE_DATA' is returned if '1 > count'.

    double skew() const;
        // Return skew of the data set.  The behavior is undefined unless
        // '3 <= count' or the variance is zero.

    int skewIfValid(double *result) const;
        // Load into the specified 'result', the skew of the data set.  Return
        // 0 on success, and a non-zero value otherwise.  Specifically,
        // 'e_INADEQUATE_DATA' is returned if '3 > count' or the variance is
        // zero.

    double variance() const;
        // Return the variance of the data set.  The behavior is undefined
        // unless '2 <= count'.

    int varianceIfValid(double *result) const;
        // Load into the specified 'result', the variance of the data set.
        // Return 0 on success, and a non-zero value otherwise.  Specifically,
        // 'e_INADEQUATE_DATA' is returned if '2 > count'.
};

// ============================================================================
//                               INLINE DEFINITIONS
// ============================================================================

                        // --------------------------
                        // struct bdlsta::Moment_Data
                        // --------------------------

// CREATORS
inline
Moment_Data<MomentLevel::e_M1>::Moment_Data()
: d_count(0)
, d_sum(0.0)
{
}

inline
Moment_Data<MomentLevel::e_M2>::Moment_Data()
: d_count(0)
, d_sum(0.0)
, d_mean(0.0)
, d_M2(0.0)
{
}

inline
Moment_Data<MomentLevel::e_M3>::Moment_Data()
: d_count(0)
, d_sum(0.0)
, d_mean(0.0)
, d_M2(0.0)
, d_M3(0.0)
{
}

inline
Moment_Data<MomentLevel::e_M4>::Moment_Data()
: d_count(0)
, d_sum(0.0)
, d_mean(0.0)
, d_M2(0.0)
, d_M3(0.0)
, d_M4(0.0)
{
}

                        // --------------------
                        // class bdlsta::Moment
                        // --------------------

// MANIPULATORS
template<>
inline
void Moment<MomentLevel::e_M1>::add(double value)
{
    ++d_data.d_count;
    d_data.d_sum += value;
}

template<>
inline
void Moment<MomentLevel::e_M2>::add(double value)
{
    // Modified Welford algorithm for variance.
    const double delta = value - d_data.d_mean;
    d_data.d_sum += value;
    ++d_data.d_count;
    d_data.d_mean = d_data.d_sum / static_cast<double>(d_data.d_count);
    const double delta2 = value - d_data.d_mean;
    d_data.d_M2 += delta * delta2;
}

template<>
inline
void Moment<MomentLevel::e_M3>::add(double value)
{
    // Modified Welford algorithm for variance, and similar algorithm for skew.
    const double delta = value - d_data.d_mean;
    const double nm1 = d_data.d_count;
    d_data.d_sum += value;
    ++d_data.d_count;
    const double n = d_data.d_count;
    const double deltaN = delta / n;
    d_data.d_mean = d_data.d_sum / n;
    const double term1 = delta * deltaN * nm1;
    d_data.d_M3 += term1 * deltaN * (n - 2.0) - 3.0 * deltaN * d_data.d_M2;
    d_data.d_M2 += term1;
}

template<>
inline
void Moment<MomentLevel::e_M4>::add(double value)
{
    // Modified Welford algorithm for variance, and similar algorithms for skew
    // and kurtosis.
    const double delta = value - d_data.d_mean;
    const double nm1 = d_data.d_count;
    d_data.d_sum += value;
    ++d_data.d_count;
    const double n = d_data.d_count;
    const double n2 = n * n;
    const double deltaN = delta / n;
    d_data.d_mean = d_data.d_sum / n;
    const double term1 = delta * deltaN * nm1;
    const double deltaN2 = deltaN * deltaN;
    d_data.d_M4 += term1 * deltaN2 * (n2 - 3.0 * n + 3.0) +
                   6 * deltaN2 * d_data.d_M2 - 4.0 * deltaN * d_data.d_M3;
    d_data.d_M3 += term1 * deltaN * (n - 2.0) - 3.0 * deltaN * d_data.d_M2;
    d_data.d_M2 += term1;
}

// ACCESSORS
template <MomentLevel::Enum ML>
inline
int Moment<ML>::count() const
{
    return d_data.d_count;
}

template<>
inline
double Moment<MomentLevel::e_M4>::kurtosis() const
{
    BSLS_ASSERT(4 <= d_data.d_count && 0.0 != d_data.d_M2);

    const double n    = static_cast<double>(d_data.d_count);
    const double n1   = (n - 1.0);
    const double n2n3 = (n - 2.0) * (n - 3.0);
    return n * (n + 1.0) * n1 / n2n3 * d_data.d_M4 / d_data.d_M2 / d_data.d_M2
           - 3.0 * n1 * n1 / n2n3;
}

template<>
inline
int Moment<MomentLevel::e_M4>::kurtosisIfValid(double *result) const
{
    if (4 > d_data.d_count || 0.0 == d_data.d_M2) {
        return e_INADEQUATE_DATA;                                     // RETURN
    }
    *result = kurtosis();
    return 0;
}

template <MomentLevel::Enum ML>
inline
double Moment<ML>::mean() const
{
    BSLS_ASSERT(1 <= d_data.d_count);

    return d_data.d_sum / static_cast<double>(d_data.d_count);
}

template <MomentLevel::Enum ML>
inline
int Moment<ML>::meanIfValid(double *result) const
{
    if (1 > d_data.d_count) {
        return e_INADEQUATE_DATA;                                     // RETURN
    }
    *result = mean();
    return 0;
}

template <MomentLevel::Enum ML>
inline
double Moment<ML>::skew() const
{
    BSLS_ASSERT(3 <= d_data.d_count && 0.0 != d_data.d_M2);

    const double n = static_cast<double>(d_data.d_count);
    return bsl::sqrt(n - 1.0) * n / (n- 2.0) * d_data.d_M3
                                                  / bsl::pow(d_data.d_M2, 1.5);
}

template <MomentLevel::Enum ML>
inline int Moment<ML>::skewIfValid(double *result) const
{
    if (3 > d_data.d_count || 0.0 == d_data.d_M2) {
        return e_INADEQUATE_DATA;                                     // RETURN
    }
    *result = skew();
    return 0;
}

template <MomentLevel::Enum ML>
inline
double Moment<ML>::variance() const
{
    BSLS_ASSERT(2 <= d_data.d_count);

    return d_data.d_M2 / (d_data.d_count - 1);
}

template <MomentLevel::Enum ML>
inline
int Moment<ML>::varianceIfValid(double *result) const
{
    if (2 > d_data.d_count) {
        return e_INADEQUATE_DATA;                                     // RETURN
    }
    *result = variance();
    return 0;
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2017 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------