BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlsta_linefit.h
Go to the documentation of this file.
1/// @file bdlsta_linefit.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlsta_linefit.h -*-C++-*-
8#ifndef INCLUDED_BDLSTA_LINEFIT
9#define INCLUDED_BDLSTA_LINEFIT
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14// BDE_VERIFY pragma: -LL01 // Link is just too long
15
16/// @defgroup bdlsta_linefit bdlsta_linefit
17/// @brief Online algorithm for computing the least squares regression line.
18/// @addtogroup bdl
19/// @{
20/// @addtogroup bdlsta
21/// @{
22/// @addtogroup bdlsta_linefit
23/// @{
24///
25/// <h1> Outline </h1>
26/// * <a href="#bdlsta_linefit-purpose"> Purpose</a>
27/// * <a href="#bdlsta_linefit-classes"> Classes </a>
28/// * <a href="#bdlsta_linefit-description"> Description </a>
29/// * <a href="#bdlsta_linefit-usage"> Usage </a>
30/// * <a href="#bdlsta_linefit-example-1-calculating-line-fit-variance-and-mean"> Example 1: Calculating line fit, variance, and mean </a>
31///
32/// # Purpose {#bdlsta_linefit-purpose}
33/// Online algorithm for computing the least squares regression line.
34///
35/// # Classes {#bdlsta_linefit-classes}
36///
37/// - bdlsta::LineFit: online calculation of least squares regression line
38///
39/// # Description {#bdlsta_linefit-description}
40/// This component provides a mechanism, `bdlsta::LineFit`, that
41/// provides online calculation of the least squares line fit. Online
42/// algorithms process the data in one pass, while maintaining accuracy. The
43/// online algorithm used is developed in the implementation notes (it is
44/// similar to the Welford online algorithm for computing variance). The
45/// formulae for line fit are taken from:
46/// https://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
47///
48/// Note that the behavior is undefined if there are less than 2 data points, or
49/// if all the X's (dependent variable) are the same.
50///
51/// ## Usage {#bdlsta_linefit-usage}
52///
53///
54/// This section illustrates intended use of this component.
55///
56/// ### Example 1: Calculating line fit, variance, and mean {#bdlsta_linefit-example-1-calculating-line-fit-variance-and-mean}
57///
58///
59/// This example shows how to accumulate a set of values, and calculate the
60/// line fit parameters, variance, and mean.
61///
62/// First, we create example input and instantiate the appropriate mechanism:
63/// @code
64/// double inputX[] = { 1.0, 2.0, 4.0, 5.0 };
65/// double inputY[] = { 1.0, 2.0, 4.0, 4.5 };
66/// bdlsta::LineFit lineFit;
67/// @endcode
68/// Then, we invoke the `add` routine to accumulate the data:
69/// @code
70/// for(int i = 0; i < 4; ++i) {
71/// lineFit.add(inputX[i], inputY[i]);
72/// }
73/// @endcode
74/// Finally, we assert that the alpha, beta, variance, and mean are what we
75/// expect:
76/// @code
77/// double alpha, beta;
78/// ASSERT(4 == lineFit.count());
79/// ASSERT(3.0 == lineFit.xMean());
80/// ASSERT(1e-3 > fabs(2.875 - lineFit.yMean()));
81/// ASSERT(1e-3 > fabs(3.33333 - lineFit.variance()));
82/// ASSERT(0 == lineFit.fitIfValid(&alpha, &beta));
83/// ASSERT(1e-3 > fabs(0.175 - alpha));
84/// ASSERT(1e-3 > fabs(0.9 - beta ));
85/// @endcode
86/// @}
87/** @} */
88/** @} */
89
90/** @addtogroup bdl
91 * @{
92 */
93/** @addtogroup bdlsta
94 * @{
95 */
96/** @addtogroup bdlsta_linefit
97 * @{
98 */
99
100// BDE_VERIFY pragma: +LL01
101
102#include <bdlscm_version.h>
103
104#include <bsl_cmath.h>
105
106#include <bsls_assert.h>
107#include <bsls_review.h>
108
109
110namespace bdlsta {
111
112 // =============
113 // class LineFit
114 // =============
115
116/// This class provides an efficient online algorithm for calculating linear
117/// square line fit. The class also calculates the mean for the X's and
118/// Y's, and variance for the X's. These are byproducts of calculating the
119/// line fit. The online algorithm is detailed in the implementation notes.
120///
121/// See @ref bdlsta_linefit
122class LineFit {
123 private:
124 // DATA
125 int d_count; // Number of data points.
126 double d_xMean; // Mean of X's.
127 double d_xSum; // Sum of X's.
128 double d_ySum; // Sum of Y's.
129 double d_M2; // 2nd moment
130 double d_xySum; // Sum of Xi*Yi
131
132 public:
133 // CONSTANTS
134 enum {
137 };
138
139 // CREATORS
140
141 /// Create an empty `LineFit` object.
142 LineFit();
143
144 // MANIPULATORS
145
146 /// Add the specified `(xValue, yValue)` point to the data set.
147 void add(double xValue, double yValue);
148
149 // ACCESSORS
150
151 /// Returns the number of elements in the data set.
152 int count() const;
153
154 /// Calculate line fit coefficients `Y = Alpha + Beta * X`, and populate
155 /// the specified `alpha` (intercept) and `beta` (slope). The behavior
156 /// is undefined if `2 > count` or all X's are identical.
157 void fit(double *alpha, double *beta) const;
158
159 /// Calculate line fit coefficients `Y = Alpha + Beta * X`, and populate
160 /// the specified `alpha` (intercept) and `beta` (slope). Return 0 on
161 /// success, and non-zero otherwise. The computations is unsuccessful
162 /// if `2 > count` or all X's are identical.
163 int fitIfValid(double *alpha, double *beta) const;
164
165 /// Return the variance of the data set X's. The behavior is undefined
166 /// unless `2 <= count`.
167 double variance() const;
168
169 /// Load into the specified `result`, the variance of the data set X's.
170 /// Return 0 on success, and a non-zero value otherwise. Specifically,
171 /// `e_INADEQUATE_DATA` is returned if `2 > count`.
172 int varianceIfValid(double *result) const;
173
174 /// Return the mean of the data set X's. The behavior is undefined
175 /// unless `1 <= count`.
176 double xMean() const;
177
178 /// Load into the specified `result`, the mean of the data set X's.
179 /// Return 0 on success, and a non-zero value otherwise. Specifically,
180 /// `e_INADEQUATE_DATA` is returned if `1 > count`.
181 int xMeanIfValid(double *result) const;
182
183 /// Return the mean of the data set Y's. The behavior is undefined
184 /// unless `1 <= count`.
185 double yMean() const;
186
187 /// Load into the specified `result`, the mean of the data set Y's.
188 /// Return 0 on success, and a non-zero value otherwise. Specifically,
189 /// `e_INADEQUATE_DATA` is returned if `1 > count`.
190 int yMeanIfValid(double *result) const;
191};
192
193// ============================================================================
194// INLINE DEFINITIONS
195// ============================================================================
196
197 // ---------------------
198 // class bdlsta::LineFit
199 // ---------------------
200
201// CREATORS
202inline
204: d_count(0)
205, d_xMean(0.0)
206, d_xSum(0.0)
207, d_ySum(0.0)
208, d_M2(0.0)
209, d_xySum(0.0)
210{
211}
212
213// MANIPULATORS
214inline
215void LineFit::add(double xValue, double yValue)
216{
217 const double delta = xValue - d_xMean;
218 ++d_count;
219 d_xSum += xValue;
220 d_ySum += yValue;
221 d_xMean = d_xSum / static_cast<double>(d_count);
222 const double delta2 = xValue - d_xMean;
223 d_M2 += delta * delta2;
224 d_xySum += xValue * yValue;
225}
226
227// ACCESSORS
228inline
229int LineFit::count() const
230{
231 return d_count;
232}
233
234inline
235void LineFit::fit(double *alpha, double *beta) const
236{
237 BSLS_ASSERT(2 <= d_count && 0.0 != d_M2);
238
239 const double n = static_cast<double>(d_count);
240 double tmpBeta = (d_xySum - d_xSum * d_ySum / n) / d_M2;
241 *beta = tmpBeta;
242 *alpha = (d_ySum - d_xSum * tmpBeta) / n;
243}
244
245inline
246int LineFit::fitIfValid(double *alpha, double *beta) const
247{
248 if (2 > d_count || 0.0 == d_M2) {
249 return e_INADEQUATE_DATA; // RETURN
250 }
251 const double n = static_cast<double>(d_count);
252 double tmpBeta = (d_xySum - d_xSum * d_ySum / n) / d_M2;
253 *beta = tmpBeta;
254 *alpha = (d_ySum - d_xSum * tmpBeta) / n;
255 return 0;
256}
257
258inline
259double LineFit::variance() const
260{
261 BSLS_ASSERT(2 <= d_count);
262
263 return d_M2 / (d_count - 1);
264}
265
266inline
267int LineFit::varianceIfValid(double *result) const
268{
269 if (2 > d_count) {
270 return e_INADEQUATE_DATA; // RETURN
271 }
272 *result = variance();
273 return 0;
274}
275
276inline
277double LineFit::xMean() const
278{
279 BSLS_ASSERT(1 <= d_count);
280
281 return d_xSum / static_cast<double>(d_count);
282}
283
284inline
285int LineFit::xMeanIfValid(double *result) const
286{
287 if (1 > d_count) {
288 return e_INADEQUATE_DATA; // RETURN
289 }
290 *result = xMean();
291 return 0;
292}
293
294inline
295double LineFit::yMean() const
296{
297 BSLS_ASSERT(1 <= d_count);
298
299 return d_ySum / static_cast<double>(d_count);
300}
301
302inline
303int LineFit::yMeanIfValid(double *result) const
304{
305 if (1 > d_count) {
306 return e_INADEQUATE_DATA; // RETURN
307 }
308 *result = yMean();
309 return 0;
310}
311
312} // close package namespace
313
314
315#endif
316
317// ----------------------------------------------------------------------------
318// Copyright 2017 Bloomberg Finance L.P.
319//
320// Licensed under the Apache License, Version 2.0 (the "License");
321// you may not use this file except in compliance with the License.
322// You may obtain a copy of the License at
323//
324// http://www.apache.org/licenses/LICENSE-2.0
325//
326// Unless required by applicable law or agreed to in writing, software
327// distributed under the License is distributed on an "AS IS" BASIS,
328// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
329// See the License for the specific language governing permissions and
330// limitations under the License.
331// ----------------------------- END-OF-FILE ----------------------------------
332
333/** @} */
334/** @} */
335/** @} */
Definition bdlsta_linefit.h:122
void fit(double *alpha, double *beta) const
Definition bdlsta_linefit.h:235
@ e_INADEQUATE_DATA
Definition bdlsta_linefit.h:136
@ e_SUCCESS
Definition bdlsta_linefit.h:135
double xMean() const
Definition bdlsta_linefit.h:277
int xMeanIfValid(double *result) const
Definition bdlsta_linefit.h:285
int yMeanIfValid(double *result) const
Definition bdlsta_linefit.h:303
double yMean() const
Definition bdlsta_linefit.h:295
void add(double xValue, double yValue)
Add the specified (xValue, yValue) point to the data set.
Definition bdlsta_linefit.h:215
double variance() const
Definition bdlsta_linefit.h:259
int count() const
Returns the number of elements in the data set.
Definition bdlsta_linefit.h:229
int fitIfValid(double *alpha, double *beta) const
Definition bdlsta_linefit.h:246
LineFit()
Create an empty LineFit object.
Definition bdlsta_linefit.h:203
int varianceIfValid(double *result) const
Definition bdlsta_linefit.h:267
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlsta_linefit.h:110