DGtal  1.2.0
SimpleLinearRegression.ih
1 /**
2  * This program is free software: you can redistribute it and/or modify
3  * it under the terms of the GNU Lesser General Public License as
4  * published by the Free Software Foundation, either version 3 of the
5  * License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program. If not, see <http://www.gnu.org/licenses/>.
14  *
15  **/
16 
17 /**
18  * @file SimpleLinearRegression.ih
19  * @author Jacques-Olivier Lachaud (\c jacques-olivier.lachaud@univ-savoie.fr )
20  * Laboratory of Mathematics (CNRS, UMR 5127), University of Savoie, France
21  *
22  * @date 2013/10/25
23  *
24  * Implementation of inline methods defined in Histogram.h
25  *
26  * This file is part of the DGtal library.
27  */
28 
29 
30 //////////////////////////////////////////////////////////////////////////////
31 ///////////////////////////////////////////////////////////////////////////////
32 // IMPLEMENTATION of inline methods.
33 ///////////////////////////////////////////////////////////////////////////////
34 
35 //////////////////////////////////////////////////////////////////////////////
36 #include <cstdlib>
37 #include <iostream>
38 #include <boost/math/distributions/students_t.hpp>
39 //////////////////////////////////////////////////////////////////////////////
40 
41 ///////////////////////////////////////////////////////////////////////////////
42 // Implementation of inline methods //
43 
44 /**
45  * Adds the samples (y,x). Does not compute immediately the
46  * regression. See 'computeRegression' for computing the
47  * regression with the current samples.
48  *
49  * @param begin_x an iterator on the first x-data
50  * @param end_x an iterator after the last x-data
51  * @param begin_y an iterator on the first y-data
52  *
53  * @see computeRegression
54  */
55 template <class XIterator, class YIterator>
56 inline
57 void
58 DGtal::SimpleLinearRegression::addSamples
59 ( XIterator begin_x, XIterator end_x, YIterator begin_y )
60 {
61  for ( ; begin_x != end_x; ++begin_x, ++begin_y )
62  {
63  addSample( *begin_x, *begin_y );
64  }
65 }
66 
67 
68 inline
69 void
70 DGtal::SimpleLinearRegression::addSample(const double x, const double y )
71 {
72  myX.push_back( x );
73  myY.push_back( y );
74  mySumX += x;
75  mySumX2 += x*x;
76  mySumY += y;
77  mySumXY += x*y;
78  ++myN;
79  }
80 
81 /**
82  * @return the slope of the linear regression (B1 in Y=B0+B1*X).
83  */
84 inline
85 double
86 DGtal::SimpleLinearRegression::slope() const
87 {
88  return myB[ 1 ];
89 }
90 
91 /**
92  * @return the intercept of the linear regression (B0 in Y=B0+B1*X).
93  */
94 inline
95 double
96 DGtal::SimpleLinearRegression::intercept() const
97 {
98  return myB[ 0 ];
99 }
100 
101 /**
102  * Given a new x, predicts its y (hat{y}) according to the linear
103  * regression model.
104  *
105  * @param x any value.
106  * @return the estimated y value, ie hat{y} = B0 + B1*x.
107  */
108 inline
109 double
110 DGtal::SimpleLinearRegression::estimateY( double x ) const
111 {
112  return x * myB[ 1 ] + myB[ 0 ];
113 }
114 
115 /**
116  * @return the current estimation of the variance of the Gaussian
117 
118  * perturbation (i.e. variance of U).
119  */
120 inline
121 double
122 DGtal::SimpleLinearRegression::estimateVariance() const
123 {
124  ASSERT( ( myN >= 3 ) && "[DGtal::SimpleLinearRegression::estimateVariance] need at least 3 datas to estimate variance.");
125  return myNormU2 / ( myN - 2 );
126 }
127 
128 /**
129  * Destructor.
130  */
131 DGtal::SimpleLinearRegression::~SimpleLinearRegression()
132 {
133 }
134 
135 /**
136  * Constructor.
137  * The object is invalid.
138  *
139  * @param eps_zero the value below which the absolute value of the
140  * determinant is considered null.
141  */
142 DGtal::SimpleLinearRegression::SimpleLinearRegression( double eps_zero )
143  : myEpsilonZero( eps_zero ), myN( 0 )
144 {
145  clear();
146 }
147 
148 /**
149  * Clear all datas.
150  */
151 inline
152 void
153 DGtal::SimpleLinearRegression::clear()
154 {
155  myN = 0;
156  mySumX = 0.0;
157  mySumX2 = 0.0;
158  mySumY = 0.0;
159  mySumXY = 0.0;
160  myY.clear();
161  myX.clear();
162  myB[ 0 ] = 0.0;
163  myB[ 1 ] = 0.0;
164  myD = 0.0;
165 }
166 
167 /**
168  * Computes the regression of the current parameters.
169  *
170  * @return 'true' if the regression was valid (non null number of
171  * samples, rank of X is 2), 'false' otherwise.
172  */
173 inline
174 bool
175 DGtal::SimpleLinearRegression::computeRegression()
176 {
177  if ( myN <= 2 ) return false;
178  myD = myN * mySumX2 - ( mySumX * mySumX );
179 
180  if ( ( myD > -myEpsilonZero ) && ( myD < myEpsilonZero ) )
181  {
182  myD = 0.0;
183  return false;
184  }
185  myB[ 1 ] = ( -mySumX * mySumY + myN * mySumXY ) / myD;
186  myB[ 0 ] = mySumY/(double)myN - myB[ 1 ]*mySumX/(double)myN;
187  myU.clear();
188  myNormU2 = 0.0;
189  for ( unsigned int i = 0; i < myN; ++i )
190  {
191  double u = myY[ i ] - myB[ 0 ] - myB[ 1 ] * myX[ i ];
192  myU.push_back( u );
193  myNormU2 += u * u;
194  }
195 
196  return true;
197 }
198 
199 
200 
201 /**
202  * Given a test confidence value (1-[a]), return the expected interval
203  * of value for Y, given a new [x], so that the model is still
204  * linear. One may thus check if a new pair (y,x) is still in the
205  * current linear model or not.
206  *
207  * @param x any x value.
208  *
209  * @param a the expected confidence value for the test (a=0.05
210  * means 95% of confidence).
211  *
212  * @return the expected interval [min_y, max_y] such that any
213  * value y within confirms the current linear model.
214  */
215 inline
216 std::pair<double,double>
217 DGtal::SimpleLinearRegression::trustIntervalForY( const double x,
218  const double a ) const
219 {
220  double t = ( mySumX2 - 2.0 * x * mySumX + myN * x * x ) / myD;
221  double c = sqrt( estimateVariance() * ( 1 + t ) );
222  boost::math::students_t_distribution<double> T( myN - 2 );
223  double q = boost::math::quantile( T, 1.0 - a/2.0 );
224  return std::make_pair( estimateY( x ) - q*c, estimateY( x ) + q*c );
225 }
226 
227 
228 
229 ///////////////////////////////////////////////////////////////////////////////
230 // Interface - public :
231 
232 /**
233  * Writes/Displays the object on an output stream.
234  * @param that_stream the output stream where the object is written.
235  */
236 inline
237 void
238 DGtal::SimpleLinearRegression::selfDisplay( std::ostream& that_stream ) const
239 {
240  that_stream << "[SimpleLinearRegression] Number of samples="<< myN;
241 }
242 
243 /**
244  * Checks the validity/consistency of the object.
245  * @return 'true' if the object is valid, 'false' otherwise.
246  */
247 bool
248 DGtal::SimpleLinearRegression::isValid() const
249 {
250  return true;
251 }
252 
253 
254 
255 ///////////////////////////////////////////////////////////////////////////////
256 // Implementation of inline functions and external operators //
257 
258 /**
259  * Overloads 'operator<<' for displaying objects of class 'SimpleLinearRegression'.
260  * @param that_stream the output stream where the object is written.
261  * @param that_object_to_display the object of class 'SimpleLinearRegression' to write.
262  * @return the output stream after the writing.
263  */
264 std::ostream&
265 DGtal::operator<<( std::ostream & that_stream,
266  const SimpleLinearRegression & that_object_to_display )
267 {
268  that_object_to_display.selfDisplay( that_stream );
269  return that_stream;
270 }
271 
272 // //
273 ///////////////////////////////////////////////////////////////////////////////