###### regression.sql
###### Tom Cunningham <tom-dot-cunningham-at-gmail-dot-com> Fri-13-May-05
###### Functions for running a bivariate regression & getting output values.
###### Expected input: a table or view called "regression" with
###### columns: "datum" (=unique id), "independent", & "dependent".
####### Simple test regression data
# drop table if exists simple_data;
#create table simple_data (datum int auto_increment primary key, independent float, dependent float);
#insert into simple_data (independent, dependent)
#values (1,4.1), (2,5.0), (3,6.1), (4,7), (5,8.1), (6,9), (7,10.1), (8,11), (9,10);
# DROP VIEW IF EXISTS regression;
#CREATE VIEW regression AS
#SELECT * FROM simple_data;
###### get means
select @DEPENDENT_MEAN:= AVG( dependent ),
@DEPENDENT_STD:= STD( dependent ),
@DEPENDENT_VAR:= POW(STD( dependent ),2),
@DEPENDENT_MAX:= MAX(dependent),
@INDEPENDENT_MEAN:= AVG( independent ),
@INDEPENDENT_STD:= STD( independent ),
@INDEPENDENT_VAR:= POW(STD( independent ),2),
@INDEPENDENT_SUM_SQRD:= SUM( independent * independent ),
@SAMPLE:= COUNT(*)
from regression;
###### beta (slope)
select @BETA:= SUM( (dependent - @DEPENDENT_MEAN ) * (independent - @INDEPENDENT_MEAN) )
/ SUM( POW( independent - @INDEPENDENT_MEAN,2 ) )
from regression;
###### alpha (intersection)
SELECT @ALPHA:= @DEPENDENT_MEAN - @BETA * @INDEPENDENT_MEAN;
###### predicted values & residuals
select independent, dependent,
@ALPHA + @BETA * independent AS predicted,
@ALPHA + @BETA * independent - dependent AS error,
REPEAT(IF((@ALPHA + @BETA * independent - dependent)>0,'+','-'),
ABS(@ALPHA + @BETA * independent - dependent)) AS residual
from regression
ORDER BY independent ASC;
###### Check that sum of residuals is 0
select round( sum( @ALPHA + @BETA * independent - dependent ), 3) from regression;
###### R-squared (coefficient of determination -- proportion of variance explained) (p107)
# Cov(X,Y)^2 / (Var(X)*Var(Y))
SELECT @R_SQUARED:= POW( SUM( (dependent - @DEPENDENT_MEAN) * (independent - @INDEPENDENT_MEAN) ), 2)
/ ( SUM( POW( independent - @INDEPENDENT_MEAN, 2) ) * SUM( POW( dependent - @DEPENDENT_MEAN, 2) ) )
FROM regression;
###### Variance of regression (p107)
SELECT @VAR_REGRESSION:= SUM( POW( @ALPHA + @BETA * independent - dependent, 2 ) ) / (@SAMPLE - 2)
FROM regression;
###### SER: Standard Error of Regression (p107)
SELECT @SER:= SQRT( @VAR_REGRESSION );
###### Std. Error for Beta = sqrt( var-regression / var-x )
SELECT @BETA_STD_ERROR:= SQRT( @VAR_REGRESSION / @INDEPENDENT_VAR );
###### Durbin-Watson (p120)
# Measures serial correlation: if less than 1.5, positive serial correlation; if more than 2.5, then negative.
# TODO: the quotient is not including the *first* error - should use left join
SELECT @DURBIN_WATSON:=
SUM(
POW( (@ALPHA + @BETA * r2.independent - r2.dependent)
- (@ALPHA + @BETA * r1.independent - r1.dependent)
,2
)
)
/ SUM(
POW( @ALPHA + @BETA * r2.independent - r2.dependent
,2
)
) AS dw
FROM regression r2
LEFT JOIN regression r1 ON r1.datum=r2.datum - 1;
###### dump all vars:
SELECT @SAMPLE,
@ALPHA, @ALPHA_STD_ERROR,
@BETA, @BETA_STD_ERROR,
@R_SQUARED, @VAR_REGRESSION, @SER,
@DURBIN_WATSON;
REQUEST:Array
(
[link_body/body] => {incl:Regression}
[PHPSESSID] => b9a289e2959005ab5c102380940272d1
)
SESSION:Array
(
[body/body] => {incl:Regression}
)
MESSAGES:PLUGINS: