> summary(dat.lm1) Call: lm(formula = sales ~ price, data = dat) Residuals: Min 1Q Median 3Q Max -55.719-19.270 4.212 16.143 73.454 Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 237.1326 26.2119 9.047 6.05e-12 *** price -0.8966 0.1799-4.985 8.49e-06 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 26 on 48 degrees of freedom Multiple R-squared: 0.3411,Adjusted R-squared: 0.3274 F-statistic: 24.85 on 1 and 48 DF, p-value: 8.486e-06
> dat.lm2 <- lm(sales~price+material,data = dat) > summary(dat.lm2) Call: lm(formula = sales ~ price + material, data = dat) Residuals: Min 1Q Median 3Q Max -51.649-17.811 3.966 15.260 73.184 Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 241.3685 27.2716 8.851 1.41e-11 *** price -0.9730 0.2197-4.428 5.64e-05 *** material 1.3009 2.1196 0.614 0.542 --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 26.17 on 47 degrees of freedom Multiple R-squared: 0.3463, Adjusted R-squared: 0.3185 F-statistic: 12.45 on 2 and 47 DF, p-value: 4.578e-05
> dat.regall<-lm(sales~.,data=dat) > summary(dat.regall) Residual standard error: 17 on 46 degrees of freedom Multiple R-squared: 0.7302, Adjusted R-squared: 0.7126 F-statistic: 41.51 on 3 and 46 DF, p-value: 3.874e-13 σ 1 σ 2! " # 667 089:;<= *+,(./012) 5 *+,(1,,/,) = 66> 5 08<??:?! # # ~% & ',& ) @ : : *+,./012 = *+,(1,,/,) E 66> = B CD" F C HF C # 667 = B C F C F #
! " # = 0, ( ), *+ = ), *, ( cov " #, " 0 = 0, (, 1 "~3 0, ) *! 4 6 =! 7 8 + 7 : 6 + " 6 =! 7 8 6 +! 7 : 6 6 +! " 6 = 7 8 + 7 : 6 VAR 4 6 = VAR 7 8 + 7 : 6 + " 6 = VAR " 6 = ) *, 4~3 7 8 + 7 : 6, ) * 6
dat<-read.csv("http://www.matsuka.info/data_folder/tdkreg01.csv") dat.lm01<-lm(sales~price, data=as.data.frame(scale(dat))) Residuals vs Fitted >plot(dat.lm01,which=1)! " # = 0, ( ), *+ = ), *, ( Residuals -2-1 0 1 2 43 16 27-1.5-1.0-0.5 0.0 0.5 1.0 Fitted values lm(sales ~ price)
>plot(dat.lm01,which=2) Normal Q-Q!~# 0, & ' Standardized residuals -2-1 0 1 2 3 27 16 43-2 -1 0 1 2 Theoretical Quantiles lm(sales ~ price)
norm.vars=rnorm(300) qqnorm(norm.vars) qqline(norm.vars,col='red',lwd=2) Normal Q-Q Plot unif.vars=runif(300) qqnorm(unif.vars) qqline(unif.vars,col='green',lwd=2) Normal Q-Q Plot Sample Quantiles -3-2 -1 0 1 2 3 Sample Quantiles -1.0-0.5 0.0 0.5 1.0-3 -2-1 0 1 2 3 Theoretical Quantiles -3-2 -1 0 1 2 3 Theoretical Quantiles
> plot(sort(unif.vars),sort(unif.vars)) sort(unif.vars) 0.0 0.2 0.4 0.6 0.8 1.0 > plot(sort(norm.vars),sort(unif.vars)) sort(unif.vars) 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 sort(unif.vars) -3-2 -1 0 1 2 3 sort(norm.vars)
par(mfrow=c(2,2)) plot(dat.lm01) Residuals -2-1 0 1 2 16 Residuals vs Fitted 27 43 Standardized residuals -2-1 0 1 2 3 27 16 Normal Q-Q 43-1.5-1.0-0.5 0.0 0.5 1.0 Fitted values -2-1 0 1 2 Theoretical Quantiles Standardized residuals 0.0 0.5 1.0 1.5 16 Scale-Location 27 43 Standardized residuals -2-1 0 1 2 3 Residuals vs Leverage 1 43 0.5 Cook's distance 27 16 0.5-1.5-1.0-0.5 0.0 0.5 1.0 Fitted values 0.00 0.04 0.08 0.12 Leverage
dat<-read.csv("http://www.matsuka.info/data_folder/dktb312.csv") > levels(dat$method) [1] "free" "image" "repeat" "sentence" # factor dat$method=factor(dat$method, levels(dat$method)[c(1,3,4,2)]) > levels(dat$method) [1] "free" "repeat" "sentence" "image"
Residuals vs Fitted Normal Q-Q > dat.lm<-lm(result~method,data=dat) > par(mfrow=c(2,2)) > plot(dat.lm) Residuals -4-2 0 2 4 8 19 25 Standardized residuals -2-1 0 1 2 19 25 8 7 8 9 10 11 12 13-2 -1 0 1 2 Fitted values Theoretical Quantiles Standardized residuals 0.0 0.5 1.0 1.5 Scale-Location 19 25 8 7 8 9 10 11 12 13 Standardized residuals -2-1 0 1 2 Constant Leverage: Residuals vs Factor Levels 8 method : free repeat image 19 25 Fitted values Factor Level Combinations
! " = $ " &$ " = $ " ' ( ' ) * "! " =, -. )/0 - h " = ) 2 + 4 -/ 4 6 8 4 8 / 4 6
> dat <- read.csv("http://www.matsuka.info/data_folder/tdkreg01.csv") > summary(dat) material price design sales Min. : 1.00 Min. :100.0 Min. :10.00 Min. : 13.0 1st Qu.: 4.00 1st Qu.:130.0 1st Qu.:32.00 1st Qu.: 99.0 Median : 5.00 Median :145.0 Median :39.00 Median :106.5 Mean : 5.22 Mean :144.3 Mean :39.12 Mean :107.8 3rd Qu.: 6.00 3rd Qu.:158.8 3rd Qu.:45.50 3rd Qu.:127.5 Max. :10.00 Max. :195.0 Max. :70.00 Max. :203.0 > dat2 <- rbind(dat,c(0,0,100,250)) > tail(dat2) material price design sales 46 7 120 12 107 47 5 155 50 130 48 5 160 42 72 49 10 180 26 48 50 7 160 54 106 51 0 0 100 250 plot(dat2,col=c(rep("black",50),"red"),pch=20,cex=3)
> dat.lm<-lm(sales~design,data=dat) > summary(dat.lm) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 85.5989 13.6762 6.259 1.01e-07 *** design 0.5665 0.3310 1.711 0.0935. --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 31.1 on 48 degrees of freedom Multiple R-squared: 0.0575, Adjusted R-squared: 0.03787 F-statistic: 2.929 on 1 and 48 DF, p-value: 0.09348 > dat2.lm<-lm(sales~design,data=dat2) > summary(dat2.lm) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 66.9017 12.9166 5.180 4.17e-06 *** design 1.0827 0.2987 3.624 0.000688 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 33.34 on 49 degrees of freedom Multiple R-squared: 0.2114, Adjusted R-squared: 0.1953 F-statistic: 13.14 on 1 and 49 DF, p-value: 0.000688
! #! # = % 1 h #
!""#$%&' ( = $ ( = * +, -* +. /01 2. 3 + 1,3 +. 6 ( = 4 + 2 1,3 + 45 + 3 + /01 103 +
b = ( X T X) 1 X T y
dat<-read.csv("http://www.matsuka.info/data_folder/tdkreg02.csv") plot(dat) 100 140 180 20 60 100 140 >dat.lm<-lm(sales~., data=dat) material 2 4 6 8 10 100 140 180 price design 10 30 50 70 20 60 100 140 sales dump 120 160 2 4 6 8 10 10 30 50 70 120 160
100 140 180 20 60 100 140 material 2 4 6 8 10 > summary(dat.lm) Call: lm(formula = sales ~., data = dat) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 245.3982 19.3595 12.676 2.37e-13 *** material 9.7503 1.8431 5.290 1.13e-05 *** price -2.8178 0.5225-5.393 8.52e-06 *** design 1.7655 0.2654 6.651 2.71e-07 *** dump 1.0196 0.4915 2.074 0.047 * --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 100 140 180 20 60 100 140 price design sales dump 2 4 6 8 10 10 30 50 70 120 160 10 30 50 70 120 160 Residual standard error: 15.12 on 29 degrees of freedom Multiple R-squared: 0.7638, Adjusted R-squared: 0.7313 F-statistic: 23.45 on 4 and 29 DF, p-value: 9.846e-09
VIF i = 1-1 R 2 i
dat.lm<-lm(sales~., data=dat) install.packages("daag") library(daag) vif(dat.lm) material price design dump 1.9087 16.8890 1.6417 14.8310 lm.price<-lm(price~material+design+dump, data=dat) p.rsq = summary(lm.price)$r.squared vif.p = 1/(1-p.rsq) > vif.p [1] 16.88905
> dat<-read.csv("http://www.matsuka.info/data_folder/waa07_2.csv") > head(dat) grade study study.sq study.type 1 11 5 25 g1 2 30 5 25 g1 3 32 5 25 g1 4 29 5 25 g1 5 35 5 25 g1 6 13 5 25 g2
Polynomial regression > poly.lm<-lm(grade~study+study.sq, data= dat) > summary(poly.lm) Call: lm(formula = grade ~ study + study.sq, data = dat) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) -15.16000 5.84084-2.596 0.0126 * study 9.07771 0.89022 10.197 1.70e-13 *** study.sq -0.18686 0.02911-6.418 6.25e-08 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1 Residual standard error: 8.612 on 47 degrees of freedom Multiple R-squared: 0.905, Adjusted R-squared: 0.9009 F-statistic: 223.8 on 2 and 47 DF, p-value: < 2.2e-16
Polynomial regression plot(grade~study,data=dat,pch=20,xlab = "hours studied",xlim=c(3,27), ylim =c(0,110),cex=2) x = seq(0,30,0.1) y = -15.16+9.07771*x-0.18686*x^2 lines(x,y,col='red',lwd=3)
dat<-read.table("http://www.matsuka.info/data_folder/tdkpath01.txt",header=true) plot(dat,pch=20,cex=2)
Absence = b 1 Interest + e 1 Study = b 2 Interest + b 3 Knowledge + e 2 Grade = b 4 Knldg.+ b 5 Ab.+ b 6 Std.+ e 3 b1 b2 b3 b4 b5 b6
dat<-read.table("http://www.matsuka.info/data_folder/tdkpath01.txt",header=true) dat.lm1<-lm(absence~interest,dat) > summary(dat.lm1) Call: lm(formula = absence ~ interest, data = dat) Residuals: Min 1Q Median 3Q Max -12.503-5.803-2.248 6.044 20.297 Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 50.6146 3.1582 16.026 < 2e-16 *** interest -6.9028 0.7423-9.299 4.28e-13 *** --- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05. 0.1 1
dat.lm2<-lm(study~interest+knowledge,dat) > summary(dat.lm2) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 66.4730 22.6573 2.934 0.004816 ** interest 31.9430 2.9516 10.822 1.92e-15 *** knowledge -1.6514 0.4001-4.128 0.000121 *** dat.lm3<-lm(grade~knowledge+study+absence,dat) > summary(dat.lm3) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 55.70510 9.81742 5.674 5.12e-07 *** knowledge 0.39900 0.10943 3.646 0.000585 *** study 0.09713 0.02711 3.583 0.000713 *** absense -0.61701 0.11516-5.358 1.64e-06 ***
-6.90 31.94-0.62-1.65 0.40 0.10
dat.std<-as.data.frame(scale(dat)) > summary(dat.std) interest knowledge absence study grade Min. :-2.22219 Min. :-2.33035 Min. :-1.86984 Min. :-1.92440 Min. :-2.4275 1st Qu.:-0.75701 1st Qu.:-0.59252 1st Qu.:-0.82503 1st Qu.:-0.64855 1st Qu.:-0.7986 Median :-0.02442 Median : 0.05296 Median : 0.07609 Median :-0.01063 Median : 0.2873 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000 3rd Qu.: 0.70817 3rd Qu.: 0.64879 3rd Qu.: 0.76783 3rd Qu.: 0.62729 3rd Qu.: 0.8303 Max. : 2.17335 Max. : 3.42931 Max. : 2.53107 Max. : 1.90313 Max. : 1.5090 > var(dat.std) interest knowledge absence study grade interest 1.0000000 0.47233767-0.7736441 0.76014540 0.7756937 knowledge 0.4723377 1.00000000-0.4238815 0.08432142 0.5150212 absence -0.7736441-0.42388147 1.0000000-0.62220338-0.8184952 study 0.7601454 0.08432142-0.6222034 1.00000000 0.6503332 grade 0.7756937 0.51502121-0.8184952 0.65033323 1.0000000
> summary(lm(absence~interest,dat.std)) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 5.658e-17 8.250e-02 0.000 1 interest -7.736e-01 8.320e-02-9.299 4.28e-13 *** > summary(lm(study~interest+knowledge,datstd)) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) -3.923e-16 7.488e-02-5.24e-15 1.000000 interest 9.272e-01 8.567e-02 10.822 1.92e-15 *** knowledge -3.536e-01 8.567e-02-4.128 0.000121 *** > summary(lm(grade~study+absence+knowledge,datstd)) Coefficients: Estimate Std. Error t value Pr(> t ) (Intercept) 1.822e-16 6.499e-02 2.80e-15 1.000000 study 3.100e-01 8.653e-02 3.583 0.000713 *** absence -5.100e-01 9.519e-02-5.358 1.64e-06 *** knowledge 2.727e-01 7.479e-02 3.646 0.000585 ***
0.40 0.47 0.93-0.77-0.35 0.33 0.23 0.31-0.52 0.25