MATH-542 Final Exam

chemical.data <- read.csv('chemical.csv', header=T)
N <- nrow(chemical.data)
model <- lm(y1 ~ x1 + x2 + x3, data=chemical.data)
model.summary <- summary(model)
model.summary

## 
## Call:
## lm(formula = y1 ~ x1 + x2 + x3, data = chemical.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6420 -1.1551 -0.0821  1.1563  4.9055 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 332.11098   18.69293  17.767 1.74e-11 ***
## x1           -1.54596    0.09903 -15.610 1.10e-10 ***
## x2           -1.42456    0.14765  -9.648 7.99e-08 ***
## x3           -2.23737    0.33982  -6.584 8.67e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.312 on 15 degrees of freedom
## Multiple R-squared:  0.9551, Adjusted R-squared:  0.9462 
## F-statistic: 106.5 on 3 and 15 DF,  p-value: 2.459e-10

Part (2.1)

beta <- coef(model)
sigma <- summary(model)$sigma
sigma.squared <- sigma**2

\(\beta\): (Intercept x1 x2 x3): (332.1109833, -1.5459608, -1.4245587, -2.2373657)

\(\sigma^2\): 5.3449026

Part (2.2)

cov.beta <- vcov(model)
cov.beta

##             (Intercept)           x1           x2           x3
## (Intercept) 349.4256938 -1.811104235 -1.670412407 -0.109108566
## x1           -1.8111042  0.009807861  0.006807743 -0.002302894
## x2           -1.6704124  0.006807743  0.021800498 -0.009424583
## x3           -0.1091086 -0.002302894 -0.009424583  0.115479831

Part (2.3)

R.unadjusted <- model.summary$r.squared
R.adjusted <- model.summary$adj.r.squared

\(R^2\) 0.9551434 \(R^2 adjusted\): 0.9461721

Part (2.4)

model2 <- lm(y1 ~ x1+x2+x3+I(x1^2)+I(x2^2)+I(x3^2)+x1*x2+x1*x3+x2*x3, data=chemical.data)
model2.summary <- summary(model2)
beta2 <- coef(model2)
beta2.constant <- beta2[1]-3
beta2.adj.constant <- c(beta2.constant, beta2[-1])
sigma2 <- summary(model2)$sigma
sigma2.squared <- sigma2**2

Part (2.5)

R2.unadjusted <- model2.summary$r.squared
R2.adjusted <- model2.summary$adj.r.squared

\(R^2\) 0.9741468 \(R^2 adjusted\): 0.9482936

Part (2.6)

anova(model2, model,test= 'F')

## Analysis of Variance Table
## 
## Model 1: y1 ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2) + x1 * x2 + x1 * 
##     x3 + x2 * x3
## Model 2: y1 ~ x1 + x2 + x3
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1      9 46.208                           
## 2     15 80.174 -6   -33.965 1.1026 0.4295

Part (2.7)

CI <- confint(model2, level=0.95, c(2,3,4))
CI

##        2.5 %    97.5 %
## x1 -23.46535  8.581090
## x2 -29.79275  6.777342
## x3 -36.85452 32.574268

Check this:

p <- 10
k <- 3
alpha <- 0.05
coef <- model2.summary$coefficients[,1][c(2,3,4)]
err <- model2.summary$coefficients[,2][c(2,3,4)]
coef - err * qt(1-alpha/2, N-p) ## same as CI above

##        x1        x2        x3 
## -23.46535 -29.79275 -36.85452

Part (2.8)

coef <- model2.summary$coefficients[,1][c(2,3,4)]
std.err.coef <- model2.summary$coefficients[,2][c(2,3,4)]
t.stats <- qt(1-alpha/(2*N),df=N-p)

CI.bonferroni.ub <-  coef + std.err.coef * t.stats 
CI.bonferroni.lb <-  coef - std.err.coef * t.stats
CI.bonferroni <- c(CI.bonferroni.lb, CI.bonferroni.ub)
CI.bonferroni

##        x1        x2        x3        x1        x2        x3 
## -36.56369 -44.74005 -65.23217  21.67944  21.72464  60.95191

Part (2.9)

range.data <- max(chemical.data$y1)-min(chemical.data$y1)
tukey.stats <- qtukey(1-alpha/2, nmeans=k  ,df=N-p)
CI.tukeys.ub <- coef + std.err.coef * tukey.stats
CI.tukeys.lb <- coef - std.err.coef * tukey.stats
CI.tukeys <- c(CI.tukeys.lb, CI.tukeys.ub)
CI.tukeys

##        x1        x2        x3        x1        x2        x3 
## -39.87027 -48.51338 -72.39589  24.98601  25.49798  68.11563

Tukey’s 95% CI : [-39.8702692, -48.5133846 ]

Part (2.10)

kfk <- k*qf(1-alpha/2, df1=k, df2=p)
CI.scheffes.ub <- coef + std.err.coef * (kfk**0.5)
CI.scheffes.lb <- coef - std.err.coef * (kfk**0.5)
CI.scheffes <- c(CI.scheffes.lb, CI.scheffes.ub)
CI.scheffes

##        x1        x2        x3        x1        x2        x3 
## -34.39247 -42.26234 -60.52819  19.50821  19.24693  56.24794

Scheffe’s’s 95% CI : [-34.3924657, -42.2623374 ]

Part (2.11)

xob <- c(1,165,32,5) %*% beta2[1:4]
xob.CI.ub <- xob + t.stats * sigma 
xob.CI.lb <- xob - t.stats * sigma 
xob.CI <- c(xob.CI.lb, xob.CI.ub)
print (xob.CI)

## [1] -651.4743 -632.4641

95% CI: [-651.4742936, -632.4640524]

Part (2.12)

xob.PI.ub <- xob + t.stats * sigma * sqrt(1+1/N)
xob.PI.lb <- xob - t.stats * sigma * sqrt(1+1/N)
xob.PI <- c(xob.PI.lb, xob.PI.ub)
print (xob.PI)

## [1] -651.7212 -632.2171

96% Prediction Interval: [-651.721221, -632.2171251]

Part (2.13)

leverage <- hat(model.matrix(model2))
epsilon <- residuals(model2)
prediction <- predict(model2)
studentised.residual <- rstandard(model2)
cooksdistance <- cooks.distance(model2)
deleted.studentised.residual <- rstudent(model2)
df <- data.frame(y_i=chemical.data$y1, y_hat= prediction, epsilon=epsilon, h=leverage,  r=studentised.residual, t=studentised.residual, D=cooksdistance)
library(xtable)
options(xtable.floating = FALSE)
colnames(df) <- c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$')
html <- xtable(df, digits=3)#, labels=c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$'))
print(html, type="html", sanitize.text.function=function(x){x}, size = "\\setlength{\\tabcolsep}{12pt}")

	\(y_i\)	\(\hat{y}_i\)	\(\epsilon_i\)	\(h_{ii}\)	\(r_i\)	\(t_i\)	\(D_i\)
1	41.500	40.900	0.600	0.926	0.977	0.977	1.202
2	33.800	33.153	0.647	0.768	0.593	0.593	0.116
3	27.700	28.455	-0.755	0.598	-0.526	-0.526	0.041
4	21.700	19.909	1.791	0.330	0.965	0.965	0.046
5	19.900	18.523	1.377	0.523	0.880	0.880	0.085
6	15.000	12.642	2.358	0.366	1.307	1.307	0.099
7	12.200	13.595	-1.395	0.493	-0.864	-0.864	0.073
8	4.300	6.189	-1.889	0.461	-1.136	-1.136	0.110
9	19.300	20.694	-1.394	0.352	-0.764	-0.764	0.032
10	6.400	6.312	0.088	0.599	0.062	0.062	0.001
11	37.600	37.567	0.033	0.714	0.027	0.027	0.000
12	18.000	14.496	3.504	0.418	2.027	2.027	0.295
13	26.300	28.552	-2.252	0.453	-1.344	-1.344	0.150
14	9.900	10.834	-0.934	0.704	-0.757	-0.757	0.136
15	25.000	25.261	-0.261	0.559	-0.173	-0.173	0.004
16	14.100	14.709	-0.609	0.459	-0.366	-0.366	0.011
17	15.200	14.709	0.491	0.459	0.294	0.294	0.007
18	15.900	18.450	-2.550	0.409	-1.464	-1.464	0.148
19	19.600	18.450	1.150	0.409	0.660	0.660	0.030