chemical.data <- read.csv('chemical.csv', header=T)
N <- nrow(chemical.data)
model <- lm(y1 ~ x1 + x2 + x3, data=chemical.data)
model.summary <- summary(model)
model.summary
## 
## Call:
## lm(formula = y1 ~ x1 + x2 + x3, data = chemical.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6420 -1.1551 -0.0821  1.1563  4.9055 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 332.11098   18.69293  17.767 1.74e-11 ***
## x1           -1.54596    0.09903 -15.610 1.10e-10 ***
## x2           -1.42456    0.14765  -9.648 7.99e-08 ***
## x3           -2.23737    0.33982  -6.584 8.67e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.312 on 15 degrees of freedom
## Multiple R-squared:  0.9551, Adjusted R-squared:  0.9462 
## F-statistic: 106.5 on 3 and 15 DF,  p-value: 2.459e-10

Part (2.1)

beta <- coef(model)
sigma <- summary(model)$sigma
sigma.squared <- sigma**2

\(\beta\): (Intercept x1 x2 x3): (332.1109833, -1.5459608, -1.4245587, -2.2373657)

\(\sigma^2\): 5.3449026

Part (2.2)

cov.beta <- vcov(model)
cov.beta
##             (Intercept)           x1           x2           x3
## (Intercept) 349.4256938 -1.811104235 -1.670412407 -0.109108566
## x1           -1.8111042  0.009807861  0.006807743 -0.002302894
## x2           -1.6704124  0.006807743  0.021800498 -0.009424583
## x3           -0.1091086 -0.002302894 -0.009424583  0.115479831

Part (2.3)

R.unadjusted <- model.summary$r.squared
R.adjusted <- model.summary$adj.r.squared

\(R^2\) 0.9551434 \(R^2 adjusted\): 0.9461721

Part (2.4)

model2 <- lm(y1 ~ x1+x2+x3+I(x1^2)+I(x2^2)+I(x3^2)+x1*x2+x1*x3+x2*x3, data=chemical.data)
model2.summary <- summary(model2)
beta2 <- coef(model2)
beta2.constant <- beta2[1]-3
beta2.adj.constant <- c(beta2.constant, beta2[-1])
sigma2 <- summary(model2)$sigma
sigma2.squared <- sigma2**2

Part (2.5)

R2.unadjusted <- model2.summary$r.squared
R2.adjusted <- model2.summary$adj.r.squared

\(R^2\) 0.9741468 \(R^2 adjusted\): 0.9482936

Part (2.6)

anova(model2, model,test= 'F')
## Analysis of Variance Table
## 
## Model 1: y1 ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2) + x1 * x2 + x1 * 
##     x3 + x2 * x3
## Model 2: y1 ~ x1 + x2 + x3
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1      9 46.208                           
## 2     15 80.174 -6   -33.965 1.1026 0.4295

Part (2.7)

CI <- confint(model2, level=0.95, c(2,3,4))
CI
##        2.5 %    97.5 %
## x1 -23.46535  8.581090
## x2 -29.79275  6.777342
## x3 -36.85452 32.574268

Check this:

p <- 10
k <- 3
alpha <- 0.05
coef <- model2.summary$coefficients[,1][c(2,3,4)]
err <- model2.summary$coefficients[,2][c(2,3,4)]
coef - err * qt(1-alpha/2, N-p) ## same as CI above
##        x1        x2        x3 
## -23.46535 -29.79275 -36.85452

Part (2.8)

coef <- model2.summary$coefficients[,1][c(2,3,4)]
std.err.coef <- model2.summary$coefficients[,2][c(2,3,4)]
t.stats <- qt(1-alpha/(2*N),df=N-p)
CI.bonferroni.ub <-  coef + std.err.coef * t.stats 
CI.bonferroni.lb <-  coef - std.err.coef * t.stats
CI.bonferroni <- c(CI.bonferroni.lb, CI.bonferroni.ub)
CI.bonferroni
##        x1        x2        x3        x1        x2        x3 
## -36.56369 -44.74005 -65.23217  21.67944  21.72464  60.95191

Part (2.9)

range.data <- max(chemical.data$y1)-min(chemical.data$y1)
tukey.stats <- qtukey(1-alpha/2, nmeans=k  ,df=N-p)
CI.tukeys.ub <- coef + std.err.coef * tukey.stats
CI.tukeys.lb <- coef - std.err.coef * tukey.stats
CI.tukeys <- c(CI.tukeys.lb, CI.tukeys.ub)
CI.tukeys
##        x1        x2        x3        x1        x2        x3 
## -39.87027 -48.51338 -72.39589  24.98601  25.49798  68.11563

Tukey’s 95% CI : [-39.8702692, -48.5133846 ]

Part (2.10)

kfk <- k*qf(1-alpha/2, df1=k, df2=p)
CI.scheffes.ub <- coef + std.err.coef * (kfk**0.5)
CI.scheffes.lb <- coef - std.err.coef * (kfk**0.5)
CI.scheffes <- c(CI.scheffes.lb, CI.scheffes.ub)
CI.scheffes
##        x1        x2        x3        x1        x2        x3 
## -34.39247 -42.26234 -60.52819  19.50821  19.24693  56.24794

Scheffe’s’s 95% CI : [-34.3924657, -42.2623374 ]

Part (2.11)

xob <- c(1,165,32,5) %*% beta2[1:4]
xob.CI.ub <- xob + t.stats * sigma 
xob.CI.lb <- xob - t.stats * sigma 
xob.CI <- c(xob.CI.lb, xob.CI.ub)
print (xob.CI)
## [1] -651.4743 -632.4641

95% CI: [-651.4742936, -632.4640524]

Part (2.12)

xob.PI.ub <- xob + t.stats * sigma * sqrt(1+1/N)
xob.PI.lb <- xob - t.stats * sigma * sqrt(1+1/N)
xob.PI <- c(xob.PI.lb, xob.PI.ub)
print (xob.PI)
## [1] -651.7212 -632.2171

96% Prediction Interval: [-651.721221, -632.2171251]

Part (2.13)

leverage <- hat(model.matrix(model2))
epsilon <- residuals(model2)
prediction <- predict(model2)
studentised.residual <- rstandard(model2)
cooksdistance <- cooks.distance(model2)
deleted.studentised.residual <- rstudent(model2)
df <- data.frame(y_i=chemical.data$y1, y_hat= prediction, epsilon=epsilon, h=leverage,  r=studentised.residual, t=studentised.residual, D=cooksdistance)
library(xtable)
options(xtable.floating = FALSE)
colnames(df) <- c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$')
html <- xtable(df, digits=3)#, labels=c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$'))
print(html, type="html", sanitize.text.function=function(x){x}, size = "\\setlength{\\tabcolsep}{12pt}")
\(y_i\) \(\hat{y}_i\) \(\epsilon_i\) \(h_{ii}\) \(r_i\) \(t_i\) \(D_i\)
1 41.500 40.900 0.600 0.926 0.977 0.977 1.202
2 33.800 33.153 0.647 0.768 0.593 0.593 0.116
3 27.700 28.455 -0.755 0.598 -0.526 -0.526 0.041
4 21.700 19.909 1.791 0.330 0.965 0.965 0.046
5 19.900 18.523 1.377 0.523 0.880 0.880 0.085
6 15.000 12.642 2.358 0.366 1.307 1.307 0.099
7 12.200 13.595 -1.395 0.493 -0.864 -0.864 0.073
8 4.300 6.189 -1.889 0.461 -1.136 -1.136 0.110
9 19.300 20.694 -1.394 0.352 -0.764 -0.764 0.032
10 6.400 6.312 0.088 0.599 0.062 0.062 0.001
11 37.600 37.567 0.033 0.714 0.027 0.027 0.000
12 18.000 14.496 3.504 0.418 2.027 2.027 0.295
13 26.300 28.552 -2.252 0.453 -1.344 -1.344 0.150
14 9.900 10.834 -0.934 0.704 -0.757 -0.757 0.136
15 25.000 25.261 -0.261 0.559 -0.173 -0.173 0.004
16 14.100 14.709 -0.609 0.459 -0.366 -0.366 0.011
17 15.200 14.709 0.491 0.459 0.294 0.294 0.007
18 15.900 18.450 -2.550 0.409 -1.464 -1.464 0.148
19 19.600 18.450 1.150 0.409 0.660 0.660 0.030