chemical.data <- read.csv('chemical.csv', header=T)
N <- nrow(chemical.data)
model <- lm(y1 ~ x1 + x2 + x3, data=chemical.data)
model.summary <- summary(model)
model.summary
##
## Call:
## lm(formula = y1 ~ x1 + x2 + x3, data = chemical.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6420 -1.1551 -0.0821 1.1563 4.9055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 332.11098 18.69293 17.767 1.74e-11 ***
## x1 -1.54596 0.09903 -15.610 1.10e-10 ***
## x2 -1.42456 0.14765 -9.648 7.99e-08 ***
## x3 -2.23737 0.33982 -6.584 8.67e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.312 on 15 degrees of freedom
## Multiple R-squared: 0.9551, Adjusted R-squared: 0.9462
## F-statistic: 106.5 on 3 and 15 DF, p-value: 2.459e-10
beta <- coef(model)
sigma <- summary(model)$sigma
sigma.squared <- sigma**2
\(\beta\): (Intercept x1 x2 x3): (332.1109833, -1.5459608, -1.4245587, -2.2373657)
\(\sigma^2\): 5.3449026
cov.beta <- vcov(model)
cov.beta
## (Intercept) x1 x2 x3
## (Intercept) 349.4256938 -1.811104235 -1.670412407 -0.109108566
## x1 -1.8111042 0.009807861 0.006807743 -0.002302894
## x2 -1.6704124 0.006807743 0.021800498 -0.009424583
## x3 -0.1091086 -0.002302894 -0.009424583 0.115479831
R.unadjusted <- model.summary$r.squared
R.adjusted <- model.summary$adj.r.squared
\(R^2\) 0.9551434 \(R^2 adjusted\): 0.9461721
model2 <- lm(y1 ~ x1+x2+x3+I(x1^2)+I(x2^2)+I(x3^2)+x1*x2+x1*x3+x2*x3, data=chemical.data)
model2.summary <- summary(model2)
beta2 <- coef(model2)
beta2.constant <- beta2[1]-3
beta2.adj.constant <- c(beta2.constant, beta2[-1])
sigma2 <- summary(model2)$sigma
sigma2.squared <- sigma2**2
R2.unadjusted <- model2.summary$r.squared
R2.adjusted <- model2.summary$adj.r.squared
\(R^2\) 0.9741468 \(R^2 adjusted\): 0.9482936
anova(model2, model,test= 'F')
## Analysis of Variance Table
##
## Model 1: y1 ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2) + x1 * x2 + x1 *
## x3 + x2 * x3
## Model 2: y1 ~ x1 + x2 + x3
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 46.208
## 2 15 80.174 -6 -33.965 1.1026 0.4295
CI <- confint(model2, level=0.95, c(2,3,4))
CI
## 2.5 % 97.5 %
## x1 -23.46535 8.581090
## x2 -29.79275 6.777342
## x3 -36.85452 32.574268
Check this:
p <- 10
k <- 3
alpha <- 0.05
coef <- model2.summary$coefficients[,1][c(2,3,4)]
err <- model2.summary$coefficients[,2][c(2,3,4)]
coef - err * qt(1-alpha/2, N-p) ## same as CI above
## x1 x2 x3
## -23.46535 -29.79275 -36.85452
coef <- model2.summary$coefficients[,1][c(2,3,4)]
std.err.coef <- model2.summary$coefficients[,2][c(2,3,4)]
t.stats <- qt(1-alpha/(2*N),df=N-p)
CI.bonferroni.ub <- coef + std.err.coef * t.stats
CI.bonferroni.lb <- coef - std.err.coef * t.stats
CI.bonferroni <- c(CI.bonferroni.lb, CI.bonferroni.ub)
CI.bonferroni
## x1 x2 x3 x1 x2 x3
## -36.56369 -44.74005 -65.23217 21.67944 21.72464 60.95191
range.data <- max(chemical.data$y1)-min(chemical.data$y1)
tukey.stats <- qtukey(1-alpha/2, nmeans=k ,df=N-p)
CI.tukeys.ub <- coef + std.err.coef * tukey.stats
CI.tukeys.lb <- coef - std.err.coef * tukey.stats
CI.tukeys <- c(CI.tukeys.lb, CI.tukeys.ub)
CI.tukeys
## x1 x2 x3 x1 x2 x3
## -39.87027 -48.51338 -72.39589 24.98601 25.49798 68.11563
Tukey’s 95% CI : [-39.8702692, -48.5133846 ]
kfk <- k*qf(1-alpha/2, df1=k, df2=p)
CI.scheffes.ub <- coef + std.err.coef * (kfk**0.5)
CI.scheffes.lb <- coef - std.err.coef * (kfk**0.5)
CI.scheffes <- c(CI.scheffes.lb, CI.scheffes.ub)
CI.scheffes
## x1 x2 x3 x1 x2 x3
## -34.39247 -42.26234 -60.52819 19.50821 19.24693 56.24794
Scheffe’s’s 95% CI : [-34.3924657, -42.2623374 ]
xob <- c(1,165,32,5) %*% beta2[1:4]
xob.CI.ub <- xob + t.stats * sigma
xob.CI.lb <- xob - t.stats * sigma
xob.CI <- c(xob.CI.lb, xob.CI.ub)
print (xob.CI)
## [1] -651.4743 -632.4641
95% CI: [-651.4742936, -632.4640524]
xob.PI.ub <- xob + t.stats * sigma * sqrt(1+1/N)
xob.PI.lb <- xob - t.stats * sigma * sqrt(1+1/N)
xob.PI <- c(xob.PI.lb, xob.PI.ub)
print (xob.PI)
## [1] -651.7212 -632.2171
96% Prediction Interval: [-651.721221, -632.2171251]
leverage <- hat(model.matrix(model2))
epsilon <- residuals(model2)
prediction <- predict(model2)
studentised.residual <- rstandard(model2)
cooksdistance <- cooks.distance(model2)
deleted.studentised.residual <- rstudent(model2)
df <- data.frame(y_i=chemical.data$y1, y_hat= prediction, epsilon=epsilon, h=leverage, r=studentised.residual, t=studentised.residual, D=cooksdistance)
library(xtable)
options(xtable.floating = FALSE)
colnames(df) <- c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$')
html <- xtable(df, digits=3)#, labels=c('$y_i$', '$\\hat{y}_i$', '$\\epsilon_i$', '$h_{ii}$', '$r_i$' , '$t_i$', '$D_i$'))
print(html, type="html", sanitize.text.function=function(x){x}, size = "\\setlength{\\tabcolsep}{12pt}")
\(y_i\) | \(\hat{y}_i\) | \(\epsilon_i\) | \(h_{ii}\) | \(r_i\) | \(t_i\) | \(D_i\) | |
---|---|---|---|---|---|---|---|
1 | 41.500 | 40.900 | 0.600 | 0.926 | 0.977 | 0.977 | 1.202 |
2 | 33.800 | 33.153 | 0.647 | 0.768 | 0.593 | 0.593 | 0.116 |
3 | 27.700 | 28.455 | -0.755 | 0.598 | -0.526 | -0.526 | 0.041 |
4 | 21.700 | 19.909 | 1.791 | 0.330 | 0.965 | 0.965 | 0.046 |
5 | 19.900 | 18.523 | 1.377 | 0.523 | 0.880 | 0.880 | 0.085 |
6 | 15.000 | 12.642 | 2.358 | 0.366 | 1.307 | 1.307 | 0.099 |
7 | 12.200 | 13.595 | -1.395 | 0.493 | -0.864 | -0.864 | 0.073 |
8 | 4.300 | 6.189 | -1.889 | 0.461 | -1.136 | -1.136 | 0.110 |
9 | 19.300 | 20.694 | -1.394 | 0.352 | -0.764 | -0.764 | 0.032 |
10 | 6.400 | 6.312 | 0.088 | 0.599 | 0.062 | 0.062 | 0.001 |
11 | 37.600 | 37.567 | 0.033 | 0.714 | 0.027 | 0.027 | 0.000 |
12 | 18.000 | 14.496 | 3.504 | 0.418 | 2.027 | 2.027 | 0.295 |
13 | 26.300 | 28.552 | -2.252 | 0.453 | -1.344 | -1.344 | 0.150 |
14 | 9.900 | 10.834 | -0.934 | 0.704 | -0.757 | -0.757 | 0.136 |
15 | 25.000 | 25.261 | -0.261 | 0.559 | -0.173 | -0.173 | 0.004 |
16 | 14.100 | 14.709 | -0.609 | 0.459 | -0.366 | -0.366 | 0.011 |
17 | 15.200 | 14.709 | 0.491 | 0.459 | 0.294 | 0.294 | 0.007 |
18 | 15.900 | 18.450 | -2.550 | 0.409 | -1.464 | -1.464 | 0.148 |
19 | 19.600 | 18.450 | 1.150 | 0.409 | 0.660 | 0.660 | 0.030 |