Read Data
library(limma)
library(knitr)
load('stallcupdat.Rdata')
Volcano Plots
Index1 <- which(stallcupdat$target$time==1)
Index2 <- which(stallcupdat$target$time==2)
Index3 <- which(stallcupdat$target$time==3)
d <- rowMeans(stallcupdat$E[,Index2])-rowMeans(stallcupdat$E[,Index1])
tt <- rowttests(stallcupdat$E[, c(Index1,Index2)], factor(stallcupdat$targets$time[c(Index1,Index2)]))
lodt <- -log10(tt$p.value)
smoothScatter(d,
lodt,
nrpoints=500,
xlab="Average fold-change",
ylab="-log10 pvalue",
main="Volcano plot for t-test across time points(time=1 vs time=2)")
points(d[lodt>10], lodt[lodt>10], pch=20, col=4)
points(d[lodt>10 & abs(d)>1], lodt[lodt>10 & abs(d)>1], pch=20, col=6)
Based on t-tests, there seem to be enough differentially expressed genes between time points 1 and time points 2 (Across treatments+control)
Moderated t-tests
design <- model.matrix(~factor(stallcupdat$target$time))
fit <- lmFit(stallcupdat$E, design)
efit <- eBayes(fit)
lodmt <- -log10(efit$p.value[,2])
d <- rowMeans(stallcupdat$E[,Index2])-rowMeans(stallcupdat$E[,Index1])
smoothScatter(d,
lodmt,
nrpoints=500,
main="Volcano plot for moderated t-test",xlab="Average fold-change",
ylab="-log10 pvalue",ylim=c(0,15))
points(d[lodmt>10], lodmt[lodmt>10], pch=20, col=4)
points(d[lodmt>10 & abs(d)>1],lodmt[lodmt>10 & abs(d)>1], pch=20, col=6)
par(mar=c(5,5,3,2))
plot(lodt,lodmt,pch=".",cex.axis=1.5,cex.lab=1.5,
xlab="-log10(p) from 2-sample t-test",
ylab="-log10(p) from moderated t-test (limma)")
abline(0,1,col=2,lwd=3)
box(lwd=2)
As there are too few replicates(12 here) so the variance estimates can be stabilised using moderated t-tests. Clearly. in this case pooling variance information from similar genes helps as they p-value estimates improve.
Finding DE genes
time <- stallcupdat$targets$time
trts <- factor(stallcupdat$targets$treatment,
levels=unique(stallcupdat$targets$treatment))
design <- model.matrix(~0+trts)
colnames(design) <- levels(trts)
kable(design)
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
1 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
0 |
1 |
0 |
0 |
We define a more explicit design matrix without intercept.
Differential genes at time point 2
We define an contrast matrix which is more easy to interpret than using vectors
fit <- lmFit(stallcupdat$E[,c(time==2)],design[c(time==2),])
contr.matrix <- makeContrasts(XYsC1C2 = (X+Y)/2 - (Control1+Control2)/2,
XvsC1C2 = X-(Control1+Control2)/2,
YvsC1C2 = Y-(Control1+Control2)/2,
levels = design)
kable(contr.matrix )
Control2 |
-0.5 |
-0.5 |
-0.5 |
X |
0.5 |
1.0 |
0.0 |
Control1 |
-0.5 |
-0.5 |
-0.5 |
Y |
0.5 |
0.0 |
1.0 |
fitgpd=contrasts.fit(fit,contr.matrix)
fitgpd=eBayes(fitgpd)
topTable(fitgpd,n=10)
## XYsC1C2 XvsC1C2 YvsC1C2 AveExpr F
## ILMN_2392261 1.4636316 1.9561086 0.9711547 0.05572692 90.92310
## ILMN_1760103 1.5530846 2.0300292 1.0761400 0.18434020 79.23543
## ILMN_1692223 -1.9220230 -1.8575570 -1.9864891 -0.25493441 54.06754
## ILMN_1688184 0.7892799 0.7565609 0.8219990 0.21793060 53.20783
## ILMN_1721559 1.5286137 1.9785469 1.0786805 0.06988887 51.27430
## ILMN_1678143 1.1178679 1.4761343 0.7596015 0.23695775 50.49902
## ILMN_1767685 1.5999404 1.6852403 1.5146405 0.30349530 49.89704
## ILMN_1765668 1.1264200 1.3279808 0.9248592 -0.12808822 48.10125
## ILMN_1744765 -2.1002668 -2.1216260 -2.0789077 0.22195917 46.93273
## ILMN_1659610 -1.2895246 -1.4403356 -1.1387137 -0.17105952 45.92719
## P.Value adj.P.Val
## ILMN_2392261 6.323691e-09 0.0001550948
## ILMN_1760103 1.583799e-08 0.0001942212
## ILMN_1692223 1.912231e-07 0.0011159689
## ILMN_1688184 2.118156e-07 0.0011159689
## ILMN_1721559 2.680453e-07 0.0011159689
## ILMN_1678143 2.952292e-07 0.0011159689
## ILMN_1767685 3.185103e-07 0.0011159689
## ILMN_1765668 4.013873e-07 0.0012305530
## ILMN_1744765 4.684749e-07 0.0012766462
## ILMN_1659610 5.365566e-07 0.0013159586
Here XvsC1C2 implies that differential expression was calculated by averaging over Control1 and Control2 values.
A total of 193 genes are upregulated and 158 downregulated when comparing average X+Y over average C1+C2 at time point 2 alone (i.e. not accounting for common genes with X vs C1C2 or YvsC1C2)
Also, around 735 genes are up and 582 genes are downregulated in the common region of XvsC1C2, XYvsC1C2, YvsC1C2 indicating that X,Y are similar expression wise.
results <- decideTests(fitgpd,adjust.method="none")
a <- vennCounts(results)
print(a)
## XYsC1C2 XvsC1C2 YvsC1C2 Counts
## 1 0 0 0 18148
## 2 0 0 1 283
## 3 0 1 0 1376
## 4 0 1 1 2
## 5 1 0 0 351
## 6 1 0 1 419
## 7 1 1 0 2530
## 8 1 1 1 1417
## attr(,"class")
## [1] "VennCounts"
head(results,n=10)
## Contrasts
## XYsC1C2 XvsC1C2 YvsC1C2
## ILMN_1762337 0 0 0
## ILMN_2055271 0 0 0
## ILMN_2383229 0 0 0
## ILMN_1806310 0 0 0
## ILMN_1779670 0 0 0
## ILMN_2321282 0 0 0
## ILMN_1772582 0 0 0
## ILMN_1717783 0 0 0
## ILMN_1814316 0 0 0
## ILMN_2359168 0 0 0
vennDiagram(results,include=c("up","down"),counts.col=c("red","green"), main="Time point = 2")
Let’s compare XvsY and XvsC1 and YvsC1
contr.matrix <- makeContrasts(XYvsC1 = (X+Y)/2 - (Control1), XvsY = X-Y, XvsC1= X-Control1, levels = design)
fitgpd <- contrasts.fit(fit,contr.matrix)
fitgpd <- eBayes(fitgpd)
results <- decideTests(fitgpd, adjust.method="none")
cs <- vennCounts(results)
vennDiagram(results, include=c("up","down"),counts.col=c("red","green"), main="Time point = 2")
head(results,n=10)
## Contrasts
## XYvsC1 XvsY XvsC1
## ILMN_1762337 0 0 0
## ILMN_2055271 0 0 0
## ILMN_2383229 0 0 0
## ILMN_1806310 0 0 0
## ILMN_1779670 0 0 0
## ILMN_2321282 0 0 0
## ILMN_1772582 0 0 0
## ILMN_1717783 0 0 0
## ILMN_1814316 0 0 0
## ILMN_2359168 0 0 0
print(vennCounts(results,include=c("up")))
## XYvsC1 XvsY XvsC1 Counts
## 1 0 0 0 21724
## 2 0 0 1 497
## 3 0 1 0 308
## 4 0 1 1 233
## 5 1 0 0 212
## 6 1 0 1 1234
## 7 1 1 0 0
## 8 1 1 1 318
## attr(,"class")
## [1] "VennCounts"
print(vennCounts(results,include=c("down")))
## XYvsC1 XvsY XvsC1 Counts
## 1 0 0 0 21812
## 2 0 0 1 458
## 3 0 1 0 245
## 4 0 1 1 205
## 5 1 0 0 205
## 6 1 0 1 1192
## 7 1 1 0 0
## 8 1 1 1 409
## attr(,"class")
## [1] "VennCounts"
A striking number that stands out is 0 up and downregulated genes between XYvsC1 and XvsY, thus indicating that there are no such genes which are diff expressed in (X+Y)vsC1C2 or XvsY or XvsC1 however there are genes which are in common up or down regulated in all these groups.
Differential genes at time point 3
fit <- lmFit(stallcupdat$E[,c(time==3)],design[c(time==3),])
contr.matrix <- makeContrasts(XYsC1C2 = (X+Y)/2 - (Control1+Control2)/2,
XvsC1C2 = X-(Control1+Control2)/2,
YvsC1C2 = Y-(Control1+Control2)/2,
levels = design)
kable(contr.matrix )
Control2 |
-0.5 |
-0.5 |
-0.5 |
X |
0.5 |
1.0 |
0.0 |
Control1 |
-0.5 |
-0.5 |
-0.5 |
Y |
0.5 |
0.0 |
1.0 |
fitgpd=contrasts.fit(fit,contr.matrix)
fitgpd=eBayes(fitgpd)
kable(topTable(fitgpd,n=10))
ILMN_1692223 |
-1.8940198 |
-1.9449983 |
-1.8430414 |
-0.4530204 |
82.71110 |
0e+00 |
0.0001927 |
ILMN_1721559 |
1.4268679 |
1.6331010 |
1.2206347 |
0.1593326 |
71.53020 |
0e+00 |
0.0002601 |
ILMN_1701424 |
1.0294645 |
1.2389639 |
0.8199651 |
0.2988811 |
65.88971 |
0e+00 |
0.0003023 |
ILMN_2382290 |
0.8318677 |
0.8472919 |
0.8164435 |
0.2804587 |
52.94152 |
2e-07 |
0.0009729 |
ILMN_2096985 |
-0.8602917 |
-1.0186670 |
-0.7019164 |
-0.1337231 |
50.87320 |
2e-07 |
0.0010107 |
ILMN_1755811 |
-0.7159714 |
-0.8606184 |
-0.5713244 |
-0.0355728 |
48.77121 |
3e-07 |
0.0011091 |
ILMN_1792679 |
0.6616360 |
0.7118095 |
0.6114625 |
0.2222735 |
45.79061 |
4e-07 |
0.0014301 |
ILMN_2072622 |
0.7368801 |
1.0632469 |
0.4105133 |
0.3582748 |
43.31539 |
6e-07 |
0.0015061 |
ILMN_1702301 |
0.4475621 |
0.4663214 |
0.4288029 |
-0.0008716 |
41.71393 |
7e-07 |
0.0015061 |
ILMN_1775829 |
-0.7633448 |
-0.9708421 |
-0.5558475 |
-0.0260578 |
41.60169 |
8e-07 |
0.0015061 |
Lots of common genes between XYvsC1C2 and XvsC1C2
results <- decideTests(fitgpd,adjust.method="none")
a <- vennCounts(results)
head(results,n=10)
## Contrasts
## XYsC1C2 XvsC1C2 YvsC1C2
## ILMN_1762337 0 0 0
## ILMN_2055271 0 0 0
## ILMN_2383229 0 0 0
## ILMN_1806310 0 0 0
## ILMN_1779670 0 0 0
## ILMN_2321282 0 0 0
## ILMN_1772582 -1 -1 0
## ILMN_1717783 0 1 0
## ILMN_1814316 0 0 0
## ILMN_2359168 0 0 0
vennDiagram(results,include=c("up","down"),counts.col=c("red","green"), main="Time point = 3")
contr.matrix <- makeContrasts(XYvsC1 = (X+Y)/2 - (Control1), XvsY = X-Y, XvsC1= X-Control1, levels = design)
fitgpd <- contrasts.fit(fit,contr.matrix)
fitgpd <- eBayes(fitgpd)
results <- decideTests(fitgpd, adjust.method="none")
cs <- vennCounts(results)
vennDiagram(results, include=c("up","down"),counts.col=c("red","green"), main="Time point = 3")
head(results,n=10)
## Contrasts
## XYvsC1 XvsY XvsC1
## ILMN_1762337 0 0 0
## ILMN_2055271 0 0 0
## ILMN_2383229 0 0 0
## ILMN_1806310 0 0 0
## ILMN_1779670 0 0 0
## ILMN_2321282 0 0 0
## ILMN_1772582 -1 0 -1
## ILMN_1717783 0 0 0
## ILMN_1814316 0 0 0
## ILMN_2359168 0 0 0
print(vennCounts(results,include=c("up")))
## XYvsC1 XvsY XvsC1 Counts
## 1 0 0 0 21222
## 2 0 0 1 483
## 3 0 1 0 286
## 4 0 1 1 248
## 5 1 0 0 312
## 6 1 0 1 1633
## 7 1 1 0 0
## 8 1 1 1 342
## attr(,"class")
## [1] "VennCounts"
print(vennCounts(results,include=c("down")))
## XYvsC1 XvsY XvsC1 Counts
## 1 0 0 0 21485
## 2 0 0 1 340
## 3 0 1 0 273
## 4 0 1 1 127
## 5 1 0 0 314
## 6 1 0 1 1494
## 7 1 1 0 0
## 8 1 1 1 493
## attr(,"class")
## [1] "VennCounts"
Again no common genes between XYvsC1 and XvsY which are not in XvsC1.
DE genes associated with time
Reference: https://www.bioconductor.org/packages/devel/bioc/vignettes/limma/inst/doc/usersguide.pdf Section 9.6
time <- factor(stallcupdat$targets$time)
trts <- factor(stallcupdat$targets$treatment,
levels=unique(stallcupdat$targets$treatment))
design <- model.matrix(~0+trts*time)
colnames(design) <- c('Control2', 'X', 'Control1', 'Y', 'time2', 'time3', 'X.time2', 'Control1.time2', 'Y.time2', 'X.time3', 'Control1.time3', 'Y.time3')
fit <- lmFit(stallcupdat$E,design)
contr.matrix <- makeContrasts(XYvsC1C2 = (X+Y)/2-(Control1+Control2)/2,
X3Y3vsX2Y2 = (X.time3+Y.time3)/2-(X.time2+Y.time2)/2,
X3vsX2 = X.time3-X.time2,
levels = design)
kable(contr.matrix)
Control2 |
-0.5 |
0.0 |
0 |
X |
0.5 |
0.0 |
0 |
Control1 |
-0.5 |
0.0 |
0 |
Y |
0.5 |
0.0 |
0 |
time2 |
0.0 |
0.0 |
0 |
time3 |
0.0 |
0.0 |
0 |
X.time2 |
0.0 |
-0.5 |
-1 |
Control1.time2 |
0.0 |
0.0 |
0 |
Y.time2 |
0.0 |
-0.5 |
0 |
X.time3 |
0.0 |
0.5 |
1 |
Control1.time3 |
0.0 |
0.0 |
0 |
Y.time3 |
0.0 |
0.5 |
0 |
fitgpd=contrasts.fit(fit,contr.matrix)
fitgpd=eBayes(fitgpd)
kable(topTable(fitgpd,n=10))
ILMN_2167758 |
-1.3896028 |
-0.0688103 |
-0.1023459 |
0 |
44.19823 |
0 |
0.00e+00 |
ILMN_1692223 |
-2.1329445 |
0.0127492 |
-0.1026954 |
0 |
41.95023 |
0 |
0.00e+00 |
ILMN_1655595 |
1.0482834 |
0.0033124 |
0.0069149 |
0 |
40.00381 |
0 |
0.00e+00 |
ILMN_1767685 |
1.7273583 |
-0.1139329 |
0.0082781 |
0 |
35.23824 |
0 |
2.00e-07 |
ILMN_1716658 |
-0.9511261 |
0.0106308 |
-0.0062736 |
0 |
34.50571 |
0 |
2.00e-07 |
ILMN_1755796 |
-1.3112695 |
-0.6494273 |
-0.5966956 |
0 |
32.94084 |
0 |
4.00e-07 |
ILMN_1765668 |
1.3044230 |
-0.2756104 |
-0.3990895 |
0 |
32.56274 |
0 |
4.00e-07 |
ILMN_1768577 |
-1.0893185 |
-0.2411923 |
-0.2485188 |
0 |
28.40039 |
0 |
2.10e-06 |
ILMN_1652407 |
0.6911198 |
-0.1529714 |
-0.1781824 |
0 |
25.46233 |
0 |
7.60e-06 |
ILMN_2388547 |
0.7136707 |
0.3246403 |
0.6653724 |
0 |
24.63418 |
0 |
1.04e-05 |
results <- decideTests(fitgpd,adjust.method="none") # doesn't subset of F<0.05'
a <- vennCounts(results)
print(a)
## XYvsC1C2 X3Y3vsX2Y2 X3vsX2 Counts
## 1 0 0 0 20073
## 2 0 0 1 366
## 3 0 1 0 447
## 4 0 1 1 520
## 5 1 0 0 2897
## 6 1 0 1 51
## 7 1 1 0 67
## 8 1 1 1 105
## attr(,"class")
## [1] "VennCounts"
vennDiagram(results, include=c("up","down"),counts.col=c("red","green"))
The number of up or down regulated genes are more affected by the treatment type than by time as these numbers are less
when comparing based on time points.