suppressPackageStartupMessages({
library(ggplot2)
library(gganimate)
library(dplyr)
library(tidyr)
library(gridExtra)
library(gifski)
})theme_set(ggpubr::theme_pubr())
set.seed(42)
PCA Animation
Simulate Data
<- 1:100
x
<- rnorm(n = 100, mean = 0, sd = 20)
error.x <- rnorm(n = 100, mean = 0, sd = 20)
error.y
<- 50 + 2 * x
y plot(x,y)
<- x + error.x
x.obs <- y + error.y
y.obs <- data.frame(x = x.obs, y = y.obs)
df ggplot(df, aes(x, y)) +
geom_point()
<- as.matrix(df)
X.noncentered <- scale(X.noncentered, center=TRUE, scale=FALSE)
X <- t(X) %*% X/(nrow(X)-1)
covX.calc
# PCA
<- cov(X)
covX <- eigen(covX)
eig <- eig$vectors
eigenVectors <- eig$values
eigenValues
<- svd(X)$d # sqrt(eigenValues * (nrow(X)-1)) --> WHY?
singular.values <- svd(X)$v
singular.vectors
<- lapply(1:179, function(alpha) {
frames <- c(cos(alpha * pi/180), sin(alpha * pi/180))
w <- X %*% (w %*% t(w))
z
data.frame(
x = X[,1], y = X[,2],
zx = z[,1], zy = z[,2],
alpha = alpha
)
})
<- do.call(rbind, frames)
anim_data
<- data.frame(x = c(0, 0),
eig_df y = c(0, 0),
xend = eigenVectors[1,] * sqrt(eigenValues),
yend = eigenVectors[2,] * sqrt(eigenValues),
col = c("PC1", "PC2"))
<- lm(y~x, data = as.data.frame(X))
linear_fit <- predict(linear_fit, newdata = as.data.frame(X)[,1,drop=FALSE])
linear_fit.pred <- data.frame(X=X[,1],Y=linear_fit.pred)
linear_fit.pred.df $col <- "linear fit" linear_fit.pred.df
Plot PCs
ggplot() +
geom_point(aes(X[,1], X[,2]), color="blue", alpha=0.5) +
geom_segment(data=eig_df,
aes(x=x, y=y, xend=xend, yend=yend, col=col),
arrow=arrow(length=unit(0.3,"cm")), size=1) +
scale_color_manual(values=c("PC1"="red", "PC2"="green",`linear fit`="black"))+
theme_minimal() +
coord_equal() +
xlab("X") +
ylab("Y")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
How does it compare with linear fit?
ggplot() +
geom_point(aes(X[,1], X[,2]), color="blue", alpha=0.5) +
geom_segment(data=eig_df,
aes(x=x, y=y, xend=xend, yend=yend, color=col),
arrow=arrow(length=unit(0.3,"cm")), size=1) +
scale_color_manual(values=c("PC1"="red", "PC2"="green",`linear fit`="black"))+
geom_line(data = linear_fit.pred.df, aes(X,Y, color=col))+
theme_minimal() +
coord_equal() +
xlab("X") +
ylab("Y")
Animate
<- ggplot(anim_data, aes(x, y)) +
p geom_point(color='blue', alpha=1) +
geom_point(aes(zx, zy), color='red', size=2, alpha=1) +
geom_segment(data=eig_df,
aes(x=x, y=y, xend=xend, yend=yend, color=col),
arrow=arrow(length=unit(0.3,"cm")), size=1) +
scale_color_manual(values=c("PC1"="red", "PC2"="green"))+
geom_segment(aes(xend=zx, yend=zy), color='red', alpha=0.5) +
geom_abline(aes(intercept=0, slope=tan(alpha * pi/180)), color='gray', linetype="dashed") +
coord_equal() +
transition_states(alpha, transition_length=2, state_length=2)
animate(p, fps=10, duration=10,
width=8, height=5, units = "in", res = 150*2,
renderer=gifski_renderer("pca_rotation.gif"))