Following the lecture slides and the previous tiny example of pedigree-based linear mixed model, adapt the code below to fit the genome-based linear mixed model.
# Clean the working environment
rm(list = ls())
Here you should implement the SNP-BLUP model.
First we define our inputs.
# Phenotype vector
y <- matrix(data = c(7.2, 3.5, 5.7, 6.3))
y
## [,1]
## [1,] 7.2
## [2,] 3.5
## [3,] 5.7
## [4,] 6.3
# Design matrix for fixed effects
X <- matrix(data = c(1, 1, 1, 1))
X
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
# Design matrix for genotypes (=genotype matrix)
W <- matrix(data = c(2, 2, 2, 0, 1,
0, 2, 1, 1, 0,
1, 1, 1, 1, 1,
1, 1, 0, 1, 2),
byrow = TRUE, nrow = 4, ncol = 5)
W
## [,1] [,2] [,3] [,4] [,5]
## [1,] 2 2 2 0 1
## [2,] 0 2 1 1 0
## [3,] 1 1 1 1 1
## [4,] 1 1 0 1 2
# Variance components
sigma2e <- 0.1
sigma2alpha <- 0.02
Now we setup and solve the Mixed Model Equations (MME).
# Left Hand Side (LHS) components
XtX <- crossprod(X)
XtX
## [,1]
## [1,] 4
XtW <- crossprod(X, W)
XtW
## [,1] [,2] [,3] [,4] [,5]
## [1,] 4 6 4 3 4
WtX <- t(XtW)
WtX
## [,1]
## [1,] 4
## [2,] 6
## [3,] 4
## [4,] 3
## [5,] 4
WtW <- crossprod(W)
WtW
## [,1] [,2] [,3] [,4] [,5]
## [1,] 6 6 5 2 5
## [2,] 6 10 7 4 5
## [3,] 5 7 6 2 3
## [4,] 2 4 2 3 3
## [5,] 5 5 3 3 6
# LHS
LHS <- rbind(cbind(XtX, XtW),
cbind(WtX, WtW + diag(nrow = 5) * sigma2e / sigma2alpha))
LHS
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 4 4 6 4 3 4
## [2,] 4 11 6 5 2 5
## [3,] 6 6 15 7 4 5
## [4,] 4 5 7 11 2 3
## [5,] 3 2 4 2 8 3
## [6,] 4 5 5 3 3 11
# Right Hand Side (RHS) components
Xty <- crossprod(X, y)
Xty
## [,1]
## [1,] 22.7
Wty <- crossprod(W, y)
Wty
## [,1]
## [1,] 26.4
## [2,] 33.4
## [3,] 23.6
## [4,] 15.5
## [5,] 25.5
# RHS
RHS <- rbind(Xty,
Wty)
RHS
## [,1]
## [1,] 22.7
## [2,] 26.4
## [3,] 33.4
## [4,] 23.6
## [5,] 15.5
## [6,] 25.5
Finally, we solve the MME and deliver conditional means for marker effects.
# Solutions --> conditional means
sol <- solve(LHS, RHS)
sol
## [,1]
## [1,] 5.05347395
## [2,] 0.44019851
## [3,] -0.08337469
## [4,] 0.10062035
## [5,] -0.17841191
## [6,] 0.33957816
Lastly, we now estimate breeding values from genotypes and estimated marker effects.
W %*% sol[-1]
## [,1]
## [1,] 1.2544665
## [2,] -0.2445409
## [3,] 0.6186104
## [4,] 0.8575682
Based on the above data objects and lecture slides, implement now the G-BLUP model.
# Design matrix for individuals (linking their phenotypes to their breeding value)
Z <- matrix(data = c(1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1),
byrow = TRUE, nrow = 4, ncol = 4)
# Genomic covariance matrix between individuals
G <- tcrossprod(W)
# LHS
XtZ <- crossprod(X, Z)
ZtX <- t(XtZ)
ZtZ <- crossprod(Z, Z)
LHS <- rbind(cbind(XtX, XtZ),
cbind(ZtX, ZtZ + solve(G) * sigma2e / sigma2alpha))
# Note that we can do solve(G=WW^T) only when we do not have linear dependencies
# in the genotype matrix W - this will happen sooner when we have fewer markers.
# A way to bypass this issue, is to add additional regularisation (on top of
# regularising marker effects) by adding a small value m to diagonal of G
# (diag(G) = diag(G) + m), which means that we are assuming this model for
# breeding values: a = W alpha + r_m with alpha ~ N(0, I\sigma^2_alpha) and
# r_m ~ N(0, Im\sigma^2_alpha). With m = 0.01 we would have
# r_m ~ N(0, I 0.01\sigma^2_alpha).
LHS
## [,1] [,2] [,3] [,4] [,5]
## [1,] 4 1.00000000 1.00000000 1.000000 1.0000000
## [2,] 1 2.85483871 0.08064516 -3.629032 0.9677419
## [3,] 1 0.08064516 3.17741935 -2.983871 1.1290323
## [4,] 1 -3.62903226 -2.98387097 15.274194 -5.8064516
## [5,] 1 0.96774194 1.12903226 -5.806452 4.5483871
# Right Hand Side (RHS) components
Xty <- crossprod(X, y)
Xty
## [,1]
## [1,] 22.7
Zty <- crossprod(Z, y)
Zty
## [,1]
## [1,] 7.2
## [2,] 3.5
## [3,] 5.7
## [4,] 6.3
# RHS
RHS <- rbind(Xty,
Zty)
RHS
## [,1]
## [1,] 22.7
## [2,] 7.2
## [3,] 3.5
## [4,] 5.7
## [5,] 6.3
# Solutions
solve(LHS, RHS)
## [,1]
## [1,] 5.0534739
## [2,] 1.2544665
## [3,] -0.2445409
## [4,] 0.6186104
## [5,] 0.8575682