Following the lecture slides we will show the tiny example of pedigree-based linear mixed model.
# Clean the working environment
rm(list = ls())
First we define our inputs.
# Phenotype vector
y <- matrix(data = c(2.4, 4.1, 4.5))
y
## [,1]
## [1,] 2.4
## [2,] 4.1
## [3,] 4.5
# Design matrix for fixed effects
X <- matrix(data = c(1, 1, 1))
X
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
# Design matrix for breeding values
Z <- matrix(data = c(0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1), byrow = TRUE, nrow = 3)
Z
## [,1] [,2] [,3] [,4]
## [1,] 0 1 0 0
## [2,] 0 0 1 0
## [3,] 0 0 0 1
# Design matrix for pedigree
T <- matrix(data = c( 1, 0, 0, 0,
0, 1, 0, 0,
1/2, 1/2, 1, 0,
3/4, 1/4, 1/2, 1), byrow = TRUE, nrow = 4)
T
## [,1] [,2] [,3] [,4]
## [1,] 1.00 0.00 0.0 0
## [2,] 0.00 1.00 0.0 0
## [3,] 0.50 0.50 1.0 0
## [4,] 0.75 0.25 0.5 1
# Conditional variance matrix for pedigree
R <- diag(x = c(1, 1, 0.5, 0.5))
R
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0.0 0.0
## [2,] 0 1 0.0 0.0
## [3,] 0 0 0.5 0.0
## [4,] 0 0 0.0 0.5
# Pedigree-based Numerator Relationship Matrix (NRM)
A <- T %*% R %*% t(T)
# Variance components
sigma2e <- 0.1
sigma2a <- 0.1
Now we setup and solve the Mixed Model Equations (MME).
# Left Hand Side (LHS) components
# XtX <- t(X) %*% X
XtX <- crossprod(X)
XtX
## [,1]
## [1,] 3
# XtZ <- t(X) %*% Z
XtZ <- crossprod(X, Z)
XtZ
## [,1] [,2] [,3] [,4]
## [1,] 0 1 1 1
ZtX <- t(XtZ)
ZtX
## [,1]
## [1,] 0
## [2,] 1
## [3,] 1
## [4,] 1
# ZtZ <- t(Z) %*% Z
ZtZ <- crossprod(Z)
ZtZ
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
# Inverse of pedigree-based NRM
AInv <- solve(A)
# LHS
LHS <- rbind(cbind(XtX, XtZ),
cbind(ZtX, ZtZ + AInv * sigma2e / sigma2a))
LHS
## [,1] [,2] [,3] [,4] [,5]
## [1,] 3 0.0 1.0 1.0 1
## [2,] 0 2.0 0.5 -0.5 -1
## [3,] 1 0.5 2.5 -1.0 0
## [4,] 1 -0.5 -1.0 3.5 -1
## [5,] 1 -1.0 0.0 -1.0 3
# Right Hand Side (RHS) components
# Xty <- t(X) %*% y
Xty <- crossprod(X, y)
Xty
## [,1]
## [1,] 11
# Zty <- t(X) %*% y
Zty <- crossprod(Z, y)
Zty
## [,1]
## [1,] 0.0
## [2,] 2.4
## [3,] 4.1
## [4,] 4.5
# RHS
RHS <- rbind(Xty,
Zty)
RHS
## [,1]
## [1,] 11.0
## [2,] 0.0
## [3,] 2.4
## [4,] 4.1
## [5,] 4.5
Finally, we solve the MME and deliver conditional means and covariances.
# Inverse of the LHS
LHSInv <- solve(LHS)
# Solutions --> conditional means
sol <- LHSInv %*% RHS
# If we do not have access to LHSInv we could use
# sol <- solve(LHS, RHS)
sol
## [,1]
## [1,] 3.55
## [2,] 0.45
## [3,] -0.45
## [4,] 0.25
## [5,] 0.55
# Conditional covariance
PEC <- LHSInv * sigma2e
# Presentation of results
results <- data.frame(parameter = c("mu", paste0("a", 1:4)),
mean = sol,
var = diag(PEC))
results$reliability <- c(NA, 1 - results$var[-1] / (sigma2a * diag(A)))
results
## parameter mean var reliability
## 1 mu 3.55 0.10172414 NA
## 2 a1 0.45 0.08448276 0.1551724
## 3 a2 -0.45 0.08448276 0.1551724
## 4 a3 0.25 0.09137931 0.0862069
## 5 a4 0.55 0.09827586 0.2137931
First we define our inputs.
# install.packages(pkg = "pedigreeTools")
library(package = "pedigreeTools") # for getX()
library(package = "Matrix") # for sparse.model.matrix()
pedigree <- data.frame(individual = c(1, 2, 3, 4),
father = c(0, 0, 1, 1),
mother = c(0, 0, 2, 3))
pedigree2 <- pedigreeTools::pedigree(label = pedigree$individual,
sire = pedigree$father,
dam = pedigree$mother)
phenotypes <- data.frame(individual = c(2, 3, 4),
phenotype = c(2.4, 4.1, 4.5))
phenotypes$individual <- factor(x = phenotypes$individual, levels = pedigree$individual)
# Phenotype vector
y <- phenotypes$phenotype
y
## [1] 2.4 4.1 4.5
# Design matrix for fixed effects
# X <- model.matrix(phenotypes$phenotype ~ 1) # dense
X <- sparse.model.matrix(phenotypes$phenotype ~ 1) # sparse
X
## 3 x 1 sparse Matrix of class "dgCMatrix"
## (Intercept)
## 1 1
## 2 1
## 3 1
# Design matrix for breeding values
# Z <- model.matrix(phenotypes$phenotype ~ phenotypes$individual - 1) # dense
Z <- sparse.model.matrix(phenotypes$phenotype ~ phenotypes$individual - 1) # sparse
Z
## 3 x 4 sparse Matrix of class "dgCMatrix"
## phenotypes$individual1 phenotypes$individual2 phenotypes$individual3
## 1 . 1 .
## 2 . . 1
## 3 . . .
## phenotypes$individual4
## 1 .
## 2 .
## 3 1
# Design matrix for pedigree
T <- getT(ped = pedigree2)
T
## 4 x 4 sparse Matrix of class "dtCMatrix"
## 1 2 3 4
## 1 1.00 . . .
## 2 . 1.00 . .
## 3 0.50 0.50 1.0 .
## 4 0.75 0.25 0.5 1
# Conditional variance matrix for pedigree
R <- getD(ped = pedigree2)
R
## 1 2 3 4
## 1.0 1.0 0.5 0.5
# Pedigree-based Numerator Relationship Matrix (NRM)
# A <- as.matrix(T) %*% diag(R) %*% t(as.matrix(T))
A <- getA(ped = pedigree2)
A
## 4 x 4 sparse Matrix of class "dsCMatrix"
## 1 2 3 4
## 1 1.00 . 0.50 0.75
## 2 . 1.00 0.50 0.25
## 3 0.50 0.50 1.00 0.75
## 4 0.75 0.25 0.75 1.25
# Inverse pedigree-based NRM
AInv <- getAInv(ped = pedigree2)
AInv
## 4 x 4 sparse Matrix of class "dsCMatrix"
## 1 2 3 4
## 1 2.0 0.5 -0.5 -1
## 2 0.5 1.5 -1.0 .
## 3 -0.5 -1.0 2.5 -1
## 4 -1.0 . -1.0 2
# This shows how the inverse is actually built
# ... Pedigree Directed Acyclic Graph (DAG) matrix
TInv <- getTInv(ped = pedigree2)
TInv
## 4 x 4 sparse Matrix of class "dtCMatrix" (unitriangular)
## 1 2 3 4
## 1 I . . .
## 2 . I . .
## 3 -0.5 -0.5 I .
## 4 -0.5 . -0.5 I
# ... Inverse of Mendelian sampling variance
RInv <- Diagonal(x = 1 / R)
RInv
## 4 x 4 diagonal matrix of class "ddiMatrix"
## [,1] [,2] [,3] [,4]
## [1,] 1 . . .
## [2,] . 1 . .
## [3,] . . 2 .
## [4,] . . . 2
# ... Getting the inverse via generalised Cholesky factorisation
AInv2 <- t(TInv) %*% RInv %*% TInv
AInv2
## 4 x 4 sparse Matrix of class "dgCMatrix"
## 1 2 3 4
## 1 2.0 0.5 -0.5 -1
## 2 0.5 1.5 -1.0 .
## 3 -0.5 -1.0 2.5 -1
## 4 -1.0 . -1.0 2
# ... Compare getAInv() vs the above approach (~1e-16 diffs are OK!)
AInv - AInv2
## 4 x 4 sparse Matrix of class "dgCMatrix"
## 1 2 3 4
## 1 0.000000e+00 1.110223e-16 -1.110223e-16 -2.220446e-16
## 2 1.110223e-16 0.000000e+00 -2.220446e-16 .
## 3 -1.110223e-16 -2.220446e-16 4.440892e-16 -2.220446e-16
## 4 -2.220446e-16 . -2.220446e-16 4.440892e-16
# Variance components
sigma2e <- 0.1
sigma2a <- 0.1
Exercise: Now you can repeat the code from above to setup and solve MME.
# Left Hand Side (LHS) components
# XtX <- t(X) %*% X
XtX <- crossprod(X)
XtX
## 1 x 1 sparse Matrix of class "dsCMatrix"
## (Intercept)
## (Intercept) 3
# XtZ <- t(X) %*% Z
XtZ <- crossprod(X, Z)
XtZ
## 1 x 4 sparse Matrix of class "dgCMatrix"
## phenotypes$individual1 phenotypes$individual2
## (Intercept) . 1
## phenotypes$individual3 phenotypes$individual4
## (Intercept) 1 1
ZtX <- t(XtZ)
ZtX
## 4 x 1 sparse Matrix of class "dgCMatrix"
## (Intercept)
## phenotypes$individual1 .
## phenotypes$individual2 1
## phenotypes$individual3 1
## phenotypes$individual4 1
# ZtZ <- t(Z) %*% Z
ZtZ <- crossprod(Z)
ZtZ
## 4 x 4 sparse Matrix of class "dsCMatrix"
## phenotypes$individual1 phenotypes$individual2
## phenotypes$individual1 . .
## phenotypes$individual2 . 1
## phenotypes$individual3 . .
## phenotypes$individual4 . .
## phenotypes$individual3 phenotypes$individual4
## phenotypes$individual1 . .
## phenotypes$individual2 . .
## phenotypes$individual3 1 .
## phenotypes$individual4 . 1
# LHS
LHS <- rbind(cbind(XtX, XtZ),
cbind(ZtX, ZtZ + AInv * sigma2e / sigma2a))
LHS
## 5 x 5 sparse Matrix of class "dgCMatrix"
## (Intercept) phenotypes$individual1
## (Intercept) 3 .
## phenotypes$individual1 . 2.0
## phenotypes$individual2 1 0.5
## phenotypes$individual3 1 -0.5
## phenotypes$individual4 1 -1.0
## phenotypes$individual2 phenotypes$individual3
## (Intercept) 1.0 1.0
## phenotypes$individual1 0.5 -0.5
## phenotypes$individual2 2.5 -1.0
## phenotypes$individual3 -1.0 3.5
## phenotypes$individual4 . -1.0
## phenotypes$individual4
## (Intercept) 1
## phenotypes$individual1 -1
## phenotypes$individual2 .
## phenotypes$individual3 -1
## phenotypes$individual4 3
# Right Hand Side (RHS) components
# Xty <- t(X) %*% y
Xty <- crossprod(X, y)
Xty
## 1 x 1 Matrix of class "dgeMatrix"
## [,1]
## (Intercept) 11
# Zty <- t(X) %*% y
Zty <- crossprod(Z, y)
Zty
## 4 x 1 Matrix of class "dgeMatrix"
## [,1]
## phenotypes$individual1 0.0
## phenotypes$individual2 2.4
## phenotypes$individual3 4.1
## phenotypes$individual4 4.5
# RHS
RHS <- rbind(Xty,
Zty)
RHS
## 5 x 1 Matrix of class "dgeMatrix"
## [,1]
## (Intercept) 11.0
## phenotypes$individual1 0.0
## phenotypes$individual2 2.4
## phenotypes$individual3 4.1
## phenotypes$individual4 4.5
# Inverse of the LHS
LHSInv <- solve(LHS)
# Solutions --> conditional means
sol <- LHSInv %*% RHS
# If we do not have access to LHSInv we could use
# sol <- solve(LHS, RHS)
sol
## 5 x 1 Matrix of class "dgeMatrix"
## [,1]
## [1,] 3.55
## [2,] 0.45
## [3,] -0.45
## [4,] 0.25
## [5,] 0.55
# Conditional covariance
PEC <- LHSInv * sigma2e
# Presentation of results
results <- data.frame(parameter = c("mu", paste0("a", 1:4)),
mean = sol[, 1],
var = diag(PEC))
results$reliability <- c(NA, 1 - results$var[-1] / (sigma2a * diag(A)))
results
## parameter mean var reliability
## 1 mu 3.55 0.10172414 NA
## 2 a1 0.45 0.08448276 0.1551724
## 3 a2 -0.45 0.08448276 0.1551724
## 4 a3 0.25 0.09137931 0.0862069
## 5 a4 0.55 0.09827586 0.2137931