Introduction

Following the lecture slides we will show the tiny example of pedigree-based linear mixed model.

# Clean the working environment
rm(list = ls())

Approach 1: Setup matrices “by hand”

First we define our inputs.

# Phenotype vector
y <- matrix(data = c(2.4, 4.1, 4.5))
y
##      [,1]
## [1,]  2.4
## [2,]  4.1
## [3,]  4.5
# Design matrix for fixed effects
X <- matrix(data = c(1, 1, 1))
X
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
# Design matrix for breeding values
Z <- matrix(data = c(0, 1, 0, 0,
                     0, 0, 1, 0,
                     0, 0, 0, 1), byrow = TRUE, nrow = 3)
Z
##      [,1] [,2] [,3] [,4]
## [1,]    0    1    0    0
## [2,]    0    0    1    0
## [3,]    0    0    0    1
# Design matrix for pedigree
T <- matrix(data = c(  1,   0,   0, 0,
                       0,   1,   0, 0,
                     1/2, 1/2,   1, 0,
                     3/4, 1/4, 1/2, 1), byrow = TRUE, nrow = 4)
T
##      [,1] [,2] [,3] [,4]
## [1,] 1.00 0.00  0.0    0
## [2,] 0.00 1.00  0.0    0
## [3,] 0.50 0.50  1.0    0
## [4,] 0.75 0.25  0.5    1
# Conditional variance matrix for pedigree
R <- diag(x = c(1, 1, 0.5, 0.5))
R
##      [,1] [,2] [,3] [,4]
## [1,]    1    0  0.0  0.0
## [2,]    0    1  0.0  0.0
## [3,]    0    0  0.5  0.0
## [4,]    0    0  0.0  0.5
# Pedigree-based Numerator Relationship Matrix (NRM)
A <- T %*% R %*% t(T)

# Variance components
sigma2e <- 0.1
sigma2a <- 0.1

Now we setup and solve the Mixed Model Equations (MME).

# Left Hand Side (LHS) components
# XtX <- t(X) %*% X
XtX <- crossprod(X)
XtX
##      [,1]
## [1,]    3
# XtZ <- t(X) %*% Z
XtZ <- crossprod(X, Z)
XtZ
##      [,1] [,2] [,3] [,4]
## [1,]    0    1    1    1
ZtX <- t(XtZ)
ZtX
##      [,1]
## [1,]    0
## [2,]    1
## [3,]    1
## [4,]    1
# ZtZ <- t(Z) %*% Z
ZtZ <- crossprod(Z)
ZtZ
##      [,1] [,2] [,3] [,4]
## [1,]    0    0    0    0
## [2,]    0    1    0    0
## [3,]    0    0    1    0
## [4,]    0    0    0    1
# Inverse of pedigree-based NRM
AInv <- solve(A)

# LHS
LHS <- rbind(cbind(XtX, XtZ),
             cbind(ZtX, ZtZ + AInv * sigma2e / sigma2a))
LHS
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    3  0.0  1.0  1.0    1
## [2,]    0  2.0  0.5 -0.5   -1
## [3,]    1  0.5  2.5 -1.0    0
## [4,]    1 -0.5 -1.0  3.5   -1
## [5,]    1 -1.0  0.0 -1.0    3
# Right Hand Side (RHS) components
# Xty <- t(X) %*% y
Xty <- crossprod(X, y)
Xty
##      [,1]
## [1,]   11
# Zty <- t(X) %*% y
Zty <- crossprod(Z, y)
Zty
##      [,1]
## [1,]  0.0
## [2,]  2.4
## [3,]  4.1
## [4,]  4.5
# RHS
RHS <- rbind(Xty,
             Zty)
RHS
##      [,1]
## [1,] 11.0
## [2,]  0.0
## [3,]  2.4
## [4,]  4.1
## [5,]  4.5

Finally, we solve the MME and deliver conditional means and covariances.

# Inverse of the LHS
LHSInv <- solve(LHS)

# Solutions --> conditional means
sol <- LHSInv %*% RHS
# If we do not have access to LHSInv we could use
# sol <- solve(LHS, RHS)
sol
##       [,1]
## [1,]  3.55
## [2,]  0.45
## [3,] -0.45
## [4,]  0.25
## [5,]  0.55
# Conditional covariance
PEC <- LHSInv * sigma2e

# Presentation of results
results <- data.frame(parameter = c("mu", paste0("a", 1:4)),
                      mean = sol,
                      var = diag(PEC))
results$reliability <- c(NA, 1 - results$var[-1] / (sigma2a * diag(A)))
results
##   parameter  mean        var reliability
## 1        mu  3.55 0.10172414          NA
## 2        a1  0.45 0.08448276   0.1551724
## 3        a2 -0.45 0.08448276   0.1551724
## 4        a3  0.25 0.09137931   0.0862069
## 5        a4  0.55 0.09827586   0.2137931

Approach 2: Setup matrices using R packages Matrix & pedigreeTools, then solve

First we define our inputs.

# install.packages(pkg = "pedigreeTools")
library(package = "pedigreeTools") # for getX()
library(package = "Matrix") # for sparse.model.matrix()

pedigree <- data.frame(individual = c(1, 2, 3, 4),
                       father = c(0, 0, 1, 1),
                       mother = c(0, 0, 2, 3))
pedigree2 <- pedigreeTools::pedigree(label = pedigree$individual,
                                     sire = pedigree$father,
                                     dam = pedigree$mother)

phenotypes <- data.frame(individual = c(2, 3, 4),
                         phenotype = c(2.4, 4.1, 4.5))
phenotypes$individual <- factor(x = phenotypes$individual, levels = pedigree$individual)

# Phenotype vector
y <- phenotypes$phenotype
y
## [1] 2.4 4.1 4.5
# Design matrix for fixed effects
# X <- model.matrix(phenotypes$phenotype ~ 1) # dense
X <- sparse.model.matrix(phenotypes$phenotype ~ 1) # sparse
X
## 3 x 1 sparse Matrix of class "dgCMatrix"
##   (Intercept)
## 1           1
## 2           1
## 3           1
# Design matrix for breeding values
# Z <- model.matrix(phenotypes$phenotype ~ phenotypes$individual - 1) # dense
Z <- sparse.model.matrix(phenotypes$phenotype ~ phenotypes$individual - 1) # sparse
Z
## 3 x 4 sparse Matrix of class "dgCMatrix"
##   phenotypes$individual1 phenotypes$individual2 phenotypes$individual3
## 1                      .                      1                      .
## 2                      .                      .                      1
## 3                      .                      .                      .
##   phenotypes$individual4
## 1                      .
## 2                      .
## 3                      1
# Design matrix for pedigree
T <- getT(ped = pedigree2)
## 'as(<dtTMatrix>, "dtCMatrix")' is deprecated.
## Use 'as(., "CsparseMatrix")' instead.
## See help("Deprecated") and help("Matrix-deprecated").
T
## 4 x 4 sparse Matrix of class "dtCMatrix"
##      1    2   3 4
## 1 1.00 .    .   .
## 2 .    1.00 .   .
## 3 0.50 0.50 1.0 .
## 4 0.75 0.25 0.5 1
# Conditional variance matrix for pedigree
R <- getD(ped = pedigree2)
R
##   1   2   3   4 
## 1.0 1.0 0.5 0.5
# Pedigree-based Numerator Relationship Matrix (NRM)
# A <- as.matrix(T) %*% diag(R) %*% t(as.matrix(T))
A <- getA(ped = pedigree2)
A
## 4 x 4 sparse Matrix of class "dsCMatrix"
##      1    2    3    4
## 1 1.00 .    0.50 0.75
## 2 .    1.00 0.50 0.25
## 3 0.50 0.50 1.00 0.75
## 4 0.75 0.25 0.75 1.25
# Inverse pedigree-based NRM
AInv <- getAInv(ped = pedigree2)
AInv
## 4 x 4 sparse Matrix of class "dsCMatrix"
##      1    2    3  4
## 1  2.0  0.5 -0.5 -1
## 2  0.5  1.5 -1.0  .
## 3 -0.5 -1.0  2.5 -1
## 4 -1.0  .   -1.0  2
# This shows how the inverse is actually built
# ... Pedigree Directed Acyclic Graph (DAG) matrix
TInv <- getTInv(ped = pedigree2)
TInv
## 4 x 4 sparse Matrix of class "dtCMatrix" (unitriangular)
##      1    2    3 4
## 1  I    .    .   .
## 2  .    I    .   .
## 3 -0.5 -0.5  I   .
## 4 -0.5  .   -0.5 I
# ... Inverse of Mendelian sampling variance
# RInv <- Diagonal(x = 1 / diag(R))
RInv <- diag(1/R)
RInv
##      [,1] [,2] [,3] [,4]
## [1,]    1    0    0    0
## [2,]    0    1    0    0
## [3,]    0    0    2    0
## [4,]    0    0    0    2
# ... Getting the inverse via generalised Cholesky factorisation
AInv2 <- t(TInv) %*% RInv %*% TInv
AInv2
## 4 x 4 Matrix of class "dgeMatrix"
##      1    2    3  4
## 1  2.0  0.5 -0.5 -1
## 2  0.5  1.5 -1.0  0
## 3 -0.5 -1.0  2.5 -1
## 4 -1.0  0.0 -1.0  2
# ... Compare getAInv() vs the above approach (~1e-16 diffs are OK!)
AInv - AInv2
## 4 x 4 Matrix of class "dgeMatrix"
##               1             2             3             4
## 1  0.000000e+00  1.110223e-16 -1.110223e-16 -2.220446e-16
## 2  1.110223e-16  0.000000e+00 -2.220446e-16  0.000000e+00
## 3 -1.110223e-16 -2.220446e-16  4.440892e-16 -2.220446e-16
## 4 -2.220446e-16  0.000000e+00 -2.220446e-16  4.440892e-16
# Variance components
sigma2e <- 0.1
sigma2a <- 0.1

Exercise: Now you can repeat the code from above to setup and solve MME.