We will use the beef breeding simulation and compare accuracy of selection using phenotypes or pedigree-based estimates of breeding values.
# Clean the working environment
rm(list = ls())
# Set the default plot layout
par(mfrow = c(1, 1))
# Load AlphaSimR
library(package = "AlphaSimR")
## Loading required package: R6
library(package = "pedigreeTools")
library(package = "asreml")
## Loading required package: Matrix
## Online License checked out Wed Feb 7 09:20:28 2024
## Loading ASReml-R version 4.2
asreml.options(workspace = "1gb")
# Simulate founder genomes
# ... this runMacs() call will take quite a bit of time!
# founderGenomes = runMacs(nInd = 2000,
# nChr = 30,
# segSites = 100,
# species = "CATTLE")
# ... therefore, we will speed up the demonstration with the quickHaplo()
# (we recommend use of runMacs() for research purposes!)
founderGenomes = quickHaplo(nInd = 2000,
nChr = 30,
segSites = 100)
# Global simulation parameters
SP = SimParam$new(founderGenomes)
SP$setSexes("yes_sys")
phenoVar = 400
heritability = 0.3
genVar = phenoVar * heritability
SP$addTraitA(nQtlPerChr = 100, mean = 250, var = genVar)
SP$setTrackPed(TRUE)
sexDifference = 10
# Base population
founders = newPop(founderGenomes)
# Phenotype the base population
founders = setPheno(pop = founders, h2 = heritability)
founders@pheno[founders@sex == "M", 1] =
founders@pheno[founders@sex == "M", 1] + sexDifference
# Assign year of birth for the founders
year = 0
founders = setMisc(x = founders,
node = "yearOfBirth",
value = year)
cat("Founder males\n")
## Founder males
nSires2 = 10
nSires1 = 40
nSires = nSires1 + nSires2
males = selectInd(pop = founders, nInd = nSires, use = "pheno", sex = "M")
sires2 = males[ 1:nSires2]
sires2 = setMisc(x = sires2,
node = "yearOfBirth",
value = -1)
sires1 = males[(nSires2+1):nSires]
sires1 = setMisc(x = sires1,
node = "yearOfBirth",
value = 0)
sires = c(sires2, sires1)
cat("Founder females\n")
## Founder females
(nFemales = sum(founders@sex == "F"))
## [1] 1000
females = selectInd(pop = founders, nInd = nFemales, use = "pheno", sex = "F")
# Here we define how many dams are kept in each year
nDams1 = 500
nDams2 = 250
nDams3 = 150
nDams4 = 75
nDams5 = 25
cat("Dams5\n")
## Dams5
(start = 1)
## [1] 1
(end = nDams5)
## [1] 25
dams5 = females[start:end]
dams5 = setMisc(x = dams5,
node = "yearOfBirth",
value = -4)
cat("Dams4\n")
## Dams4
(start = end + 1)
## [1] 26
(end = start - 1 + nDams4)
## [1] 100
dams4 = females[start:end]
dams4 = setMisc(x = dams4,
node = "yearOfBirth",
value = -3)
cat("Dams3\n")
## Dams3
(start = end + 1)
## [1] 101
(end = start - 1 + nDams3)
## [1] 250
dams3 = females[start:end]
dams3 = setMisc(x = dams3,
node = "yearOfBirth",
value = -2)
cat("Dams2\n")
## Dams2
(start = end + 1)
## [1] 251
(end = start - 1 + nDams2)
## [1] 500
dams2 = females[start:end]
dams2 = setMisc(x = dams2,
node = "yearOfBirth",
value = -1)
cat("Dams1\n")
## Dams1
(start = end + 1)
## [1] 501
(end = start - 1 + nDams1)
## [1] 1000
dams1 = females[start:end]
dams1 = setMisc(x = dams1,
node = "yearOfBirth",
value = 0)
dams = c(dams5, dams4, dams3, dams2, dams1)
# Function to record and collate data
recordData <- function(data = NULL, pop, yearOfUse = NA) {
popData = data.frame(id = pop@id,
father = pop@father,
mother = pop@mother,
sex = pop@sex,
gv = pop@gv[, "Trait1"],
pheno = pop@pheno[, "Trait1"],
yearOfBirth = unlist(getMisc(x = pop, node ="yearOfBirth")),
yearOfUse = yearOfUse)
# Manage first instance of calling this function, when data is NULL
if (is.null(data)) {
ret = popData
} else {
ret = rbind(data, popData)
}
return(ret)
}
data4AllAnimals = recordData(pop = founders)
head(data4AllAnimals)
## id father mother sex gv pheno yearOfBirth yearOfUse
## 1 1 0 0 M 245.0727 258.1907 0 NA
## 2 2 0 0 F 251.8903 249.9372 0 NA
## 3 3 0 0 M 247.0972 270.8629 0 NA
## 4 4 0 0 F 248.2361 238.7732 0 NA
## 5 5 0 0 M 254.6868 268.6909 0 NA
## 6 6 0 0 F 255.3589 248.2552 0 NA
for (year in 1:3) {
# year = 1
# year = 2
cat("Working on the year:", year, "\n")
# Generate progeny from current dams and sires
candidates = randCross2(males = sires, females = dams, nCrosses = nInd(dams))
candidates = setMisc(x = candidates, node = "yearOfBirth", value = year)
candidates = setPheno(candidates, h2 = heritability)
candidates@pheno[candidates@sex == "M", 1] =
candidates@pheno[candidates@sex == "M", 1] + sexDifference
tmp = SP$pedigree
pedigree = data.frame(id = rownames(tmp),
father = tmp[, "father"],
mother = tmp[, "mother"])
nrow(pedigree)
pedigree2 = with(pedigree, pedigree(label = id,
sire = father,
dam = mother))
pedNRM = getA(ped = pedigree2)
dim(pedNRM)
nrow(data4AllAnimals)
# Ensure correct factor levels
data4AllAnimals$id = factor(data4AllAnimals$id, levels = pedigree$id)
data4AllAnimals$sex = factor(data4AllAnimals$sex)
# Prepare data for pedigree-based genetic evaluation WITHOUT candidates' phenotypes
if (year > 1) { # Need more than 1 generation of phenotypes for this analysis!
# Fit the model using AlphaSimR::solveUVM() WITHOUT candidates' phenotypes
if (FALSE) {
y = matrix(data4AllAnimals$pheno)
dim(y)
X = model.matrix(data4AllAnimals$pheno ~ 1 + sex)
dim(X)
Z = model.matrix(data4AllAnimals$pheno ~ data4AllAnimals$id - 1)
dim(Z)
fit = solveUVM(y = y, X = X, Z = Z, K = as.matrix(pedNRM))
ebv = data.frame(id = pedigree$id, ebv = fit$u)
}
# Fit the model using asreml::asreml() WITHOUT candidates' phenotypes
if (TRUE) {
fit <- asreml(fixed = pheno ~ 1 + sex,
random = ~ vm(obj = id, source = as.matrix(pedNRM)),
residual = ~ units,
data = data4AllAnimals)
ebv = data.frame(id = pedigree$id, ebv = fit$coefficients$rand[, 1])
}
}
# Record data
data4AllAnimals = recordData(data = data4AllAnimals,
pop = candidates)
# Evaluate accuracy of estimated breeding values
if (year > 1) { # Need more than 1 generation of phenotypes for this analysis!
tmp = merge(x = data4AllAnimals, y = ebv, all = TRUE)
cat("Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)\n")
for (yearOfBirth in unique(tmp$yearOfBirth)) {
tmp2 = tmp[tmp$yearOfBirth == yearOfBirth, ]
cat("YearOfBirth:", yearOfBirth,
"cor(gv, ebv):", round(cor(tmp2$gv, tmp2$ebv), digits = 2),
"cor(gv, pheno):", round(cor(tmp2$gv, tmp2$pheno), digits = 2), "\n")
}
}
# Prepare data for pedigree-based genetic evaluation WITH candidates' phenotypes
nrow(pedigree)
nrow(data4AllAnimals)
dim(pedNRM)
# Fit the model using AlphaSimR::solveUVM() WITH candidates' phenotypes
if (FALSE) {
y = matrix(data4AllAnimals$pheno)
dim(y)
X = model.matrix(data4AllAnimals$pheno ~ 1 + sex)
dim(X)
Z = model.matrix(data4AllAnimals$pheno ~ data4AllAnimals$id - 1)
dim(Z)
fit = solveUVM(y = y, X = X, Z = Z, K = as.matrix(pedNRM))
ebv = data.frame(id = pedigree$id, ebv = fit$u)
}
# Fit the model using asreml::asreml() WITH candidates' phenotypes
if (TRUE) {
fit <- asreml(fixed = pheno ~ 1 + sex,
random = ~ vm(obj = id, source = as.matrix(pedNRM)),
residual = ~ units,
data = data4AllAnimals)
ebv = data.frame(id = pedigree$id, ebv = fit$coefficients$rand[, 1])
}
# Evaluate accuracy of estimated breeding values
tmp = merge(x = data4AllAnimals, y = ebv, all = TRUE)
cat("Accuracy of evaluation by year of birth (WITH candidates' phenos)\n")
for (yearOfBirth in unique(tmp$yearOfBirth)) {
tmp2 = tmp[tmp$yearOfBirth == yearOfBirth, ]
cat("YearOfBirth:", yearOfBirth,
"cor(gv, ebv):", round(cor(tmp2$gv, tmp2$ebv), digits = 2),
"cor(gv, pheno):", round(cor(tmp2$gv, tmp2$pheno), digits = 2), "\n")
}
setEbv2 = function(pop, df) {
# pop AlphaSimR population
# df data.frame with id and ebv columns
dfSel = df[df$id %in% pop@id, c("id", "ebv")]
ebv = dfSel$ebv
names(ebv) = dfSel$id
pop@ebv = as.matrix(ebv[pop@id])
pop
}
reportAccuracy = function(pop, name) {
cat(paste0("Accuracy of ", name, " selection\n"),
"cor(gv, ebv):", round(cor(pop@gv, pop@ebv), digits = 2),
"cor(gv, pheno):", round(cor(pop@gv, pop@pheno), digits = 2), "\n")
}
# Update and select sires
sires1 = setEbv2(sires1, tmp)
reportAccuracy(sires1, "sires2")
sires2 = selectInd(pop = sires1, nInd = nSires2, use = "ebv")
candidatesM = setEbv2(candidates[candidates@sex == "M"], tmp)
reportAccuracy(candidatesM, "sires1")
sires1 = selectInd(pop = candidatesM, nInd = nSires1, use = "ebv")
sires = c(sires2, sires1)
# Update and select dams
dams4 = setEbv2(dams4, tmp)
reportAccuracy(dams4, "dams5")
dams5 = selectInd(pop = dams4, nInd = nDams5, use = "ebv")
dams3 = setEbv2(dams3, tmp)
reportAccuracy(dams3, "dams4")
dams4 = selectInd(pop = dams3, nInd = nDams4, use = "ebv")
dams2 = setEbv2(dams2, tmp)
reportAccuracy(dams2, "dams3")
dams3 = selectInd(pop = dams2, nInd = nDams3, use = "ebv")
dams1 = setEbv2(dams1, tmp)
reportAccuracy(dams1, "dams2")
dams2 = selectInd(pop = dams1, nInd = nDams2, use = "ebv")
candidatesF = setEbv2(candidates[candidates@sex == "F"], tmp)
reportAccuracy(candidatesF, "dams1")
dams1 = selectInd(pop = candidatesF, nInd = nDams1, use = "ebv")
dams = c(dams5, dams4, dams3, dams2, dams1)
save.image("pedigree-based-model.RData")
}
## Working on the year: 1
## 'as(<dtTMatrix>, "dtCMatrix")' is deprecated.
## Use 'as(., "CsparseMatrix")' instead.
## See help("Deprecated") and help("Matrix-deprecated").
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.59 cor(gv, pheno): 0.54
## YearOfBirth: 1 cor(gv, ebv): 0.65 cor(gv, pheno): 0.54
## Accuracy of sires2 selection
## cor(gv, ebv): 0.83 cor(gv, pheno): 0.26
## Accuracy of sires1 selection
## cor(gv, ebv): 0.64 cor(gv, pheno): 0.55
## Accuracy of dams5 selection
## cor(gv, ebv): 0.36 cor(gv, pheno): 0.1
## Accuracy of dams4 selection
## cor(gv, ebv): 0.23 cor(gv, pheno): 0.03
## Accuracy of dams3 selection
## cor(gv, ebv): 0.18 cor(gv, pheno): 0.05
## Accuracy of dams2 selection
## cor(gv, ebv): 0.5 cor(gv, pheno): 0.4
## Accuracy of dams1 selection
## cor(gv, ebv): 0.66 cor(gv, pheno): 0.55
## Working on the year: 2
## Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.59 cor(gv, pheno): 0.54
## YearOfBirth: 1 cor(gv, ebv): 0.65 cor(gv, pheno): 0.54
## YearOfBirth: 2 cor(gv, ebv): 0.29 cor(gv, pheno): 0.5
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54
## YearOfBirth: 1 cor(gv, ebv): 0.67 cor(gv, pheno): 0.54
## YearOfBirth: 2 cor(gv, ebv): 0.6 cor(gv, pheno): 0.5
## Accuracy of sires2 selection
## cor(gv, ebv): 0.8 cor(gv, pheno): 0.24
## Accuracy of sires1 selection
## cor(gv, ebv): 0.61 cor(gv, pheno): 0.55
## Accuracy of dams5 selection
## cor(gv, ebv): 0.27 cor(gv, pheno): -0.02
## Accuracy of dams4 selection
## cor(gv, ebv): 0.28 cor(gv, pheno): 0.01
## Accuracy of dams3 selection
## cor(gv, ebv): 0.35 cor(gv, pheno): 0.09
## Accuracy of dams2 selection
## cor(gv, ebv): 0.68 cor(gv, pheno): 0.55
## Accuracy of dams1 selection
## cor(gv, ebv): 0.59 cor(gv, pheno): 0.5
## Working on the year: 3
## Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54
## YearOfBirth: 1 cor(gv, ebv): 0.67 cor(gv, pheno): 0.54
## YearOfBirth: 2 cor(gv, ebv): 0.6 cor(gv, pheno): 0.5
## YearOfBirth: 3 cor(gv, ebv): 0.38 cor(gv, pheno): 0.5
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54
## YearOfBirth: 1 cor(gv, ebv): 0.68 cor(gv, pheno): 0.54
## YearOfBirth: 2 cor(gv, ebv): 0.62 cor(gv, pheno): 0.5
## YearOfBirth: 3 cor(gv, ebv): 0.63 cor(gv, pheno): 0.5
## Accuracy of sires2 selection
## cor(gv, ebv): 0.79 cor(gv, pheno): -0.11
## Accuracy of sires1 selection
## cor(gv, ebv): 0.63 cor(gv, pheno): 0.53
## Accuracy of dams5 selection
## cor(gv, ebv): 0.22 cor(gv, pheno): -0.09
## Accuracy of dams4 selection
## cor(gv, ebv): 0.39 cor(gv, pheno): 0.06
## Accuracy of dams3 selection
## cor(gv, ebv): 0.55 cor(gv, pheno): 0.35
## Accuracy of dams2 selection
## cor(gv, ebv): 0.6 cor(gv, pheno): 0.5
## Accuracy of dams1 selection
## cor(gv, ebv): 0.63 cor(gv, pheno): 0.52