Introduction

We will use the beef breeding simulation and compare accuracy of selection using phenotypes or pedigree-based estimates of breeding values.

Base population

# Clean the working environment
rm(list = ls())

# Set the default plot layout
par(mfrow = c(1, 1))

# Load AlphaSimR
library(package = "AlphaSimR")
## Loading required package: R6
library(package = "pedigreeTools")
library(package = "asreml")
## Loading required package: Matrix
## Online License checked out Wed Feb  7 09:20:28 2024
## Loading ASReml-R version 4.2
asreml.options(workspace = "1gb")

# Simulate founder genomes
# ... this runMacs() call will take quite a bit of time! 
# founderGenomes = runMacs(nInd = 2000,
#                          nChr = 30,
#                          segSites = 100,
#                          species = "CATTLE")
# ... therefore, we will speed up the demonstration with the quickHaplo()
# (we recommend use of runMacs() for research purposes!)
founderGenomes = quickHaplo(nInd = 2000,
                            nChr = 30,
                            segSites = 100)

# Global simulation parameters
SP = SimParam$new(founderGenomes)

SP$setSexes("yes_sys")

phenoVar = 400
heritability = 0.3
genVar = phenoVar * heritability
SP$addTraitA(nQtlPerChr = 100, mean = 250, var = genVar)
SP$setTrackPed(TRUE)
sexDifference = 10

# Base population
founders = newPop(founderGenomes)

# Phenotype the base population
founders = setPheno(pop = founders, h2 = heritability)
founders@pheno[founders@sex == "M", 1] = 
  founders@pheno[founders@sex == "M", 1] + sexDifference

# Assign year of birth for the founders
year = 0
founders = setMisc(x = founders,
                   node = "yearOfBirth",
                   value = year)

Initial parent populations

cat("Founder males\n")
## Founder males
nSires2 = 10
nSires1 = 40
nSires = nSires1 + nSires2
males = selectInd(pop = founders, nInd = nSires, use = "pheno", sex = "M")
sires2 = males[ 1:nSires2]
sires2 = setMisc(x = sires2,
                 node = "yearOfBirth",
                 value = -1)
sires1 = males[(nSires2+1):nSires]
sires1 = setMisc(x = sires1,
                 node = "yearOfBirth",
                 value = 0)
sires = c(sires2, sires1)

cat("Founder females\n")
## Founder females
(nFemales = sum(founders@sex == "F"))
## [1] 1000
females = selectInd(pop = founders, nInd = nFemales, use = "pheno", sex = "F")

# Here we define how many dams are kept in each year
nDams1 = 500
nDams2 = 250
nDams3 = 150
nDams4 = 75
nDams5 = 25

cat("Dams5\n")
## Dams5
(start = 1)
## [1] 1
(end = nDams5)
## [1] 25
dams5 = females[start:end]
dams5 = setMisc(x = dams5,
                node = "yearOfBirth",
                value = -4)

cat("Dams4\n")
## Dams4
(start = end + 1)
## [1] 26
(end = start - 1 + nDams4)
## [1] 100
dams4 = females[start:end]
dams4 = setMisc(x = dams4,
                node = "yearOfBirth",
                value = -3)

cat("Dams3\n")
## Dams3
(start = end + 1)
## [1] 101
(end = start - 1 + nDams3)
## [1] 250
dams3 = females[start:end]
dams3 = setMisc(x = dams3,
                node = "yearOfBirth",
                value = -2)

cat("Dams2\n")
## Dams2
(start = end + 1)
## [1] 251
(end = start - 1 + nDams2)
## [1] 500
dams2 = females[start:end]
dams2 = setMisc(x = dams2,
                node = "yearOfBirth",
                value = -1)

cat("Dams1\n")
## Dams1
(start = end + 1)
## [1] 501
(end = start - 1 + nDams1)
## [1] 1000
dams1 = females[start:end]
dams1 = setMisc(x = dams1,
                node = "yearOfBirth",
                value = 0)

dams = c(dams5, dams4, dams3, dams2, dams1)

Data recording

# Function to record and collate data
recordData <- function(data = NULL, pop, yearOfUse = NA) {
  popData = data.frame(id          = pop@id,
                       father      = pop@father,
                       mother      = pop@mother,
                       sex         = pop@sex,
                       gv          = pop@gv[, "Trait1"],
                       pheno       = pop@pheno[, "Trait1"],
                       yearOfBirth = unlist(getMisc(x = pop, node ="yearOfBirth")),
                       yearOfUse   = yearOfUse)
  # Manage first instance of calling this function, when data is NULL
  if (is.null(data)) {
    ret = popData
  } else {
    ret = rbind(data, popData)
  }
  return(ret)
}

data4AllAnimals = recordData(pop = founders)
head(data4AllAnimals)
##   id father mother sex       gv    pheno yearOfBirth yearOfUse
## 1  1      0      0   M 245.0727 258.1907           0        NA
## 2  2      0      0   F 251.8903 249.9372           0        NA
## 3  3      0      0   M 247.0972 270.8629           0        NA
## 4  4      0      0   F 248.2361 238.7732           0        NA
## 5  5      0      0   M 254.6868 268.6909           0        NA
## 6  6      0      0   F 255.3589 248.2552           0        NA

Multiple years of genetic evaluation and selection

for (year in 1:3) {
  # year = 1
  # year = 2
  cat("Working on the year:", year, "\n")
  
  # Generate progeny from current dams and sires
  candidates = randCross2(males = sires, females = dams, nCrosses = nInd(dams))
  candidates = setMisc(x = candidates, node = "yearOfBirth", value = year)
  candidates = setPheno(candidates, h2 = heritability)
  candidates@pheno[candidates@sex == "M", 1] = 
    candidates@pheno[candidates@sex == "M", 1] + sexDifference
  
  tmp = SP$pedigree
  pedigree = data.frame(id = rownames(tmp),
                        father = tmp[, "father"],
                        mother = tmp[, "mother"])
  nrow(pedigree)
  pedigree2 = with(pedigree, pedigree(label = id,
                                      sire = father,
                                      dam = mother))
  pedNRM = getA(ped = pedigree2)
  dim(pedNRM)
  nrow(data4AllAnimals)
  
  # Ensure correct factor levels 
  data4AllAnimals$id = factor(data4AllAnimals$id, levels = pedigree$id)
  data4AllAnimals$sex = factor(data4AllAnimals$sex)
  
  # Prepare data for pedigree-based genetic evaluation WITHOUT candidates' phenotypes
  if (year > 1) { # Need more than 1 generation of phenotypes for this analysis!

    # Fit the model using AlphaSimR::solveUVM() WITHOUT candidates' phenotypes
    if (FALSE) {
      y = matrix(data4AllAnimals$pheno)
      dim(y)
      X = model.matrix(data4AllAnimals$pheno ~ 1 + sex)
      dim(X)
      Z = model.matrix(data4AllAnimals$pheno ~ data4AllAnimals$id - 1)
      dim(Z)
      fit = solveUVM(y = y, X = X, Z = Z, K = as.matrix(pedNRM))
      ebv = data.frame(id = pedigree$id, ebv = fit$u)
    }
    
    # Fit the model using asreml::asreml() WITHOUT candidates' phenotypes
    if (TRUE) {
      fit <- asreml(fixed = pheno ~ 1 + sex,
                    random = ~ vm(obj = id, source = as.matrix(pedNRM)),
                    residual = ~ units,
                    data = data4AllAnimals)
      ebv = data.frame(id = pedigree$id, ebv = fit$coefficients$rand[, 1])
    }
  }

  # Record data
  data4AllAnimals = recordData(data = data4AllAnimals,
                               pop = candidates)

  # Evaluate accuracy of estimated breeding values
  if (year > 1) { # Need more than 1 generation of phenotypes for this analysis!
    tmp = merge(x = data4AllAnimals, y = ebv, all = TRUE)
    cat("Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)\n")
    for (yearOfBirth in unique(tmp$yearOfBirth)) {
      tmp2 = tmp[tmp$yearOfBirth == yearOfBirth, ]
      cat("YearOfBirth:", yearOfBirth,
          "cor(gv, ebv):", round(cor(tmp2$gv, tmp2$ebv), digits = 2),
          "cor(gv, pheno):", round(cor(tmp2$gv, tmp2$pheno), digits = 2), "\n")
    }
  }
  
  # Prepare data for pedigree-based genetic evaluation WITH candidates' phenotypes
  nrow(pedigree)
  nrow(data4AllAnimals)
  dim(pedNRM)
  
  # Fit the model using AlphaSimR::solveUVM() WITH candidates' phenotypes
  if (FALSE) {
    y = matrix(data4AllAnimals$pheno)
    dim(y)
    X = model.matrix(data4AllAnimals$pheno ~ 1 + sex)
    dim(X)
    Z = model.matrix(data4AllAnimals$pheno ~ data4AllAnimals$id - 1)
    dim(Z)
    fit = solveUVM(y = y, X = X, Z = Z, K = as.matrix(pedNRM))
    ebv = data.frame(id = pedigree$id, ebv = fit$u)
  }

  # Fit the model using asreml::asreml() WITH candidates' phenotypes
  if (TRUE) {
    fit <- asreml(fixed = pheno ~ 1 + sex,
                  random = ~ vm(obj = id, source = as.matrix(pedNRM)),
                  residual = ~ units,
                  data = data4AllAnimals)
    ebv = data.frame(id = pedigree$id, ebv = fit$coefficients$rand[, 1])
  }

  # Evaluate accuracy of estimated breeding values
  tmp = merge(x = data4AllAnimals, y = ebv, all = TRUE)
  cat("Accuracy of evaluation by year of birth (WITH candidates' phenos)\n")
  for (yearOfBirth in unique(tmp$yearOfBirth)) {
    tmp2 = tmp[tmp$yearOfBirth == yearOfBirth, ]
    cat("YearOfBirth:", yearOfBirth,
        "cor(gv, ebv):", round(cor(tmp2$gv, tmp2$ebv), digits = 2),
        "cor(gv, pheno):", round(cor(tmp2$gv, tmp2$pheno), digits = 2), "\n")
  }
  
  setEbv2 = function(pop, df) {
    # pop AlphaSimR population
    # df data.frame with id and ebv columns
    dfSel = df[df$id %in% pop@id, c("id", "ebv")]
    ebv = dfSel$ebv
    names(ebv) = dfSel$id
    pop@ebv = as.matrix(ebv[pop@id])
    pop
  }
  
  reportAccuracy = function(pop, name) {
    cat(paste0("Accuracy of ", name, " selection\n"),
        "cor(gv, ebv):", round(cor(pop@gv, pop@ebv), digits = 2),
        "cor(gv, pheno):", round(cor(pop@gv, pop@pheno), digits = 2), "\n")
  }
  
  # Update and select sires
  sires1 = setEbv2(sires1, tmp)
  reportAccuracy(sires1, "sires2")
  sires2 = selectInd(pop = sires1, nInd = nSires2, use = "ebv")
  
  candidatesM = setEbv2(candidates[candidates@sex == "M"], tmp)
  reportAccuracy(candidatesM, "sires1")
  sires1 = selectInd(pop = candidatesM, nInd = nSires1, use = "ebv")
  
  sires = c(sires2, sires1)
  
  # Update and select dams
  dams4 = setEbv2(dams4, tmp)
  reportAccuracy(dams4, "dams5")
  dams5 = selectInd(pop = dams4, nInd = nDams5, use = "ebv")
  
  dams3 = setEbv2(dams3, tmp)
  reportAccuracy(dams3, "dams4")
  dams4 = selectInd(pop = dams3, nInd = nDams4, use = "ebv")
  
  dams2 = setEbv2(dams2, tmp)
  reportAccuracy(dams2, "dams3")
  dams3 = selectInd(pop = dams2, nInd = nDams3, use = "ebv")
  
  dams1 = setEbv2(dams1, tmp)
  reportAccuracy(dams1, "dams2")
  dams2 = selectInd(pop = dams1, nInd = nDams2, use = "ebv")
  
  candidatesF = setEbv2(candidates[candidates@sex == "F"], tmp)
  reportAccuracy(candidatesF, "dams1")
  dams1 = selectInd(pop = candidatesF, nInd = nDams1, use = "ebv")
  
  dams = c(dams5, dams4, dams3, dams2, dams1)
  save.image("pedigree-based-model.RData")
}
## Working on the year: 1
## 'as(<dtTMatrix>, "dtCMatrix")' is deprecated.
## Use 'as(., "CsparseMatrix")' instead.
## See help("Deprecated") and help("Matrix-deprecated").
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.59 cor(gv, pheno): 0.54 
## YearOfBirth: 1 cor(gv, ebv): 0.65 cor(gv, pheno): 0.54 
## Accuracy of sires2 selection
##  cor(gv, ebv): 0.83 cor(gv, pheno): 0.26 
## Accuracy of sires1 selection
##  cor(gv, ebv): 0.64 cor(gv, pheno): 0.55 
## Accuracy of dams5 selection
##  cor(gv, ebv): 0.36 cor(gv, pheno): 0.1 
## Accuracy of dams4 selection
##  cor(gv, ebv): 0.23 cor(gv, pheno): 0.03 
## Accuracy of dams3 selection
##  cor(gv, ebv): 0.18 cor(gv, pheno): 0.05 
## Accuracy of dams2 selection
##  cor(gv, ebv): 0.5 cor(gv, pheno): 0.4 
## Accuracy of dams1 selection
##  cor(gv, ebv): 0.66 cor(gv, pheno): 0.55 
## Working on the year: 2 
## Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.59 cor(gv, pheno): 0.54 
## YearOfBirth: 1 cor(gv, ebv): 0.65 cor(gv, pheno): 0.54 
## YearOfBirth: 2 cor(gv, ebv): 0.29 cor(gv, pheno): 0.5 
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54 
## YearOfBirth: 1 cor(gv, ebv): 0.67 cor(gv, pheno): 0.54 
## YearOfBirth: 2 cor(gv, ebv): 0.6 cor(gv, pheno): 0.5 
## Accuracy of sires2 selection
##  cor(gv, ebv): 0.8 cor(gv, pheno): 0.24 
## Accuracy of sires1 selection
##  cor(gv, ebv): 0.61 cor(gv, pheno): 0.55 
## Accuracy of dams5 selection
##  cor(gv, ebv): 0.27 cor(gv, pheno): -0.02 
## Accuracy of dams4 selection
##  cor(gv, ebv): 0.28 cor(gv, pheno): 0.01 
## Accuracy of dams3 selection
##  cor(gv, ebv): 0.35 cor(gv, pheno): 0.09 
## Accuracy of dams2 selection
##  cor(gv, ebv): 0.68 cor(gv, pheno): 0.55 
## Accuracy of dams1 selection
##  cor(gv, ebv): 0.59 cor(gv, pheno): 0.5 
## Working on the year: 3 
## Accuracy of evaluation by year of birth (WITHOUT candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54 
## YearOfBirth: 1 cor(gv, ebv): 0.67 cor(gv, pheno): 0.54 
## YearOfBirth: 2 cor(gv, ebv): 0.6 cor(gv, pheno): 0.5 
## YearOfBirth: 3 cor(gv, ebv): 0.38 cor(gv, pheno): 0.5 
## Accuracy of evaluation by year of birth (WITH candidates' phenos)
## YearOfBirth: 0 cor(gv, ebv): 0.6 cor(gv, pheno): 0.54 
## YearOfBirth: 1 cor(gv, ebv): 0.68 cor(gv, pheno): 0.54 
## YearOfBirth: 2 cor(gv, ebv): 0.62 cor(gv, pheno): 0.5 
## YearOfBirth: 3 cor(gv, ebv): 0.63 cor(gv, pheno): 0.5 
## Accuracy of sires2 selection
##  cor(gv, ebv): 0.79 cor(gv, pheno): -0.11 
## Accuracy of sires1 selection
##  cor(gv, ebv): 0.63 cor(gv, pheno): 0.53 
## Accuracy of dams5 selection
##  cor(gv, ebv): 0.22 cor(gv, pheno): -0.09 
## Accuracy of dams4 selection
##  cor(gv, ebv): 0.39 cor(gv, pheno): 0.06 
## Accuracy of dams3 selection
##  cor(gv, ebv): 0.55 cor(gv, pheno): 0.35 
## Accuracy of dams2 selection
##  cor(gv, ebv): 0.6 cor(gv, pheno): 0.5 
## Accuracy of dams1 selection
##  cor(gv, ebv): 0.63 cor(gv, pheno): 0.52