R/Simulate_under_LTM.R
simulate_under_LTM_single.Rd
simulate_under_LTM_single
simulates families and thresholds under
the liability threshold model for a given family structure and a single
phenotype. Please note that it is not possible to simulate different
family structures.
simulate_under_LTM_single(
fam_vec = c("m", "f", "s1", "mgm", "mgf", "pgm", "pgf"),
n_fam = NULL,
add_ind = TRUE,
h2 = 0.5,
n_sim = 1000,
pop_prev = 0.1
)
A vector of strings holding the different family members. All family members must be represented by strings from the following list:
m
(Mother)
f
(Father)
c[0-9]*.[0-9]*
(Children)
mgm
(Maternal grandmother)
mgf
(Maternal grandfather)
pgm
(Paternal grandmother)
pgf
(Paternal grandfather)
s[0-9]*
(Full siblings)
mhs[0-9]*
(Half-siblings - maternal side)
phs[0-9]*
(Half-siblings - paternal side)
mau[0-9]*
(Aunts/Uncles - maternal side)
pau[0-9]*
(Aunts/Uncles - paternal side).
Defaults to c("m","f","s1","mgm","mgf","pgm","pgf")
.
A named vector holding the desired number of family members.
See setNames
.
All names must be picked from the list mentioned above. Defaults to NULL
.
A logical scalar indicating whether the genetic
component of the full liability as well as the full
liability for the underlying target individual should be included in
the covariance matrix. Defaults to TRUE
.
A number representing the liability-scale heritability for a single phenotype. Must be non-negative. Note that under the liability threshold model, the heritability must also be at most 1. Defaults to 0.5.
A positive number representing the number of simulations. Defaults to 1000.
A positive number representing the population prevalence, i.e. the overall prevalence in the population. Must be smaller than 1. Defaults to 0.1.
If either fam_vec
or n_fam
is used as the argument,
if it is of the required format, if the liability-scale heritability h2
is a number satisfying \(0 \leq h^2\), n_sim
is a strictly positive number,
and pop_prev
is a positive number that is at most one,
then the output will be a list holding two tibbles.
The first tibble, sim_obs
, holds the simulated liabilities, the disease
status and the current age/age-of-onset for all family members in each of the
n_sim
families.
The second tibble, thresholds
, holds the family identifier, the personal
identifier, the role (specified in fam_vec or n_fam) as well as
the lower and upper thresholds for all individuals in all families.
Note that this tibble has the format required in estimate_liability
.
In addition, note that if neither fam_vec
nor n_fam
are specified, the function
returns the disease status, the current age/age-of-onset, the lower and upper
thresholds, as well as the personal identifier for a single individual, namely
the individual under consideration (called o
).
If both fam_vec
and n_fam
are defined, the user is asked to '
decide on which of the two vectors to use.
simulate_under_LTM_single()
#> $sim_obs
#> # A tibble: 1,000 × 26
#> fam_ID g o m f s1 mgm mgf pgm pgf
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 fam_ID_1 -0.413 0.649 -0.455 -0.499 0.215 1.10 -1.25 -0.169 0.0693
#> 2 fam_ID_2 -0.366 0.132 0.766 -0.905 -1.10 -0.329 0.715 -0.376 -0.642
#> 3 fam_ID_3 0.168 0.539 -0.724 0.287 0.670 -0.163 -0.221 1.23 0.137
#> 4 fam_ID_4 0.509 0.539 0.484 0.606 1.38 -0.484 1.21 0.0297 0.898
#> 5 fam_ID_5 -0.180 -0.428 -0.889 -0.251 1.07 1.13 0.793 -0.902 0.325
#> 6 fam_ID_6 1.05 0.714 0.725 0.227 0.127 -0.838 0.215 0.506 -0.0291
#> 7 fam_ID_7 -1.11 -1.39 -0.111 -1.54 -0.966 0.0387 -1.58 -2.19 -0.726
#> 8 fam_ID_8 0.139 -1.07 -1.65 1.26 0.0843 -1.05 0.828 1.63 0.0455
#> 9 fam_ID_9 -0.628 -0.178 -0.395 0.746 2.01 -0.972 -0.304 -0.240 -0.598
#> 10 fam_ID_10 0.489 1.15 1.07 0.878 1.58 -1.99 0.173 0.241 0.0422
#> # ℹ 990 more rows
#> # ℹ 16 more variables: o_status <lgl>, m_status <lgl>, f_status <lgl>,
#> # s1_status <lgl>, mgm_status <lgl>, mgf_status <lgl>, pgm_status <lgl>,
#> # pgf_status <lgl>, o_aoo <dbl>, m_aoo <dbl>, f_aoo <dbl>, s1_aoo <dbl>,
#> # mgm_aoo <dbl>, mgf_aoo <dbl>, pgm_aoo <dbl>, pgf_aoo <dbl>
#>
#> $thresholds
#> # A tibble: 8,000 × 5
#> fam_ID indiv_ID role lower upper
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 fam_ID_1 fam_ID_1_1 o -Inf 3.03
#> 2 fam_ID_2 fam_ID_2_1 o -Inf 2.91
#> 3 fam_ID_3 fam_ID_3_1 o -Inf 3.45
#> 4 fam_ID_4 fam_ID_4_1 o -Inf 3.48
#> 5 fam_ID_5 fam_ID_5_1 o -Inf 2.43
#> 6 fam_ID_6 fam_ID_6_1 o -Inf 3.52
#> 7 fam_ID_7 fam_ID_7_1 o -Inf 2.99
#> 8 fam_ID_8 fam_ID_8_1 o -Inf 2.79
#> 9 fam_ID_9 fam_ID_9_1 o -Inf 3.03
#> 10 fam_ID_10 fam_ID_10_1 o -Inf 2.99
#> # ℹ 7,990 more rows
#>
simulate_under_LTM_single(fam_vec = NULL, n_fam = stats::setNames(c(1,1,1,2),
c("m","mgm","mgf","mhs")))
#> $sim_obs
#> # A tibble: 1,000 × 20
#> fam_ID g o m mgm mgf mhs1 mhs2 o_status m_status
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
#> 1 fam_ID_1 0.185 0.960 -0.330 0.405 -0.834 -1.08 -0.967 FALSE FALSE
#> 2 fam_ID_2 -0.751 -1.01 -1.52 -2.07 1.15 -0.335 -0.712 FALSE FALSE
#> 3 fam_ID_3 0.994 1.42 0.160 0.675 1.55 0.681 1.32 TRUE FALSE
#> 4 fam_ID_4 0.643 0.832 2.01 0.299 -0.574 0.994 -0.0984 FALSE TRUE
#> 5 fam_ID_5 0.345 0.521 -1.46 0.131 1.70 1.03 0.366 FALSE FALSE
#> 6 fam_ID_6 -0.572 -0.174 1.76 1.72 -0.429 0.997 -1.22 FALSE TRUE
#> 7 fam_ID_7 -0.138 -0.325 -0.916 -0.539 0.269 -2.76 -2.60 FALSE FALSE
#> 8 fam_ID_8 -0.426 -0.0371 0.622 0.834 0.875 1.20 -0.878 FALSE FALSE
#> 9 fam_ID_9 -1.08 -1.36 -1.28 0.409 -0.944 -1.21 -1.41 FALSE FALSE
#> 10 fam_ID_… -0.263 -0.533 -0.717 -0.979 -0.845 -0.152 -1.07 FALSE FALSE
#> # ℹ 990 more rows
#> # ℹ 10 more variables: mgm_status <lgl>, mgf_status <lgl>, mhs1_status <lgl>,
#> # mhs2_status <lgl>, o_aoo <dbl>, m_aoo <dbl>, mgm_aoo <dbl>, mgf_aoo <dbl>,
#> # mhs1_aoo <dbl>, mhs2_aoo <dbl>
#>
#> $thresholds
#> # A tibble: 6,000 × 5
#> fam_ID indiv_ID role lower upper
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 fam_ID_1 fam_ID_1_1 o -Inf 2.68
#> 2 fam_ID_2 fam_ID_2_1 o -Inf 2.91
#> 3 fam_ID_3 fam_ID_3_1 o 1.42 1.42
#> 4 fam_ID_4 fam_ID_4_1 o -Inf 2.83
#> 5 fam_ID_5 fam_ID_5_1 o -Inf 2.68
#> 6 fam_ID_6 fam_ID_6_1 o -Inf 3.06
#> 7 fam_ID_7 fam_ID_7_1 o -Inf 2.72
#> 8 fam_ID_8 fam_ID_8_1 o -Inf 2.43
#> 9 fam_ID_9 fam_ID_9_1 o -Inf 2.68
#> 10 fam_ID_10 fam_ID_10_1 o -Inf 3.45
#> # ℹ 5,990 more rows
#>
simulate_under_LTM_single(fam_vec = c("m","f","s1"), n_fam = NULL, add_ind = FALSE,
h2 = 0.5, n_sim = 500, pop_prev = .05)
#> $sim_obs
#> # A tibble: 500 × 10
#> fam_ID m f s1 m_status f_status s1_status m_aoo f_aoo s1_aoo
#> <chr> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <dbl> <dbl> <dbl>
#> 1 fam_ID_1 -1.17 0.584 0.766 FALSE FALSE FALSE 56 59 37
#> 2 fam_ID_2 -0.473 0.493 -2.18 FALSE FALSE FALSE 38 46 16
#> 3 fam_ID_3 -2.90 0.853 0.305 FALSE FALSE FALSE 38 40 12
#> 4 fam_ID_4 -2.15 0.292 0.526 FALSE FALSE FALSE 60 52 32
#> 5 fam_ID_5 0.194 1.21 -1.04 FALSE FALSE FALSE 59 53 35
#> 6 fam_ID_6 -1.36 1.68 0.818 FALSE TRUE FALSE 32 80 13
#> 7 fam_ID_7 0.458 1.68 -0.231 FALSE TRUE FALSE 67 82 38
#> 8 fam_ID_8 2.39 0.171 2.08 TRUE FALSE TRUE 47 32 56
#> 9 fam_ID_9 0.337 -1.11 0.803 FALSE FALSE FALSE 46 49 22
#> 10 fam_ID_10 0.153 0.130 -0.565 FALSE FALSE FALSE 65 56 38
#> # ℹ 490 more rows
#>
#> $thresholds
#> # A tibble: 1,500 × 5
#> fam_ID indiv_ID role lower upper
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 fam_ID_1 fam_ID_1_1 m -Inf 2.08
#> 2 fam_ID_2 fam_ID_2_1 m -Inf 2.75
#> 3 fam_ID_3 fam_ID_3_1 m -Inf 2.75
#> 4 fam_ID_4 fam_ID_4_1 m -Inf 1.96
#> 5 fam_ID_5 fam_ID_5_1 m -Inf 1.99
#> 6 fam_ID_6 fam_ID_6_1 m -Inf 2.97
#> 7 fam_ID_7 fam_ID_7_1 m -Inf 1.81
#> 8 fam_ID_8 fam_ID_8_1 m 2.40 2.40
#> 9 fam_ID_9 fam_ID_9_1 m -Inf 2.44
#> 10 fam_ID_10 fam_ID_10_1 m -Inf 1.84
#> # ℹ 1,490 more rows
#>
simulate_under_LTM_single(fam_vec = c(), n_fam = NULL, add_ind = TRUE, h2 = 0.5,
n_sim = 200, pop_prev = 0.05)
#> Warning message:
#> Neither fam_vec nor n_fam is specified...
#> $sim_obs
#> # A tibble: 200 × 5
#> fam_ID g o o_status o_aoo
#> <chr> <dbl> <dbl> <lgl> <dbl>
#> 1 fam_ID_1 0.252 0.877 FALSE 14
#> 2 fam_ID_2 0.0336 0.152 FALSE 36
#> 3 fam_ID_3 0.534 0.244 FALSE 17
#> 4 fam_ID_4 0.815 1.68 TRUE 81
#> 5 fam_ID_5 -0.417 0.144 FALSE 18
#> 6 fam_ID_6 0.332 0.334 FALSE 37
#> 7 fam_ID_7 -1.26 -0.984 FALSE 23
#> 8 fam_ID_8 0.0944 -1.19 FALSE 39
#> 9 fam_ID_9 0.115 0.489 FALSE 40
#> 10 fam_ID_10 -0.110 0.421 FALSE 32
#> # ℹ 190 more rows
#>
#> $thresholds
#> # A tibble: 200 × 5
#> fam_ID indiv_ID role lower upper
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 fam_ID_1 fam_ID_1_1 o -Inf 3.60
#> 2 fam_ID_2 fam_ID_2_1 o -Inf 2.82
#> 3 fam_ID_3 fam_ID_3_1 o -Inf 3.50
#> 4 fam_ID_4 fam_ID_4_1 o 1.68 1.68
#> 5 fam_ID_5 fam_ID_5_1 o -Inf 3.47
#> 6 fam_ID_6 fam_ID_6_1 o -Inf 2.79
#> 7 fam_ID_7 fam_ID_7_1 o -Inf 3.30
#> 8 fam_ID_8 fam_ID_8_1 o -Inf 2.71
#> 9 fam_ID_9 fam_ID_9_1 o -Inf 2.67
#> 10 fam_ID_10 fam_ID_10_1 o -Inf 2.97
#> # ℹ 190 more rows
#>