diff --git a/.Rbuildignore b/.Rbuildignore index cf44746..7b0fa46 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,5 @@ ^\.Rproj\.user$ ^README\.Rmd$ ^LICENSE\.md$ +^cran-comments\.md$ +^CRAN-SUBMISSION$ diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..04b6fae --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 1.0.3 +Date: 2023-06-04 13:27:38 UTC +SHA: f54cd023b2e6bb2de1e74d1d0c89c13828149c44 diff --git a/DESCRIPTION b/DESCRIPTION index f14db16..62a4398 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,26 +1,29 @@ Package: DemoKin -Title: Demokin -Description: Estimate population kin counts and its distribution by type and age -Version: 1.0.0 +Title: Estimate Population Kin Distribution +Description: Estimate population kin counts and its distribution by type, age and sex. + The package implements one-sex and two-sex framework for studying living-death availability, + with time varying rates or not, and multi-stage model. +Version: 1.0.3 Authors@R: c( person("Iván", "Williams", email = "act.ivanwilliams@gmail.com", role = "cre"), person("Diego", "Alburez-Gutierrez", email = "alburezgutierrez@demogr.mpg.de", role = "aut"), - person("Xi", "Song", email = "xisong@sas.upenn.edu", role = "ctb")) + person("Xi", "Song", email = "xisong@sas.upenn.edu", role = "ctb"), + person("Caswell", "Hal", email = "caswell@demogr.mpg.de", role = "ctb")) License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Suggests: knitr, rmarkdown, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + ggplot2 VignetteBuilder: knitr Imports: dplyr, tidyr, purrr, - HMDHFDplus, progress, matrixcalc, Matrix, @@ -28,7 +31,9 @@ Imports: stats, igraph, magrittr, + data.table, lifecycle +URL: https://github.com/IvanWilli/DemoKin BugReports: https://github.com/IvanWilli/DemoKin/issues Depends: R (>= 2.10) diff --git a/NAMESPACE b/NAMESPACE index f5c7de1..9a087d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,19 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(demokin_codes) -export(get_HMDHFD) export(kin) +export(kin2sex) export(kin_multi_stage) export(kin_time_invariant) +export(kin_time_invariant_2sex) +export(kin_time_invariant_2sex_cod) export(kin_time_variant) +export(kin_time_variant_2sex) +export(kin_time_variant_2sex_cod) +export(output_period_cohort_combination) export(plot_diagram) export(rename_kin) +export(timevarying_kin) +export(timevarying_kin_2sex) +export(timevarying_kin_2sex_cod) importFrom(magrittr,"%>%") diff --git a/NEWS.md b/NEWS.md index 1db9e6f..cfc0438 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,16 @@ +# DemoKin 1.0.3 + +# DemoKin 1.0.2 + +# DemoKin 1.0.1 + # DemoKin 1.0.0 * Added a `NEWS.md` file to track changes to the package. * Change stable/non-stable references to time varying/non-varying rates. * Add multi-state process. +# DemoKin 1.0.1 +* Submitted to CRAN +* Death counts are placed in the age where Focal experience the death. +* Aggregated kin types are allowed (`s` for older and younger sisters, for example). diff --git a/R/aux_funs.R b/R/aux_funs.R index e8c745b..a8e7bfd 100644 --- a/R/aux_funs.R +++ b/R/aux_funs.R @@ -1,49 +1,17 @@ - -#' print kin codes -#' @description Print kin codes and labels -#' @export -demokin_codes <- function(){ - codes <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") - caswell_codes <- c("t", "v", "a", "b", "c", "h", "g", "d", "p", "q", "r", "s", "m", "n") - labels <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister") - data.frame(DemoKin = codes, Caswell = caswell_codes, Label = labels, row.names = NULL) -} - #' rename kin -#' @description Rename kin labels depending consolidate some types -#' @export -rename_kin <- function(df, consolidate_column = "no"){ - - stopifnot("Argument 'consolidate_column' should be 'no' or a valid column name" = consolidate_column %in% c("no", colnames(df))) - - if(consolidate_column == "no"){ - - relatives <- c("Cousins from older aunt", "Cousins from younger aunt", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", "Nieces from younger sister", "Aunt older than mother", "Aunt younger than mother", "Older sister", "Younger sister") - names(relatives) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") - } else { - - # Combine kin types irrespective of whether they come from older - # or younger sibling lines - consolidate_vec <- c("c", "c", "d", "gd", "ggd", "ggm", "gm", "m", "n", "n", "a", "a", "s", "s") - names(consolidate_vec) <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") - - # Rename kin types from codes to actual words - relatives <- c("Cousins", "Daughter", "Grand-daughter", "Great-grand-daughter", "Great-grandmother", "Grandmother", "Mother", "Nieces", "Aunt", "Sister") - names(relatives) <- unique(consolidate_vec) - - df <- as.data.frame(df) - df$count <- df[ , consolidate_column] - - df <- - df %>% - dplyr::mutate(kin = consolidate_vec[kin]) %>% - dplyr::group_by(age_focal, kin) %>% - dplyr::summarise(count = sum(count)) %>% - dplyr::ungroup() - - - } - df$kin <- relatives[df$kin] - df +#' @description Add kin labels depending the sex +#' @details See table `demokin_codes` to know label options. +#' @param df data.frame. A data frame with variable `kin` with `DemoKin` codes to be labelled. +#' @param sex character. "f" for female, "m" for male or "2sex" for both sex naming. +#' @return Add a column with kin labels in the input data frame. +#' @export +rename_kin <- function(df, sex = "f"){ + if(!"kin" %in% names(df)) stop("Input df needs a column named kin.") + if(sex == "f") demokin_codes_sex <- DemoKin::demokin_codes[,c("DemoKin", "Labels_female")] + if(sex == "m") demokin_codes_sex <- DemoKin::demokin_codes[,c("DemoKin", "Labels_male")] + if(sex == "2sex") demokin_codes_sex <- DemoKin::demokin_codes[,c("DemoKin", "Labels_2sex")] + colnames(demokin_codes_sex) <- c("kin", "kin_label") + df %>% + dplyr::left_join(demokin_codes_sex) } diff --git a/R/data.R b/R/data.R index 1bacb90..43be162 100644 --- a/R/data.R +++ b/R/data.R @@ -127,3 +127,34 @@ #' @source #' Caswell (2021) "kin_svk1990_caswell2020" + +#' Fertility for France (2012) by sex in Caswell (2022). +#' +#' Fertility for France (2012) by sex in Caswell (2022). +#' @docType data +#' @format +#' A data.frame with age specific fertility rates by age and sex. +#' +#' @source +#' Caswell (2022) +"fra_asfr_sex" + +#' Survival probability for France (2012) by sex in Caswell (2022). +#' +#' Survival probability for France (2012) by sex in Caswell (2022). +#' @docType data +#' @format +#' A data.frame with survival probabilities by age and sex. +#' +#' @source +#' Caswell (2022) +"fra_surv_sex" + +#' DemoKin codes, Caswell (2020) codes, and useful labels. +#' +#' DemoKin codes, Caswell (2020) codes, and useful labels. +#' @docType data +#' @format +#' A data.frame with codes and labels for distinction between kin types. + +"demokin_codes" diff --git a/R/get_HMDHFD.R b/R/get_HMDHFD.R deleted file mode 100644 index b4ee78e..0000000 --- a/R/get_HMDHFD.R +++ /dev/null @@ -1,125 +0,0 @@ -#' Get time serie matrix data from HMD/HFD - -#' @description Wrapper function to get data of female survival, fertlity and population -#' of selected country on selected period. - -#' @param country numeric. Country code from rom HMD/HFD. -#' @param max_year numeric. Latest year to get data. -#' @param min_year integer. Older year to get data. -#' @param user_HMD character. From HMD. -#' @param user_HFD character. From HFD. -#' @param pass_HMD character. From HMD. -#' @param pass_HFD character. From HFD. -#' @param OAG numeric. Open age group to standarize output. -#' @return A list wiith female survival probability, survival function, fertility and poopulation age specific matrixes, with calendar year as colnames. -#' @export - -get_HMDHFD <- function(country = "SWE", - min_year = 1900, - max_year = 2018, - user_HMD = NULL, - pass_HMD = NULL, - user_HFD = NULL, - pass_HFD = NULL, - OAG = 100){ - - if(any(c(is.null(user_HMD), is.null(user_HFD), is.null(pass_HMD), is.null(pass_HFD)))){ - stop("The function needs HMD and HMF access.") - } - - # source HMD HFD ----------------------------------------------------------------- - pop <- HMDHFDplus::readHMDweb(CNTRY = country, "Population", user_HMD, pass_HMD, fixup = TRUE) %>% - dplyr::select(Year, Age, N = Female1)%>% - dplyr::filter(Year >= min_year, Year <= max_year) - lt <- HMDHFDplus::readHMDweb(country, "fltper_1x1", user_HMD, pass_HMD, fixup = TRUE) %>% - dplyr::filter(Year >= min_year, Year <= max_year) - asfr <- HMDHFDplus::readHFDweb(country, "asfrRR", user_HFD, pass_HFD, fixup = TRUE)%>% - dplyr::filter(Year >= min_year, Year <= max_year) - - # list of yearly Leslie matrix --------------------------------------------------- - - age = 0:OAG - ages = length(age) - w = last(age) - last_year = max(lt$Year) - years = min_year:last_year - - # survival probability - px <- lt %>% - dplyr::filter(Age<=OAG) %>% - dplyr::mutate(px = 1 - qx, - px = ifelse(Age==OAG, 0, px)) %>% - dplyr::select(Year, Age, px) %>% - tidyr::pivot_wider(names_from = "Year", values_from = "px") %>% - dplyr::select(-Age) %>% - as.matrix() - rownames(px) = 0:OAG - - # survival function - Lx <- lt %>% - dplyr::filter(Age<=OAG) %>% - dplyr::mutate(Lx = ifelse(Age==OAG, Tx, Lx)) %>% - dplyr::select(Year, Age, Lx) %>% - tidyr::pivot_wider(names_from = "Year", values_from = "Lx") %>% - dplyr::select(-Age) %>% - as.matrix() - - Sx <- rbind(Lx[c(-1,-ages),]/Lx[-c(w:ages),], - Lx[ages,]/(Lx[w,]+Lx[ages,]), - Lx[ages,]/(Lx[w,]+Lx[ages,])) - rownames(Sx) = 0:w - - # fertility - fx <- asfr %>% - dplyr::filter(Year >= min_year) %>% - dplyr::select(-OpenInterval) %>% - rbind( - expand.grid(Year = years, - Age = c(0:(min(asfr$Age)-1),(max(asfr$Age)+1):OAG), - ASFR = 0)) %>% - dplyr::arrange(Year, Age) %>% - tidyr::spread(Year, ASFR) %>% - dplyr::select(-Age) %>% - as.matrix() - rownames(fx) = 0:OAG - - # population - Nx <- pop %>% - dplyr::mutate(Age = ifelse(Age>OAG, OAG, Age)) %>% - dplyr::group_by(Year, Age) %>% summarise(N = sum(N)) %>% - dplyr::filter(Age<=OAG, Year >= min_year) %>% - dplyr::arrange(Year, Age) %>% - tidyr::spread(Year, N) %>% - dplyr::select(-Age) %>% - as.matrix() - rownames(Nx) = 0:OAG - - # only return data with values - if(any(is.na(colSums(Sx)))){ - warning("Asked for data out of HMDHFD range") - Sx <- Sx[,!is.na(colSums(Sx))] - } - if(any(is.na(colSums(fx)))){ - warning("Asked for data out of HMDHFD range") - fx <- fx[,!is.na(colSums(fx))] - } - if(any(is.na(colSums(Nx)))){ - warning("Asked for data out of HMDHFD range") - Nx <- Nx[,!is.na(colSums(Nx))] - } - - return(list(px=px, - Sx=Sx, - fx=fx, - Nx=Nx)) -} - -# save data - # swe_px <- swe_data$px - # swe_Sx <- swe_data$Sx - # swe_asfr <-swe_data$fx - # swe_pop <- swe_data$Nx - # save(swe_px, file = "data/swe_px.rda") - # save(swe_Sx, file = "data/swe_Sx.rda") - # save(swe_asfr, file = "data/swe_asfr.rda") - # save(swe_pop, file = "data/swe_pop.rda") diff --git a/R/kin.R b/R/kin.R index e9a5669..ac782c2 100644 --- a/R/kin.R +++ b/R/kin.R @@ -1,20 +1,27 @@ -#' Estimate kin counts +#' Estimate kin counts in a one-sex framework. -#' @description Implementation of Goodman-Keyfitz-Pullum equations in a matrix framework. +#' @description Implementation of Goodman-Keyfitz-Pullum equations in a matrix framework. This produce a matrilineal (or patrilineal) +#' kin count distribution by kin and age. #' @details See Caswell (2019) and Caswell (2021) for details on formulas. One sex only (female by default). -#' @param U numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). -#' @param f numeric. Same as U but for fertility rates. +#' @param p numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class +#' in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param f numeric. Same as `p` but for fertility rates. #' @param time_invariant logical. Constant assumption for a given `year` rates. Default `TRUE`. -#' @param N numeric. Same as U but for population distribution (counts or `%`). Optional. -#' @param pi numeric. Same as U but for childbearing distribution (sum to 1). Optional. +#' @param n numeric. Only for `time_invariant = FALSE`. Same as `p` but for population distribution (counts or `%`). Optional. +#' @param pi numeric. Same as `U` but for childbearing distribution (sum to 1). Optional. #' @param output_cohort integer. Vector of year cohorts for returning results. Should be within input data years range. #' @param output_period integer. Vector of period years for returning results. Should be within input data years range. #' @param output_kin character. kin types to return: "m" for mother, "d" for daughter,... -#' @param birth_female numeric. Female portion at birth. This multiplies `f` argument. If `f` is already for female offspring, this needs to be set as 1. +#' @param output_age_focal integer. Vector of ages to select (and make faster the run). +#' @param birth_female numeric. Female portion at birth. This multiplies `f` argument. If `f` is already for female offspring, +#' @param summary_kin logical. Whether or not include `kin_summary` table (see output details). Default `TRUE`. +#' this needs to be set as 1. #' @return A list with: #' \itemize{ -#' \item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example `d` is daughter, `oa` is older aunts, etc.), including living and dead kin at that age.} -#' \item{kin_summary}{ a data frame with Focal´s age, related ages and type of kin, with indicators obtained processing `kin_full`, grouping by cohort or period (depending on the given arguments):} +#' \item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example `d` is daughter, +#' `oa` is older aunts, etc.), including living and dead kin at that age.} +#' \item{kin_summary}{ a data frame with Focal´s age, related ages and type of kin, with indicators obtained processing `kin_full`, +#' grouping by cohort or period (depending on the given arguments):} #' {\itemize{ #' \item{`count_living`}{: count of living kin at actual age of Focal} #' \item{`mean_age`}{: mean age of each type of living kin.} @@ -25,58 +32,76 @@ #' } #' } #' } - #' @export -#' -# get kin ---------------------------------------------------------------- -kin <- function(U = NULL, f = NULL, - time_invariant = TRUE, - N = NULL, pi = NULL, - output_cohort = NULL, output_period = NULL, output_kin=NULL, - birth_female = 1/2.04, - stable = lifecycle::deprecated()) - { +#' @examples +#' # Kin expected matrilineal count for a Swedish female based on 2015 rates. +#' swe_surv_2015 <- swe_px[,"2015"] +#' swe_asfr_2015 <- swe_asfr[,"2015"] +#' # Run kinship models +#' swe_2015 <- kin(p = swe_surv_2015, f = swe_asfr_2015) +#' head(swe_2015$kin_summary) - age <- as.integer(rownames(U)) - years_data <- as.integer(colnames(U)) +kin <- function(p = NULL, f = NULL, + time_invariant = TRUE, + pi = NULL, n = NULL, + output_cohort = NULL, output_period = NULL, output_kin=NULL, output_age_focal = NULL, + birth_female = 1/2.04, + summary_kin = TRUE) + { - if (lifecycle::is_present(stable)) { - lifecycle::deprecate_warn("0.0.0.9000", "kin(stable)", details = "Used time_invariant") - time_invariant <- stable - } + # global vars + living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL # kin to return all_possible_kin <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") + output_kin_asked <- output_kin if(is.null(output_kin)){ output_kin <- all_possible_kin }else{ + if("s" %in% output_kin) output_kin <- c(output_kin, "os", "ys") + if("c" %in% output_kin) output_kin <- c(output_kin, "coa", "cya") + if("a" %in% output_kin) output_kin <- c(output_kin, "oa", "ya") + if("n" %in% output_kin) output_kin <- c(output_kin, "nos", "nys") + output_kin <- output_kin[!output_kin %in% c("s", "c", "a", "n")] output_kin <- match.arg(tolower(output_kin), all_possible_kin, several.ok = TRUE) } - # if time dependent or not + # if is time dependent or not + age <- as.integer(rownames(p)) + years_data <- as.integer(colnames(p)) if(time_invariant){ - if(!is.vector(U)) { + if(!is.vector(p)) { output_period <- min(years_data) - U <- U[,as.character(output_period)] + p <- p[,as.character(output_period)] f <- f[,as.character(output_period)] } - kin_full <- kin_time_invariant(U = U, f = f, + kin_full <- kin_time_invariant(p = p, f = f, pi = pi, output_kin = output_kin, birth_female = birth_female) %>% dplyr::mutate(cohort = NA, year = NA) }else{ if(!is.null(output_cohort) & !is.null(output_period)) stop("sorry, you can not select cohort and period. Choose one please") - kin_full <- kin_time_variant(U = U, f = f, N = N, pi = pi, + kin_full <- kin_time_variant(p = p, f = f, pi = pi, n = n, output_cohort = output_cohort, output_period = output_period, output_kin = output_kin, birth_female = birth_female) message(paste0("Assuming stable population before ", min(years_data), ".")) } - # reorder - kin_full <- kin_full %>% dplyr::select(year, cohort, age_focal, kin, age_kin, living, dead) + # re-group if grouped type is asked + if(!is.null(output_kin_asked) & length(output_kin_asked)!=length(output_kin)){ + if("s" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("os", "ys")] <- "s" + if("c" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("coa", "cya")] <- "c" + if("a" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("oa", "ya")] <- "a" + if("n" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("nos", "nys")] <- "n" + kin_full <- kin_full %>% + dplyr::summarise(living = sum(living), dead = sum(dead), + .by = c(kin, age_kin, age_focal, cohort, year)) + } - # summary - # select period/cohort + # select period/cohort/age + if(!is.null(output_age_focal) & all(output_age_focal %in% 1:120)){ + kin_full <- kin_full %>% dplyr::filter(age_focal %in% output_age_focal) + } if(!is.null(output_cohort)){ agrupar <- "cohort" } else if(!is.null(output_period)){ @@ -87,28 +112,35 @@ kin <- function(U = NULL, f = NULL, agrupar_no_age_focal <- c("kin", agrupar) agrupar <- c("age_focal", "kin", agrupar) - kin_summary <- dplyr::bind_rows( - kin_full %>% - dplyr::rename(total=living) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% - dplyr::summarise(count_living = sum(total), - mean_age = sum(total*age_kin)/sum(total), - sd_age = (sum(total*age_kin^2)/sum(total)-mean_age^2)^.5) %>% - tidyr::pivot_longer(count_living:sd_age, names_to = "indicator", "value"), - kin_full %>% - dplyr::rename(total=dead) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% - dplyr::summarise(count_dead = sum(total)) %>% - dplyr::ungroup() %>% - dplyr::group_by(dplyr::across(dplyr::all_of(agrupar_no_age_focal))) %>% - dplyr::mutate(count_cum_dead = cumsum(count_dead), - mean_age_lost = cumsum(count_dead * age_focal)/cumsum(count_dead)) %>% - dplyr::ungroup() %>% - tidyr::pivot_longer(count_dead:mean_age_lost, names_to = "indicator", "value")) %>% + # get summary indicators based on group variables. If it is asked + if(summary_kin){ + kin_summary <- dplyr::bind_rows( + kin_full %>% + dplyr::rename(total=living) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% + dplyr::summarise(count_living = sum(total), + mean_age = sum(total*age_kin)/sum(total), + sd_age = (sum(total*age_kin^2)/sum(total)-mean_age^2)^.5) %>% + tidyr::pivot_longer(count_living:sd_age, names_to = "indicator", values_to = "value"), + kin_full %>% + dplyr::rename(total=dead) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% + dplyr::summarise(count_dead = sum(total)) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar_no_age_focal))) %>% + dplyr::mutate(count_cum_dead = cumsum(count_dead), + mean_age_lost = cumsum(count_dead * age_focal)/cumsum(count_dead)) %>% + dplyr::ungroup() %>% + tidyr::pivot_longer(count_dead:mean_age_lost, names_to = "indicator", values_to = "value")) %>% dplyr::ungroup() %>% tidyr::pivot_wider(names_from = indicator, values_from = value) # return kin_out <- list(kin_full = kin_full, kin_summary = kin_summary) + }else{ + # return + kin_out <- kin_full + } + return(kin_out) } diff --git a/R/kin2sex.R b/R/kin2sex.R new file mode 100644 index 0000000..07dfa11 --- /dev/null +++ b/R/kin2sex.R @@ -0,0 +1,194 @@ +#' Estimate kin counts in a two-sex framework + +#' @description Implementation of two-sex matrix kinship model. This produces kin counts grouped by kin, age and sex of +#' each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +#' are grouped in one male count of cousins. Note that the output labels relative following female notation: the label `m` +#' refers to either mothers or fathers, and column `sex_kin` determine the sex of the relative. +#' @details See Caswell (2022) for details on formulas. +#' @param pf numeric. A vector (atomic) or matrix with female probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param pm numeric. A vector (atomic) or matrix with male probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param ff numeric. Same as `pf` but for fertility rates. +#' @param fm numeric. Same as `pm` but for fertility rates. +#' @param time_invariant logical. Constant assumption for a given `year` rates. Default `TRUE`. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param pif numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default `NULL`. +#' @param pim numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default `NULL`. +#' @param Hf numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age. +#' @param Hm numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age. +#' @param nf numeric. Only for `time_invariant = FALSE`. Same as `pf` but for population distribution (counts or `%`). Optional. +#' @param nm numeric. Only for `time_invariant = FALSE`. Same as `pm` but for population distribution (counts or `%`). Optional. +#' @param output_cohort integer. Vector of year cohorts for returning results. Should be within input data years range. +#' @param output_period integer. Vector of period years for returning results. Should be within input data years range. +#' @param output_kin character. kin types to return: "m" for mother, "d" for daughter,... +#' @param output_age_focal integer. Vector of ages to select (and make faster the run). +#' @param birth_female numeric. Female portion at birth. This multiplies `f` argument. If `f` is already for female offspring, this needs to be set as 1. +#' @param summary_kin logical. Whether or not include `kin_summary` table (see output details). Default `TRUE`. +#' @return A list with: +#' \itemize{ +#' \item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example `d` could be daughter or son depending `sex_kin`, +#' `oa` is older aunts or uncles also depending `sex_kin` value, etc.), including living and dead kin at that age.} +#' \item{kin_summary}{ a data frame with Focal´s age, related ages, sex and type of kin, with indicators obtained processing `kin_full`, grouping by cohort or period (depending on the given arguments):} +#' {\itemize{ +#' \item{`count_living`}{: count of living kin at actual age of Focal} +#' \item{`mean_age`}{: mean age of each type of living kin.} +#' \item{`sd_age`}{: standard deviation of age of each type of living kin.} +#' \item{`count_death`}{: count of dead kin at specific age of Focal.} +#' \item{`count_cum_death`}{: cumulated count of dead kin until specific age of Focal.} +#' \item{`mean_age_lost`}{: mean age where Focal lost her relative.} +#' } +#' } +#' } +#' @export +#' @examples +#' # Kin expected count by relative sex for a French female based on 2012 rates. +#' fra_fert_f <- fra_asfr_sex[,"ff"] +#' fra_fert_m <- fra_asfr_sex[,"fm"] +#' fra_surv_f <- fra_surv_sex[,"pf"] +#' fra_surv_m <- fra_surv_sex[,"pm"] +#' fra_2012 <- kin2sex(fra_surv_f, fra_surv_m, fra_fert_f, fra_fert_m) +#' head(fra_2012$kin_summary) +#' +# get kin ---------------------------------------------------------------- +kin2sex <- function(pf = NULL, pm = NULL, ff = NULL, fm = NULL, + time_invariant = TRUE, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, pim = NULL, + nf = NULL, nm = NULL, + Hf = NULL, Hm = NULL, + output_cohort = NULL, output_period = NULL, output_kin=NULL,output_age_focal = NULL, + summary_kin = TRUE) + { + + # global vars + living<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-sex_kin<-age_kin<-dead<-NULL + age <- as.integer(rownames(pf)) + years_data <- as.integer(colnames(pf)) + + # kin to return + all_possible_kin <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") + output_kin_asked <- output_kin + if(is.null(output_kin)){ + output_kin <- all_possible_kin + }else{ + if("s" %in% output_kin) output_kin <- c(output_kin, "os", "ys") + if("c" %in% output_kin) output_kin <- c(output_kin, "coa", "cya") + if("a" %in% output_kin) output_kin <- c(output_kin, "oa", "ya") + if("n" %in% output_kin) output_kin <- c(output_kin, "nos", "nys") + output_kin <- output_kin[!output_kin %in% c("s", "c", "a", "n")] + output_kin <- match.arg(tolower(output_kin), all_possible_kin, several.ok = TRUE) + } + + # is cause of death specific or not + is_cod <- !is.null(Hf) & !is.null(Hm) + + # if time dependent or not + if(time_invariant){ + if(!is.vector(pf)) { + output_period <- min(years_data) + pf <- pf[,as.character(output_period)] + pm <- pm[,as.character(output_period)] + ff <- ff[,as.character(output_period)] + fm <- fm[,as.character(output_period)] + } + if(is_cod){ + kin_full <- kin_time_invariant_2sex_cod(pf, pm, ff, fm, + sex_focal = sex_focal, + birth_female = birth_female, + pif = pif, pim = pim, + Hf = Hf, Hm = Hm, + output_kin = output_kin) %>% + dplyr::mutate(cohort = NA, year = NA) + }else{ + kin_full <- kin_time_invariant_2sex(pf, pm, ff, fm, + sex_focal = sex_focal, + birth_female = birth_female, + pif = pif, pim = pim, + output_kin = output_kin) %>% + dplyr::mutate(cohort = NA, year = NA) + } + + }else{ + if(!is.null(output_cohort) & !is.null(output_period)) stop("sorry, you can not select cohort and period. Choose one please") + if(is_cod){ + kin_full <- kin_time_variant_2sex_cod(pf = pf, pm = pm, + ff = ff, fm = fm, + sex_focal = sex_focal, + birth_female = birth_female, + pif = pif, pim = pim, + nf = nf, nm = nm, + Hf = Hf, Hm = Hm, + output_cohort = output_cohort, output_period = output_period, + output_kin = output_kin) + }else{ + kin_full <- kin_time_variant_2sex(pf = pf, pm = pm, + ff = ff, fm = fm, + sex_focal = sex_focal, + birth_female = birth_female, + pif = pif, pim = pim, + nf = nf, nm = nm, + output_cohort = output_cohort, output_period = output_period, + output_kin = output_kin) + } + message(paste0("Assuming stable population before ", min(years_data), ".")) + } + + # reorder + kin_full <- kin_full %>% dplyr::select(year, cohort, age_focal, sex_kin, kin, age_kin, living, starts_with("dea")) + + # re-group if grouped type is asked + if(!is.null(output_kin_asked) & length(output_kin_asked)!=length(output_kin)){ + if("s" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("os", "ys")] <- "s" + if("c" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("coa", "cya")] <- "c" + if("a" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("oa", "ya")] <- "a" + if("n" %in% output_kin_asked) kin_full$kin[kin_full$kin %in% c("nos", "nys")] <- "n" + kin_full <- kin_full %>% + dplyr::group_by(kin, age_kin, age_focal, sex_kin, cohort, year) %>% + dplyr::summarise_at(vars(c("living", dplyr::starts_with("dea"))), funs(sum)) %>% + dplyr::ungroup() + } + + # summary + # select period/cohort/ge + if(!is.null(output_age_focal) & all(output_age_focal %in% 1:120)){ + kin_full <- kin_full %>% dplyr::filter(age_focal %in% output_age_focal) + } + if(!is.null(output_cohort)){ + agrupar <- "cohort" + } else if(!is.null(output_period)){ + agrupar <- "year" + } else{ + agrupar <- c("year", "cohort") + } + agrupar_no_age_focal <- c("kin", "sex_kin", agrupar) + agrupar <- c("age_focal", "kin", "sex_kin", agrupar) + + # only return summary if is asked and is not cod + if(summary_kin & !is_cod){ + kin_summary <- dplyr::bind_rows( + as.data.frame(kin_full) %>% + dplyr::rename(total=living) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% + dplyr::summarise(count_living = sum(total), + mean_age = sum(total*age_kin)/sum(total), + sd_age = (sum(total*age_kin^2)/sum(total)-mean_age^2)^.5) %>% + tidyr::pivot_longer(count_living:sd_age, names_to = "indicator", values_to = "value"), + as.data.frame(kin_full) %>% + dplyr::rename(total=dead) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar))) %>% + dplyr::summarise(count_dead = sum(total)) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::all_of(agrupar_no_age_focal))) %>% + dplyr::mutate(count_cum_dead = cumsum(count_dead), + mean_age_lost = cumsum(count_dead * age_focal)/cumsum(count_dead)) %>% + dplyr::ungroup() %>% + tidyr::pivot_longer(count_dead:mean_age_lost, names_to = "indicator", values_to = "value")) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider(names_from = indicator, values_from = value) + kin_out <- list(kin_full = kin_full, kin_summary = kin_summary) + }else{ + kin_out <- kin_full + } + + return(kin_out) +} diff --git a/R/kin_multi_stage.R b/R/kin_multi_stage.R index 7129134..829b232 100644 --- a/R/kin_multi_stage.R +++ b/R/kin_multi_stage.R @@ -2,110 +2,126 @@ #' @description Implementation of age-stage kin estimates (multi-state) by Caswell (2020). Stages are implied in length of input lists. -#' @param U list. age elemnts with column-stochastic transition matrix with dimension for the state space, conditional on survival. -#' @param f matrix. state-specific fertility (age in rows and states in columns). -#' @param D matrix. survival probabilities by state (age in rows and states in columns) -#' @param H matrix. assigns the offspring of individuals in some stage to the appropriate age class with 1 (age in rows and states in columns). +#' @param U list. age elements with column-stochastic transition matrix with dimension for the state space, conditional on survival. +#' @param f matrix. state-specific fertility (age in rows and states in columns). Is accepted also a list with for each age-class. +#' @param D matrix. survival probabilities by state (age in rows and states in columns). Is accepted also a list for each state with survival matrices. +#' @param H matrix. assigns the offspring of individuals in some stage to the appropriate age class (age in rows and states in columns). Is accepted also a list with a matrix for each state. #' @param output_kin character. kin to return. For example "m" for mother, "d" for daughter. See the `vignette` for all kin types. #' @param birth_female numeric. Female portion at birth. +#' @param parity logical. parity states imply age distribution of mothers re-scaled to not have parity 0 when Focal born. Default `TRUE`. #' @param list_output logical. Results as a list. Default `FALSE`. #' @return A data frame with focal´s age, related ages and type of kin #' (for example `d` is daughter, `oa` is older aunts, etc.), living and death kin counts, and specific stage. If `list_output = TRUE` then this is a list with elements as kin types. #' @export -#' kin_multi_stage <- function(U = NULL, f = NULL, D = NULL, H = NULL, - birth_female = 1/2.04, - output_kin = NULL, - list_output = FALSE){ + birth_female = 1/2.04, + output_kin = NULL, + parity = FALSE, + list_output = FALSE){ + # global vars + .<-age_kin<-stage_kin<-alive<-age_focal<-count<-NULL + + # mandatory U as a list if(!is.list(U)) stop("U must be a list with age length of elements, and stage transitiotn matrix for each one.") - # stages and ages + # stages and age-classes s <- ncol(U[[1]]) ages <- length(U) age <- (1:ages)-1 - # build matrix structure from data.frame input - H <- purrr::map(colnames(D), function(Y){ - Ht = matrix(0, nrow=ages, ncol=ages) - Ht[1,] <- 1 - Ht - }) - D <- purrr::map(colnames(D), function(Y){ + # build H if it is not already a list + if(!is.list(H)){ + H <- purrr::map(1:s, function(Y){ + Ht = matrix(0, nrow=ages, ncol=ages) + Ht[1,] <- 1 + Ht + }) + } + + # build D if it is not already a list + if(!is.list(D)){ + D <- purrr::map(1:s, function(Y){ X <- D[,Y] Dt = matrix(0, nrow=ages, ncol=ages) Dt[row(Dt)-1 == col(Dt)] <- X[-ages] Dt[ages, ages] = X[ages] Dt }) - f <- purrr::map(1:ages, function(Y){ - X <- f[Y,] - ft = matrix(0, nrow=s, ncol=s) - ft[1,] <- X - ft - }) - - # build block matrix + } + + # build f if it is not already a list + if(!is.list(f)){ + f <- purrr::map(1:ages, function(Y){ + X <- f[Y,] + ft = matrix(0, nrow=s, ncol=s) + ft[1,] <- X + ft + }) + } + + # build block matrices bbU <- Matrix::bdiag(U) bbF <- Matrix::bdiag(f) * birth_female bbD <- Matrix::bdiag(D) bbH <- Matrix::bdiag(H) - # rearrange with conmutation matrix + # order transitions: first state within age, then age given state K <- matrixcalc::commutation.matrix(s, ages) Ut <- t(K) %*% bbD %*% K %*% bbU ft <- t(K) %*% bbH %*% K %*% bbF + + # focal transition but conditioned to survive Gt <- Ut%*% MASS::ginv(diag(colSums(as.matrix(Ut)))) - # stable distribution mothers: age x stage + # stable distribution of mothers At <- Ut + ft A_decomp <- eigen(At) lambda <- as.double(A_decomp$values[1]) wt <- as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) pi <- wt*At[1,]/sum(wt*At[1,]) - # marginal mothers age - Iom <- diag(1,ages, ages); - ones <- t(rep(1,s)) + # useful vectors and matrices + ones <- t(rep(1,s)) onesom <- t(rep(1,s*ages)) - piage <- kronecker(Iom,ones) %*% pi - - # momarray is an array with pit in each column - momarray <- pi %*% matrix(1,1,ages) - Iom = diag(1, ages) - Is = diag(1, s) - Isom = diag(1, s*ages) - zsom = matrix(0, s*ages, s*ages) - Z=Is; - Z[1,1]=0; - for(i in 1:ages){ - # imom = 1 - E <- Iom[,i] %*% t(Iom[i,]); # al cuadrado? - momarray[,i] <- kronecker(E,Z) %*% momarray[,i] - } - # re-scale - momarray <- momarray %*% MASS::ginv(diag(colSums(momarray))) - - # considering deaths - phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0,ages*s*2,ages) - phi[1,1] = 1 + onesa <- t(rep(1,ages)) + Iom <- diag(1, ages) + Is <- diag(1, s) + Isom <- diag(1, s*ages) + zsom <- matrix(0, s*ages, s*ages) + + # momarray is an array with pi in each column + piage <- kronecker(Iom,ones) %*% pi + momarray <- pi %*% onesa + + # considering deaths (no cumulated): reacreate block struct matrices + phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0,ages * s * 2, ages) Mtt <- diag(as.numeric(onesom - onesom %*% Ut)) Utt <- rbind(cbind(Ut,zsom), cbind(Mtt,Isom)) %>% as.matrix() ftt <- rbind(cbind(ft,zsom), cbind(zsom,zsom)) %>% as.matrix() Gtt <- rbind(cbind(Gt,zsom), cbind(zsom,zsom)) %>% as.matrix() sages <- 1:(ages*s) - # no considering deaths - # phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0,ages*s,ages) - # phi[1,1] = 1 - # Utt = Ut %>% as.matrix() - # ftt = ft %>% as.matrix() - # Gtt = Gt %>% as.matrix() + # if parity: restriction to no initial mothers with 0 parity + if(parity){ + Z=Is + Z[1,1]=0 + for(i in 1:ages){ + E <- Iom[,i] %*% t(Iom[i,]) + momarray[,i] <- kronecker(E,Z) %*% momarray[,i] + } + # re-scale + momarray <- momarray %*% MASS::ginv(diag(colSums(momarray))) + # no 0 parity mothers: (momarray %*% piage)[seq(1,600,6)] + m[sages,1] = momarray %*% piage + }else{ + m[sages,1] = pi + } # focal´s trip - m[sages,1] = momarray %*% piage; + phi[1,1] = 1 for(i in 1:(ages-1)){ phi[,i+1] = Gtt %*% phi[,i] d[,i+1] = Utt %*% d[,i] + ftt %*% phi[,i] @@ -145,7 +161,8 @@ kin_multi_stage <- function(U = NULL, f = NULL, D = NULL, H = NULL, } # get results - kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + kin_list <- list(focal = phi, + d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) # only selected kin @@ -153,34 +170,32 @@ kin_multi_stage <- function(U = NULL, f = NULL, D = NULL, H = NULL, kin_list <- kin_list %>% purrr::keep(names(.) %in% output_kin) } - # as data.frame - kin <- purrr::map2(kin_list, names(kin_list), - function(x,y){ - out <- as.data.frame(x) - colnames(out) <- age - out %>% - dplyr::mutate(kin = y, - age_kin = rep(sort(rep(age,s)),2), - stage_kin = rep(rep(1:s,ages),2), - alive = c(rep("living",s*ages),rep("dead",s*ages)) - # age_kin = sort(rep(age,s)), - # stage_kin = rep(1:s,ages), - # alive = c(rep("yes",s*ages)) - ) %>% - tidyr::pivot_longer(c(-age_kin, -stage_kin, -kin, -alive), names_to = "age_focal", values_to = "count") %>% - dplyr::mutate(age_focal = as.integer(age_focal)) %>% - tidyr::pivot_wider(names_from = alive, values_from = count) - }) %>% + # kin_full as data.frame + kin_full <- purrr::map2(kin_list, names(kin_list), + function(x,y){ + # reassign deaths to Focal experienced age + x[(ages*s+1):(ages*s*2),1:(ages-1)] <- x[(ages*s+1):(ages*s*2),2:ages] + x[(ages*s+1):(ages*s*2),ages] <- 0 + out <- as.data.frame(x) + colnames(out) <- age + out %>% + dplyr::mutate(kin = y, + age_kin = rep(sort(rep(age,s)),2), + stage_kin = rep(rep(1:s,ages),2), + alive = c(rep("living",s*ages),rep("dead",s*ages))) %>% + tidyr::pivot_longer(c(-age_kin, -stage_kin, -kin, -alive), names_to = "age_focal", values_to = "count") %>% + dplyr::mutate(age_focal = as.integer(age_focal)) %>% + tidyr::pivot_wider(names_from = alive, values_from = count) + }) %>% purrr::reduce(rbind) # results as list? if(list_output) { out <- kin_list }else{ - out <- kin + out <- kin_full } + # end return(out) } - - diff --git a/R/kin_time_invariant.R b/R/kin_time_invariant.R index d843c4e..06058b6 100644 --- a/R/kin_time_invariant.R +++ b/R/kin_time_invariant.R @@ -1,37 +1,40 @@ -#' Estimate kin counts in a time invariant framework +#' Estimate kin counts in a time invariant framework for one-sex model (matrilineal/patrilineal) -#' @description Implementation of Goodman-Keyfitz-Pullum equations adapted by Caswell (2019). +#' @description Mtrix implementation of Goodman-Keyfitz-Pullum equations adapted by Caswell (2019). -#' @param U numeric. A vector of survival probabilities with same length as ages. +#' @param p numeric. A vector of survival probabilities with same length as ages. #' @param f numeric. A vector of age-specific fertility rates with same length as ages. #' @param birth_female numeric. Female portion at birth. #' @param pi numeric. For using some specific non-stable age distribution of childbearing (same length as ages). Default `NULL`. -#' @param output_kin character. kin to return. For example "m" for mother, "d" for daughter. See the `vignette` for all kin types. +#' @param output_kin character. kin to return. For example "m" for mother, "d" for daughter. See `vignette` for all kin types. #' @param list_output logical. Results as a list with `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` #' #' @return A data frame with focal´s age, related ages and type of kin #' (for example `d` is daughter, `oa` is older aunts, etc.), alive and death. If `list_output = TRUE` then this is a list. #' @export -kin_time_invariant <- function(U = NULL, f = NULL, +kin_time_invariant <- function(p = NULL, f = NULL, birth_female = 1/2.04, pi = NULL, output_kin = NULL, list_output = FALSE){ + # global vars + .<-alive<-age_kin<-alive<-age_focal<-count<-NULL + # make matrix transition from vectors - age = 0:(length(U)-1) + age = 0:(length(p)-1) ages = length(age) - Ut = Mt = zeros = Dcum = matrix(0, nrow=ages, ncol=ages) - Ut[row(Ut)-1 == col(Ut)] <- U[-ages] - Ut[ages, ages] = U[ages] - diag(Mt) = 1 - U + Ut = Mt = zeros = matrix(0, nrow=ages, ncol=ages) + Ut[row(Ut)-1 == col(Ut)] <- p[-ages] + Ut[ages, ages] = p[ages] + diag(Mt) = 1 - p Ut = rbind(cbind(Ut,zeros), - cbind(Mt,Dcum)) + cbind(Mt,zeros)) ft = matrix(0, nrow=ages*2, ncol=ages*2) ft[1,1:ages] = f * birth_female - # stable age distr + # stable age distribution in case no pi is given if(is.null(pi)){ A = Ut[1:ages,1:ages] + ft[1:ages,1:ages] A_decomp = eigen(A) @@ -57,24 +60,20 @@ kin_time_invariant <- function(U = NULL, f = NULL, ys[,i+1] = Ut %*% ys[,i] + ft %*% m[,i] nys[,i+1] = Ut %*% nys[,i] + ft %*% ys[,i] } - gm[1:ages,1] = m[1:ages,] %*% pi for(i in 1:(ages-1)){ gm[,i+1] = Ut %*% gm[,i] } - ggm[1:ages,1] = gm[1:ages,] %*% pi for(i in 1:(ages-1)){ ggm[,i+1] = Ut %*% ggm[,i] } - os[1:ages,1] = d[1:ages,] %*% pi nos[1:ages,1] = gd[1:ages,] %*% pi for(i in 1:(ages-1)){ os[,i+1] = Ut %*% os[,i] nos[,i+1] = Ut %*% nos[,i] + ft %*% os[,i] } - oa[1:ages,1] = os[1:ages,] %*% pi ya[1:ages,1] = ys[1:ages,] %*% pi coa[1:ages,1] = nos[1:ages,] %*% pi @@ -95,9 +94,12 @@ kin_time_invariant <- function(U = NULL, f = NULL, kin_list <- kin_list %>% purrr::keep(names(.) %in% output_kin) } - # as data.frame + # reshape as data.frame kin <- purrr::map2(kin_list, names(kin_list), function(x,y){ + # reassign deaths to Focal experienced age + x[(ages+1):(ages*2),1:(ages-1)] <- x[(ages+1):(ages*2),2:ages] + x[(ages+1):(ages*2),ages] <- 0 out <- as.data.frame(x) colnames(out) <- age out %>% @@ -111,13 +113,11 @@ kin_time_invariant <- function(U = NULL, f = NULL, ) %>% purrr::reduce(rbind) - # results as list? if(list_output) { out <- kin_list }else{ out <- kin } - return(out) } diff --git a/R/kin_time_invariant_2sex.R b/R/kin_time_invariant_2sex.R new file mode 100644 index 0000000..e9d7822 --- /dev/null +++ b/R/kin_time_invariant_2sex.R @@ -0,0 +1,165 @@ +#' Estimate kin counts in a time invariant framework for two-sex model. + +#' @description Two-sex matrix framework for kin count estimates.This produces kin counts grouped by kin, age and sex of +#' each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +#' are grouped in one male count of cousins. +#' @details See Caswell (2022) for details on formulas. +#' @param pf numeric. A vector of survival probabilities for females with same length as ages. +#' @param ff numeric. A vector of age-specific fertility rates for females with same length as ages. +#' @param pm numeric. A vector of survival probabilities for males with same length as ages. +#' @param fm numeric. A vector of age-specific fertility rates for males with same length as ages. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param birth_female numeric. Female portion at birth. +#' @param pif numeric. For using some specific non-stable age distribution of childbearing for mothers (same length as ages). Default `NULL`. +#' @param pim numeric. For using some specific non-stable age distribution of childbearing for fathers (same length as ages). Default `NULL`. +#' @param output_kin character. kin to return, considering matrilineal names. For example "m" for parents, "d" for children, etc. See the `vignette` for all kin types. +#' @param list_output logical. Results as a list with `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` +#' +#' @return A data frame with focal´s age, related ages and type of kin +#' (for example `d` is children, `oa` is older aunts/uncles, etc.), sex, alive and death. If `list_output = TRUE` then this is a list. +#' @export + +kin_time_invariant_2sex <- function(pf = NULL, pm = NULL, + ff = NULL, fm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, pim = NULL, + output_kin = NULL, + list_output = FALSE){ + + # global vars + .<-sex_kin<-alive<-count<-living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL + + # same input length + if(!all(length(pf)==length(pm), length(pf)==length(ff), length(pf)==length(fm))) stop("Lengths of p's and f's should be the same") + + # make matrix transition from vectors. Include death counts with matrix M + age = 0:(length(pf)-1) + ages = length(age) + agess = ages * 2 + Uf = Um = Ff = Fm = Gt = zeros = matrix(0, nrow=ages, ncol=ages) + Uf[row(Uf)-1 == col(Uf)] <- pf[-ages] + Uf[ages, ages] = Uf[ages] + Um[row(Um)-1 == col(Um)] <- pm[-ages] + Um[ages, ages] = Um[ages] + Mm <- diag(1-pm) + Mf <- diag(1-pf) + Ut <- as.matrix(rbind( + cbind(Matrix::bdiag(Uf, Um), Matrix::bdiag(zeros, zeros)), + cbind(Matrix::bdiag(Mf, Mm), Matrix::bdiag(zeros, zeros)))) + Ff[1,] = ff + Fm[1,] = fm + Ft <- Ft_star <- matrix(0, agess*2, agess*2) + Ft[1:agess,1:agess] <- rbind(cbind(birth_female * Ff, birth_female * Fm), + cbind((1-birth_female) * Ff, (1-birth_female) * Fm)) + + # mother and father do not reproduce independently to produce focal´s siblings. Assign to mother + Ft_star[1:agess,1:ages] <- rbind(birth_female * Ff, (1-birth_female) * Ff) + + # parents age distribution under stable assumption in case no input + if(is.null(pim) | is.null(pif)){ + A = Matrix::bdiag(Uf, Um) + Ft_star[1:agess,1:agess] + A_decomp = eigen(A) + lambda = as.double(A_decomp$values[1]) + w = as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) + wf = w[1:ages] + wm = w[(ages+1):(2*ages)] + pif = wf * ff / sum(wf * ff) + pim = wm * fm / sum(wm * fm) + } + + # initial count matrix (kin ages in rows and focal age in column) + phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0, agess*2, ages) + + # locate focal at age 0 depending sex + sex_index <- ifelse(sex_focal == "f", 1, ages+1) + phi[sex_index, 1] <- 1 + + # G matrix moves focal by age + G <- matrix(0, nrow=ages, ncol=ages) + G[row(G)-1 == col(G)] <- 1 + Gt <- matrix(0, agess*2, agess*2) + Gt[1:(agess), 1:(agess)] <- as.matrix(Matrix::bdiag(G, G)) + + # focal´s trip + # names of matrix count by kin refers to matrilineal as general reference + m[1:(agess),1] = c(pif, pim) + for(i in 1:(ages-1)){ + # i = 1 + phi[,i+1] = Gt %*% phi[,i] + d[,i+1] = Ut %*% d[,i] + Ft %*% phi[,i] + gd[,i+1] = Ut %*% gd[,i] + Ft %*% d[,i] + ggd[,i+1] = Ut %*% ggd[,i] + Ft %*% gd[,i] + m[,i+1] = Ut %*% m[,i] + ys[,i+1] = Ut %*% ys[,i] + Ft_star %*% m[,i] + nys[,i+1] = Ut %*% nys[,i] + Ft %*% ys[,i] + } + + gm[1:(agess),1] = m[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + gm[,i+1] = Ut %*% gm[,i] + } + + ggm[1:(agess),1] = gm[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + ggm[,i+1] = Ut %*% ggm[,i] + } + + # atribuible to focal sex + pios = if(sex_focal == "f") pif else pim + os[1:(agess),1] = d[1:(agess),] %*% pif + nos[1:(agess),1] = gd[1:(agess),] %*% pif + for(i in 1:(ages-1)){ + os[,i+1] = Ut %*% os[,i] + nos[,i+1] = Ut %*% nos[,i] + Ft %*% os[,i] + } + + oa[1:(agess),1] = os[1:(agess),] %*% (pif + pim) + ya[1:(agess),1] = ys[1:(agess),] %*% (pif + pim) + coa[1:(agess),1] = nos[1:(agess),] %*% (pif + pim) + cya[1:(agess),1] = nys[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + oa[,i+1] = Ut %*% oa[,i] + ya[,i+1] = Ut %*% ya[,i] + Ft_star %*% gm[,i] + coa[,i+1] = Ut %*% coa[,i] + Ft %*% oa[,i] + cya[,i+1] = Ut %*% cya[,i] + Ft %*% ya[,i] + } + + # get results + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + # only selected kin + if(!is.null(output_kin)){ + kin_list <- kin_list %>% purrr::keep(names(.) %in% output_kin) + } + + # as data.frame + kin <- purrr::map2(kin_list, names(kin_list), + function(x,y){ + # reassign deaths to Focal experienced age + x[(agess+1):(agess*2),1:(ages-1)] <- x[(agess+1):(agess*2),2:ages] + x[(agess+1):(agess*2),ages] <- 0 + out <- as.data.frame(x) + colnames(out) <- age + out %>% + dplyr::mutate(kin = y, + age_kin = rep(age,4), + sex_kin = rep(c(rep("f",ages), rep("m",ages)),2), + alive = c(rep("living",2*ages), rep("dead",2*ages))) %>% + tidyr::pivot_longer(c(-age_kin, -kin, -sex_kin, -alive), names_to = "age_focal", values_to = "count") %>% + dplyr::mutate(age_focal = as.integer(age_focal)) %>% + tidyr::pivot_wider(names_from = alive, values_from = count) + } + ) %>% + purrr::reduce(rbind) + + # results as list? + if(list_output) { + out <- kin_list + }else{ + out <- kin + } + + return(out) +} diff --git a/R/kin_time_invariant_2sex_cod.R b/R/kin_time_invariant_2sex_cod.R new file mode 100644 index 0000000..e0d17d6 --- /dev/null +++ b/R/kin_time_invariant_2sex_cod.R @@ -0,0 +1,255 @@ +#' Estimate kin counts in a time invariant framework for two-sex model. + +#' @description Two-sex matrix framework for kin count and death estimates.This produces kin counts grouped by kin, age and sex of +#' each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +#' are grouped in one male count of cousins. This also produces kin deaths grouped by kin, age, sex of +#' each relatives at each Focal´s age, and cause of death. +#' @details See Caswell (2022) for details on formulas. +#' @param pf numeric. A vector of survival probabilities for females with same length as ages. +#' @param Hf numeric. A matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age. +#' @param ff numeric. A vector of age-specific fertility rates for females with same length as ages. +#' @param pm numeric. A vector of survival probabilities for males with same length as ages. +#' @param fm numeric. A vector of age-specific fertility rates for males with same length as ages. +#' @param Hm numeric. A matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param birth_female numeric. Female portion at birth. +#' @param pif numeric. For using some specific non-stable age distribution of childbearing for mothers (same length as ages). Default `NULL`. +#' @param pim numeric. For using some specific non-stable age distribution of childbearing for fathers (same length as ages). Default `NULL`. +#' @param output_kin character. kin to return, considering matrilineal names. For example "m" for parents, "d" for children, etc. See the `vignette` for all kin types. +#' @param list_output logical. Results as a list with `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` +#' +#' @return A data frame with focal´s age, related ages and type of kin +#' (for example `d` is children, `oa` is older aunts/uncles, etc.), sex, alive and death. If `list_output = TRUE` then this is a list. +#' @export + +# BEN: Added hazard matrices as inputs. +# Assume that input of cause-specific mortality will be in terms of +# matrices of cause-specific hazards for the two sexes (causes * ages). +# Alternative: a matrix (causes * ages) containing the ratio mxi/mx. +kin_time_invariant_2sex_cod <- function(pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + Hf = NULL, + Hm = NULL, + sex_focal = "f", + birth_female = 1 / 2.04, + pif = NULL, + pim = NULL, + output_kin = NULL, + list_output = FALSE) { + + + # global vars + .<-sex_kin<-alive<-count<-living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL + + # same input length + + # BEN: Now we should also check the dimensions of the cause-specific hazard + # matrices. + if(!all(length(pf)==length(pm), length(pf)==length(ff), length(pf)==length(fm), + nrow(Hf)==nrow(Hm), ncol(Hf)==ncol(Hm), ncol(Hf)==length(pf))) stop("Number of age groups of p's, h's, and f's should match") + + # make matrix transition from vectors. Include death counts with matrix M + age = 0:(length(pf)-1) + ages = length(age) + agess = ages * 2 + Uf = Um = Ff = Fm = Gt = matrix(0, nrow=ages, ncol=ages) + + # BEN: The zero matrix was deleted from line above and has + # to be made specific according to living/dead kin + # part of the block matrix Ut. + causes <- nrow(Hf) # number of causes of death + zeros_l <- matrix(0, nrow = ages, ncol = (causes*ages)) # zero matrix for living kin part + zeros_d = matrix(0, nrow = (causes*ages), ncol = (causes*ages)) # zero matrix for death kin part + + Uf[row(Uf)-1 == col(Uf)] <- pf[-ages] + + # BEN: What is the purpose of the following line? By default it is zero due to + # how the matrix is created + Uf[ages, ages] = Uf[ages] + + Um[row(Um)-1 == col(Um)] <- pm[-ages] + Um[ages, ages] = Um[ages] + + # BEN: Building of M, matrix of cause-specific prob. of dying. + # Hence, M = H D(h_tilde)^{-1} D(q) + # where h_tilde are the summed hazards for each age, and + # q = 1 - p + sum_hf <- t(rep(1, causes)) %*% Hf # h_tilde female + sum_hm <- t(rep(1, causes)) %*% Hm # h_tilde male + Mf <- Hf %*% solve(diag(c(sum_hf))) %*% diag(1-pf) + Mm <- Hm %*% solve(diag(c(sum_hm))) %*% diag(1-pm) + # Mm <- diag(1-pm) + # Mf <- diag(1-pf) + + # BEN: In order to classify kin death by both cause and age at death, + # we need a mortality matrices M_hat of dimension + # ((causes*ages) * ages). See eq.12 in Caswell et al. (2024). + # Store columns of M as a list of vectors + Mf.cols <- lapply(1:ncol(Mf), function(j) return(Mf[,j])) + Mm.cols <- lapply(1:ncol(Mm), function(j) return(Mm[,j])) + # Create M_hat using the vectors as elements of the block diagonal + Ut <- as.matrix(rbind( + cbind(Matrix::bdiag(Uf, Um), Matrix::bdiag(zeros_l, zeros_l)), + cbind(Matrix::bdiag(Matrix::bdiag(Mf.cols), Matrix::bdiag(Mm.cols)), Matrix::bdiag(zeros_d, zeros_d)))) + + Ff[1,] = ff + Fm[1,] = fm + + # BEN: Accounting for causes of death leads to have different dimensions + # in Ft and Ft_star. + Ft <- Ft_star <- matrix(0, (agess + agess*causes), (agess + agess*causes)) + Ft[1:agess,1:agess] <- rbind(cbind(birth_female * Ff, birth_female * Fm), + cbind((1-birth_female) * Ff, (1-birth_female) * Fm)) + + # mother and father do not reproduce independently to produce focal´s siblings. Assign to mother + Ft_star[1:agess,1:ages] <- rbind(birth_female * Ff, (1-birth_female) * Ff) + + # parents age distribution under stable assumption in case no input + if(is.null(pim) | is.null(pif)){ + A = Matrix::bdiag(Uf, Um) + Ft_star[1:agess,1:agess] + A_decomp = eigen(A) + lambda = as.double(A_decomp$values[1]) + w = as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) + wf = w[1:ages] + wm = w[(ages+1):(2*ages)] + pif = wf * ff / sum(wf * ff) + pim = wm * fm / sum(wm * fm) + } + + # initial count matrix (kin ages in rows and focal age in column) + # BEN: Changed dimensions of lower part (dead kin) to account for death from causes. + phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0, (agess + agess*causes), ages) + + # locate focal at age 0 depending sex + sex_index <- ifelse(sex_focal == "f", 1, ages+1) + phi[sex_index, 1] <- 1 + + # G matrix moves focal by age + G <- matrix(0, nrow=ages, ncol=ages) + G[row(G)-1 == col(G)] <- 1 + + # BEN: Changed dimensions + Gt <- matrix(0, (agess + agess*causes), (agess + agess*causes)) + + Gt[1:(agess), 1:(agess)] <- as.matrix(Matrix::bdiag(G, G)) + + # focal´s trip + # names of matrix count by kin refers to matrilineal as general reference + m[1:(agess),1] = c(pif, pim) + for(i in 1:(ages-1)){ + # i = 1 + phi[,i+1] = Gt %*% phi[,i] + d[,i+1] = Ut %*% d[,i] + Ft %*% phi[,i] + gd[,i+1] = Ut %*% gd[,i] + Ft %*% d[,i] + ggd[,i+1] = Ut %*% ggd[,i] + Ft %*% gd[,i] + m[,i+1] = Ut %*% m[,i] + ys[,i+1] = Ut %*% ys[,i] + Ft_star %*% m[,i] + nys[,i+1] = Ut %*% nys[,i] + Ft %*% ys[,i] + } + + gm[1:(agess),1] = m[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + gm[,i+1] = Ut %*% gm[,i] + } + + ggm[1:(agess),1] = gm[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + ggm[,i+1] = Ut %*% ggm[,i] + } + + os[1:(agess),1] = d[1:(agess),] %*% pif + nos[1:(agess),1] = gd[1:(agess),] %*% pif + for(i in 1:(ages-1)){ + os[,i+1] = Ut %*% os[,i] + nos[,i+1] = Ut %*% nos[,i] + Ft %*% os[,i] + } + + oa[1:(agess),1] = os[1:(agess),] %*% (pif + pim) + ya[1:(agess),1] = ys[1:(agess),] %*% (pif + pim) + coa[1:(agess),1] = nos[1:(agess),] %*% (pif + pim) + cya[1:(agess),1] = nys[1:(agess),] %*% (pif + pim) + for(i in 1:(ages-1)){ + oa[,i+1] = Ut %*% oa[,i] + ya[,i+1] = Ut %*% ya[,i] + Ft_star %*% gm[,i] + coa[,i+1] = Ut %*% coa[,i] + Ft %*% oa[,i] + cya[,i+1] = Ut %*% cya[,i] + Ft %*% ya[,i] + } + + # get results + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + # only selected kin + if(!is.null(output_kin)){ + kin_list <- kin_list %>% purrr::keep(names(.) %in% output_kin) + } + + # as data.frame + kin <- purrr::map2(kin_list, names(kin_list), + function(x,y){ + + # BEN: Death take place in the same year and age! + # I adapted the code + # below such that it works with the new dimensions. + + # reassign deaths to Focal experienced age + x[(agess+1):(agess + agess*causes),1:(ages-1)] <- x[(agess+1):(agess + agess*causes),2:ages] + x[(agess+1):(agess + agess*causes),ages] <- 0 + out <- as.data.frame(x) + colnames(out) <- age + out %>% + # BEN: the matrices have different dimensions when + # we accounf for causes of death so what follows + # has been substantially changed. + dplyr::mutate(kin = y, + age_kin = c(rep(age,2), rep(rep(age,each=causes),2)), + sex_kin = c(rep(c("f", "m"),each=ages), rep(c("f", "m"),each=ages*causes)), + alive = c(rep("living",2*ages), rep(paste0("deadcause",1:causes),2*ages))) %>% + tidyr::pivot_longer(c(-age_kin, -kin, -sex_kin, -alive), names_to = "age_focal", values_to = "count") %>% + dplyr::mutate(age_focal = as.integer(age_focal)) %>% + tidyr::pivot_wider(names_from = alive, values_from = count) + } + ) %>% + purrr::reduce(rbind) + + # results as list? + if(list_output) { + out <- kin_list + }else{ + out <- kin + } + + return(out) +} + +## BEN: ======================================================================== + +# Checks + +# No dead parent at birth: deadcausei=0 when age_focal==0 +# ff # fertility starts at age 13 +# kin |> filter(kin == "m", age_focal ==0, age_kin >= 12) +# +# # pi when age_focal==0 and age_kin when fx>0: +# kin |> filter(kin == "m", age_kin >= 13, age_focal ==0) +# pif[14:101] +# +# # mother dying from cause i at age x when focal is age==1 comes from nber of +# # living mother age x when focal is age==1 multiplied by (1-pf[x])*(1/3) +# kin |> filter(kin == "m", age_kin == 14, age_focal ==1) +# 0.000246 * ((1-pf[15])*(1/3)) # mother +# 0.0000486 * ((1-pm[15])*(1/3)) # father +# +# # Store to compare with kin_time_invariant_2sex.R +# saveRDS( +# kin, +# here( +# "checks", +# "output_time_invariant_2sex.rds" +# ) +# ) + + +## ============================================================================= diff --git a/R/kin_time_variant.R b/R/kin_time_variant.R index bc962fc..71a5dfa 100644 --- a/R/kin_time_variant.R +++ b/R/kin_time_variant.R @@ -1,10 +1,10 @@ -#' Estimate kin counts in a time variant framework +#' Estimate kin counts in a time variant framework (dynamic rates) for one-sex model (matrilineal/patrilineal) -#' @description Implementation of time variant Goodman-Keyfitz-Pullum equations based on Caswell (2021). -#' -#' @param U numeric. A matrix of survival ratios with rows as ages and columns as years. Column names must be equal interval. +#' @description Matrix implementation of time variant Goodman-Keyfitz-Pullum equations in a matrix framework. +#' @details See Caswell (2021) for details on formulas. +#' @param p numeric. A matrix of survival ratios with rows as ages and columns as years. Column names must be equal interval. #' @param f numeric. A matrix of age-specific fertility rates with rows as ages and columns as years. Coincident with `U`. -#' @param N numeric. A matrix of population with rows as ages and columns as years. Coincident with `U`. +#' @param n numeric. A matrix of population with rows as ages and columns as years. Coincident with `U`. #' @param pi numeric. A matrix with distribution of childbearing with rows as ages and columns as years. Coincident with `U`. #' @param output_cohort integer. Year of birth of focal to return as output. Could be a vector. Should be within input data years range. #' @param output_period integer. Year for which to return kinship structure. Could be a vector. Should be within input data years range. @@ -12,82 +12,79 @@ #' @param birth_female numeric. Female portion at birth. #' @param list_output logical. Results as a list with years elements (as a result of `output_cohort` and `output_period` combination), with a second list of `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` -#' @return A data frame of population kinship structure, with focal's cohort, focal´s age, period year, type of relatives +#' @return A data frame of population kinship structure, with Focal's cohort, focal´s age, period year, type of relatives #' (for example `d` is daughter, `oa` is older aunts, etc.), living and death kin counts, and age of (living or time deceased) relatives. If `list_output = TRUE` then this is a list. #' @export -kin_time_variant <- function(U = NULL, f = NULL, N = NULL, pi = NULL, +kin_time_variant <- function(p = NULL, f = NULL, pi = NULL, n = NULL, output_cohort = NULL, output_period = NULL, output_kin = NULL, birth_female = 1/2.04, list_output = FALSE){ + # global vars + .<-living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL + # check input - if(is.null(U) | is.null(f)) stop("You need values on U and/or f.") + if(is.null(p) | is.null(f)) stop("You need values on p and f.") # diff years - if(!any(as.integer(colnames(U)) == as.integer(colnames(f)))) stop("Data should be from same years.") + if(!any(as.integer(colnames(p)) == as.integer(colnames(f)))) stop("Make sure that p and f are matrices and have the same column names.") # data should be from same interval years - years_data <- as.integer(colnames(U)) - if(var(diff(years_data))!=0) stop("Data should be for same interval length years. Fill the gaps and run again") + years_data <- as.integer(colnames(p)) + if(stats::var(diff(years_data))!=0) stop("The years given as column names in the p and f matrices must be equally spaced.") # utils - age <- 0:(nrow(U)-1) + age <- 0:(nrow(p)-1) n_years_data <- length(years_data) ages <- length(age) om <- max(age) zeros <- matrix(0, nrow=ages, ncol=ages) # age distribution at childborn + pi_N_null_flag <- FALSE if(is.null(pi)){ - if(is.null(N)){ + if(is.null(n)){ # create pi and fill it during the loop message("Stable assumption was made for calculating pi on each year because no input data.") + pi_N_null_flag <- TRUE pi <- matrix(0, nrow=ages, ncol=n_years_data) }else{ - pi <- rbind(t(t(N * f)/colSums(N * f)), matrix(0,ages,length(years_data))) + pi_N_null_flag <- FALSE + pi <- rbind(t(t(n * f)/colSums(n * f)), matrix(0,ages,length(years_data))) } } - # get lists of matrix - Ul = fl = list() - for(t in 1:n_years_data){ - Ut = Mt = Dcum = matrix(0, nrow=ages, ncol=ages) - Ut[row(Ut)-1 == col(Ut)] <- U[-ages,t] - Ut[ages, ages]=U[ages,t] - diag(Mt) = 1 - U[,t] - Ul[[as.character(years_data[t])]] <- rbind(cbind(Ut,zeros),cbind(Mt,Dcum)) - ft = matrix(0, nrow=ages*2, ncol=ages*2) - ft[1,1:ages] = f[,t] * birth_female - fl[[as.character(years_data[t])]] <- ft - } - U <- Ul - f <- fl - # loop over years (more performance here) kin_all <- list() pb <- progress::progress_bar$new( format = "Running over input years [:bar] :percent", - total = n_years_data, clear = FALSE, width = 60) - for (iyear in 1:n_years_data){ - # print(iyear) - Ut <- as.matrix(U[[iyear]]) - ft <- as.matrix(f[[iyear]]) - if(is.null(pi)){ + total = n_years_data + 1, clear = FALSE, width = 50) + for (t in 1:n_years_data){ + # build matrix + Ut = Mt = matrix(0, nrow=ages, ncol=ages) + Ut[row(Ut)-1 == col(Ut)] <- p[-ages,t] + Ut[ages, ages] = p[ages,t] + diag(Mt) = 1 - p[,t] + Ut = rbind(cbind(Ut,zeros),cbind(Mt,zeros)) + ft = matrix(0, nrow=ages*2, ncol=ages*2) + ft[1,1:ages] = f[,t] * birth_female + if(pi_N_null_flag){ A <- Ut[1:ages,1:ages] + ft[1:ages,1:ages] A_decomp = eigen(A) w <- as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) - pit <- pi[,iyear] <- w*A[1,]/sum(w*A[1,]) + pit <- pi[,t] <- w*A[1,]/sum(w*A[1,]) }else{ - pit <- pi[,iyear] + pit <- pi[,t] } - if (iyear==1){ - U1 <- c(diag(Ut[-1,])[1:om],Ut[om,om]) + # proj + if (t==1){ + p1 <- c(diag(Ut[-1,])[1:om],Ut[om,om]) f1 <- ft[1,][1:ages] pi1 <- pit[1:ages] - kin_all[[1]] <- kin_time_invariant(U = U1, f = f1/birth_female, pi = pi1, birth_female = birth_female, + kin_all[[1]] <- kin_time_invariant(p = p1, f = f1/birth_female, pi = pi1, birth_female = birth_female, list_output = TRUE) } - kin_all[[iyear+1]] <- timevarying_kin(Ut=Ut,ft=ft,pit=pit,ages,pkin=kin_all[[iyear]]) + kin_all[[t+1]] <- timevarying_kin(Ut=Ut,ft=ft,pit=pit,ages,pkin=kin_all[[t]]) pb$tick() } @@ -110,23 +107,29 @@ kin_time_variant <- function(U = NULL, f = NULL, N = NULL, pi = NULL, purrr::map(~ .[selected_kin_position]) # long format - kin <- lapply(names(kin_list), function(Y){ + message("Preparing output...") + kin <- lapply(names(kin_list), FUN = function(Y){ X <- kin_list[[Y]] - X <- purrr::map2(X, names(X), function(x,y) as.data.frame(x) %>% - dplyr::mutate(year = Y, - kin=y, - age_kin = rep(age,2), - alive = c(rep("living",ages), rep("dead",ages)), - .before=everything())) %>% - dplyr::bind_rows() %>% - stats::setNames(c("year","kin","age_kin","alive",as.character(age))) %>% - tidyr::gather(age_focal, count,-age_kin, -kin, -year, -alive) %>% - dplyr::mutate(age_focal = as.integer(age_focal), - year = as.integer(year), - cohort = year - age_focal) %>% - dplyr::filter(age_focal %in% out_selected$age[out_selected$year==as.integer(Y)]) %>% - tidyr::pivot_wider(names_from = alive, values_from = count)}) %>% - dplyr::bind_rows() + X <- purrr::map2(X, names(X), function(x,y){ + # reassign deaths to Focal experienced age + x[(ages+1):(ages*2),1:(ages-1)] <- x[(ages+1):(ages*2),2:ages] + x[(ages+1):(ages*2),ages] <- 0 + x <- as.data.frame(x) + x$year <- Y + x$kin <- y + x$age_kin <- rep(age,2) + x$alive <- c(rep("living",ages), rep("dead",ages)) + return(x) + }) %>% + data.table::rbindlist() %>% + stats::setNames(c(as.character(age), "year","kin","age_kin","alive")) %>% + data.table::melt(id.vars = c("year","kin","age_kin","alive"), variable.name = "age_focal", value.name = "count") + X$age_focal = as.integer(as.character(X$age_focal)) + X$year = as.integer(X$year) + X$cohort = X$year - X$age_focal + X[X$age_focal %in% out_selected$age[out_selected$year==as.integer(Y)],] %>% + data.table::dcast(year + kin + age_kin + age_focal + cohort ~ alive, value.var = "count") + }) %>% data.table::rbindlist() # results as list? if(list_output) { @@ -148,7 +151,8 @@ kin_time_variant <- function(U = NULL, f = NULL, N = NULL, pi = NULL, #' @param pit numeric. A matrix with distribution of childbearing. #' @param ages numeric. #' @param pkin numeric. A list with kin count distribution in previous year. -# +#' @return A list of 14 types of kin matrices (kin age by Focal age) projected one time interval. +#' @export timevarying_kin<- function(Ut, ft, pit, ages, pkin){ # frequently used zero vector for initial condition @@ -172,7 +176,8 @@ timevarying_kin<- function(Ut, ft, pit, ages, pkin){ coa[1:ages,1]= pkin[["nos"]][1:ages,] %*% pit[1:ages] cya[1:ages,1]= pkin[["nys"]][1:ages,] %*% pit[1:ages] - for (ix in 1:om){ + # vers1 + for(ix in 1:om){ d[,ix+1] = Ut %*% pkin[["d"]][,ix] + ft %*% I[,ix] gd[,ix+1] = Ut %*% pkin[["gd"]][,ix] + ft %*% pkin[["d"]][,ix] ggd[,ix+1] = Ut %*% pkin[["ggd"]][,ix] + ft %*% pkin[["gd"]][,ix] @@ -195,10 +200,17 @@ timevarying_kin<- function(Ut, ft, pit, ages, pkin){ return(kin_list) } -#' defince apc combination to return +#' APC combination to return -#' @description defince apc to return. -#' +#' @description define APC combination to return in `kin` and `kin2sex`. +#' @details Because returning all period and cohort data from a huge time-series would be hard memory consuming, +#' this function is an auxiliary one to deal with selection from inputs `output_cohort` and `output_period`. +#' @param output_cohort integer. A vector with selected calendar years. +#' @param output_period integer. A vector with selected cohort years. +#' @param age integer. A vector with ages from the kinship network to be filtered. +#' @param years_data integer. A vector with years from the time-varying kinship network to be filtered. +#' @return data.frame with years and ages to filter in `kin` and `kin_2sex` functions. +#' @export output_period_cohort_combination <- function(output_cohort = NULL, output_period = NULL, age = NULL, years_data = NULL){ # no specific @@ -214,10 +226,11 @@ output_period_cohort_combination <- function(output_cohort = NULL, output_period unlist(use.names = F)) }else{selected_cohorts_year_age <- c()} - # period year combination + # period combination if(!is.null(output_period)){selected_years_age <- expand.grid(age, output_period) %>% dplyr::rename(age=1,year=2) }else{selected_years_age <- c()} # end return(dplyr::bind_rows(selected_years_age,selected_cohorts_year_age) %>% dplyr::distinct()) } + diff --git a/R/kin_time_variant_2sex.R b/R/kin_time_variant_2sex.R new file mode 100644 index 0000000..e67db6f --- /dev/null +++ b/R/kin_time_variant_2sex.R @@ -0,0 +1,254 @@ +#' Estimate kin counts in a time variant framework (dynamic rates) in a two-sex framework (Caswell, 2022) + +#' @description Two-sex matrix framework for kin count estimates with varying rates. +#' This produces kin counts grouped by kin, age and sex of each relatives at each Focal´s age. +#' For example, male cousins from aunts and uncles from different sibling's parents are grouped in one male count of cousins. +#' @details See Caswell (2022) for details on formulas. +#' @param pf numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param pm numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param ff numeric. Same as pf but for fertility rates. +#' @param fm numeric. Same as pm but for fertility rates. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param pif numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default `NULL`. +#' @param pim numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default `NULL`. +#' @param nf numeric. Same as pf but for population distribution (counts or `%`). Optional. +#' @param nm numeric. Same as pm but for population distribution (counts or `%`). Optional. +#' @param output_cohort integer. Vector of year cohorts for returning results. Should be within input data years range. +#' @param output_period integer. Vector of period years for returning results. Should be within input data years range. +#' @param output_kin character. kin types to return: "m" for mother, "d" for daughter,... +#' @param birth_female numeric. Female portion at birth. This multiplies `f` argument. If `f` is already for female offspring, this needs to be set as 1. +#' @param list_output logical. Results as a list with years elements (as a result of `output_cohort` and `output_period` combination), with a second list of `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` +#' @return A data.frame with year, cohort, Focal´s age, related ages, sex and type of kin (for example `d` is daughter, `oa` is older aunts, etc.), including living and dead kin at that age and sex. +#' @export + +kin_time_variant_2sex <- function(pf = NULL, pm = NULL, + ff = NULL, fm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, pim = NULL, + nf = NULL, nm = NULL, + output_cohort = NULL, output_period = NULL, output_kin = NULL, + list_output = FALSE){ + + # global vars + .<-living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL + + # same input length + if(!all(dim(pf) == dim(pm), dim(pf) == dim(ff), dim(pf) == dim(fm))) stop("Dimension of P's and F's should be the same") + + # data should be from same interval years + years_data <- as.integer(colnames(pf)) + if(stats::var(diff(years_data))!=0) stop("Data should be for same interval length years. Fill the gaps and run again") + + # utils + age <- 0:(nrow(pf)-1) + n_years_data <- length(years_data) + ages <- length(age) + agess <- ages*2 + om <- max(age) + zeros <- matrix(0, nrow=ages, ncol=ages) + + # age distribution at child born + Pif <- pif; no_Pif <- FALSE + Pim <- pim; no_Pim <- FALSE + if(is.null(pif)){ + if(!is.null(nf)){ + Pif <- t(t(nf * ff)/colSums(nf * ff)) + }else{ + Pif <- matrix(0, nrow=ages, ncol=n_years_data) + no_Pif <- TRUE + } + } + if(is.null(pim)){ + if(!is.null(nm)){ + Pim <- t(t(nm * fm)/colSums(nm * fm)) + }else{ + Pim <- matrix(0, nrow=ages, ncol=n_years_data) + no_Pim <- TRUE + } + } + + # get lists of matrix + Ul = Fl = Fl_star = list() + kin_all <- list() + pb <- progress::progress_bar$new( + format = "Running over input years [:bar] :percent", + total = n_years_data + 1, clear = FALSE, width = 60) + for(t in 1:n_years_data){ + # t = 1 + Uf = Um = Fft = Fmt = Mm = Mf = Gt = zeros = matrix(0, nrow=ages, ncol=ages) + Uf[row(Uf)-1 == col(Uf)] <- pf[-ages,t] + Uf[ages, ages] = pf[ages,t] + Um[row(Um)-1 == col(Um)] <- pm[-ages,t] + Um[ages, ages] = pm[ages,t] + Mm <- diag(1-pm[,t]) + Mf <- diag(1-pf[,t]) + Ut <- as.matrix(rbind( + cbind(Matrix::bdiag(Uf, Um), Matrix::bdiag(zeros, zeros)), + cbind(Matrix::bdiag(Mf, Mm), Matrix::bdiag(zeros, zeros)))) + Ul[[as.character(years_data[t])]] <- Ut + Fft[1,] = ff[,t] + Fmt[1,] = fm[,t] + Ft <- Ft_star <- matrix(0, agess*2, agess*2) + Ft[1:agess,1:agess] <- rbind(cbind(birth_female * Fft, birth_female * Fmt), + cbind((1-birth_female) * Fft, (1-birth_female) * Fmt)) + Ft_star[1:agess,1:ages] <- rbind(birth_female * Fft, (1-birth_female) * Fft) + Fl[[as.character(years_data[t])]] <- Ft + Fl_star[[as.character(years_data[t])]] <- Ft_star + # parents age distribution under stable assumption in case no input + if(no_Pim | no_Pif){ + A = Matrix::bdiag(Uf, Um) + Ft_star[1:agess,1:agess] + A_decomp = eigen(A) + lambda = as.double(A_decomp$values[1]) + w = as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) + wf = w[1:ages] + wm = w[(ages+1):(2*ages)] + Pif[,t] = wf * ff[,t] / sum(wf * ff[,t]) + Pim[,t] = wm * fm[,t] / sum(wm * fm[,t]) + } + + # project + Ut <- as.matrix(Ul[[t]]) + Ft <- as.matrix(Fl[[t]]) + Ft_star <- as.matrix(Fl_star[[t]]) + pitf <- Pif[,t] + pitm <- Pim[,t] + pit <- c(pitf, pitm) + if (t==1){ + p1f <- pf[,1] + p1m <- pm[,1] + f1f <- ff[,1] + f1m <- fm[,1] + pif1 <- Pif[,1] + pim1 <- Pim[,1] + kin_all[[1]] <- kin_time_invariant_2sex(pf = p1f, pm = p1m, + ff = f1f, fm = f1m, + sex_focal = sex_focal, + pif = pif1, pim = pim1, + birth_female = birth_female, list_output = TRUE) + } + kin_all[[t+1]] <- timevarying_kin_2sex(Ut=Ut, Ft=Ft, Ft_star=Ft_star, pit=pit, sex_focal, ages, pkin=kin_all[[t]]) + pb$tick() + } + + # filter years and kin that were selected + names(kin_all) <- as.character(years_data) + + # combinations to return + out_selected <- output_period_cohort_combination(output_cohort, output_period, age = age, years_data = years_data) + + possible_kin <- c("d","gd","ggd","m","gm","ggm","os","ys","nos","nys","oa","ya","coa","cya") + if(is.null(output_kin)){ + selected_kin_position <- 1:length(possible_kin) + }else{ + selected_kin_position <- which(possible_kin %in% output_kin) + } + + # first filter + kin_list <- kin_all %>% + purrr::keep(names(.) %in% as.character(unique(out_selected$year))) %>% + purrr::map(~ .[selected_kin_position]) + # long format + message(" Preparing output...") + kin <- lapply(names(kin_list), FUN = function(Y){ + X <- kin_list[[Y]] + X <- purrr::map2(X, names(X), function(x,y){ + # reassign deaths to Focal experienced age + x[(agess+1):(agess*2),1:(ages-1)] <- x[(agess+1):(agess*2),2:ages] + x[(agess+1):(agess*2),ages] <- 0 + x <- data.table::as.data.table(x) + x$year <- Y + x$kin <- y + x$sex_kin <- rep(c(rep("f",ages), rep("m",ages)),2) + x$age_kin <- rep(age, 4) + x$alive <- c(rep("living",agess), rep("dead",agess)) + return(x) + }) %>% + data.table::rbindlist() %>% + stats::setNames(c(as.character(age), "year","kin","sex_kin","age_kin","alive")) %>% + data.table::melt(id.vars = c("year","kin","sex_kin","age_kin","alive"), variable.name = "age_focal", value.name = "count") + X$age_focal = as.integer(as.character(X$age_focal)) + X$year = as.integer(X$year) + X$cohort = X$year - X$age_focal + X <- X[X$age_focal %in% out_selected$age[out_selected$year==as.integer(Y)],] + X <- data.table::dcast(X, year + kin + sex_kin + age_kin + age_focal + cohort ~ alive, value.var = "count", fun.aggregate = sum) + }) %>% data.table::rbindlist() + + # results as list? + if(list_output) { + out <- kin_list + }else{ + out <- kin + } + return(out) +} + +#' one time projection kin + +#' @description one time projection kin. internal function. +#' +#' @param Ut numeric. A matrix of survival probabilities (or ratios). +#' @param Ft numeric. A matrix of age-specific fertility rates. +#' @param Ft_star numeric. Ft but for female fertility. +#' @param pit numeric. A matrix with distribution of childbearing. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param ages numeric. +#' @param pkin numeric. A list with kin count distribution in previous year. +#' @return A list of 14 types of kin matrices (kin age by Focal age, blocked for two sex) projected one time interval. +#' @export +timevarying_kin_2sex<- function(Ut, Ft, Ft_star, pit, sex_focal, ages, pkin){ + + agess <- ages*2 + om <- ages-1 + pif <- pit[1:ages] + pim <- pit[(ages+1):agess] + phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0,agess*2,ages) + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + # G matrix moves focal by age + G <- matrix(0, nrow=ages, ncol=ages) + G[row(G)-1 == col(G)] <- 1 + Gt <- matrix(0, agess*2, agess*2) + Gt[1:(agess), 1:(agess)] <- as.matrix(Matrix::bdiag(G, G)) + + # locate focal at age 0 depending sex + sex_index <- ifelse(sex_focal == "f", 1, ages+1) + phi[sex_index, 1] <- 1 + + # initial distribution + m[1:agess,1] = pit + gm[1:agess,1] = pkin[["m"]][1:agess,] %*% (pif + pim) + ggm[1:agess,1] = pkin[["gm"]][1:agess,] %*% (pif + pim) + oa[1:agess,1] = pkin[["os"]][1:agess,] %*% (pif + pim) + ya[1:agess,1] = pkin[["ys"]][1:agess,] %*% (pif + pim) + coa[1:agess,1] = pkin[["nos"]][1:agess,] %*% (pif + pim) + cya[1:agess,1] = pkin[["nys"]][1:agess,] %*% (pif + pim) + # atribuible to focal sex + pios = if(sex_focal == "f") pif else pim + os[1:agess,1] = pkin[["d"]][1:agess,] %*% pios + nos[1:agess,1] = pkin[["gd"]][1:ages,] %*% pios + + for (ix in 1:om){ + phi[,ix+1] = Gt %*% phi[, ix] + d[,ix+1] = Ut %*% pkin[["d"]][,ix] + Ft %*% phi[,ix] + gd[,ix+1] = Ut %*% pkin[["gd"]][,ix] + Ft %*% pkin[["d"]][,ix] + ggd[,ix+1] = Ut %*% pkin[["ggd"]][,ix] + Ft %*% pkin[["gd"]][,ix] + m[,ix+1] = Ut %*% pkin[["m"]][,ix] + gm[,ix+1] = Ut %*% pkin[["gm"]][,ix] + ggm[,ix+1] = Ut %*% pkin[["ggm"]][,ix] + os[,ix+1] = Ut %*% pkin[["os"]][,ix] + ys[,ix+1] = Ut %*% pkin[["ys"]][,ix] + Ft_star %*% pkin[["m"]][,ix] + nos[,ix+1] = Ut %*% pkin[["nos"]][,ix] + Ft %*% pkin[["os"]][,ix] + nys[,ix+1] = Ut %*% pkin[["nys"]][,ix] + Ft %*% pkin[["ys"]][,ix] + oa[,ix+1] = Ut %*% pkin[["oa"]][,ix] + ya[,ix+1] = Ut %*% pkin[["ya"]][,ix] + Ft_star %*% pkin[["gm"]][,ix] + coa[,ix+1] = Ut %*% pkin[["coa"]][,ix] + Ft %*% pkin[["oa"]][,ix] + cya[,ix+1] = Ut %*% pkin[["cya"]][,ix] + Ft %*% pkin[["ya"]][,ix] + } + + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + return(kin_list) +} diff --git a/R/kin_time_variant_2sex_cod.R b/R/kin_time_variant_2sex_cod.R new file mode 100644 index 0000000..562be20 --- /dev/null +++ b/R/kin_time_variant_2sex_cod.R @@ -0,0 +1,316 @@ +#' Estimate kin counts in a time variant framework (dynamic rates) in a two-sex framework (Caswell, 2022) + +#' @description Two-sex matrix framework for kin count estimates with varying rates. +#' This produces kin counts grouped by kin, age and sex of each relatives at each Focal´s age. +#' For example, male cousins from aunts and uncles from different sibling's parents are grouped in one male count of cousins. This also produces kin deaths grouped by kin, age, sex of +#' each relatives at each Focal´s age, and cause of death. +#' @details See Caswell (2022) for details on formulas. +#' @param pf numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param pm numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year). +#' @param ff numeric. Same as pf but for fertility rates. +#' @param fm numeric. Same as pm but for fertility rates. +#' @param Hf numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age. +#' @param Hm numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param pif numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default `NULL`. +#' @param pim numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default `NULL`. +#' @param nf numeric. Same as pf but for population distribution (counts or `%`). Optional. +#' @param nm numeric. Same as pm but for population distribution (counts or `%`). Optional. +#' @param output_cohort integer. Vector of year cohorts for returning results. Should be within input data years range. +#' @param output_period integer. Vector of period years for returning results. Should be within input data years range. +#' @param output_kin character. kin types to return: "m" for mother, "d" for daughter,... +#' @param birth_female numeric. Female portion at birth. This multiplies `f` argument. If `f` is already for female offspring, this needs to be set as 1. +#' @param list_output logical. Results as a list with years elements (as a result of `output_cohort` and `output_period` combination), with a second list of `output_kin` elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default `FALSE` +#' @return A data.frame with year, cohort, Focal´s age, related ages, sex and type of kin (for example `d` is daughter, `oa` is older aunts, etc.), including living and dead kin at that age and sex. +#' @export + +# BEN: Added hazard matrices as inputs. +# Assume that input of cause-specific mortality will be in terms of +# matrices of cause-specific hazards for the two sexes (causes * ages). +# Alternative: a matrix (causes * ages) containing the ratio mxi/mx. +kin_time_variant_2sex_cod <- function(pf = NULL, pm = NULL, + ff = NULL, fm = NULL, + Hf = NULL, Hm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, pim = NULL, + nf = NULL, nm = NULL, + output_cohort = NULL, output_period = NULL, output_kin = NULL, + list_output = FALSE){ + + # global vars + .<-living<-dead<-age_kin<-age_focal<-cohort<-year<-total<-mean_age<-count_living<-sd_age<-count_dead<-mean_age_lost<-indicator<-value<-NULL + + # same input length + + # BEN: Now we should also check the dimensions of the cause-specific hazard + # matrices. + if(!all(dim(pf) == dim(pm), dim(pf) == dim(ff), dim(pf) == dim(fm), + nrow(Hf)==nrow(Hm), ncol(Hf)==ncol(Hm), ncol(Hf)==nrow(pf), + length(Hf)==length(Hm), length(Hm)==ncol(pf))) stop("Dimension of P's, F's, and H's should match") + + # data should be from same interval years + years_data <- as.integer(colnames(pf)) + if(stats::var(diff(years_data))!=0) stop("Data should be for same interval length years. Fill the gaps and run again") + + # utils + age <- 0:(nrow(pf)-1) + n_years_data <- length(years_data) + ages <- length(age) + agess <- ages*2 + om <- max(age) + + # BEN: The zero matrix was deleted from line above and has + # to be made specific according to living/dead kin + # part of the block matrix Ut. + causes <- nrow(Hf[[1]]) # number of causes of death + zeros_l <- matrix(0, nrow = ages, ncol = (causes*ages)) # zero matrix for living kin part + zeros_d = matrix(0, nrow = (causes*ages), ncol = (causes*ages)) # zero matrix for death kin part + + # age distribution at child born + Pif <- pif; no_Pif <- FALSE + Pim <- pim; no_Pim <- FALSE + if(is.null(pif)){ + if(!is.null(nf)){ + Pif <- t(t(nf * ff)/colSums(nf * ff)) + }else{ + Pif <- matrix(0, nrow=ages, ncol=n_years_data) + no_Pif <- TRUE + } + } + if(is.null(pim)){ + if(!is.null(nm)){ + Pim <- t(t(nm * fm)/colSums(nm * fm)) + }else{ + Pim <- matrix(0, nrow=ages, ncol=n_years_data) + no_Pim <- TRUE + } + } + + # get lists of matrix + Ul = Fl = Fl_star = list() + kin_all <- list() + pb <- progress::progress_bar$new( + format = "Running over input years [:bar] :percent", + total = n_years_data + 1, clear = FALSE, width = 60) + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # BEN: First load function at the end of script + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + for(t in 1:n_years_data){ + # t = 1 + Uf = Um = Fft = Fmt = Mm = Mf = Gt = matrix(0, nrow=ages, ncol=ages) + Uf[row(Uf)-1 == col(Uf)] <- pf[-ages,t] + Uf[ages, ages] = pf[ages,t] + Um[row(Um)-1 == col(Um)] <- pm[-ages,t] + Um[ages, ages] = pm[ages,t] + + # BEN: Building of M, matrix of cause-specific prob. of dying. + # Hence, M = H D(h_tilde)^{-1} D(q) + # where h_tilde are the summed hazards for each age, and + # q = 1 - p + sum_hf <- t(rep(1, causes)) %*% Hf[[t]] # h_tilde female + sum_hm <- t(rep(1, causes)) %*% Hm[[t]] # h_tilde male + Mf <- Hf[[t]] %*% solve(diag(c(sum_hf))) %*% diag(1-pf[,t]) + Mm <- Hm[[t]] %*% solve(diag(c(sum_hm))) %*% diag(1-pm[,t]) + # Mm <- diag(1-pm[,t]) + # Mf <- diag(1-pf[,t]) + + # BEN: In order to classify kin death by both cause and age at death, + # we need a mortality matrices M_hat of dimension + # ((causes*ages) * ages). See eq.12 in Caswell et al. (2024). + # Store columns of M as a list of vectors + Mf.cols <- lapply(1:ncol(Mf), function(j) return(Mf[,j])) + Mm.cols <- lapply(1:ncol(Mm), function(j) return(Mm[,j])) + # Create M_hat using the vectors as elements of the block diagonal + Ut <- as.matrix(rbind( + cbind(Matrix::bdiag(Uf, Um), Matrix::bdiag(zeros_l, zeros_l)), + cbind(Matrix::bdiag(Matrix::bdiag(Mf.cols), Matrix::bdiag(Mm.cols)), Matrix::bdiag(zeros_d, zeros_d)))) + + Ul[[as.character(years_data[t])]] <- Ut + Fft[1,] = ff[,t] + Fmt[1,] = fm[,t] + + # BEN: Accounting for causes of death leads to have different dimensions + # in Ft and Ft_star. + Ft <- Ft_star <- matrix(0, (agess + agess*causes), (agess + agess*causes)) + + Ft[1:agess,1:agess] <- rbind(cbind(birth_female * Fft, birth_female * Fmt), + cbind((1-birth_female) * Fft, (1-birth_female) * Fmt)) + Ft_star[1:agess,1:ages] <- rbind(birth_female * Fft, (1-birth_female) * Fft) + Fl[[as.character(years_data[t])]] <- Ft + Fl_star[[as.character(years_data[t])]] <- Ft_star + # parents age distribution under stable assumption in case no input + if(no_Pim | no_Pif){ + A = Matrix::bdiag(Uf, Um) + Ft_star[1:agess,1:agess] + A_decomp = eigen(A) + lambda = as.double(A_decomp$values[1]) + w = as.double(A_decomp$vectors[,1])/sum(as.double(A_decomp$vectors[,1])) + wf = w[1:ages] + wm = w[(ages+1):(2*ages)] + Pif[,t] = wf * ff[,t] / sum(wf * ff[,t]) + Pim[,t] = wm * fm[,t] / sum(wm * fm[,t]) + } + + # project + Ut <- as.matrix(Ul[[t]]) + Ft <- as.matrix(Fl[[t]]) + Ft_star <- as.matrix(Fl_star[[t]]) + pitf <- Pif[,t] + pitm <- Pim[,t] + pit <- c(pitf, pitm) + if (t==1){ + p1f <- pf[,1] + p1m <- pm[,1] + f1f <- ff[,1] + f1m <- fm[,1] + pif1 <- Pif[,1] + pim1 <- Pim[,1] + + # BEN: Add Hf and Hm + H1f <- Hf[[1]] + H1m <- Hm[[1]] + + # BEN: cod version !!! + kin_all[[1]] <- kin_time_invariant_2sex_cod(pf = p1f, pm = p1m, + ff = f1f, fm = f1m, + pif = pif1, pim = pim1, + Hf = H1f, Hm = H1m, + birth_female = birth_female, list_output = TRUE) + } + kin_all[[t+1]] <- timevarying_kin_2sex_cod(Ut=Ut, Ft=Ft, Ft_star=Ft_star, pit=pit, sex_focal, ages, pkin=kin_all[[t]]) + pb$tick() + } + + # filter years and kin that were selected + names(kin_all) <- as.character(years_data) + + # combinations to return + out_selected <- output_period_cohort_combination(output_cohort, output_period, age = age, years_data = years_data) + + possible_kin <- c("d","gd","ggd","m","gm","ggm","os","ys","nos","nys","oa","ya","coa","cya") + if(is.null(output_kin)){ + selected_kin_position <- 1:length(possible_kin) + }else{ + selected_kin_position <- which(possible_kin %in% output_kin) + } + + # first filter + kin_list <- kin_all %>% + purrr::keep(names(.) %in% as.character(unique(out_selected$year))) %>% + purrr::map(~ .[selected_kin_position]) + # long format + message("Preparing output...") + kin <- lapply(names(kin_list), FUN = function(Y){ + X <- kin_list[[Y]] + X <- purrr::map2(X, names(X), function(x,y){ + # reassign deaths to Focal experienced age + x[(agess+1):(agess + agess*causes),1:(ages-1)] <- x[(agess+1):(agess + agess*causes),2:ages] + x[(agess+1):(agess + agess*causes),ages] <- 0 + x <- data.table::as.data.table(x) + x$year <- Y + x$kin <- y + x$sex_kin <- c(rep(c("f", "m"),each=ages), rep(c("f", "m"),each=ages*causes)) + x$age_kin <- c(rep(age,2), rep(rep(age,each=causes),2)) + x$alive <- c(rep("living",2*ages), rep(paste0("deadcause",1:causes),2*ages)) + return(x) + }) %>% + data.table::rbindlist() %>% + stats::setNames(c(as.character(age), "year","kin","sex_kin","age_kin","alive")) %>% + data.table::melt(id.vars = c("year","kin","sex_kin","age_kin","alive"), variable.name = "age_focal", value.name = "count") + X$age_focal = as.integer(as.character(X$age_focal)) + X$year = as.integer(X$year) + X$cohort = X$year - X$age_focal + X <- X[X$age_focal %in% out_selected$age[out_selected$year==as.integer(Y)],] + X <- data.table::dcast(X, year + kin + sex_kin + age_kin + age_focal + cohort ~ alive, value.var = "count", fun.aggregate = sum) + }) %>% data.table::rbindlist() + + # results as list? + if(list_output) { + out <- kin_list + }else{ + out <- kin + } + return(out) +} + +#' one time projection kin + +#' @description one time projection kin. internal function. +#' +#' @param Ut numeric. A matrix of survival probabilities (or ratios). +#' @param Ft numeric. A matrix of age-specific fertility rates. +#' @param Ft_star numeric. Ft but for female fertility. +#' @param pit numeric. A matrix with distribution of childbearing. +#' @param sex_focal character. "f" for female or "m" for male. +#' @param ages numeric. +#' @param pkin numeric. A list with kin count distribution in previous year. +#' @return A list of 14 types of kin matrices (kin age by Focal age, blocked for two sex) projected one time interval. +#' @export +timevarying_kin_2sex_cod<- function(Ut, Ft, Ft_star, pit, sex_focal, ages, pkin){ + + agess <- ages*2 + om <- ages-1 + pif <- pit[1:ages] + pim <- pit[(ages+1):agess] + + # BEN : Add the number of CoD + causes <- nrow(Hf[[1]]) + + # BEN: Changed dimensions of lower part (dead kin) to account for death from causes. + phi = d = gd = ggd = m = gm = ggm = os = ys = nos = nys = oa = ya = coa = cya = matrix(0, (agess + agess*causes), ages) + + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + # G matrix moves focal by age + G <- matrix(0, nrow=ages, ncol=ages) + G[row(G)-1 == col(G)] <- 1 + + # BEN: Changed dimensions + Gt <- matrix(0, (agess + agess*causes), (agess + agess*causes)) + + Gt[1:(agess), 1:(agess)] <- as.matrix(Matrix::bdiag(G, G)) + + # locate focal at age 0 depending sex + sex_index <- ifelse(sex_focal == "f", 1, ages+1) + phi[sex_index, 1] <- 1 + + # BEN: NOT SURE ABOUT WHAT IS HAPPENING BELOW + # Rows are multiplied by the sum of the pi? + + # initial distribution + m[1:agess,1] = pit + gm[1:agess,1] = pkin[["m"]][1:agess,] %*% (pif + pim) + ggm[1:agess,1] = pkin[["gm"]][1:agess,] %*% (pif + pim) + os[1:agess,1] = pkin[["d"]][1:agess,] %*% pif + nos[1:agess,1] = pkin[["gd"]][1:ages,] %*% pif + oa[1:agess,1] = pkin[["os"]][1:agess,] %*% (pif + pim) + ya[1:agess,1] = pkin[["ys"]][1:agess,] %*% (pif + pim) + coa[1:agess,1] = pkin[["nos"]][1:agess,] %*% (pif + pim) + cya[1:agess,1] = pkin[["nys"]][1:agess,] %*% (pif + pim) + + for (ix in 1:om){ + phi[,ix+1] = Gt %*% phi[, ix] + d[,ix+1] = Ut %*% pkin[["d"]][,ix] + Ft %*% phi[,ix] + gd[,ix+1] = Ut %*% pkin[["gd"]][,ix] + Ft %*% pkin[["d"]][,ix] + ggd[,ix+1] = Ut %*% pkin[["ggd"]][,ix] + Ft %*% pkin[["gd"]][,ix] + m[,ix+1] = Ut %*% pkin[["m"]][,ix] + gm[,ix+1] = Ut %*% pkin[["gm"]][,ix] + ggm[,ix+1] = Ut %*% pkin[["ggm"]][,ix] + os[,ix+1] = Ut %*% pkin[["os"]][,ix] + ys[,ix+1] = Ut %*% pkin[["ys"]][,ix] + Ft_star %*% pkin[["m"]][,ix] + nos[,ix+1] = Ut %*% pkin[["nos"]][,ix] + Ft %*% pkin[["os"]][,ix] + nys[,ix+1] = Ut %*% pkin[["nys"]][,ix] + Ft %*% pkin[["ys"]][,ix] + oa[,ix+1] = Ut %*% pkin[["oa"]][,ix] + ya[,ix+1] = Ut %*% pkin[["ya"]][,ix] + Ft_star %*% pkin[["gm"]][,ix] + coa[,ix+1] = Ut %*% pkin[["coa"]][,ix] + Ft %*% pkin[["oa"]][,ix] + cya[,ix+1] = Ut %*% pkin[["cya"]][,ix] + Ft %*% pkin[["ya"]][,ix] + } + + kin_list <- list(d=d,gd=gd,ggd=ggd,m=m,gm=gm,ggm=ggm,os=os,ys=ys, + nos=nos,nys=nys,oa=oa,ya=ya,coa=coa,cya=cya) + + return(kin_list) +} diff --git a/R/plot_diagramm.R b/R/plot_diagramm.R index 2497186..6bffa8c 100644 --- a/R/plot_diagramm.R +++ b/R/plot_diagramm.R @@ -1,90 +1,77 @@ #' plot a Kin diagram (network) -#' @description Given estimation of kin counts from `kins` function, draw a network diagramm. -#' @param kin_total data.frame. With columns `kin` with type and `count` with some measeure. -#' @param rounding numeric. Estimation could have a lot of decimals. Rounding will make looks more clear the diagramm. -#' @return A plot +#' @description Draws a Keyfitz-style kinship diagram given a kinship object created by the `kin` function. Displays expected kin counts for a Focal aged 'a'. +#' @param kin_total data.frame. values in column `kin` define the relative type - see `demokin_codes()`. Values in column `count` are the expected number of relatives. +#' @param rounding numeric. Number of decimals to show in diagram. +#' @return A Keyfitz-style kinship plot. #' @export -plot_diagram <- function(kin_total, rounding = 3){ - - vertices <- data.frame( - nodes = c("ggd", "gd", "d", "Focal", "m", "gm", "ggm", "oa", "coa", "os", "nos", "ya", "cya", "ys", "nys") - , x = c(1, 1, 1, 1, 1, 1, 1, 0, -1, 0, -1, 2, 3, 2, 3) - , y = c(0, 1, 2, 3, 4, 5, 6, 4, 3, 3, 2, 4, 3, 3, 2) - ) - - d <- data.frame( - from = c("ggd", "gd", "d", "Focal", "m", "gm", "gm", "oa", "m", "os", "gm", "ya", "m", "ys") - , to = c("gd", "d", "Focal", "m", "gm", "ggm", "oa", "coa", "os", "nos", "ya", "cya", "ys", "nys") - ) - - # Add values - lookup <- c(with(kin_total, paste0(kin, " \n", round(count, rounding))), "Focal") - names(lookup) <- c(kin_total$kin, "Focal") - - vertices$nodes <- lookup[vertices$nodes] - d$from <- lookup[d$from] - d$to <- lookup[d$to] - - # Plot - - b <- igraph::graph_from_data_frame(vertices = vertices, d= d, directed = FALSE) - - plot( - b - , vertex.size = 30 - , curved = 1 - , vertex.color = "#FFF1E2" - , vertex.shape = "circle" - , vertex.label.cex = 0.8 - , vertex.label.color = "black" - , label.degree = -pi/2 - , edge.width = 2 - , edge.color = "black" - ) - -} - -# old function - -# plot_diagram <- function(kin_total, rounding = 3){ -# # https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html -# # https://color.hailpixel.com/#D9E9BE,BF62CB,94C2DB,79D297,CDA76A,C8695B -# -# kin_total <- kin_total %>% mutate(count = round(count,digits = rounding)) -# -# DiagrammeR::mermaid( -# paste0("graph TD -# -# GGM(ggm:
", kin_total$count[kin_total$kin=="ggm"] ,") -# GGM ==> GM(gm:
", kin_total$count[kin_total$kin=="gm"] ,") -# GM --> AOM(oa:
", kin_total$count[kin_total$kin=="oa"] ,") -# GM ==> M(m:
", kin_total$count[kin_total$kin=="m"] ,") -# GM --> AYM(ya:
", kin_total$count[kin_total$kin=="ya"] ,") -# AOM --> CAOM(coa:
", kin_total$count[kin_total$kin=="coa"] ,") -# M --> OS(os:
", kin_total$count[kin_total$kin=="os"] ,") -# M ==> E((Ego)) -# M --> YS(ys:
", kin_total$count[kin_total$kin=="ys"] ,") -# AYM --> CAYM(cya:
", kin_total$count[kin_total$kin=="cya"] ,") -# OS --> NOS(nos:
", kin_total$count[kin_total$kin=="nos"] ,") -# E ==> D(d:
", kin_total$count[kin_total$kin=="d"] ,") -# YS --> NYS(nys:
", kin_total$count[kin_total$kin=="nys"] ,") -# D ==> GD(gd:
", kin_total$count[kin_total$kin=="gd"] ,") -# style GGM fill:#a1f590, stroke:#333, stroke-width:2px; -# style GM fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center; -# style M fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center -# style D fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center -# style YS fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center -# style OS fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center -# style CAOM fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style AYM fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style AOM fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style CAYM fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style NOS fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style NYS fill:#f1f0f5, stroke:#333, stroke-width:2px, text-align: center -# style E fill:#FFF, stroke:#333, stroke-width:4px, text-align: center -# style D fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center -# style GD fill:#a1f590, stroke:#333, stroke-width:2px, text-align: center")) -# } - +plot_diagram <- + function (kin_total, rounding = 3) { + rels <- c("ggd", "gd", "d", "Focal", "m", "gm", "ggm", "oa", "coa", "os", "nos", "ya", "cya", "ys", "nys") + # check all types are in + if(!any(unique(kin_total$kin) %in% rels) | any(c("s", "c", "a", "n") %in% unique(kin_total$kin))) stop("You need all specific types. If some are missed or grouped, for example old and younger sisters in 's', this will fail.") + vertices <- data.frame( + nodes = rels + , x = c(1, 1, 1, 1, 1, 1, 1, 0, -1, 0, -1, 2, 3, 2, 3) + , y = c(0, 1, 2, 3, 4, 5, 6, 4, 3, 3, 2, 4, 3, 3, 2) + ) + d <- data.frame(from = c("ggd", "gd", "d", "Focal", "m", + "gm", "gm", "oa", "m", "os", "gm", "ya", "m", "ys"), + to = c("gd", "d", "Focal", "m", "gm", "ggm", "oa", "coa", + "os", "nos", "ya", "cya", "ys", "nys")) + lookup <- c(with(kin_total, paste0(kin, " \n", round(count, rounding))), "Focal") + names(lookup) <- c(kin_total$kin, "Focal") + vertices$nodes <- lookup[vertices$nodes] + d$from <- lookup[d$from] + d$to <- lookup[d$to] + # to show full relative names + relatives <- c("Cousins from older aunt", "Cousins from younger aunt", + "Daughter", "Grand-daughter", "Great-grand-daughter", + "Great-grandmother", "Grandmother", "Mother", "Nieces from older sister", + "Nieces from younger sister", "Aunt older than mother", + "Aunt younger than mother", "Older sister", "Younger sister", "") + names(relatives) <- c("coa", "cya", "d", "gd", "ggd", + "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", + "ys", "Focal") + labs <- relatives[rels] + # Plot + b <- igraph::graph_from_data_frame(vertices = vertices, d= d, directed = FALSE) + b_auto_layout <- igraph::layout.auto(b) + b_auto_layout_scaled <- igraph::norm_coords(b_auto_layout, ymin=-1, ymax=1, xmin=-1, xmax=1) + plot( + b + , vertex.size = 70 + , curved = 1 + , vertex.color = "#FFF1E2" + , vertex.shape = "circle" + , vertex.label.cex = 0.8 + , vertex.label.color = "black" + , edge.width = 2 + , layout = b_auto_layout_scaled * 3 + , rescale = FALSE + , xlim = c(-3.3,3.3) + , ylim = c(-3.1,3.1) + ) + # Add relative names + # Thanks to Egor Kotov for this tip! + plot( + b + , vertex.size = 70 + , curved = 1 + , vertex.color = NA + , vertex.shape = "none" + , vertex.label = labs + , vertex.label.dist = -6.5 + , vertex.label.cex = 0.8 + , vertex.label.color = "black" + , vertex.label.degree = -pi/2 + , edge.width = 2 + , edge.color = NA + , layout = b_auto_layout_scaled * 3 + , rescale = FALSE + , xlim = c(-3.3,3.3) + , ylim = c(-3.1,3.1) + , add = T + ) + } diff --git a/README.Rmd b/README.Rmd index 5eb0592..a3f462f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,9 +1,8 @@ --- output: github_document +bibliography: vignettes\\references.bib --- - - ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -23,19 +22,25 @@ library(knitr) ::: {.column width="60%"} -`DemoKin` uses matrix demographic methods to compute expected (average) kin counts from demographic rates under a range of scenarios and assumptions. The package is an R-language implementation of Caswell (2019), Caswell (2020), and Caswell and Song (2021). It draws on previous theoretical development by Goodman, Keyfitz and Pullum (1974). +`DemoKin` uses matrix demographic methods to compute expected (average) kin counts from demographic rates under a range of scenarios and assumptions. The package is an R-language implementation of Caswell [-@caswell_formal_2019; -@caswell_formal_2020; -@caswell_formal_2022], and Caswell and Song [-@caswell_formal_2021]. It draws on previous theoretical development by Goodman, Keyfitz and Pullum [-@goodman_family_1974]. ::: ::: {.column width="40%"} - + ::: :::::::::::::: ## Installation -You can install the development version from GitHub with: +Download the stable version [from CRAN](https://cran.r-project.org/web/packages/DemoKin/): + +``` {r, eval=FALSE, include = T} +install.packages("DemoKin") +``` + +Or you can install the development version from GitHub: ``` {r, eval=FALSE} # install.packages("devtools") @@ -44,22 +49,22 @@ devtools::install_github("IvanWilli/DemoKin") ## Usage -Consider an average Swedish woman called 'Focal'. For this exercise, we assume a female closed population in which everyone experiences the Swedish 2015 mortality and fertility rates at each age throughout their life (the 'time-invariant' assumption in Caswell [2019]). +Consider an average Swedish woman called 'Focal.' For this exercise, we assume a female closed population in which everyone experiences the Swedish 2015 mortality and fertility rates at each age throughout their life; i.e., the 'time-invariant' assumption in Caswell [-@caswell_formal_2019]. We then ask: -> How many living relatives does Focal have at each age? +> What is the expected number of relatives of Focal over her life course? Let's explore this using the Swedish data already included with `DemoKin`. -```{r, fig.height=6, fig.width=8} +```{r, fig.height=6, fig.width=8, message=FALSE, warning=FALSE} library(DemoKin) swe_surv_2015 <- swe_px[,"2015"] swe_asfr_2015 <- swe_asfr[,"2015"] -swe_2015 <- kin(U = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) +swe_2015 <- kin(p = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) ``` -*px* is the survival probability by age from a life table and *f* are the age specific fertility raties by age (see `?kin` for details). +*p* is the survival probability by age from a life table and *f* are the age specific fertility ratios by age (see `?kin` for details). Now, we can visualize the implied kin counts (i.e., the average number of living kin) of Focal at age 35 using a network or 'Keyfitz' kinship diagram with the function `plot_diagram`: @@ -75,12 +80,13 @@ plot_diagram(kin_total, rounding = 2) Relatives are identified by a unique code: ```{r, fig.height=6, fig.width=8, echo=FALSE} -kable(DemoKin::demokin_codes()[-2]) +# kable(DemoKin::demokin_codes[,c("DemoKin", "Labels_2sex")]) +kable(DemoKin::demokin_codes[,-c(2)]) ``` ## Vignette -For more details, including an extension to time varying-populations rates, deceased kin, and multi-state models, see `vignette("Reference", package = "DemoKin")`. +For more details, including an extension to time-variant rates, deceased kin, and multi-state models in a one-sex framework, see the [Reference_OneSex](https://cran.r-project.org/web/packages/DemoKin/vignettes/Reference_OneSex.html) vignette; also accessible from DemoKin: `vignette("Reference_OneSex", package = "DemoKin")`. For two-sex models, see the [Reference_TwoSex](https://cran.r-project.org/web/packages/DemoKin/vignettes/Reference_TwoSex.html) vignette; also accessible from DemoKin: `vignette("Reference_TwoSex", package = "DemoKin")`. If the vignette does not load, you may need to install the package as `devtools::install_github("IvanWilli/DemoKin", build_vignettes = T)`. ## Citation @@ -98,18 +104,3 @@ If you're interested in contributing, please get in touch, create an issue, or s We look forward to hearing from you! ## References - -Caswell, H. 2019. The formal demography of kinship: A matrix formulation. Demographic Research 41:679–712. doi:10.4054/DemRes.2019.41.24. - -Caswell, H. 2020. The formal demography of kinship II: Multistate models, parity, and sibship. Demographic Research 42: 1097-1144. doi:10.4054/DemRes.2020.42.38. - -Caswell, Hal and Xi Song. 2021. “The Formal Demography of Kinship. III. Kinship Dynamics with Time-Varying Demographic Rates.” Demographic Research 45: 517–46. doi:10.4054/DemRes.2021.45.16. - -Goodman, L.A., Keyfitz, N., and Pullum, T.W. (1974). Family formation and the frequency of various kinship relationships. Theoretical Population Biology 5(1):1–27. doi:10.1016/0040-5809(74)90049-5. - - - - - - - diff --git a/README.md b/README.md index 0142d8b..9255f10 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ - - # DemoKin
@@ -10,14 +8,14 @@ `DemoKin` uses matrix demographic methods to compute expected (average) kin counts from demographic rates under a range of scenarios and assumptions. The package is an R-language implementation of Caswell -(2019), Caswell (2020), and Caswell and Song (2021). It draws on -previous theoretical development by Goodman, Keyfitz and Pullum (1974). +(2019, 2020, 2022), and Caswell and Song (2021). It draws on previous +theoretical development by Goodman, Keyfitz and Pullum (1974).
- +
@@ -25,7 +23,14 @@ previous theoretical development by Goodman, Keyfitz and Pullum (1974). ## Installation -You can install the development version from GitHub with: +Download the stable version [from +CRAN](https://cran.r-project.org/web/packages/DemoKin/): + +``` r +install.packages("DemoKin") +``` + +Or you can install the development version from GitHub: ``` r # install.packages("devtools") @@ -34,14 +39,15 @@ devtools::install_github("IvanWilli/DemoKin") ## Usage -Consider an average Swedish woman called ‘Focal’. For this exercise, we +Consider an average Swedish woman called ‘Focal.’ For this exercise, we assume a female closed population in which everyone experiences the Swedish 2015 mortality and fertility rates at each age throughout their -life (the ‘time-invariant’ assumption in Caswell \[2019\]). +life; i.e., the ‘time-invariant’ assumption in Caswell (2019). We then ask: -> How many living relatives does Focal have at each age? +> What is the expected number of relatives of Focal over her life +> course? Let’s explore this using the Swedish data already included with `DemoKin`. @@ -50,11 +56,11 @@ Let’s explore this using the Swedish data already included with library(DemoKin) swe_surv_2015 <- swe_px[,"2015"] swe_asfr_2015 <- swe_asfr[,"2015"] -swe_2015 <- kin(U = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) +swe_2015 <- kin(p = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) ``` -*px* is the survival probability by age from a life table and *f* are -the age specific fertility raties by age (see `?kin` for details). +*p* is the survival probability by age from a life table and *f* are the +age specific fertility ratios by age (see `?kin` for details). Now, we can visualize the implied kin counts (i.e., the average number of living kin) of Focal at age 35 using a network or ‘Keyfitz’ kinship @@ -69,38 +75,48 @@ names(kin_total) <- c("kin", "count") plot_diagram(kin_total, rounding = 2) ``` - + Relatives are identified by a unique code: -| DemoKin | Label | -|:--------|:---------------------------| -| coa | Cousins from older aunt | -| cya | Cousins from younger aunt | -| d | Daughter | -| gd | Grand-daughter | -| ggd | Great-grand-daughter | -| ggm | Great-grandmother | -| gm | Grandmother | -| m | Mother | -| nos | Nieces from older sister | -| nys | Nieces from younger sister | -| oa | Aunt older than mother | -| ya | Aunt younger than mother | -| os | Older sister | -| ys | Younger sister | +| DemoKin | Labels_female | Labels_male | Labels_2sex | +|:--------|:----------------------------|:------------------------------|:----------------------------------| +| coa | Cousins from older aunts | Cousins from older uncles | Cousins from older aunts/uncles | +| cya | Cousins from younger aunts | Cousins from younger uncles | Cousins from younger aunts/uncles | +| c | Cousins | Cousins | Cousins | +| d | Daughters | Brothers | Siblings | +| gd | Grand-daughters | Grand-sons | Grand-childrens | +| ggd | Great-grand-daughters | Great-grand-sons | Great-grand-childrens | +| ggm | Great-grandmothers | Great-grandfathers | Great-grandfparents | +| gm | Grandmothers | Grandfathers | Grandparents | +| m | Mother | Father | Parents | +| nos | Nieces from older sisters | Nephews from older brothers | Niblings from older siblings | +| nys | Nieces from younger sisters | Nephews from younger brothers | Niblings from younger siblings | +| n | Nieces | Nephews | Niblings | +| oa | Aunts older than mother | Uncles older than fathers | Aunts/Uncles older than parents | +| ya | Aunts younger than mother | Uncles younger than father | Aunts/Uncles younger than parents | +| a | Aunts | Uncles | Aunts/Uncles | +| os | Older sisters | Older brothers | Older siblings | +| ys | Younger sisters | Younger brothers | Younger siblings | +| s | Sisters | Brothers | Siblings | ## Vignette -For more details, including an extension to time varying-populations -rates, deceased kin, and multi-state models, see -`vignette("Reference", package = "DemoKin")`. If the vignette does not -load, you may need to install the package as +For more details, including an extension to time-variant rates, deceased +kin, and multi-state models in a one-sex framework, see the +[Reference_OneSex](https://cran.r-project.org/web/packages/DemoKin/vignettes/Reference_OneSex.html) +vignette; also accessible from DemoKin: +`vignette("Reference_OneSex", package = "DemoKin")`. For two-sex models, +see the +[Reference_TwoSex](https://cran.r-project.org/web/packages/DemoKin/vignettes/Reference_TwoSex.html) +vignette; also accessible from DemoKin: +`vignette("Reference_TwoSex", package = "DemoKin")`. If the vignette +does not load, you may need to install the package as `devtools::install_github("IvanWilli/DemoKin", build_vignettes = T)`. ## Citation -Williams, Iván; Alburez-Gutierrez, Diego; Song, Xi; and Hal Caswell. +Williams, Iván; Alburez-Gutierrez, Diego; and the DemoKin team. (2021) DemoKin: An R package to implement demographic matrix kinship models. URL: . @@ -122,22 +138,48 @@ request. We look forward to hearing from you! ## References -Caswell, H. 2019. The formal demography of kinship: A matrix -formulation. Demographic Research 41:679–712. -. +
+ +
-Caswell, H. 2020. The formal demography of kinship II: Multistate -models, parity, and sibship. Demographic Research 42: 1097-1144. -. +Caswell, Hal. 2019. “The Formal Demography of Kinship: A Matrix +Formulation.” *Demographic Research* 41 (September): 679–712. +. + +
+ +
+ +———. 2020. “The Formal Demography of Kinship II: Multistate Models, +Parity, and Sibship.” *Demographic Research* 42 (June): 1097–1146. +. + +
-Caswell, Hal and Xi Song. 2021. “The Formal Demography of Kinship. III. -Kinship Dynamics with Time-Varying Demographic Rates.” Demographic -Research 45: 517–46. . +
-Goodman, L.A., Keyfitz, N., and Pullum, T.W. (1974). Family formation -and the frequency of various kinship relationships. Theoretical -Population Biology 5(1):1–27. . +———. 2022. “The Formal Demography of Kinship IV: Two-Sex Models and +Their Approximations.” *Demographic Research* 47 (September): 359–96. +. - - - +
+ +
+ +Caswell, Hal, and Xi Song. 2021. “The Formal Demography of Kinship III: +Kinship Dynamics with Time-Varying Demographic Rates.” *Demographic +Research* 45 (August): 517–46. +. + +
+ +
+ +Goodman, Leo A, Nathan Keyfitz, and Thomas W. Pullum. 1974. “Family +Formation and the Frequency of Various Kinship Relationships.” +*Theoretical Population Biology*, 27. +. + +
+ +
diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..998da63 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,5 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. I replaced the use of par(), which created some problems, with ggplot tools. Hope solves the issue. diff --git a/data/demokin_codes.rda b/data/demokin_codes.rda new file mode 100644 index 0000000..8ab6620 Binary files /dev/null and b/data/demokin_codes.rda differ diff --git a/data/fra_asfr_sex.rda b/data/fra_asfr_sex.rda new file mode 100644 index 0000000..5572499 Binary files /dev/null and b/data/fra_asfr_sex.rda differ diff --git a/data/fra_surv_sex.rda b/data/fra_surv_sex.rda new file mode 100644 index 0000000..abef45f Binary files /dev/null and b/data/fra_surv_sex.rda differ diff --git a/data/swe_surv.rda b/data/swe_surv.rda deleted file mode 100644 index f00af74..0000000 Binary files a/data/swe_surv.rda and /dev/null differ diff --git a/dev/.DS_Store b/dev/.DS_Store deleted file mode 100644 index 1e69428..0000000 Binary files a/dev/.DS_Store and /dev/null differ diff --git a/dev/PENDS.txt b/dev/PENDS.txt deleted file mode 100644 index ecbf143..0000000 --- a/dev/PENDS.txt +++ /dev/null @@ -1,7 +0,0 @@ -1) Set no specific argument for Pb: in the case the user wants to use it, that can be included by her/himself in the F matrix, implicitly. - ok -1.1) caswell´s assumption stable: ft[1,1:ages] = f * U * birth_female -2) Include a paragraph in the "using" vignette to show this option. -3) Non-stable without pi or N as argument: give user an output anyways and a message "A stable assumption was used for the age distribution of the mother in each input year". -4) Replicate Hal´s output for dinamycs. -5) Correct the appendix: survival/probabilities. -6) Finish Multi-stage. \ No newline at end of file diff --git a/dev/calling_kinship_SVK_4867.m b/dev/calling_kinship_SVK_4867.m deleted file mode 100644 index f51c641..0000000 --- a/dev/calling_kinship_SVK_4867.m +++ /dev/null @@ -1,86 +0,0 @@ -%script to calculate kinship results -%this script calls the function kinship_function_parity_4867 -%requires the function vecperm.m to create vec-permutation matrix -% -% Supplement to: -% Caswell, H. 2020. The formal demography of kinship II. Multistate models, -% parity, and sibship. Demographic Research 42:1097-1144 -% -% Has been successfully used under Matlab R2018b - -%specify range of years to analyze -years=1960:2014; - -%years=2002; - -numyears=length(years); %specific to SVK data -%add path to location of matrices -addpath('SVK_kinmats/') - -for iy=1:numyears - year=years(iy) - - %specify name of matrix file - fname=char(['SVKmats' num2str(1950+iy-1) '.mat']); - %load matrix file - load(fname) - - %create the block diagonal matrices - - %identity matrices that are useful - Iom=eye(om); - Is=eye(s); - - bbU=zeros(s*om); - bbF=zeros(s*om); - for i=1:om - bbU = bbU + kron(Iom(:,i)*Iom(i,:),U{i}); - bbF = bbF + kron(Iom(:,i)*Iom(i,:),F{i}); - end - bbD=zeros(s*om); - bbH=zeros(s*om); - for i=1:s - bbD = bbD+kron(Is(:,i)*Is(i,:),D{i}); - bbH = bbH+kron(Is(:,i)*Is(i,:),H{i}); - end - - %create the age-stage matrices using the vec permuation formula - K=vecperm(s,om); - Ut= K'*bbD*K*bbU; - Ft= K'*bbH*K*bbF; - - %conditional transition matrix, conditional on survival - Gt=Ut*pinv(diag(sum(Ut))); - - %calculate distributions of mothers - %projection matrix Atilde - At=Ut+Ft; - %eigenvalues and right eigenvectors - [wt,d]=eig(At); - d=diag(d); - %find maximum eigenvalue - pick=find(d==max(d)); - wt=wt(:,pick); - %stable age-parity distribution normalized to sum to 1 - wt=wt/sum(wt); - lambda=d(pick) - - %age-stage distribution of mothers - pit=Ft(1,:)'.*wt; - pit=pit/sum(pit); - %marginal age distribution of mothers - piage=kron(Iom,ones(s,1)')*pit; - - clear At - - %add path to call the kinship program - path('../',path) - - %call the kinship function - kinout=kinship_function_parity(Ut,Ft,Gt,wt,pit,piage); - - %save the kin output - %include path to output folder - myname=char(['SVK_kinout/SVKkinout' num2str(years(iy)) '.mat']) - save(myname,'kinout') -end diff --git a/dev/kinship_function_parity_4867.m b/dev/kinship_function_parity_4867.m deleted file mode 100644 index 897bcdb..0000000 --- a/dev/kinship_function_parity_4867.m +++ /dev/null @@ -1,199 +0,0 @@ -function out=kinship_function_parity(Ut,Ft,Gt,wt,pit,piage) -% -%function to compute kinship network for multistate age x parity model -% Supplement to: -% Caswell, H. 2020. The formal demography of kinship II. Multistate models, -% parity, and sibship. Demographic Research 42:1097-1144 -% -% Has been successfully used under Matlab R2018b -% -% -%inputs -% Ut=age-stage transition matrix -% Ft = age-stage fertility matrix -% Gt=age-stage transition matrix conditional on survival -% wt=stable age-stage distribution, normalized to sum to 1 -% pit=age-stage distribution of mothers -% piage = marginal age distribution of mothers - - -%number of age classes -om=length(piage); -%number of stages -s=length(pit)/om; - -%identity matrices useful in calculations -Iom=eye(om); -Is=eye(s); -Isom=eye(s*om); - -%frequently used zero vector for initial condition -zvec=zeros(s*om,1); - -%frequently used om-1 limit for iterations -omz=om-1; - -% the following code calculates age-stage distributions, -% for each type of kin, for each age x of Focal, -% and stores these as columns of an array -% e.g., a(x) = daughters at age x; A(:,x) contains a(x) - -% dynamics of Focal -% initial condition -phiz=zeros(s*om,1); -phiz(1)=1; -%age-stage vector of Focal, conditional on survival -Phi(:,1)=phiz; -for ix=1:omz - Phi(:,ix+1)=Gt*Phi(:,ix); -end - -% a: daughters of focal - -az=zvec; -A(:,1)=az; -for ix=1:omz - A(:,ix+1)=Ut*A(:,ix) + Ft*Phi(:,ix); -end % for ix - - -% b = granddaughters of Focal -b=zvec; -B(:,1)=b; -for ix=1:omz - B(:,ix+1)=Ut*B(:,ix) + Ft*A(:,ix); -end - - -% c = greatgranddaughters of Focal -c=zvec; -C(:,1)=c; -for ix=1:omz - C(:,ix+1)=Ut*C(:,ix) +Ft*B(:,ix); -end - - -% d = mothers of Focal -% conditional on mother having parity >0 - -%momarray is an array with pit in each column -momarray=pit*ones(1,om); - -Z=eye(s); -Z(1,1)=0; -for imom=1:om %go through all columns of momarray - E=Iom(:,imom)*Iom(imom,:); - momarray(:,imom)=kron(E,Z)*momarray(:,imom); - %selects age imom, and eliminates the zero parity row of momarray - -end -%rescale columns of momarray to sum to 1 -momarray=momarray*pinv(diag(sum(momarray))); - -%set dzero to the average of the momarray over the ages of moms at birth of -%children -dzero=momarray*piage; - -D(:,1)=dzero; -for ix=1:omz - D(:,ix+1)=Ut*D(:,ix); -end - - -% g = maternal grandmothers of Focal -gzero=D*piage; - -G(:,1)=gzero; -for ix=1:omz - G(:,ix+1)=Ut*G(:,ix); -end - - -% h = great-grandmothers of Focal -hzero=G*piage; -H(:,1)=hzero; -for ix=1:omz - H(:,ix+1)=Ut*H(:,ix) + 0; -end - -% m = older sisters of Focal -mzero=A*piage; -M(:,1)=mzero; -for ix=1:omz - M(:,ix+1)=Ut*M(:,ix) + 0; -end - -% n = younger sisters of Focal -nzero=zvec; -N(:,1)=nzero; -for ix=1:omz - N(:,ix+1)=Ut*N(:,ix) + Ft*D(:,ix); -end - - -% p = nieces through older sisters of Focal -pzero=B*piage; -P(:,1)=pzero; -for ix=1:omz - P(:,ix+1)=Ut*P(:,ix) + Ft*M(:,ix); -end - -% q = nieces through younger sisters of Focal -qzero=zvec; -Q(:,1)=qzero; -for ix=1:omz - Q(:,ix+1)=Ut*Q(:,ix) + Ft*N(:,ix); -end - -% r = aunts older than mother of Focal -rzero=M*piage; -R(:,1)=rzero; -for ix=1:omz - R(:,ix+1)=Ut*R(:,ix) + 0; -end - -% s = aunts younger than mother of Focal -szero=N*piage; -S(:,1)=szero; -for ix=1:omz - S(:,ix+1)=Ut*S(:,ix) + Ft*G(:,ix); -end - -% t = cousins from aunts older than mother of Focal -tzero=P*piage; -T(:,1)=tzero; -for ix=1:omz - T(:,ix+1)=Ut*T(:,ix) + Ft*R(:,ix); -end - - -% v = cousins from aunts younger than mother of Focal -vzero=Q*piage; -V(:,1)=vzero; -for ix=1:omz - V(:,ix+1)=Ut*V(:,ix) + Ft*S(:,ix); -end %for i - - -%overall kinship matrices, concatenating all kin -allkin=cat(3,A,B,C,D,G,H,M,N,P,Q,R,S,T,V); - -%combining older and younger categories -% for sisters, neices, aunts, and cousins -allkin2=cat(3,A,B,C,D,G,H,M+N,P+Q,R+S,T+V); - -%output structure -out.allkin=allkin; -out.allkin2=allkin2; -out.Phi=Phi; -out.pit=pit; -out.piage=piage; -out.om=om; -out.s=s; - out.Ut=Ut; -out.Ft=Ft; -out.Gt=Gt; - - - - diff --git a/dev/matrix_construction_4867.m b/dev/matrix_construction_4867.m deleted file mode 100644 index 1f6c75e..0000000 --- a/dev/matrix_construction_4867.m +++ /dev/null @@ -1,102 +0,0 @@ - -% script to prepare matrices for multistate age x parity model -% Supplement to: -% Caswell, H. 2020. The formal demography of kinship II. Multistate models, -% parity, and sibship. Demographic Research 42:1097-1144 -% -%requires Matlab Table files obtained from HMD (fltper) and HFD (mi) -% Has been successfully used under Matlab R2018b - -%add folder contraining the table files to path -addpath('SVK_tables/') - -% load the female lifetable file -load('SVKfltperTable.mat') -%columns in this Table: Year,Age,mx,qx,ax,lx,dx,Lx,Tx,ex -lt=ltable; - -%load the parity state transition file -load('SVKmiTable.mat') -%columns: Year,Age,mi1,mi2,mi3,mi4,mi5p - -%find year ranges -minfertyear=min(fert.Year); -maxfertyear=max(fert.Year); - -minltyear=min(lt.Year); -maxltyear=max(lt.Year); - -%pick a starting year and ending year -startyear=max([minfertyear minltyear]); -endyear=min([maxfertyear maxltyear]); - -%array of years and number of years -years=startyear:endyear; -numyears=endyear-startyear+1; - -for iy=1:numyears - years(iy); - - %find life table and qx array for year iy - pick=find(lt.Year==years(iy)); - qx=table2array(lt(pick,4)); - - %find fertility and create fertility array - pick=find(fert.Year==years(iy)); - fertarray=table2array(fert(pick,[2:7])); - - %number of age classes - %om=length(qx)-1; - om=length(qx)-1; - %number of parity classes - s=6; - - %extend the fertility array - startfert=fertarray(1,1); - endfert=fertarray(end,1); - %put zeros before age of first reproduction - fertarray=[zeros(startfert-1,6); fertarray]; - fertarray(1:startfert-1,1)=(1:startfert-1)'; - %put zeros after age of last reproduction - fertarray=[fertarray; zeros(om-endfert,6)]; - fertarray(endfert+1:om,1)=(endfert+1:om)'; - - %remove age column from fertarray - fertarray=fertarray(:,2:6); - - %construct the stage transition matrices using probabilities - for i=1:om - U{i} = diag(fertarray(i,:),-1); - %transform subdiagonals to probabilities - U{i}=U{i}./(1+0.5*U{i}); - %fill in diagonal entries - U{i}=U{i}+diag([1-diag(U{i},-1) ; 1]); - end - - %construct the age transition and survival matrices - for i=1:s - D{i}=diag(1-qx(1:om-1),-1); - end - - %construct fertility matrices - for i=1:om - F{i}=zeros(s,s); - F{i}(1,1:s-1)=diag(U{i},-1); - F{i}(1,s)=U{i}(s,s-1); - %divide fertility by 2 - F{i}=F{i}/2; - end - - %stage assignment matrices - for i=1:s - H{i}=zeros(om,om); - H{i}(1,:)=1; - end - - %include path to folder where matrix files are to be stored - myname=char(['SVK_kinmats/SVKmats' num2str(years(iy)) '.mat']) - %save the matrices into a .mat file - save(myname,'U','D','F','H','om','s') - -end - diff --git a/dev/readme.txt b/dev/readme.txt deleted file mode 100644 index e69de29..0000000 diff --git a/dev/tests/repl_caswell.R b/dev/tests/repl_caswell.R deleted file mode 100644 index b81c5ee..0000000 --- a/dev/tests/repl_caswell.R +++ /dev/null @@ -1,443 +0,0 @@ -# replicating Caswell´s figures: choose some kin - -library(devtools) -load_all() -library(DemoKin) -library(tidyverse) -library(progress) -library(R.matlab) -load("tests/test.RData") - -# basic -debugonce(kin_time_variant) -swe_kin_period_pack <- kin(U = swe_surv, - f = swe_asfr, - N = swe_pop, - time_invariant = F, - birth_female = 1, - output_period = c(1900, 1950, 2010), - output_kin = c("d","gd","m","gm","oa", "os")) - -swe_kin_period_pack$kin_full %>% - filter(alive == "yes") %>% - group_by(age_focal, kin, year) %>% - summarise(count = sum(count, na.rm=T)) %>% - ggplot(aes(age_focal, count, color=factor(year))) + - geom_line() + - facet_wrap(~kin, scales="free_y") - -# time variant ------------------------------------------------------------ - -# inputs -input_time_variant <- readMat("tests/SWEhist_matrices.mat") -input_time_variant_proj <- readMat("tests/SWEproj_matrices.mat") -# class(input_time_variant) -# names(input_time_variant) -# length(input_time_variant[["matrices"]]) # number of years -# input_time_variant[["matrices"]][[128]][[1]][[1]] # U -# input_time_variant[["matrices"]][[1]][[1]][[2]] # F -# input_time_variant[["matrices"]][[1]][[1]][[3]] # popsize -# input_time_variant[["matrices"]][[1]][[1]][[4]] # pi -# length(input_time_variant_proj[["matrices"]]) # number of years - -U_hal <- f_hal <-N_hal <- pi_hal <-matrix(rep(0,111)) -for(y in 1:128){ - # y = 1 - U <- input_time_variant[["matrices"]][[y]][[1]][[1]] %>% as.matrix() - f <- input_time_variant[["matrices"]][[y]][[1]][[2]] %>% as.matrix() - N <- input_time_variant[["matrices"]][[y]][[1]][[3]] %>% as.matrix() - pi <- input_time_variant[["matrices"]][[y]][[1]][[4]] %>% as.matrix() - U_hal <- cbind(U_hal, c(U[col(U)==row(U)-1], U[ncol(U),nrow(U)])) - f_hal <- cbind(f_hal ,f[1,]) - N_hal <- cbind(N_hal ,N) - pi_hal <-cbind(pi_hal, pi) -} -U_hal_end <- U_hal[,-1] -f_hal_end <- f_hal[,-1] -N_hal_end <- N_hal[,-1] -pi_hal_end <-pi_hal[,-1] -colnames(U_hal_end) <- colnames(f_hal_end) <- colnames(N_hal_end) <- colnames(pi_hal_end) <-1891:2018 -dim(U_hal_end);class(U_hal_end %>% as.matrix) - -# period -swe_kin_period <- kin(U = U_hal_end %>% as.matrix(), - f = f_hal_end %>% as.matrix(), - pi = pi_hal_end %>% as.matrix(), - time_invariant = F, - birth_female = 1, - output_period = c(1891,1921,1951,2010), - output_kin = c("d","gd","m","gm","oa", "os")) - -# check first-row plots from figures 5-A and 5-B from https://www.demographic-research.org/volumes/vol45/16/45-16.pdf -swe_kin_period$kin_full %>% - filter(alive == "yes") %>% - group_by(age_focal, kin, year) %>% - summarise(count = sum(count, na.rm=T)) %>% - ggplot(aes(age_focal, count, color=factor(year))) + - geom_line() + - facet_wrap(~kin, scales="free_y") - -# read from https://www.dropbox.com/t/3YiILmn7SpczN3oM -output_time_variant <- readMat("tests/time-varying_sweden.mat") - -# inspect the way the package reads -# class(output_time_variant) -# names(output_time_variant) -# length(output_time_variant[["allkin"]]) # number of years -# length(output_time_variant[["allkin"]][[1]]) -# length(output_time_variant[["allkin"]][[1]]) -# class(output_time_variant[["allkin"]][[1]][[1]]) # 1 array with kin matrix -# dim(output_time_variant[["allkin"]][[1]][[1]][,,14]) # the matrix of the nth kin, 111 ages - -# use own codes to interpret -codes <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") -caswell_codes <- c("t", "v", "a", "b", "c", "h", "g", "d", "p", "q", "r", "s", "m", "n") - -# re arrange all data to a dataframe -output_time_variant_df <- map_df(1:128, function(i){ - array_branch(output_time_variant[["allkin"]][[i]][[1]], margin = 3) %>% - map_df(., as.data.frame)}) %>% - setNames(as.character(0:110)) %>% - bind_cols(crossing(year = 1891+(0:127), - kin_index = 1:14, - age_kin = 0:110)) %>% - inner_join(tibble(kin = codes, caswell_codes) %>% - arrange(caswell_codes) %>% mutate(kin_index = 1:14)) - -# check dimension: 128 years, 14 types of kin, 111 ages -nrow(output_time_variant_df); 128*14*111 - -# check first-row plots from figures 5-A and 5-B from https://www.demographic-research.org/volumes/vol45/16/45-16.pdf -output_time_variant_df %>% - filter(year %in% c(1891,1921,1951,2010), kin %in% c("d","gd", "m", "gm", "oa", "os")) %>% - pivot_longer(`0`:`110`, names_to = "age", values_to = "count") %>% - mutate(age = as.integer(age)) %>% - group_by(age, kin, year) %>% - summarise(count = sum(count)) %>% - ggplot(aes(age, count, color=factor(year))) + - geom_line() + - facet_wrap(~kin, scales="free_y") - -# differences - look d, gd, in 1891 and 1951 -swe_period_together <- swe_kin_period$kin_full %>% - filter(alive == "yes") %>% - filter(year %in% c(1891,1921,1951,2010), kin %in% c("d","gd","m","gm","oa", "os")) %>% - group_by(age_focal, kin, year) %>% summarise(count_demokin = sum(count, na.rm=T)) %>% - inner_join( - output_time_variant_df %>% - filter(year %in% c(1891,1921,1951,2010), kin %in% c("d","gd", "m", "gm","oa", "os")) %>% - pivot_longer(`0`:`110`, names_to = "age", values_to = "count") %>% - mutate(age = as.integer(age)) %>% - group_by(age_focal=age, kin, year) %>% - summarise(count_paper = sum(count))) - -swe_period_together %>% - filter(year == 1891) %>% - ggplot() + - geom_line(aes(age_focal, count_demokin, color=factor(year)), linetype=1) + - geom_line(aes(age_focal, count_paper, color=factor(year)), linetype=2) + - facet_wrap(~kin, scales="free_y") - -swe_period_rel_dif <- swe_period_together %>% - mutate(rel_dif = round(100*(count_paper/count_demokin-1),3)) %>% - arrange(year, kin) %>% - as.data.frame() %>% - group_by(year, kin) %>% summarise(sum(rel_dif, na.rm=T)) - - - - - - - - - - - - - - -# to bind projected -# U_hal <- U_hal[1:106,] -# f_hal <- f_hal[1:106,] -# N_hal <- N_hal[1:106,] -# pi_hal <-pi_hal[1:106,] -# for(y in 1:102){ -# # y = 1 -# U <- input_time_variant_proj[["matrices"]][[y]][[1]][[1]] -# f <- input_time_variant_proj[["matrices"]][[y]][[1]][[2]] -# N <- input_time_variant_proj[["matrices"]][[y]][[1]][[3]] -# pi <- input_time_variant_proj[["matrices"]][[y]][[1]][[4]] -# U_hal <- U_hal %>% bind_cols(c(U[col(U)==row(U)-1], U[ncol(U),nrow(U)])) -# f_hal <- f_hal %>% bind_cols(f[1,]) -# N_hal <- N_hal %>% bind_cols(N) -# pi_hal <-pi_hal%>% bind_cols(as.numeric(pi)) -# } -# dim(U_hal[,-1]) -# U_hal_end <- U_hal[,-1] %>% setNames(as.character(1891:2120)) -# f_hal_end <- f_hal[,-1] %>% setNames(as.character(1891:2120)) -# N_hal_end <- N_hal[,-1] %>% setNames(as.character(1891:2120)) -# pi_hal_end <-pi_hal[,-1] %>% setNames(as.character(1891:2120)) -# dim(U_hal_end);names(U_hal_end) - -# time invariant ---------------------------------------------------------- - -### data: survival probability and fertility by age for Japan -# available at https://www.demographic-research.org/volumes/vol41/24/default.htm - -p_1947 <- 1 - read.csv("tests/qx_years.csv", header = F, sep = " ")[[4]] -f_1947 <- read.csv("tests/fx_years.csv", header = F, sep = " ")[[4]] -p_2014 <- 1 - read.csv("tests/qx_years.csv", header = F, sep = " ")[[205]] -f_2014 <- read.csv("tests/fx_years.csv", header = F, sep = " ")[[205]] - -# Caswell assumption on first age -f_1947 <- f_1947 * p_1947 -f_2014 <- f_2014 * p_2014 - -kins_japan_1947 <- kin(p_1947, f_1947, living = F)$kin_full -kins_japan_1947 %>% - filter(alive=="yes", kin=="ggm") %>% - group_by(age_focal) %>% summarise(sum(count)) - - -### results -kins_japan <- rbind(tibble(Year = 1947, kin(p_1947, f_1947, living = F)$kin_full), - tibble(Year = 2014, kin(p_2014, f_2014, living = F)$kin_full)) - -# kins alive by age when ego is aged 30 or 70 -kins_japan %>% - filter(age_focal %in% c(30,70), alive=="yes") %>% - ggplot() + - geom_line(aes(x=age_kin, y=count, - color=factor(age_focal), linetype=factor(Year))) + - facet_wrap(~kin,scales = "free_y") + - theme_classic() + - facet_wrap(~kin,scales = "free_y") - -kins_japan %>% - filter(age_focal %in% 30, alive=="yes", kin == "m", Year==2014) - -### get paper results: done with https://plotdigitizer.com/app - -m_30_2014 <- c(48.124993716677295, 0.0068724848600042006, - 52.13541022398433, 0.022765097394085585, - 56.14582673129136, 0.056697985757917374, - 60.04166165822103, 0.07398657677157613, - 64.16665974590543, 0.054765100671140945, - 68.17707625321246, 0.02330201014576342, - 71.95832959976478, 0.0035436192454907658) %>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "m", year = 2014, age = 30) -m_30_1947 <- c(47.89583055592257, 0.010630874121749168, - 51.791665482852245, 0.045100671140939595, - 56.37499863406029, 0.05111409232120386, - 59.92708007784368, 0.03908724586435613, - 63.82291500477335, 0.02577181208053692, - 68.17707625321246, 0.012671136024014266, - 71.84374801938742, 0.0025771828465813683) %>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "m", year = 1947, age = 30) -s_30_1947 <- c(117.30467373388943, 0.028030307190039253, - 3.8671896129379024, 0.0011363654972982584, - 8.05663990468026, 0.005681817853417949, - 12.031249743886312, 0.01792929767285178, - 16.220700035628674, 0.036111111913867226, - 19.873045098211307, 0.05782828333912762, - 23.84765903523633, 0.07626262618964844, - 28.037105229159724, 0.08244949644554042, - 32.119139392452894, 0.06717171906914071, - 36.09374513383999, 0.044696973856705915, - 39.853510422690775, 0.024621210698144477, - 43.93554458598395, 0.010858592937435215, - 47.80273010110288, 0.00303030478177092) %>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "s", year = 1947, age = 30) -s_70_1947 <- c(43.93554458598395, 0.0011363654972982584, - 47.80273010110288, 0.00441919166376952, - 52.20702494320051, 0.013383845316732092, - 56.074218653957374, 0.026388889290266948, - 59.94140416907631, 0.03952020358922534, - 64.02343013673156, 0.048358586916764375, - 68.10546430002474, 0.045959600046354354, - 71.97264981514367, 0.03143939886539736, - 76.05468397843684, 0.015404045293554923, - 80.02928971982392, 0.005429296468717607, - 84.00390365684893, 0.0011363654972982584) %>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "s", year = 1947, age = 70) -s_30_2014 <- c(7.949219678412107, 0.0005050524024740438, - 12.031249743886312, 0.0032828357995446046, - 16.005859583092366, 0.009217175037662914, - 19.873045098211307, 0.020328289359798468, - 23.6328103870621, 0.03194444645133473, - 27.822264776623417, 0.03888889049440111, - 32.01171916618474, 0.0337121250434572, - 35.77148445503553, 0.0215909155494469, - 39.96093884459685, 0.010732322612011683, - 44.04296481225208, 0.0032828357995446046)%>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "s", year = 2014, age = 30) -s_70_2014 <- c(51.88476426439605, 0.002904044089420749, - 56.18163888022552, 0.009343435730013084, - 59.83398394280815, 0.01944444524720057, - 64.13085855863763, 0.03068182026168629, - 68.21288452629288, 0.035479798819043, - 71.75780936260736, 0.03068182026168629, - 76.16210420470499, 0.019318184554850383, - 79.92186949355577, 0.008459601250488509, - 84.00390365684893, 0.00265152270472039) %>% matrix(ncol=2, byrow = T) %>% - as.data.frame() %>% setNames(c("age_kin","count")) %>% - mutate(kin = "s", year = 2014, age = 70) - -output_time_invariant <- m_30_2014 %>% - bind_rows(m_30_1947, s_30_1947, s_70_1947, s_30_2014, s_70_2014) %>% - mutate(age_kin = trunc(age_kin), count=round(count,7)) - -compare_time_invariant <- kins_japan %>% - filter(kin %in% c("os", "ys"), alive == "yes") %>% - group_by(Year, age_focal, age_kin, alive) %>% - summarise(count = sum(count)) %>% - mutate(kin = "s") %>% - bind_rows(kins_japan %>% filter(alive == "yes")) %>% - select(-year, -cohort, -alive) %>% - rename(count_demokin = count, year = Year) %>% - mutate(count_demokin = round(count_demokin,7)) %>% - right_join(output_time_invariant %>% - rename(age_focal=age, count_paper = count)) - -compare_time_invariant %>% - ggplot() + - geom_line(aes(age_kin, count_demokin, linetype=factor(year)), col=1)+ - geom_line(aes(age_kin, count_paper, linetype=factor(year)), col=2) + - facet_grid(~kin+age_focal)+ - theme_bw() - - -### compare values - - - - - - - - - - -# period -swe_kin_period <- kin(U = U_caswell_2021, f = f_caswell_2021, pi = pi_caswell_2021, stable = F, birth_female = 1, - focal_year = c(1891,1921,1951,2010,2050,2080,2120), - selected_kin = c("d","gd","ggd","m","gm","ggm","os","ys","oa","ya")) - -swe_kin_period$kin_summary %>% - ggplot(aes(age_focal,count,color=factor(year))) + - geom_line(size=1)+ - scale_y_continuous(name = "",labels = seq(0,3,.2),breaks = seq(0,3,.2))+ - facet_wrap(~kin, scales = "free")+ - theme_bw() - -# ADDITIONAL PLOTS cohrot and period -ggplot(swe_kin_cohorts$kin_summary %>% filter(cohort == 1911), - aes(year,mean_age)) + - geom_point(aes(size=count,color=kin)) + - geom_line(aes(color=kin)) + - scale_y_continuous(name = "Edad", breaks = seq(0,110,10), labels = seq(0,110,10), limits = c(0,110))+ - geom_segment(x = 1911, y = 0, xend = 2025, yend = 110, color = 1)+ - geom_vline(xintercept = 1911, linetype=2)+ - theme_light()+ coord_fixed()+ - labs(title = "Kin cohort 1911") - -swe_kin_period$kin_summary %>% - filter(age_focal==50) %>% - ggplot(aes(year, mean_age, color=kin)) + - geom_point(aes(size=count)) + - geom_line() + - geom_hline(yintercept = 50, color=1, linetype=1)+ - theme_light()+ - coord_fixed()+ - labs(title = "Kin period") - -### plots -# kins alive by age when ego is aged 30 or 70 -kins_japan %>% - filter(age_focal %in% c(30,70), alive=="yes") %>% - ggplot() + - geom_line(aes(x=age_kin, y=count, - color=factor(age_focal), linetype=factor(Year))) + - facet_wrap(~kin,scales = "free_y") + - theme_classic() + - facet_wrap(~kin,scales = "free_y") -# kins alive during ego´s life -kins_japan %>% - filter(alive=="yes") %>% - group_by(Year, kin, age_focal) %>% summarise(count = sum(count)) %>% - ggplot() + - geom_line(aes(age_focal, count, linetype=factor(Year))) + - theme_classic() + - facet_wrap(~kin, scales = "free_y") -# experienced deaths -kins_japan %>% - filter(alive=="no") %>% - group_by(Year, kin, age_focal) %>% summarise(count = sum(count)) %>% - ggplot() + - geom_line(aes(age_focal, count, linetype=factor(Year))) + - theme_classic() + - facet_wrap(~kin, scales = "free_y") -# variation coefficient of age by kin -kins_japan %>% - filter(alive=="yes") %>% - group_by(Year, kin, age_focal) %>% - summarise(mean_age = sum(count*age_kin)/sum(count), - var_age = sum(count*age_kin^2)/sum(count) - mean_age^2, - cv_age = round(sqrt(var_age)/mean_age*100,1)) %>% - ggplot() + - geom_line(aes(age_focal, cv_age, linetype=factor(Year))) + - theme_classic() + - facet_wrap(~kin, scales = "free_y") -# dependency ages -kins_japan %>% - filter(alive=="yes") %>% - mutate(age_kin_dep = ifelse(age_kin<15,"0-14", - ifelse(age_kin<65,"15-64","65+"))) %>% - group_by(Year, kin, age_focal, age_kin_dep) %>% - summarise(count = sum(count)) %>% - ggplot() + - geom_line(aes(age_focal, count, - color = age_kin_dep, linetype=factor(Year))) + - theme_classic() + - facet_wrap(~kin, scales = "free_y") - - - - - - - - - - - - - - - - - -swe_surv_2010 <- swe_surv %>% pull(`2011`) -swe_asfr_2010 <- swe_asfr %>% pull(`2011`) -debugonce(kin) -swe50_2015_stable <- kin(U = swe_surv_2010, f = swe_asfr_2010, output_cohort = c(1911,1930), - output_kin = c("d","m")) - -swe_kin_cohorts <- kin(U = U_caswell_2021, f = f_caswell_2021, time_invariant = F, - birth_female = 1, - output_cohort = c(1911), - output_kin = c("d")) - -U = U_caswell_2021; f = f_caswell_2021; pi = pi_caswell_2021; birth_female = 1; -output_cohort = c(1911);output_period = NULL; output_kin = c("d") - -# FIGURE 5 - - - diff --git a/dev/tests/repl_caswell_first_year.R b/dev/tests/repl_caswell_first_year.R deleted file mode 100644 index c88c121..0000000 --- a/dev/tests/repl_caswell_first_year.R +++ /dev/null @@ -1,106 +0,0 @@ -# replicating Caswell´s figures: choose some kin -library(DemoKin) -library(tidyverse) -library(R.matlab) -source("R/kin_time_invariant.R") - -# paper input from https://www.demographic-research.org/volumes/vol45/16/45-16.pdf -input_time_variant <- readMat("tests/SWEhist_matrices.mat") - -# check structure from reading mat -class(input_time_variant) -names(input_time_variant) -length(input_time_variant[["matrices"]]) # number of years -input_time_variant[["matrices"]][[128]][[1]][[1]] # U -input_time_variant[["matrices"]][[1]][[1]][[2]] # F -input_time_variant[["matrices"]][[1]][[1]][[3]] # popsize -input_time_variant[["matrices"]][[1]][[1]][[4]] # pi -length(input_time_variant_proj[["matrices"]]) # number of years - -# reshape -U_hal <- f_hal <-N_hal <- pi_hal <-matrix(rep(0,111)) -for(y in 1:128){ - U <- input_time_variant[["matrices"]][[y]][[1]][[1]] %>% as.matrix() - f <- input_time_variant[["matrices"]][[y]][[1]][[2]] %>% as.matrix() - N <- input_time_variant[["matrices"]][[y]][[1]][[3]] %>% as.matrix() - pi <- input_time_variant[["matrices"]][[y]][[1]][[4]] %>% as.matrix() - U_hal <- cbind(U_hal, c(U[col(U)==row(U)-1], U[ncol(U),nrow(U)])) - f_hal <- cbind(f_hal ,f[1,]) - N_hal <- cbind(N_hal ,N) - pi_hal <-cbind(pi_hal, pi) -} -U_hal <- U_hal[,-1] -f_hal <- f_hal[,-1] -N_hal <- N_hal[,-1] -pi_hal <-pi_hal[,-1] -colnames(U_hal) <- colnames(f_hal) <- colnames(N_hal) <- colnames(pi_hal) <-1891:2018 -dim(U_hal);class(U_hal %>% as.matrix) - -# output from Hal (dropbox link https://www.dropbox.com/t/3YiILmn7SpczN3oM) -output_time_variant <- readMat("tests/time-varying_sweden.mat") - -# inspect the way the package reads mat -class(output_time_variant) -names(output_time_variant) -length(output_time_variant[["allkin"]]) # number of years -length(output_time_variant[["allkin"]][[1]]) -length(output_time_variant[["allkin"]][[1]]) -class(output_time_variant[["allkin"]][[1]][[1]]) # 1 array with kin matrix -dim(output_time_variant[["allkin"]][[1]][[1]][,,14]) # the matrix of the nth kin, 111 ages - -# use own codes to interpret -codes <- c("coa", "cya", "d", "gd", "ggd", "ggm", "gm", "m", "nos", "nys", "oa", "ya", "os", "ys") -caswell_codes <- c("t", "v", "a", "b", "c", "h", "g", "d", "p", "q", "r", "s", "m", "n") - -# re shape data to tidy -output_time_variant_df <- map_df(1:128, function(i){ - array_branch(output_time_variant[["allkin"]][[i]][[1]], margin = 3) %>% - map_df(., as.data.frame)}) %>% - setNames(as.character(0:110)) %>% - bind_cols(crossing(year = 1891+(0:127), # years - kin_index = 1:14, # number of possible kin - age_kin = 0:110) # ages - ) %>% - inner_join(tibble(kin = codes, caswell_codes) %>% - arrange(caswell_codes) %>% mutate(kin_index = 1:14)) - -# check dimension: 128 years, 14 types of kin, 111 ages -nrow(output_time_variant_df); 128*14*111 - -# own calculation for first year -out_first_year <- kin_time_invariant( - U = U_hal[,"1891"], - f = f_hal[,"1891"], - pi = pi_hal[,"1891"], - birth_female = 1) - -# check first visually demokin -out_first_year %>% - filter(alive == "yes") %>% - group_by(age_focal, kin) %>% - summarise(count = sum(count, na.rm=T)) %>% - ggplot(aes(age_focal, count)) + - geom_line() + - facet_wrap(~kin, scales="free_y") - -# compare with paper results -comparison <- out_first_year %>% - filter(alive == "yes") %>% - group_by(age_focal, kin) %>% - summarise(count = sum(count, na.rm=T)) %>% - mutate(source = "demokin") %>% - bind_rows( - output_time_variant_df %>% - filter(year %in% 1891) %>% - pivot_longer(`0`:`110`, names_to = "age", values_to = "count") %>% - mutate(age = as.integer(age)) %>% - group_by(age_focal=age, kin) %>% - summarise(count = sum(count)) %>% - mutate(source = "paper")) - -# comparison visually -comparison %>% - ggplot() + - geom_line(aes(age_focal, count, color=source, linetype=source)) + - facet_wrap(~kin, scales="free_y") + - theme_bw() diff --git a/dev/tests/timevarying_kin.m b/dev/tests/timevarying_kin.m deleted file mode 100644 index c1176c8..0000000 --- a/dev/tests/timevarying_kin.m +++ /dev/null @@ -1,180 +0,0 @@ -function kout=timevarying_kin(U,F,pi,om,pkin); -% function to return kinship network -% calculated from the rates and the kinship at the previous time -% U=survival matrix -% F=fertility matrix -% pi = distribution of ages of mothers -% om=number of age classes -% pkin = the array of all kin from the previous time step -% model structure -% k(x+1,t+1)=U(t)*k(x,t) + F(t)*kstar(x,t) for some other kin kstar - -%set to full in case they arrive as sparse matrices -U=full(U); -F=full(F); -pi=full(pi); - -%frequently used zero vector for initial condition -zvec=zeros(om,1); -I=eye(om); -omz=om-1; - -% a: daughters of focal - -A(:,1)=zvec; -for ix=1:omz - ap=U*pkin.A(:,ix) + F*I(:,ix); - A(:,ix+1)=ap; - -end % for ix - -% b = granddaughters of Focal - -B(:,1)=zvec; -for ix=1:omz - bp=U*pkin.B(:,ix) + F*pkin.A(:,ix); - B(:,ix+1)=bp; - -end - - -% c = greatgranddaughters of Focal -C(:,1)=zvec; -for ix=1:omz - cp=U*pkin.C(:,ix) +F*pkin.B(:,ix); - C(:,ix+1)=cp; - -end - - -% d = mothers of Focal -D(:,1)=pi; -for ix=1:omz - dp=U*pkin.D(:,ix) + 0; - D(:,ix+1)=dp; - -end - - -% g = grandmothers of Focal -%only maternal grandmothers right now -G(:,1)=pkin.D*pi;; -for ix=1:omz - gp=U*pkin.G(:,ix) + 0; - G(:,ix+1)=gp; - -end - - -% h = greattrandmothers of Focal - -H(:,1)=pkin.G*pi; -for ix=1:omz - hp=U*pkin.H(:,ix) + 0; - H(:,ix+1)=hp; - -end - -% m = older sisters of Focal - -M(:,1)=pkin.A*pi; -for ix=1:omz - mp=U*pkin.M(:,ix) + 0; - M(:,ix+1)=mp; - -end - -% n = younger sisters - -N(:,1)=zvec; -for ix=1:omz - np=U*pkin.N(:,ix) + F*pkin.D(:,ix); - N(:,ix+1)=np; - -end - - -% p = nieces through older sisters - -P(:,1)=pkin.B*pi; -for ix=1:omz - pp=U*pkin.P(:,ix) + F*pkin.M(:,ix); - P(:,ix+1)=pp; -end - -% q = nieces through younger sisters - -Q(:,1)=zvec; -for ix=1:omz - qp=U*pkin.Q(:,ix) + F*pkin.N(:,ix); - Q(:,ix+1)=qp; - -end - -% r = aunts older than mother - -R(:,1)=pkin.M*pi; -for ix=1:omz - rp=U*pkin.R(:,ix) + 0; - R(:,ix+1)=rp; - -end - -% s = aunts younger than mother - -S(:,1)=pkin.N*pi; -for ix=1:omz - sp=U*pkin.S(:,ix) + F*pkin.G(:,ix); - S(:,ix+1)=sp; - -end - -% t = cousins from older aunts - -T(:,1)=pkin.P*pi; -for ix=1:omz - tp=U*pkin.T(:,ix) + F*pkin.R(:,ix); - T(:,ix+1)=tp; - -end - - -% v = cousins from aunts younger than mother - -V(:,1)=pkin.Q*pi; -for ix=1:omz - vp=U*pkin.V(:,ix) + F*pkin.S(:,ix); - V(:,ix+1)=vp; - -end - -%concatenate kin matrices -allkin=cat(3,A,B,C,D,G,H,M,N,P,Q,R,S,T,V); - -%concatenate, combining older and younger sisters, etc. -allkin2=cat(3,A,B,C,D,G,H,M+N,P+Q,R+S,T+V); - -%create output structures -kout.A=A; -kout.B=B; -kout.C=C; -kout.D=D; -kout.G=G; -kout.H=H; -kout.M=M; -kout.N=N; -kout.P=P; -kout.Q=Q; -kout.R=R; -kout.S=S; -kout.T=T; -kout.V=V; - -kout.allkin=allkin; -kout.allkin2=allkin2; - -kout.U=U; -kout.F=F; -kout.pi=pi; - - \ No newline at end of file diff --git a/man/demokin_codes.Rd b/man/demokin_codes.Rd index deb1b04..6f1e241 100644 --- a/man/demokin_codes.Rd +++ b/man/demokin_codes.Rd @@ -1,11 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_funs.R +% Please edit documentation in R/data.R +\docType{data} \name{demokin_codes} \alias{demokin_codes} -\title{print kin codes} +\title{DemoKin codes, Caswell (2020) codes, and useful labels.} +\format{ +A data.frame with codes and labels for distinction between kin types. +} \usage{ -demokin_codes() +demokin_codes } \description{ -Print kin codes and labels +DemoKin codes, Caswell (2020) codes, and useful labels. } +\keyword{datasets} diff --git a/DemoKin-Logo.png b/man/figures/DemoKin-Logo.png similarity index 100% rename from DemoKin-Logo.png rename to man/figures/DemoKin-Logo.png diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png index 118cd3c..681db7c 100644 Binary files a/man/figures/README-unnamed-chunk-4-1.png and b/man/figures/README-unnamed-chunk-4-1.png differ diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png index 118cd3c..681db7c 100644 Binary files a/man/figures/README-unnamed-chunk-5-1.png and b/man/figures/README-unnamed-chunk-5-1.png differ diff --git a/man/fra_asfr_sex.Rd b/man/fra_asfr_sex.Rd new file mode 100644 index 0000000..dfda668 --- /dev/null +++ b/man/fra_asfr_sex.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{fra_asfr_sex} +\alias{fra_asfr_sex} +\title{Fertility for France (2012) by sex in Caswell (2022).} +\format{ +A data.frame with age specific fertility rates by age and sex. +} +\source{ +Caswell (2022) +} +\usage{ +fra_asfr_sex +} +\description{ +Fertility for France (2012) by sex in Caswell (2022). +} +\keyword{datasets} diff --git a/man/fra_surv_sex.Rd b/man/fra_surv_sex.Rd new file mode 100644 index 0000000..afa550f --- /dev/null +++ b/man/fra_surv_sex.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{fra_surv_sex} +\alias{fra_surv_sex} +\title{Survival probability for France (2012) by sex in Caswell (2022).} +\format{ +A data.frame with survival probabilities by age and sex. +} +\source{ +Caswell (2022) +} +\usage{ +fra_surv_sex +} +\description{ +Survival probability for France (2012) by sex in Caswell (2022). +} +\keyword{datasets} diff --git a/man/get_HMDHFD.Rd b/man/get_HMDHFD.Rd deleted file mode 100644 index 9bbf264..0000000 --- a/man/get_HMDHFD.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_HMDHFD.R -\name{get_HMDHFD} -\alias{get_HMDHFD} -\title{Get time serie matrix data from HMD/HFD} -\usage{ -get_HMDHFD( - country = "SWE", - min_year = 1900, - max_year = 2018, - user_HMD = NULL, - pass_HMD = NULL, - user_HFD = NULL, - pass_HFD = NULL, - OAG = 100 -) -} -\arguments{ -\item{country}{numeric. Country code from rom HMD/HFD.} - -\item{min_year}{integer. Older year to get data.} - -\item{max_year}{numeric. Latest year to get data.} - -\item{user_HMD}{character. From HMD.} - -\item{pass_HMD}{character. From HMD.} - -\item{user_HFD}{character. From HFD.} - -\item{pass_HFD}{character. From HFD.} - -\item{OAG}{numeric. Open age group to standarize output.} -} -\value{ -A list wiith female survival probability, survival function, fertility and poopulation age specific matrixes, with calendar year as colnames. -} -\description{ -Wrapper function to get data of female survival, fertlity and population -of selected country on selected period. -} diff --git a/man/kin.Rd b/man/kin.Rd index 21c8115..5fbc92f 100644 --- a/man/kin.Rd +++ b/man/kin.Rd @@ -2,31 +2,33 @@ % Please edit documentation in R/kin.R \name{kin} \alias{kin} -\title{Estimate kin counts} +\title{Estimate kin counts in a one-sex framework.} \usage{ kin( - U = NULL, + p = NULL, f = NULL, time_invariant = TRUE, - N = NULL, pi = NULL, + n = NULL, output_cohort = NULL, output_period = NULL, output_kin = NULL, + output_age_focal = NULL, birth_female = 1/2.04, - stable = lifecycle::deprecated() + summary_kin = TRUE ) } \arguments{ -\item{U}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} +\item{p}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class +in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} -\item{f}{numeric. Same as U but for fertility rates.} +\item{f}{numeric. Same as \code{p} but for fertility rates.} \item{time_invariant}{logical. Constant assumption for a given \code{year} rates. Default \code{TRUE}.} -\item{N}{numeric. Same as U but for population distribution (counts or \verb{\%}). Optional.} +\item{pi}{numeric. Same as \code{U} but for childbearing distribution (sum to 1). Optional.} -\item{pi}{numeric. Same as U but for childbearing distribution (sum to 1). Optional.} +\item{n}{numeric. Only for \code{time_invariant = FALSE}. Same as \code{p} but for population distribution (counts or \verb{\%}). Optional.} \item{output_cohort}{integer. Vector of year cohorts for returning results. Should be within input data years range.} @@ -34,13 +36,20 @@ kin( \item{output_kin}{character. kin types to return: "m" for mother, "d" for daughter,...} -\item{birth_female}{numeric. Female portion at birth. This multiplies \code{f} argument. If \code{f} is already for female offspring, this needs to be set as 1.} +\item{output_age_focal}{integer. Vector of ages to select (and make faster the run).} + +\item{birth_female}{numeric. Female portion at birth. This multiplies \code{f} argument. If \code{f} is already for female offspring,} + +\item{summary_kin}{logical. Whether or not include \code{kin_summary} table (see output details). Default \code{TRUE}. +this needs to be set as 1.} } \value{ A list with: \itemize{ -\item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example \code{d} is daughter, \code{oa} is older aunts, etc.), including living and dead kin at that age.} -\item{kin_summary}{ a data frame with Focal´s age, related ages and type of kin, with indicators obtained processing \code{kin_full}, grouping by cohort or period (depending on the given arguments):} +\item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example \code{d} is daughter, +\code{oa} is older aunts, etc.), including living and dead kin at that age.} +\item{kin_summary}{ a data frame with Focal´s age, related ages and type of kin, with indicators obtained processing \code{kin_full}, +grouping by cohort or period (depending on the given arguments):} {\itemize{ \item{\code{count_living}}{: count of living kin at actual age of Focal} \item{\code{mean_age}}{: mean age of each type of living kin.} @@ -53,8 +62,17 @@ A list with: } } \description{ -Implementation of Goodman-Keyfitz-Pullum equations in a matrix framework. +Implementation of Goodman-Keyfitz-Pullum equations in a matrix framework. This produce a matrilineal (or patrilineal) +kin count distribution by kin and age. } \details{ See Caswell (2019) and Caswell (2021) for details on formulas. One sex only (female by default). } +\examples{ +# Kin expected matrilineal count for a Swedish female based on 2015 rates. +swe_surv_2015 <- swe_px[,"2015"] +swe_asfr_2015 <- swe_asfr[,"2015"] +# Run kinship models +swe_2015 <- kin(p = swe_surv_2015, f = swe_asfr_2015) +head(swe_2015$kin_summary) +} diff --git a/man/kin2sex.Rd b/man/kin2sex.Rd new file mode 100644 index 0000000..fde5cd9 --- /dev/null +++ b/man/kin2sex.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin2sex.R +\name{kin2sex} +\alias{kin2sex} +\title{Estimate kin counts in a two-sex framework} +\usage{ +kin2sex( + pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + time_invariant = TRUE, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, + pim = NULL, + nf = NULL, + nm = NULL, + Hf = NULL, + Hm = NULL, + output_cohort = NULL, + output_period = NULL, + output_kin = NULL, + output_age_focal = NULL, + summary_kin = TRUE +) +} +\arguments{ +\item{pf}{numeric. A vector (atomic) or matrix with female probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{pm}{numeric. A vector (atomic) or matrix with male probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{ff}{numeric. Same as \code{pf} but for fertility rates.} + +\item{fm}{numeric. Same as \code{pm} but for fertility rates.} + +\item{time_invariant}{logical. Constant assumption for a given \code{year} rates. Default \code{TRUE}.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{birth_female}{numeric. Female portion at birth. This multiplies \code{f} argument. If \code{f} is already for female offspring, this needs to be set as 1.} + +\item{pif}{numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default \code{NULL}.} + +\item{pim}{numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default \code{NULL}.} + +\item{nf}{numeric. Only for \code{time_invariant = FALSE}. Same as \code{pf} but for population distribution (counts or \verb{\%}). Optional.} + +\item{nm}{numeric. Only for \code{time_invariant = FALSE}. Same as \code{pm} but for population distribution (counts or \verb{\%}). Optional.} + +\item{Hf}{numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age.} + +\item{Hm}{numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age.} + +\item{output_cohort}{integer. Vector of year cohorts for returning results. Should be within input data years range.} + +\item{output_period}{integer. Vector of period years for returning results. Should be within input data years range.} + +\item{output_kin}{character. kin types to return: "m" for mother, "d" for daughter,...} + +\item{output_age_focal}{integer. Vector of ages to select (and make faster the run).} + +\item{summary_kin}{logical. Whether or not include \code{kin_summary} table (see output details). Default \code{TRUE}.} +} +\value{ +A list with: +\itemize{ +\item{kin_full}{ a data frame with year, cohort, Focal´s age, related ages and type of kin (for example \code{d} could be daughter or son depending \code{sex_kin}, +\code{oa} is older aunts or uncles also depending \code{sex_kin} value, etc.), including living and dead kin at that age.} +\item{kin_summary}{ a data frame with Focal´s age, related ages, sex and type of kin, with indicators obtained processing \code{kin_full}, grouping by cohort or period (depending on the given arguments):} +{\itemize{ +\item{\code{count_living}}{: count of living kin at actual age of Focal} +\item{\code{mean_age}}{: mean age of each type of living kin.} +\item{\code{sd_age}}{: standard deviation of age of each type of living kin.} +\item{\code{count_death}}{: count of dead kin at specific age of Focal.} +\item{\code{count_cum_death}}{: cumulated count of dead kin until specific age of Focal.} +\item{\code{mean_age_lost}}{: mean age where Focal lost her relative.} +} +} +} +} +\description{ +Implementation of two-sex matrix kinship model. This produces kin counts grouped by kin, age and sex of +each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +are grouped in one male count of cousins. Note that the output labels relative following female notation: the label \code{m} +refers to either mothers or fathers, and column \code{sex_kin} determine the sex of the relative. +} +\details{ +See Caswell (2022) for details on formulas. +} +\examples{ +# Kin expected count by relative sex for a French female based on 2012 rates. +fra_fert_f <- fra_asfr_sex[,"ff"] +fra_fert_m <- fra_asfr_sex[,"fm"] +fra_surv_f <- fra_surv_sex[,"pf"] +fra_surv_m <- fra_surv_sex[,"pm"] +fra_2012 <- kin2sex(fra_surv_f, fra_surv_m, fra_fert_f, fra_fert_m) +head(fra_2012$kin_summary) + +} diff --git a/man/kin_multi_stage.Rd b/man/kin_multi_stage.Rd index 12fa213..7a0830d 100644 --- a/man/kin_multi_stage.Rd +++ b/man/kin_multi_stage.Rd @@ -11,22 +11,25 @@ kin_multi_stage( H = NULL, birth_female = 1/2.04, output_kin = NULL, + parity = FALSE, list_output = FALSE ) } \arguments{ -\item{U}{list. age elemnts with column-stochastic transition matrix with dimension for the state space, conditional on survival.} +\item{U}{list. age elements with column-stochastic transition matrix with dimension for the state space, conditional on survival.} -\item{f}{matrix. state-specific fertility (age in rows and states in columns).} +\item{f}{matrix. state-specific fertility (age in rows and states in columns). Is accepted also a list with for each age-class.} -\item{D}{matrix. survival probabilities by state (age in rows and states in columns)} +\item{D}{matrix. survival probabilities by state (age in rows and states in columns). Is accepted also a list for each state with survival matrices.} -\item{H}{matrix. assigns the offspring of individuals in some stage to the appropriate age class with 1 (age in rows and states in columns).} +\item{H}{matrix. assigns the offspring of individuals in some stage to the appropriate age class (age in rows and states in columns). Is accepted also a list with a matrix for each state.} \item{birth_female}{numeric. Female portion at birth.} \item{output_kin}{character. kin to return. For example "m" for mother, "d" for daughter. See the \code{vignette} for all kin types.} +\item{parity}{logical. parity states imply age distribution of mothers re-scaled to not have parity 0 when Focal born. Default \code{TRUE}.} + \item{list_output}{logical. Results as a list. Default \code{FALSE}.} } \value{ diff --git a/man/kin_time_invariant.Rd b/man/kin_time_invariant.Rd index a470503..d04e243 100644 --- a/man/kin_time_invariant.Rd +++ b/man/kin_time_invariant.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/kin_time_invariant.R \name{kin_time_invariant} \alias{kin_time_invariant} -\title{Estimate kin counts in a time invariant framework} +\title{Estimate kin counts in a time invariant framework for one-sex model (matrilineal/patrilineal)} \usage{ kin_time_invariant( - U = NULL, + p = NULL, f = NULL, birth_female = 1/2.04, pi = NULL, @@ -14,7 +14,7 @@ kin_time_invariant( ) } \arguments{ -\item{U}{numeric. A vector of survival probabilities with same length as ages.} +\item{p}{numeric. A vector of survival probabilities with same length as ages.} \item{f}{numeric. A vector of age-specific fertility rates with same length as ages.} @@ -22,7 +22,7 @@ kin_time_invariant( \item{pi}{numeric. For using some specific non-stable age distribution of childbearing (same length as ages). Default \code{NULL}.} -\item{output_kin}{character. kin to return. For example "m" for mother, "d" for daughter. See the \code{vignette} for all kin types.} +\item{output_kin}{character. kin to return. For example "m" for mother, "d" for daughter. See \code{vignette} for all kin types.} \item{list_output}{logical. Results as a list with \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} } @@ -31,5 +31,5 @@ A data frame with focal´s age, related ages and type of kin (for example \code{d} is daughter, \code{oa} is older aunts, etc.), alive and death. If \code{list_output = TRUE} then this is a list. } \description{ -Implementation of Goodman-Keyfitz-Pullum equations adapted by Caswell (2019). +Mtrix implementation of Goodman-Keyfitz-Pullum equations adapted by Caswell (2019). } diff --git a/man/kin_time_invariant_2sex.Rd b/man/kin_time_invariant_2sex.Rd new file mode 100644 index 0000000..550a331 --- /dev/null +++ b/man/kin_time_invariant_2sex.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_invariant_2sex.R +\name{kin_time_invariant_2sex} +\alias{kin_time_invariant_2sex} +\title{Estimate kin counts in a time invariant framework for two-sex model.} +\usage{ +kin_time_invariant_2sex( + pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, + pim = NULL, + output_kin = NULL, + list_output = FALSE +) +} +\arguments{ +\item{pf}{numeric. A vector of survival probabilities for females with same length as ages.} + +\item{pm}{numeric. A vector of survival probabilities for males with same length as ages.} + +\item{ff}{numeric. A vector of age-specific fertility rates for females with same length as ages.} + +\item{fm}{numeric. A vector of age-specific fertility rates for males with same length as ages.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{birth_female}{numeric. Female portion at birth.} + +\item{pif}{numeric. For using some specific non-stable age distribution of childbearing for mothers (same length as ages). Default \code{NULL}.} + +\item{pim}{numeric. For using some specific non-stable age distribution of childbearing for fathers (same length as ages). Default \code{NULL}.} + +\item{output_kin}{character. kin to return, considering matrilineal names. For example "m" for parents, "d" for children, etc. See the \code{vignette} for all kin types.} + +\item{list_output}{logical. Results as a list with \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} +} +\value{ +A data frame with focal´s age, related ages and type of kin +(for example \code{d} is children, \code{oa} is older aunts/uncles, etc.), sex, alive and death. If \code{list_output = TRUE} then this is a list. +} +\description{ +Two-sex matrix framework for kin count estimates.This produces kin counts grouped by kin, age and sex of +each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +are grouped in one male count of cousins. +} +\details{ +See Caswell (2022) for details on formulas. +} diff --git a/man/kin_time_invariant_2sex_cod.Rd b/man/kin_time_invariant_2sex_cod.Rd new file mode 100644 index 0000000..6645ca0 --- /dev/null +++ b/man/kin_time_invariant_2sex_cod.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_invariant_2sex_cod.R +\name{kin_time_invariant_2sex_cod} +\alias{kin_time_invariant_2sex_cod} +\title{Estimate kin counts in a time invariant framework for two-sex model.} +\usage{ +kin_time_invariant_2sex_cod( + pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + Hf = NULL, + Hm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, + pim = NULL, + output_kin = NULL, + list_output = FALSE +) +} +\arguments{ +\item{pf}{numeric. A vector of survival probabilities for females with same length as ages.} + +\item{pm}{numeric. A vector of survival probabilities for males with same length as ages.} + +\item{ff}{numeric. A vector of age-specific fertility rates for females with same length as ages.} + +\item{fm}{numeric. A vector of age-specific fertility rates for males with same length as ages.} + +\item{Hf}{numeric. A matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age.} + +\item{Hm}{numeric. A matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{birth_female}{numeric. Female portion at birth.} + +\item{pif}{numeric. For using some specific non-stable age distribution of childbearing for mothers (same length as ages). Default \code{NULL}.} + +\item{pim}{numeric. For using some specific non-stable age distribution of childbearing for fathers (same length as ages). Default \code{NULL}.} + +\item{output_kin}{character. kin to return, considering matrilineal names. For example "m" for parents, "d" for children, etc. See the \code{vignette} for all kin types.} + +\item{list_output}{logical. Results as a list with \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} +} +\value{ +A data frame with focal´s age, related ages and type of kin +(for example \code{d} is children, \code{oa} is older aunts/uncles, etc.), sex, alive and death. If \code{list_output = TRUE} then this is a list. +} +\description{ +Two-sex matrix framework for kin count and death estimates.This produces kin counts grouped by kin, age and sex of +each relatives at each Focal´s age. For example, male cousins from aunts and uncles from different sibling's parents +are grouped in one male count of cousins. This also produces kin deaths grouped by kin, age, sex of +each relatives at each Focal´s age, and cause of death. +} +\details{ +See Caswell (2022) for details on formulas. +} diff --git a/man/kin_time_variant.Rd b/man/kin_time_variant.Rd index da16ad7..787052f 100644 --- a/man/kin_time_variant.Rd +++ b/man/kin_time_variant.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/kin_time_variant.R \name{kin_time_variant} \alias{kin_time_variant} -\title{Estimate kin counts in a time variant framework} +\title{Estimate kin counts in a time variant framework (dynamic rates) for one-sex model (matrilineal/patrilineal)} \usage{ kin_time_variant( - U = NULL, + p = NULL, f = NULL, - N = NULL, pi = NULL, + n = NULL, output_cohort = NULL, output_period = NULL, output_kin = NULL, @@ -17,14 +17,14 @@ kin_time_variant( ) } \arguments{ -\item{U}{numeric. A matrix of survival ratios with rows as ages and columns as years. Column names must be equal interval.} +\item{p}{numeric. A matrix of survival ratios with rows as ages and columns as years. Column names must be equal interval.} \item{f}{numeric. A matrix of age-specific fertility rates with rows as ages and columns as years. Coincident with \code{U}.} -\item{N}{numeric. A matrix of population with rows as ages and columns as years. Coincident with \code{U}.} - \item{pi}{numeric. A matrix with distribution of childbearing with rows as ages and columns as years. Coincident with \code{U}.} +\item{n}{numeric. A matrix of population with rows as ages and columns as years. Coincident with \code{U}.} + \item{output_cohort}{integer. Year of birth of focal to return as output. Could be a vector. Should be within input data years range.} \item{output_period}{integer. Year for which to return kinship structure. Could be a vector. Should be within input data years range.} @@ -36,9 +36,12 @@ kin_time_variant( \item{list_output}{logical. Results as a list with years elements (as a result of \code{output_cohort} and \code{output_period} combination), with a second list of \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} } \value{ -A data frame of population kinship structure, with focal's cohort, focal´s age, period year, type of relatives +A data frame of population kinship structure, with Focal's cohort, focal´s age, period year, type of relatives (for example \code{d} is daughter, \code{oa} is older aunts, etc.), living and death kin counts, and age of (living or time deceased) relatives. If \code{list_output = TRUE} then this is a list. } \description{ -Implementation of time variant Goodman-Keyfitz-Pullum equations based on Caswell (2021). +Matrix implementation of time variant Goodman-Keyfitz-Pullum equations in a matrix framework. +} +\details{ +See Caswell (2021) for details on formulas. } diff --git a/man/kin_time_variant_2sex.Rd b/man/kin_time_variant_2sex.Rd new file mode 100644 index 0000000..a30624f --- /dev/null +++ b/man/kin_time_variant_2sex.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_variant_2sex.R +\name{kin_time_variant_2sex} +\alias{kin_time_variant_2sex} +\title{Estimate kin counts in a time variant framework (dynamic rates) in a two-sex framework (Caswell, 2022)} +\usage{ +kin_time_variant_2sex( + pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, + pim = NULL, + nf = NULL, + nm = NULL, + output_cohort = NULL, + output_period = NULL, + output_kin = NULL, + list_output = FALSE +) +} +\arguments{ +\item{pf}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{pm}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{ff}{numeric. Same as pf but for fertility rates.} + +\item{fm}{numeric. Same as pm but for fertility rates.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{birth_female}{numeric. Female portion at birth. This multiplies \code{f} argument. If \code{f} is already for female offspring, this needs to be set as 1.} + +\item{pif}{numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default \code{NULL}.} + +\item{pim}{numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default \code{NULL}.} + +\item{nf}{numeric. Same as pf but for population distribution (counts or \verb{\%}). Optional.} + +\item{nm}{numeric. Same as pm but for population distribution (counts or \verb{\%}). Optional.} + +\item{output_cohort}{integer. Vector of year cohorts for returning results. Should be within input data years range.} + +\item{output_period}{integer. Vector of period years for returning results. Should be within input data years range.} + +\item{output_kin}{character. kin types to return: "m" for mother, "d" for daughter,...} + +\item{list_output}{logical. Results as a list with years elements (as a result of \code{output_cohort} and \code{output_period} combination), with a second list of \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} +} +\value{ +A data.frame with year, cohort, Focal´s age, related ages, sex and type of kin (for example \code{d} is daughter, \code{oa} is older aunts, etc.), including living and dead kin at that age and sex. +} +\description{ +Two-sex matrix framework for kin count estimates with varying rates. +This produces kin counts grouped by kin, age and sex of each relatives at each Focal´s age. +For example, male cousins from aunts and uncles from different sibling's parents are grouped in one male count of cousins. +} +\details{ +See Caswell (2022) for details on formulas. +} diff --git a/man/kin_time_variant_2sex_cod.Rd b/man/kin_time_variant_2sex_cod.Rd new file mode 100644 index 0000000..c0db9a8 --- /dev/null +++ b/man/kin_time_variant_2sex_cod.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_variant_2sex_cod.R +\name{kin_time_variant_2sex_cod} +\alias{kin_time_variant_2sex_cod} +\title{Estimate kin counts in a time variant framework (dynamic rates) in a two-sex framework (Caswell, 2022)} +\usage{ +kin_time_variant_2sex_cod( + pf = NULL, + pm = NULL, + ff = NULL, + fm = NULL, + Hf = NULL, + Hm = NULL, + sex_focal = "f", + birth_female = 1/2.04, + pif = NULL, + pim = NULL, + nf = NULL, + nm = NULL, + output_cohort = NULL, + output_period = NULL, + output_kin = NULL, + list_output = FALSE +) +} +\arguments{ +\item{pf}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{pm}{numeric. A vector (atomic) or matrix with probabilities (or survival ratios, or transition between age class in a more general perspective) with rows as ages (and columns as years in case of matrix, being the name of each col the year).} + +\item{ff}{numeric. Same as pf but for fertility rates.} + +\item{fm}{numeric. Same as pm but for fertility rates.} + +\item{Hf}{numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for females with rows as causes and columns as ages, being the name of each col the age.} + +\item{Hm}{numeric. A list where each list element (being the name of each list element the year) contains a matrix with cause-specific hazards for males with rows as causes and columns as ages, being the name of each col the age.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{birth_female}{numeric. Female portion at birth. This multiplies \code{f} argument. If \code{f} is already for female offspring, this needs to be set as 1.} + +\item{pif}{numeric. For using some specific age distribution of childbearing for mothers (same length as ages). Default \code{NULL}.} + +\item{pim}{numeric. For using some specific age distribution of childbearing for fathers (same length as ages). Default \code{NULL}.} + +\item{nf}{numeric. Same as pf but for population distribution (counts or \verb{\%}). Optional.} + +\item{nm}{numeric. Same as pm but for population distribution (counts or \verb{\%}). Optional.} + +\item{output_cohort}{integer. Vector of year cohorts for returning results. Should be within input data years range.} + +\item{output_period}{integer. Vector of period years for returning results. Should be within input data years range.} + +\item{output_kin}{character. kin types to return: "m" for mother, "d" for daughter,...} + +\item{list_output}{logical. Results as a list with years elements (as a result of \code{output_cohort} and \code{output_period} combination), with a second list of \code{output_kin} elements, with focal´s age in columns and kin ages in rows (2 * ages, last chunk of ages for death experience). Default \code{FALSE}} +} +\value{ +A data.frame with year, cohort, Focal´s age, related ages, sex and type of kin (for example \code{d} is daughter, \code{oa} is older aunts, etc.), including living and dead kin at that age and sex. +} +\description{ +Two-sex matrix framework for kin count estimates with varying rates. +This produces kin counts grouped by kin, age and sex of each relatives at each Focal´s age. +For example, male cousins from aunts and uncles from different sibling's parents are grouped in one male count of cousins. This also produces kin deaths grouped by kin, age, sex of +each relatives at each Focal´s age, and cause of death. +} +\details{ +See Caswell (2022) for details on formulas. +} diff --git a/man/output_period_cohort_combination.Rd b/man/output_period_cohort_combination.Rd index 5b20baf..62fc02e 100644 --- a/man/output_period_cohort_combination.Rd +++ b/man/output_period_cohort_combination.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/kin_time_variant.R \name{output_period_cohort_combination} \alias{output_period_cohort_combination} -\title{defince apc combination to return} +\title{APC combination to return} \usage{ output_period_cohort_combination( output_cohort = NULL, @@ -11,6 +11,22 @@ output_period_cohort_combination( years_data = NULL ) } +\arguments{ +\item{output_cohort}{integer. A vector with selected calendar years.} + +\item{output_period}{integer. A vector with selected cohort years.} + +\item{age}{integer. A vector with ages from the kinship network to be filtered.} + +\item{years_data}{integer. A vector with years from the time-varying kinship network to be filtered.} +} +\value{ +data.frame with years and ages to filter in \code{kin} and \code{kin_2sex} functions. +} \description{ -defince apc to return. +define APC combination to return in \code{kin} and \code{kin2sex}. +} +\details{ +Because returning all period and cohort data from a huge time-series would be hard memory consuming, +this function is an auxiliary one to deal with selection from inputs \code{output_cohort} and \code{output_period}. } diff --git a/man/plot_diagram.Rd b/man/plot_diagram.Rd index 8d2448c..fdce2a5 100644 --- a/man/plot_diagram.Rd +++ b/man/plot_diagram.Rd @@ -7,13 +7,13 @@ plot_diagram(kin_total, rounding = 3) } \arguments{ -\item{kin_total}{data.frame. With columns \code{kin} with type and \code{count} with some measeure.} +\item{kin_total}{data.frame. values in column \code{kin} define the relative type - see \code{demokin_codes()}. Values in column \code{count} are the expected number of relatives.} -\item{rounding}{numeric. Estimation could have a lot of decimals. Rounding will make looks more clear the diagramm.} +\item{rounding}{numeric. Number of decimals to show in diagram.} } \value{ -A plot +A Keyfitz-style kinship plot. } \description{ -Given estimation of kin counts from \code{kins} function, draw a network diagramm. +Draws a Keyfitz-style kinship diagram given a kinship object created by the \code{kin} function. Displays expected kin counts for a Focal aged 'a'. } diff --git a/man/rename_kin.Rd b/man/rename_kin.Rd index 023cf99..b5d195b 100644 --- a/man/rename_kin.Rd +++ b/man/rename_kin.Rd @@ -4,8 +4,19 @@ \alias{rename_kin} \title{rename kin} \usage{ -rename_kin(df, consolidate_column = "no") +rename_kin(df, sex = "f") +} +\arguments{ +\item{df}{data.frame. A data frame with variable \code{kin} with \code{DemoKin} codes to be labelled.} + +\item{sex}{character. "f" for female, "m" for male or "2sex" for both sex naming.} +} +\value{ +Add a column with kin labels in the input data frame. } \description{ -Rename kin labels depending consolidate some types +Add kin labels depending the sex +} +\details{ +See table \code{demokin_codes} to know label options. } diff --git a/man/timevarying_kin.Rd b/man/timevarying_kin.Rd index 1826543..ac481c9 100644 --- a/man/timevarying_kin.Rd +++ b/man/timevarying_kin.Rd @@ -17,6 +17,9 @@ timevarying_kin(Ut, ft, pit, ages, pkin) \item{pkin}{numeric. A list with kin count distribution in previous year.} } +\value{ +A list of 14 types of kin matrices (kin age by Focal age) projected one time interval. +} \description{ one time projection kin. internal function. } diff --git a/man/timevarying_kin_2sex.Rd b/man/timevarying_kin_2sex.Rd new file mode 100644 index 0000000..abf1774 --- /dev/null +++ b/man/timevarying_kin_2sex.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_variant_2sex.R +\name{timevarying_kin_2sex} +\alias{timevarying_kin_2sex} +\title{one time projection kin} +\usage{ +timevarying_kin_2sex(Ut, Ft, Ft_star, pit, sex_focal, ages, pkin) +} +\arguments{ +\item{Ut}{numeric. A matrix of survival probabilities (or ratios).} + +\item{Ft}{numeric. A matrix of age-specific fertility rates.} + +\item{Ft_star}{numeric. Ft but for female fertility.} + +\item{pit}{numeric. A matrix with distribution of childbearing.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{ages}{numeric.} + +\item{pkin}{numeric. A list with kin count distribution in previous year.} +} +\value{ +A list of 14 types of kin matrices (kin age by Focal age, blocked for two sex) projected one time interval. +} +\description{ +one time projection kin. internal function. +} diff --git a/man/timevarying_kin_2sex_cod.Rd b/man/timevarying_kin_2sex_cod.Rd new file mode 100644 index 0000000..8bc0b87 --- /dev/null +++ b/man/timevarying_kin_2sex_cod.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kin_time_variant_2sex_cod.R +\name{timevarying_kin_2sex_cod} +\alias{timevarying_kin_2sex_cod} +\title{one time projection kin} +\usage{ +timevarying_kin_2sex_cod(Ut, Ft, Ft_star, pit, sex_focal, ages, pkin) +} +\arguments{ +\item{Ut}{numeric. A matrix of survival probabilities (or ratios).} + +\item{Ft}{numeric. A matrix of age-specific fertility rates.} + +\item{Ft_star}{numeric. Ft but for female fertility.} + +\item{pit}{numeric. A matrix with distribution of childbearing.} + +\item{sex_focal}{character. "f" for female or "m" for male.} + +\item{ages}{numeric.} + +\item{pkin}{numeric. A list with kin count distribution in previous year.} +} +\value{ +A list of 14 types of kin matrices (kin age by Focal age, blocked for two sex) projected one time interval. +} +\description{ +one time projection kin. internal function. +} diff --git a/tests/testthat/test-kin_multi_stage.R b/tests/testthat/test-kin_multi_stage.R index 2e3d58d..d7a845b 100644 --- a/tests/testthat/test-kin_multi_stage.R +++ b/tests/testthat/test-kin_multi_stage.R @@ -4,7 +4,8 @@ test_that("same output in multi_stage (caswell 2020)", { demokin_svk1990_caswell2020 <- kin_multi_stage(U = svk_Uxs, f = svk_fxs, D = svk_pxs, - H = svk_Hxs, birth_female=1, list_output = TRUE) + H = svk_Hxs, birth_female=1, + list_output = TRUE, parity = TRUE) expect_equal(demokin_svk1990_caswell2020$d[1:(110*6),], kin_svk1990_caswell2020$d) expect_equal(demokin_svk1990_caswell2020$gd[1:(110*6),], kin_svk1990_caswell2020$gd) expect_equal(demokin_svk1990_caswell2020$ggd[1:(110*6),], kin_svk1990_caswell2020$ggd) diff --git a/vignettes/Reference.Rmd b/vignettes/Reference_OneSex.Rmd similarity index 76% rename from vignettes/Reference.Rmd rename to vignettes/Reference_OneSex.Rmd index 388a072..ced906e 100644 --- a/vignettes/Reference.Rmd +++ b/vignettes/Reference_OneSex.Rmd @@ -1,17 +1,19 @@ --- -title: "Expected kin counts by type of relative: A matrix implementation" +title: "Expected kin counts by type of relative in a one-sex framework" +bibliography: references.bib output: html_document: toc: true toc_depth: 1 vignette: > - %\VignetteIndexEntry{Use} + %\VignetteIndexEntry{Reference_OneSex} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- -```{r, include=FALSE} -devtools::load_all() +```{r, eval = F, include=FALSE} +knitr::opts_chunk$set(collapse = TRUE, comment = "#>") +library(devtools); load_all() ``` In this vignette, we'll demonstrate how `DemoKin` can be used to compute kinship networks for an average member of a given (female) population. Let us call her Focal: an average Swedish woman who has always lived in Sweden and whose family has never left the country. @@ -19,19 +21,21 @@ Here, we'll show how `DemoKin` can be used to compute the number and age distrib ## 1. Kin counts with time-invariant rates -First, we compute kin counts in a **time-invariant** framework. We assume that Focal and all of her relatives experience the 2015 mortality and fertility rates throughout their entire lives (Caswell, 2019). The `DemoKin` package includes data from Sweden as an example: age-by-year matrices of survival probabilities (*swe_px*), survival ratios (*swe_Sx*), fertility rates (*swe_asfr*), and population numbers (*swe_pop*). You can see the data contained in `DemoKin` with `data(package="DemoKin")`. This data comes from the [Human Mortality Database](https://www.mortality.org/) and [Human Fertility Database](https://www.humanfertility.org/) (see `?DemoKin::get_HMDHFD`). +First, we compute kin counts in a **time-invariant** framework. We assume that Focal and all of her relatives experience the 2015 mortality and fertility rates throughout their entire lives [@caswell_formal_2019]. The `DemoKin` package includes data from Sweden as an example: age-by-year matrices of survival probabilities (*swe_px*), survival ratios (*swe_Sx*), fertility rates (*swe_asfr*), and population numbers (*swe_pop*). You can see the data contained in `DemoKin` with `data(package="DemoKin")`. This data comes from the [Human Mortality Database](https://www.mortality.org/) and [Human Fertility Database](https://www.humanfertility.org/) (see `?DemoKin::get_HMDHFD`). -In order to implement the time-invariant models, the function `DemoKin::kin` expects a vector of sruvival ratios and another vector of fertility rates. In this example, we get the data for the year 2015, and run the matrix models: +In order to implement the time-invariant models, the function `DemoKin::kin` expects a vector of survival ratios and another vector of fertility rates. In this example, we get the data for the year 2015, and run the matrix models: ```{r, message=FALSE, warning=FALSE} library(DemoKin) -library(tidyverse) +library(tidyr) +library(dplyr) +library(ggplot2) library(knitr) # First, get vectors for a given year swe_surv_2015 <- swe_px[,"2015"] swe_asfr_2015 <- swe_asfr[,"2015"] # Run kinship models -swe_2015 <- kin(U = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) +swe_2015 <- kin(p = swe_surv_2015, f = swe_asfr_2015, time_invariant = TRUE) ``` ### 1.1. Value @@ -53,7 +57,7 @@ head(swe_2015$kin_summary) To produce it, we sum over all ages of kin to produce a data frame of expected kin counts by year or cohort and age of Focal (but not by age of kin). Consider this simplified example for living kin counts: -```{r} +```{r, message=FALSE, warning=FALSE} kin_summary_example <- swe_2015$kin_full %>% select(year, cohort, kin, age_focal, age_kin, living, dead) %>% @@ -76,12 +80,10 @@ swe_2015[["kin_summary"]] %>% facet_wrap(~kin) ``` -Here, each relative type is identified by a unique code. Note that `DemoKin` uses different codes than Caswell (2019); the equivalence between the two set of codes is given in the following table: +Here, each relative type is identified by a unique code. Note that `DemoKin` uses different codes than Caswell [-@caswell_formal_2019]; the equivalence between the two set of codes is given in the following table: ```{r, fig.height=6, fig.width=8, echo=FALSE} -library(knitr) - -DemoKin::demokin_codes() %>% +demokin_codes %>% kable ``` @@ -89,7 +91,6 @@ We can also visualize the age distribution of relatives when Focal is 35 years o ```{r, fig.height=6, fig.width=8} swe_2015[["kin_full"]] %>% - DemoKin::rename_kin() %>% filter(age_focal == 35) %>% ggplot() + geom_line(aes(age_kin, living)) + @@ -100,14 +101,12 @@ swe_2015[["kin_full"]] %>% ``` The one-sex model implemented in `DemoKin` assumes that the given fertility input applies to both sexes. - -Note that, if using survival rates ($S_x$) instead of probabilities ($p_x$), fertility vectors should account for female person-year exposure, using: $(\frac{f_x+f_{x+1}S_x}{2})\frac{L_0}{l_0}$ instead of only $fx$ (see Preston et.al, 2002). +Note that, if using survival rates ($S_x$) instead of probabilities ($p_x$), fertility vectors should account for female person-year exposure, using: $(\frac{f_x+f_{x+1}S_x}{2})\frac{L_0}{l_0}$ instead of only $fx$; see Preston et.al [-@preston_demography:_2001]). The `kin` function also includes a summary output with the count of living kin, mean and standard deviation of kin age, by type of kin, for each Focal's age: ```{r, fig.height=6, fig.width=8} swe_2015[["kin_summary"]] %>% - DemoKin::rename_kin() %>% filter(age_focal == 35) %>% select(kin, count_living, mean_age, sd_age) %>% mutate_if(is.numeric, round, 2) %>% @@ -117,32 +116,31 @@ swe_2015[["kin_summary"]] %>% Finally, we can visualize the estimated kin counts by type of kin using a network diagram. Following with the age 35: ```{r, fig.height=6, fig.width=8, dpi=900, message=FALSE, warning=FALSE} - swe_2015[["kin_summary"]] %>% - filter(age_focal == 35) %>% - select(kin, count = count_living) %>% - plot_diagram(rounding = 2) +swe_2015[["kin_summary"]] %>% + filter(age_focal == 35) %>% + select(kin, count = count_living) %>% + plot_diagram(rounding = 2) ``` ## 2. Kin counts with time-variant rates The demography of Sweden is, in reality, changing every year. This means that Focal and her relatives will have experienced changing mortality and fertility rates over time. -We account for this, by using the time-variant models introduced by Caswell and Song (2021). +We account for this, by using the time-variant models introduced by Caswell and Song [-@caswell_formal_2021]. Let's take a look at the resulting kin counts for a Focal born in 1960, limiting the output to the relative types given in the argument `output_kin`: ```{r, fig.height=6, fig.width=8} swe_time_varying <- kin( - U = swe_px, + p = swe_px, f = swe_asfr, - N = swe_pop, + n = swe_pop, time_invariant =FALSE, output_cohort = 1960, output_kin = c("d","gd","ggd","m","gm","ggm") ) swe_time_varying$kin_summary %>% - DemoKin::rename_kin() %>% ggplot(aes(age_focal,count_living,color=factor(cohort))) + scale_y_continuous(name = "",labels = seq(0,3,.2),breaks = seq(0,3,.2))+ geom_line(color = 1)+ @@ -162,7 +160,6 @@ The function `kin` also includes information on the number of relatives lost by ```{r, fig.height=6, fig.width=8, message=FALSE, warning=FALSE} swe_time_varying$kin_summary %>% - DemoKin::rename_kin() %>% ggplot() + geom_line(aes(age_focal, count_cum_dead)) + labs(y = "Expected number of deceased relatives") + @@ -174,7 +171,6 @@ Given these population-level measures, we can compute Focal's the mean age at th ```{r} swe_time_varying$kin_summary %>% - rename_kin() %>% filter(age_focal == 50) %>% select(kin,count_cum_dead,mean_age_lost) %>% mutate_if(is.numeric, round, 2) %>% @@ -183,7 +179,7 @@ swe_time_varying$kin_summary %>% ## 4. Prevalences -Given the distribution of kin by age, we can compute the expected portion of living kin in some stage given a set of prevalences by age (e.g., a disease, employment, etc.). This is known as the Sullivan Method in the life-table literature. A matrix formulation for same results can be found in Caswell (2019), which can also be extended to a time-variant framework. +Given the distribution of kin by age, we can compute the expected portion of living kin in some stage given a set of prevalences by age (e.g., a disease, employment, etc.). This is known as the Sullivan Method in the life-table literature. A matrix formulation for same results can be found in Caswell [-@caswell_formal_2019], which can also be extended to a time-variant framework. ```{r, message=FALSE, warning=FALSE, fig.height=6, fig.width=10} # let´s create some prevalence by age @@ -192,14 +188,10 @@ swe_2015_prevalence <- age_kin = unique(swe_2015$kin_full$age_kin), prev = .005 * exp(.05 * age_kin) ) - -# plot(swe_2015_prevalence) - # join to kin count estimates and plot swe_2015$kin_full %>% left_join(swe_2015_prevalence) %>% group_by(kin, age_focal) %>% - rename_kin() %>% summarise( prevalent = sum(living * prev), no_prevalent = sum(living * (1-prev)) @@ -216,7 +208,7 @@ swe_2015$kin_full %>% `DemoKin` allows the computation of kin structures in a multi-state framework, classifying individuals jointly by age and some other feature (e.g., stages of a disease). For this, we need mortality and fertility data for each possible stage and probabilities of changing state by age. -Let's consider the example of Slovakia given by Caswell (2021), where stages are parity states. +Let's consider the example of Slovakia given by Caswell [-@caswell_formal_2021], where stages are parity states. `DemoKin` includes the data to replicate this analysis for the year 1980: - The data.frame `svk_fxs` is the fertility rate by age (rows) for each parity stage (columns). The first stage represents $parity=0$; the second stage, $parity=1$; and so on, until finally the sixth stage represents $parity\geq5$. @@ -224,7 +216,7 @@ Let's consider the example of Slovakia given by Caswell (2021), where stages are - The data.frame `svk_pxs` has the same structure and represents survival probabilities. - The list `svk_Uxs` has the same number of elements and ages (in this case 110, where $omega$ is 109). For each age, it contains a column-stochastic transition matrix with dimension for the state space. The entries are transition probabilities conditional on survival. -Following Caswell (2020), we can obtain the joint age-parity kin structure: +Following Caswell [-@caswell_formal_2020], we can obtain the joint age-parity kin structure: ```{r} # use birth_female=1 because fertility is for female only @@ -234,18 +226,18 @@ demokin_svk1980_caswell2020 <- f = svk_fxs, D = svk_pxs, H = svk_Hxs, - birth_female=1 - ) + birth_female=1, + parity = TRUE) ``` -As an example, consider the age-parity distribution of aunts, when Focal is 20 and 60 yo (this is equivalent to Figure 4 in Caswell [2021]). +Note that the function ask for risks already in a certain matrix format. As an example, consider the age-parity distribution of aunts, when Focal is 20 and 60 yo (this is equivalent to Figure 4 in Caswell [-@caswell_formal_2021]). ```{r, message=FALSE, warning=FALSE, fig.height=6, fig.width=10} demokin_svk1980_caswell2020 %>% filter(kin %in% c("oa","ya"), age_focal %in% c(20,60)) %>% mutate(parity = as.integer(stage_kin)-1, - parity = case_when(parity == 5 ~ "5+", T ~ as.character(parity)), - parity = fct_rev(parity)) %>% + parity = case_when(parity == 5 ~ "5+", T ~ as.character(parity)) + ) %>% group_by(age_focal, age_kin, parity) %>% summarise(count= sum(living)) %>% ggplot() + @@ -256,14 +248,13 @@ demokin_svk1980_caswell2020 %>% facet_wrap(~age_focal, nrow = 2) ``` -We can also see the portion of living daughters and mothers at different parity stages over Focal's lie-course (this is equivalent to Figure 9 and 10 in Caswell [2021]). +We can also see the portion of living daughters and mothers at different parity stages over Focal's life-course (this is equivalent to Figure 9 and 10 in Caswell [-@caswell_formal_2021]). ```{r, message=FALSE, warning=FALSE, fig.height=6, fig.width=10} demokin_svk1980_caswell2020 %>% filter(kin %in% c("d","m")) %>% mutate(parity = as.integer(stage_kin)-1, - parity = case_when(parity == 5 ~ "5+", T ~ as.character(parity)), - parity = fct_rev(parity)) %>% + parity = case_when(parity == 5 ~ "5+", T ~ as.character(parity))) %>% group_by(age_focal, kin, parity) %>% summarise(count= sum(living)) %>% DemoKin::rename_kin() %>% @@ -274,12 +265,6 @@ demokin_svk1980_caswell2020 %>% facet_wrap(~kin, nrow = 2) ``` -## References +This function `kin_multi_stage` can be generalized to any kind of state (be sure to set parameter `parity = FALSE`, de default). -Caswell, H. (2019). The formal demography of kinhip: A matrix formulation. Demographic Research 41:679–712. doi:10.4054/DemRes.2019.41.24. - -Caswell, H. (2020). The formal demography of kinship II: Multistate models, parity, and sibship. Demographic Research, 42, 1097–1146. - -Caswell, H., & Song, X. (2021). The formal demography of kinhip III: kinhip dynamics with time-varying demographic rates. Demographic Research, 45, 517–546. - -Preston, S., Heuveline, P., & Guillot, M. (2000). Demography: Measuring and Modeling Population Processes. Wiley. +## References diff --git a/vignettes/Reference_TwoSex.Rmd b/vignettes/Reference_TwoSex.Rmd new file mode 100644 index 0000000..12289d9 --- /dev/null +++ b/vignettes/Reference_TwoSex.Rmd @@ -0,0 +1,325 @@ +--- +title: "Two-sex kinship model" +bibliography: references.bib +output: + html_document: + toc: true + toc_depth: 1 +vignette: > + %\VignetteIndexEntry{Reference_TwoSex} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, eval = T, include=FALSE} +knitr::opts_chunk$set(collapse = TRUE, comment = "#>") +library(devtools); load_all() +``` + +Human males generally live shorter and reproduce later than females. +These sex-specific processes affect kinship dynamics in a number of ways. +For example, the degree to which an average member of the population, call her Focal, has a living grandparent is affected by differential mortality affecting the parental generation at older ages. +We may also be interested in considering how kinship structures vary by Focal's sex: a male Focal may have a different number of grandchildren than a female Focal given differences in fertility by sex. +Documenting these differences matters since women often face greater expectations to provide support and informal care to relatives. +As they live longer, they may find themselves at greater risk of being having no living kin. +The function `kin2sex` implements two-sex kinship models as introduced by Caswell [-@caswell_formal_2022]. +This vignette show how to run two-sex models and highlights some of the advantages of this model over one-sex models in populations with time-invariant and time-variant rates. + +```{r, message=FALSE, warning=FALSE} +library(DemoKin) +library(tidyr) +library(dplyr) +library(ggplot2) +library(knitr) +``` + +### 1. Demographic rates by sex + +Data on female fertility by age is less common than female fertility. Schoumaker (2019) shows that male TFR is almost always higher than female Total Fertility Rates (TFR) using a sample of 160 countries. +For this example, we use data from 2012 France to exemplify the use of the two-sex function. +Data on female and male fertility and mortality are included in `DemoKin`. In this population, male and female TFR is almost identical (1.98 and 1.99) but the distributions of fertility by sex varies over age: + +```{r} +fra_fert_f <- fra_asfr_sex[,"ff"] +fra_fert_m <- fra_asfr_sex[,"fm"] +fra_surv_f <- fra_surv_sex[,"pf"] +fra_surv_m <- fra_surv_sex[,"pm"] + +sum(fra_fert_m)-sum(fra_fert_f) + +data.frame(value = c(fra_fert_f, fra_fert_m, fra_surv_f, fra_surv_m), + age = rep(0:100, 4), + sex = rep(c(rep("f", 101), rep("m", 101)), 2), + risk = c(rep("fertility rate", 101 * 2), rep("survival probability", 101 * 2))) %>% + ggplot(aes(age, value, col=sex)) + + geom_line() + + facet_wrap(~ risk, scales = "free_y") + + theme_bw() +``` + +### 2. Time-invariant two-sex kinship models + +We now introduce the functions `kin2sex`, which is similar to the one-sex function `kin` (see `?kin`) with two exceptions. +First, the user needs to specify mortality and fertility by sex. +Second, the user must indicate the sex of Focal (which was assumed to be female in the one-sex model). +Let us first consider the application for time-invariant populations: + +```{r} +kin_result <- kin2sex( + pf = fra_surv_f, + pm = fra_surv_m, + ff = fra_fert_f, + fm = fra_fert_m, + time_invariant = TRUE, + sex_focal = "f", + birth_female = .5 + ) +``` + +The output of `kin2sex` is equivalent to that of `kin`, except that it includes a column `sex_kin` to specify the sex of the given relatives. + +Let's group aunts and siblings to visualize the number of living kin by Focal's age. + +```{r, message=FALSE, warning=FALSE} +kin_out <- kin_result$kin_summary %>% + mutate(kin = case_when(kin %in% c("s", "s") ~ "s", + kin %in% c("ya", "oa") ~ "a", + T ~ kin)) %>% + filter(kin %in% c("d", "m", "gm", "ggm", "s", "a")) + +kin_out %>% + group_by(kin, age_focal, sex_kin) %>% + summarise(count=sum(count_living)) %>% + ggplot(aes(age_focal, count, fill=sex_kin))+ + geom_area()+ + theme_bw() + + facet_wrap(~kin) +``` + +**A note on terminology** + +The function `kin2sex` uses the same codes as `kin` to identify relatives (see `demokin_codes()`). +Note that when running a two-sex model, the code 'm' refers to either mothers or fathers! +Use the column `sex_kin` to determine the sex of a given relatives. +For example, in order to consider only sons and ignore daughters, use: + +```{r} +kin_result$kin_summary %>% + filter(kin == "d", sex_kin == "m") %>% + head() +``` + +Information on kin availability by sex allows us to consider sex ratios, a traditional measure in demography, with females often in denominator. The following figure, for example, shows that a 25yo French woman in our hypothetical population can expect to have 0.5 grandfathers for every grandmother: + +```{r, message=FALSE, warning=FALSE} +kin_out %>% + group_by(kin, age_focal) %>% + summarise(sex_ratio=sum(count_living[sex_kin=="m"], na.rm=T)/sum(count_living[sex_kin=="f"], na.rm=T)) %>% + ggplot(aes(age_focal, sex_ratio))+ + geom_line()+ + theme_bw() + + facet_wrap(~kin, scales = "free") +``` + +The experience of kin loss for Focal depends on differences in mortality between sexes. +A female Focal starts losing fathers earlier than mothers. +We see a slightly different pattern for grandparents since Focal's experience of grandparental loss is dependent on the initial availability of grandparents (i.e. if Focal's grandparent died before her birth, she will never experience his death). + +```{r, message=FALSE, warning=FALSE} +# sex ratio +kin_out %>% + group_by(kin, sex_kin, age_focal) %>% + summarise(count=sum(count_dead)) %>% + ggplot(aes(age_focal, count, col=sex_kin))+ + geom_line()+ + theme_bw() + + facet_wrap(~kin) +``` + + +### 3. Time-variant two-sex kinship models + +We look at populations where demographic rates are not static but change on a yearly basis. +For this, we consider the case of Sweden using data pre-loaded in `DemoKin`. +For this example, we will create 'pretend' male fertility rates by slightly perturbing the existing female rates. +This is a toy example, since a real two-sex model should use actual female and male rates as inputs. + +```{r} +years <- ncol(swe_px) +ages <- nrow(swe_px) +swe_surv_f_matrix <- swe_px +swe_surv_m_matrix <- swe_px ^ 1.5 # artificial perturbation for this example +swe_fert_f_matrix <- swe_asfr +swe_fert_m_matrix <- rbind(matrix(0, 5, years), + swe_asfr[-((ages-4):ages),]) * 1.05 # artificial perturbation for this example +``` + +This is how it looks for year 1900: +```{r} +bind_rows( + data.frame(age = 0:100, sex = "Female", component = "Fertility rate", value = swe_fert_f_matrix[,"1900"]), + data.frame(age = 0:100, sex = "Male", component = "Fertility rate", value = swe_fert_m_matrix[,"1900"]), + data.frame(age = 0:100, sex = "Female", component = "Survival probability", value = swe_surv_f_matrix[,"1900"]), + data.frame(age = 0:100, sex = "Male", component = "Survival probability", value = swe_surv_m_matrix[,"1900"])) %>% + ggplot(aes(age, value, col = sex)) + + geom_line() + + theme_bw() + + facet_wrap(~component, scales = "free") +``` + + +We now run the time-variant two-sex models (note the `time_invariant = FALSE` argument): + +```{r} +kin_out_time_variant <- kin2sex( + pf = swe_surv_f_matrix, + pm = swe_surv_m_matrix, + ff = swe_fert_f_matrix, + fm = swe_fert_m_matrix, + sex_focal = "f", + time_invariant = FALSE, + birth_female = .5, + output_cohort = 1900 + ) +``` + +We can plot data on kin availability alongside values coming from a time-invariant model to show how demographic change matters: the time-variant models take into account changes derived from the demographic transition, whereas the time-invariant models assume never-changing rates. + +```{r, message=FALSE, warning=FALSE} +kin_out_time_invariant <- kin2sex( + swe_surv_f_matrix[,"1900"], swe_surv_m_matrix[,"1900"], + swe_fert_f_matrix[,"1900"], swe_fert_m_matrix[,"1900"], + sex_focal = "f", birth_female = .5) + + +kin_out_time_variant$kin_summary %>% + filter(cohort == 1900) %>% mutate(type = "variant") %>% + bind_rows(kin_out_time_invariant$kin_summary %>% mutate(type = "invariant")) %>% + mutate(kin = case_when(kin %in% c("ys", "os") ~ "s", + kin %in% c("ya", "oa") ~ "a", + T ~ kin)) %>% + filter(kin %in% c("d", "m", "gm", "ggm", "s", "a")) %>% + group_by(type, kin, age_focal, sex_kin) %>% + summarise(count=sum(count_living)) %>% + ggplot(aes(age_focal, count, linetype=type))+ + geom_line()+ theme_bw() + + facet_grid(cols = vars(kin), rows=vars(sex_kin), scales = "free") +``` + +### 4. Approximations + +Caswell [-@caswell_formal_2022] introduced two approaches for approximating two-sex kinship structures for cases when male demographic rates are not available. +The first is the *androgynous* approximation, which assumes equal fertility and survival for males and females. +The second is the use of *GKP factors* apply to each type of relative (e.g., multiplying mothers by two to obtain the number of mothers and fathers). + +Here, we present a visual evaluation of the accuracy of these approximations by comparing to 'true' two-sex models using the French data included with `DemoKin` for time-invariant models [@caswell_formal_2022]. +We start by considering the androgynous approximation. +We compare expected kin counts by age and find high levels of consistency for all kin types, except for grandfathers and great-grandfathers: + +```{r, message=FALSE, warning=FALSE} +kin_out <- kin2sex(fra_surv_f, fra_surv_m, fra_fert_f, fra_fert_m, sex_focal = "f", birth_female = .5) + +kin_out_androgynous <- kin2sex(fra_surv_f, fra_surv_f, fra_fert_f, fra_fert_f, sex_focal = "f", birth_female = .5) + +bind_rows( + kin_out$kin_summary %>% mutate(type = "full"), + kin_out_androgynous$kin_summary %>% mutate(type = "androgynous")) %>% + group_by(kin, age_focal, sex_kin, type) %>% + summarise(count = sum(count_living)) %>% + ggplot(aes(age_focal, count, linetype = type)) + + geom_line() + + theme_bw() + + theme(legend.position = "bottom", axis.text.x = element_blank()) + + facet_grid(row = vars(sex_kin), col = vars(kin), scales = "free") +``` + +Next, we consider the use of GKP factors and find that it also approximates relatively accurately kin counts at different ages of Focal. +These are presented as examples only. +Users are invited to perform more rigorous comparisons of these approximations. + +```{r, message=FALSE, warning=FALSE} +# with gkp +kin_out_1sex <- kin(fra_surv_f, fra_fert_f, birth_female = .5) + +kin_out_GKP <- kin_out_1sex$kin_summary%>% + mutate(count_living = case_when(kin == "m" ~ count_living * 2, + kin == "gm" ~ count_living * 4, + kin == "ggm" ~ count_living * 8, + kin == "d" ~ count_living * 2, + kin == "gd" ~ count_living * 4, + kin == "ggd" ~ count_living * 4, + kin == "oa" ~ count_living * 4, + kin == "ya" ~ count_living * 4, + kin == "os" ~ count_living * 2, + kin == "ys" ~ count_living * 2, + kin == "coa" ~ count_living * 8, + kin == "cya" ~ count_living * 8, + kin == "nos" ~ count_living * 4, + kin == "nys" ~ count_living * 4)) + +bind_rows( + kin_out$kin_summary %>% mutate(type = "full"), + kin_out_androgynous$kin_summary %>% mutate(type = "androgynous"), + kin_out_GKP %>% mutate(type = "gkp")) %>% + mutate(kin = case_when(kin %in% c("ys", "os") ~ "s", + kin %in% c("ya", "oa") ~ "a", + kin %in% c("coa", "cya") ~ "c", + kin %in% c("nys", "nos") ~ "n", + T ~ kin)) %>% + filter(age_focal %in% c(5, 15, 30, 60, 80)) %>% + group_by(kin, age_focal, type) %>% + summarise(count = sum(count_living)) %>% + ggplot(aes(type, count)) + + geom_bar(aes(fill=type), stat = "identity") + + theme_bw()+theme(axis.text.x = element_text(angle = 90), legend.position = "bottom")+ + facet_grid(col = vars(kin), row = vars(age_focal), scales = "free") +``` + +### 2. Causes of death + +Now assume we have two causes of death (COD). For females, the risk of the first COD is half the risk of the second COD for ages greater than 50. For males, the risk of the first COD is 2/3 of the second COD for ages greater than 50. We operationalize this using two matrices with dimension 2 by 101 (number of causes by number of ages). + +```{r} +Hf <- matrix(c( .5, 1), nrow = 2, ncol = length(fra_surv_f)) +Hm <- matrix(c(.33, 1), nrow = 2, ncol = length(fra_surv_f)) +Hf[,1:50] <- Hm[,1:50] <- 1 +``` + +This is a generalization of the approach outlined by Caswell [-@Caswell2023]. In the original formulation, the inputs in matrix $H$ are the hazard rates. Here, we treat them like a relative risk factor related to the underlying probability of dying. For more details, see section 2.3 and formula 30 in section A.1 of Caswell [-@Caswell2023]. Now we run the time-invariant two-sex model by COD for France 2012, assuming a death count distribution based on the two competing causes; note that the `kin2sex` function now takes the arguments `Hf` and `Hm` but the other arguments remain unchanged: + +```{r} +kin_out_cod_invariant <- kin2sex( + pf = fra_surv_f, + pm = fra_surv_m, + ff = fra_fert_f, + fm = fra_fert_m, + Hf = Hf, + Hm = Hm, + time_invariant = TRUE) +``` + +The output of `kin2sex` is the the `kin_full` data frame that we have encountered before. The only differences is that `kin_full` now includes one column for each COD specified in the input. Therefore, the number of columns will vary depending on how many COD you are considering! + +```{r} +head(kin_out_cod_invariant) +``` + +We can now plot the death distribution by age and COD of Focal's parents when Focal is 30 yo. + +```{r} +kin_out_cod_invariant %>% + filter(kin == "m", age_focal == 30) %>% + summarise(deadcause1 = sum(deadcause1), + deadcause2 = sum(deadcause2), .by = c(age_kin, sex_kin)) %>% + pivot_longer(deadcause1:deadcause2) %>% + ggplot(aes(age_kin, value, col = sex_kin, linetype = name)) + + geom_line() + + labs(y = "Expected number of parental deaths") + + theme_bw() +``` + +In this simplified example, the parents of Focal only died after age 50. This helped highlight the relative difference between the COD for each sex. Note that the sum of the death counts by sex gives the same result as the total deaths by sex at that age in the less complex model (i.e., the one that does not consider COD, see section 2 of this guide). + +You can add as many COD as you want, but keep in mind that this can be computationally intensive. For time-variant kinship models that consider COD, you must provide a list of matrices by sex ($Hf$ and $Hm$). The elements of this list should be $H$ matrices for each year (following the same order than the mortality and fertility components). + +## References diff --git a/vignettes/references.bib b/vignettes/references.bib new file mode 100644 index 0000000..a2fc0ff --- /dev/null +++ b/vignettes/references.bib @@ -0,0 +1,104 @@ +@article{Caswell2023, + author = {Caswell, Hal and Margolis, Rachel and Verdery, Ashton}, + title = {{The formal demography of kinship V: Kin loss, bereavement, and causes of death}}, + journal = {Demographic Research}, + volume = {49}, + pages = {1163--1200}, + year = {2023}, + month = dec, + issn = {1435-9871}, + publisher = {Demographic Research}, + url = {https://www.demographic-research.org/articles/volume/49/41} +} + +@article{caswell_formal_2019, + title = {The formal demography of kinship: {A} matrix formulation}, + volume = {41}, + issn = {1435-9871}, + shorttitle = {The formal demography of kinship}, + url = {https://www.demographic-research.org/volumes/vol41/24/}, + doi = {10.4054/DemRes.2019.41.24}, + language = {en}, + urldate = {2019-09-17}, + journal = {Demographic Research}, + author = {Caswell, Hal}, + month = sep, + year = {2019}, + pages = {679--712}, + file = {Full Text:C\:\\Users\\alburezgutierrez\\Zotero\\storage\\C84MW6VX\\Caswell - 2019 - The formal demography of kinship A matrix formula.pdf:application/pdf}, +} + +@article{caswell_formal_2020, + title = {The formal demography of kinship {II}: {Multistate} models, parity, and sibship}, + volume = {42}, + issn = {1435-9871}, + shorttitle = {The formal demography of kinship {II}}, + url = {https://www.demographic-research.org/volumes/vol42/38/}, + doi = {10.4054/DemRes.2020.42.38}, + language = {en}, + urldate = {2021-03-05}, + journal = {Demographic Research}, + author = {Caswell, Hal}, + month = jun, + year = {2020}, + pages = {1097--1146}, + file = {Full Text:C\:\\Users\\alburezgutierrez\\Zotero\\storage\\LEHIM987\\Caswell - 2020 - The formal demography of kinship II Multistate mo.pdf:application/pdf}, +} + +@article{caswell_formal_2021, + title = {The formal demography of kinship {III}: {Kinship} dynamics with time-varying demographic rates}, + volume = {45}, + issn = {1435-9871}, + shorttitle = {The formal demography of kinship {III}}, + url = {https://www.demographic-research.org/volumes/vol45/16/}, + doi = {10.4054/DemRes.2021.45.16}, + language = {en}, + urldate = {2021-10-19}, + journal = {Demographic Research}, + author = {Caswell, Hal and Song, Xi}, + month = aug, + year = {2021}, + pages = {517--546}, + file = {Full Text:C\:\\Users\\alburezgutierrez\\Zotero\\storage\\W2JPMRH8\\Caswell and Song - 2021 - The formal demography of kinship III Kinship dyna.pdf:application/pdf}, +} + +@article{caswell_formal_2022, + title = {The formal demography of kinship {IV}: {Two}-sex models and their approximations}, + volume = {47}, + issn = {1435-9871}, + shorttitle = {The formal demography of kinship {IV}}, + url = {https://www.demographic-research.org/volumes/vol47/13/}, + doi = {10.4054/DemRes.2022.47.13}, + language = {en}, + urldate = {2022-09-27}, + journal = {Demographic Research}, + author = {Caswell, Hal}, + month = sep, + year = {2022}, + pages = {359--396}, + file = {Full Text:C\:\\Users\\alburezgutierrez\\Zotero\\storage\\CWGLWECI\\Caswell - 2022 - The formal demography of kinship IV Two-sex model.pdf:application/pdf}, +} + + +@article{goodman_family_1974, + title = {Family {Formation} and the {Frequency} of {Various} {Kinship} {Relationships}}, + doi = {10.1016/0040-5809(74)90049-5}, + language = {en}, + journal = {Theoretical Population Biology}, + author = {Goodman, Leo A and Keyfitz, Nathan and Pullum, Thomas W.}, + year = {1974}, + pages = {27}, + file = {Goodman - Family Formation and the Frequency of Various Kins.pdf:C\:\\Users\\alburezgutierrez\\Zotero\\storage\\8ICBKYIE\\Goodman - Family Formation and the Frequency of Various Kins.pdf:application/pdf}, +} + + +@book{preston_demography:_2001, + address = {Malden, MA}, + title = {Demography: measuring and modeling population processes}, + isbn = {978-1-55786-214-3 978-1-55786-451-2}, + shorttitle = {Demography}, + publisher = {Blackwell Publishers}, + author = {Preston, Samuel H. and Heuveline, Patrick and Guillot, Michel}, + year = {2001}, + keywords = {Demography, Population research}, +}