Skip to content

Commit

Permalink
add concept names
Browse files Browse the repository at this point in the history
  • Loading branch information
catalamarti committed Apr 28, 2024
1 parent f14daa8 commit 8090b7c
Show file tree
Hide file tree
Showing 4 changed files with 252 additions and 137 deletions.
302 changes: 166 additions & 136 deletions R/summariseOmopTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ summariseOmopTable <- function(omopTable,
assertLogical(domainId, length = 1)
assertLogical(typeConcept, length = 1)

if ("observation_period" == omopgenerics::tableName(omopTable)) {
standardConcept <- FALSE
sourceConcept <- FALSE
domainId <- FALSE
}

cdm <- omopgenerics::cdmReference(omopTable)
omopTable <- omopTable |> dplyr::ungroup()

Expand Down Expand Up @@ -75,18 +81,32 @@ summariseOmopTable <- function(omopTable,
grepl("number", .data$variable_name), "integer", "percentage"
)
)
den <- result |>
dplyr::filter(.data$variable_name == "number_records") |>
dplyr::pull("estimate_value") |>
as.integer()

# records per person
if (length(recordsPerPerson) > 0) {
cli::cli_inform("Summarising records per person")
suppressMessages(
result <- result |>
dplyr::union_all(
omopTable |>
dplyr::group_by(.data$person_id) |>
dplyr::summarise(
"records_per_person" = dplyr::n(), .groups = "drop"
cdm[["person"]] |>
dplyr::left_join(
omopTable |>
dplyr::group_by(.data$person_id) |>
dplyr::summarise(
"records_per_person" = as.integer(dplyr::n()),
.groups = "drop"
),
by = "person_id",
) |>
dplyr::mutate("records_per_person" = dplyr::if_else(
is.na(.data$records_per_person),
0L,
.data$records_per_person
)) |>
PatientProfiles::summariseResult(
variables = "records_per_person",
estimates = recordsPerPerson,
Expand Down Expand Up @@ -115,7 +135,7 @@ summariseOmopTable <- function(omopTable,
dplyr::tally() |>
dplyr::collect() |>
dplyr::mutate("n" = as.integer(.data$n)) |>
summaryData(variables, cdm)
summaryData(variables, cdm, den)
)
}

Expand Down Expand Up @@ -149,6 +169,7 @@ addVariables <- function(x,
"source" = sourceConcept(name),
"type" = typeConcept(name)
)
newNames <- newNames[!is.na(newNames)]
cdm <- omopgenerics::cdmReference(x)

x <- x |>
Expand Down Expand Up @@ -200,15 +221,15 @@ addVariables <- function(x,
cdm[["observation_period"]] |>
dplyr::select(
"person_id",
"obs_start" = "observation_start_date",
"obs_end" = "observation_end_date"
"obs_start" = "observation_period_start_date",
"obs_end" = "observation_period_end_date"
),
by = "person_id"
) |>
dplyr::filter(
.data$date >= .data$obs_start & .data$date <= .data$obs_end
) |>
dplyr::mutate("in_observation") |>
dplyr::mutate("in_observation" = 1L) |>
dplyr::select("id", "in_observation"),
by = "id"
)
Expand All @@ -227,44 +248,46 @@ columnsVariables <- function(inObservation,
inObservation, standardConcept, domainId, sourceConcept, typeConcept
)]
}
summaryData <- function(x, variables) {
summaryData <- function(x, variables, cdm, den) {
results <- list()

# in observation
if ("in_observation" %in% variables) {
results[["obs"]] <- x |>
dplyr::mutate("in_observation" = dplyr::if_else(
.data$in_observation == 1, "Yes", "No"
!is.na(.data$in_observation), "Yes", "No"
)) |>
formatResults("In observation", "in_observation")
formatResults("In observation", "in_observation", den)
}

# standard
if ("standard" %in% variables) {
results[["standard"]] <- x |> formatResults("Standard concept", "standard")
results[["standard"]] <- x |>
formatResults("Standard concept", "standard", den)
}

# source
if ("source" %in% variables) {
results[["source"]] <- x |> formatResults("Source concept", "source")
results[["source"]] <- x |> formatResults("Source concept", "source", den)
}

# domain
if ("domain_id" %in% variables) {
results[["domain"]] <- x |> formatResults("Domain", "domain_id")
results[["domain"]] <- x |> formatResults("Domain", "domain_id", den)
}

# type
if ("type" %in% variables) {
namesTypes <- cdm[["concept"]] |>
dplyr::filter(.data$type_concept_id == "concept_class_id") |>
dplyr::select(
"variable_level" = "concept_id", "new_variable_level" = "concept_name"
) |>
dplyr::collect()
results[["type"]] <- x |>
formatResults("Type concept id", "type") |>
dplyr::left_join(namesTypes, by = "variable_level") |>
formatResults("Type concept id", "type", den) |>
dplyr::left_join(
conceptTypes |>
dplyr::select(
"variable_level" = "type_concept_id",
"new_variable_level" = "type_name"
),
by = "variable_level"
) |>
dplyr::mutate("variable_level" = dplyr::if_else(
is.na(.data$new_variable_level),
.data$variable_level,
Expand All @@ -278,128 +301,135 @@ summaryData <- function(x, variables) {

return(results)
}
formatResults <- function(x, variableName, variableLevel) {
formatResults <- function(x, variableName, variableLevel, den) {
x |>
dplyr::group_by(dplyr::across(dplyr::all_of(variableLevel))) |>
dplyr::summarise("estimate_value" = sum(.data$n), .groups = "drop") |>
dplyr::summarise("count" = sum(.data$n), .groups = "drop") |>
dplyr::mutate("percentage" = 100 * .data$count / .env$den) |>
dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |>
tidyr::pivot_longer(
cols = c("count", "percentage"),
names_to = "estimate_name",
values_to = "estimate_value"
) |>
dplyr::mutate(
"variable_name" = .env$variableName,
"variable_level" = dplyr::all_of(variableLevel),
"estimate_name" = "count",
"estimate_type" = "integer",
"estimate_value" = as.character(.data$estimate_value)
"variable_level" = as.character(.data[[variableLevel]]),
"estimate_type" = dplyr::if_else(
.data$estimate_name == "count", "integer", "percentage"
)
) |>
dplyr::select(
"variable_name", "variable_level", "estimate_name", "estimate_type",
"estimate_value"
)
}

getFunctions <- function(date, concept) {
functions <- c(
rlang::parse_exprs("dplyr::n()") |>
rlang::set_names("count_number_records"),
rlang::parse_exprs("dplyr::n_distinct(.data$person_id)") |>
rlang::set_names("count_number_subjects"),
rlang::parse_exprs("dplyr::n_distinct(.data$concept_id)") |>
rlang::set_names("count_distinct_concept_id"),
rlang::parse_exprs("sum(.data$in_observation, na.rm = TRUE)") |>
rlang::set_names("count_records_in_observation")
)
functions <- functions[c(
TRUE, TRUE, date != "cohort_start_date", concept != "cohort_definition_id"
)]
return(functions)
}
prepareTable <- function(omopTable, date, concept) {
cdm <- omopgenerics::cdmReference(omopTable)

# domain_id
if (concept != "cohort_definition_id") {
omopTable <- omopTable |>
dplyr::rename("concept_id" = dplyr::all_of(concept)) |>
dplyr::left_join(
cdm$concept |> dplyr::select("concept_id", "domain_id"),
by = "concept_id"
)
}

# year and in_observation
if (date != "cohort_start_date") {
omopTable <- omopTable |>
PatientProfiles::addInObservation(indexDate = date) %>%
dplyr::mutate(
"year" = !!CDMConnector::datepart(date = date, interval = "year")
)
}

return(omopTable)
}
summaryData <- function(omopTable, functions, byYear){
result <- omopTable |>
dplyr::summarise(!!!functions) |>
dplyr::collect()
if ("domain_id" %in% colnames(omopTable)) {
result <- result |>
dplyr::bind_rows(
omopTable |>
dplyr::group_by(.data$domain_id) |>
dplyr::summarise(!!!functions, .groups = "drop") |>
dplyr::collect()
)
} else {
result <- result |> dplyr::mutate("domain_id" = NA_character_)
}

if (byYear & "year" %in% colnames(omopTable)) {
result <- result |>
dplyr::bind_rows(
omopTable |>
dplyr::group_by(.data$year) |>
dplyr::summarise(!!!functions, .groups = "drop") |>
dplyr::collect()
)
if ("domain_id" %in% colnames(omopTable)) {
result <- result |>
dplyr::bind_rows(
omopTable |>
dplyr::group_by(.data$domain_id, .data$year) |>
dplyr::summarise(!!!functions, .groups = "drop") |>
dplyr::collect()
)
}
} else {
result <- result |> dplyr::mutate("year" = NA_character_)
}
return(result)
}
formatResult <- function(result, cdm, name) {
result |>
tidyr::pivot_longer(
cols = !c("year", "domain_id"),
names_to = "name",
values_to = "estimate_value"
) |>
tidyr::separate_wider_delim(
cols = "name",
delim = "_",
names = c("estimate_name", "variable_name"),
too_many = "merge"
) |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"cdm_name" = omopgenerics::cdmName(cdm = cdm),
"estimate_type" = "integer",
"variable_level" = NA_character_,
"package_name" = "OmopSketch",
"package_version" = as.character(utils::packageVersion("OmopSketch")),
"group_name" = "omop_table",
"group_level" = name,
"result_type" = "summarised_omop_table",
"additional_name" = "overall",
"additional_level" = "overlal"
) |>
dplyr::rename("domain" = "domain_id") |>
visOmopResults::uniteStrata(cols = c("year", "domain")) |>
omopgenerics::newSummarisedResult()
}
# getFunctions <- function(date, concept) {
# functions <- c(
# rlang::parse_exprs("dplyr::n()") |>
# rlang::set_names("count_number_records"),
# rlang::parse_exprs("dplyr::n_distinct(.data$person_id)") |>
# rlang::set_names("count_number_subjects"),
# rlang::parse_exprs("dplyr::n_distinct(.data$concept_id)") |>
# rlang::set_names("count_distinct_concept_id"),
# rlang::parse_exprs("sum(.data$in_observation, na.rm = TRUE)") |>
# rlang::set_names("count_records_in_observation")
# )
# functions <- functions[c(
# TRUE, TRUE, date != "cohort_start_date", concept != "cohort_definition_id"
# )]
# return(functions)
# }
# prepareTable <- function(omopTable, date, concept) {
# cdm <- omopgenerics::cdmReference(omopTable)
#
# # domain_id
# if (concept != "cohort_definition_id") {
# omopTable <- omopTable |>
# dplyr::rename("concept_id" = dplyr::all_of(concept)) |>
# dplyr::left_join(
# cdm$concept |> dplyr::select("concept_id", "domain_id"),
# by = "concept_id"
# )
# }
#
# # year and in_observation
# if (date != "cohort_start_date") {
# omopTable <- omopTable |>
# PatientProfiles::addInObservation(indexDate = date) %>%
# dplyr::mutate(
# "year" = !!CDMConnector::datepart(date = date, interval = "year")
# )
# }
#
# return(omopTable)
# }
# summaryData <- function(omopTable, functions, byYear){
# result <- omopTable |>
# dplyr::summarise(!!!functions) |>
# dplyr::collect()
# if ("domain_id" %in% colnames(omopTable)) {
# result <- result |>
# dplyr::bind_rows(
# omopTable |>
# dplyr::group_by(.data$domain_id) |>
# dplyr::summarise(!!!functions, .groups = "drop") |>
# dplyr::collect()
# )
# } else {
# result <- result |> dplyr::mutate("domain_id" = NA_character_)
# }
#
# if (byYear & "year" %in% colnames(omopTable)) {
# result <- result |>
# dplyr::bind_rows(
# omopTable |>
# dplyr::group_by(.data$year) |>
# dplyr::summarise(!!!functions, .groups = "drop") |>
# dplyr::collect()
# )
# if ("domain_id" %in% colnames(omopTable)) {
# result <- result |>
# dplyr::bind_rows(
# omopTable |>
# dplyr::group_by(.data$domain_id, .data$year) |>
# dplyr::summarise(!!!functions, .groups = "drop") |>
# dplyr::collect()
# )
# }
# } else {
# result <- result |> dplyr::mutate("year" = NA_character_)
# }
# return(result)
# }
# formatResult <- function(result, cdm, name) {
# result |>
# tidyr::pivot_longer(
# cols = !c("year", "domain_id"),
# names_to = "name",
# values_to = "estimate_value"
# ) |>
# tidyr::separate_wider_delim(
# cols = "name",
# delim = "_",
# names = c("estimate_name", "variable_name"),
# too_many = "merge"
# ) |>
# dplyr::mutate(
# "estimate_value" = as.character(.data$estimate_value),
# "cdm_name" = omopgenerics::cdmName(cdm = cdm),
# "estimate_type" = "integer",
# "variable_level" = NA_character_,
# "package_name" = "OmopSketch",
# "package_version" = as.character(utils::packageVersion("OmopSketch")),
# "group_name" = "omop_table",
# "group_level" = name,
# "result_type" = "summarised_omop_table",
# "additional_name" = "overall",
# "additional_level" = "overlal"
# ) |>
# dplyr::rename("domain" = "domain_id") |>
# visOmopResults::uniteStrata(cols = c("year", "domain")) |>
# omopgenerics::newSummarisedResult()
# }
Binary file modified R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit 8090b7c

Please sign in to comment.