Skip to content

Commit

Permalink
main fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
gowthamrao committed Sep 13, 2024
1 parent e12010d commit e255247
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 72 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ Depends:
dplyr,
R (>= 4.0.0)
Imports:
CohortAlgebra,
checkmate,
CirceR,
ParallelLogger,
purrr,
RJSONIO,
rlang,
Expand Down
26 changes: 13 additions & 13 deletions R/ExtractConceptSetsInCohortDefinition.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ extractConceptSetsInCohortDefinition <-
)
conceptSetExpressionMetaData[[j]] <-
conceptSetExpression2[[j]][1, ] |>
dplyr::select(dplyr::all_of("conceptSetId")) |>
dplyr::select(dplyr::all_of(c("conceptSetId"))) |>
dplyr::mutate(
hasStandard = as.integer(
conceptSetDataFrame |>
Expand Down Expand Up @@ -156,7 +156,7 @@ extractConceptSetsInCohortDefinition <-
pattern = "condition"
)
) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasProcedure = as.integer(
Expand All @@ -177,7 +177,7 @@ extractConceptSetsInCohortDefinition <-
pattern = "procedure"
)
) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasDevice = as.integer(
Expand All @@ -194,7 +194,7 @@ extractConceptSetsInCohortDefinition <-
string = tolower(.data$domainId),
pattern = "device"
)) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasDrug = as.integer(
Expand All @@ -211,7 +211,7 @@ extractConceptSetsInCohortDefinition <-
string = tolower(.data$domainId),
pattern = "drug"
)) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasObservation = as.integer(
Expand All @@ -232,7 +232,7 @@ extractConceptSetsInCohortDefinition <-
pattern = "observation"
)
) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasVisit = as.integer(
Expand All @@ -249,7 +249,7 @@ extractConceptSetsInCohortDefinition <-
string = tolower(.data$domainId),
pattern = "visit"
)) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
hasType = as.integer(
Expand All @@ -266,7 +266,7 @@ extractConceptSetsInCohortDefinition <-
string = tolower(.data$domainId),
pattern = "type"
)) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
nrow(),
isSelectedIncludeMapped = max(as.integer(conceptSetDataFrame$includeMapped)),
Expand All @@ -284,13 +284,13 @@ extractConceptSetsInCohortDefinition <-
numberOfUniqueConceptIdsWithoutDescendants = length(
conceptSetDataFrame |>
dplyr::filter(.data$includeDescendants == FALSE) |>
dplyr::pull(dplyr::all_of("conceptId")) |>
dplyr::pull(dplyr::all_of(c("conceptId"))) |>
unique()
),
numberOfUniqueConceptIdsWitDescendants = length(
conceptSetDataFrame |>
dplyr::filter(.data$includeDescendants == TRUE) |>
dplyr::pull(dplyr::all_of("conceptId")) |>
dplyr::pull(dplyr::all_of(c("conceptId"))) |>
unique()
),
numberOfUniqueConceptIdIsStandard = length(
Expand All @@ -301,7 +301,7 @@ extractConceptSetsInCohortDefinition <-
pattern = "S"
)
) |>
dplyr::pull(dplyr::all_of("conceptId")) |>
dplyr::pull(dplyr::all_of(c("conceptId"))) |>
unique()
),
numberOfUniqueConceptIdIsNonStandard = length(
Expand All @@ -313,7 +313,7 @@ extractConceptSetsInCohortDefinition <-
negate = TRUE
)
) |>
dplyr::pull(dplyr::all_of("conceptId")) |>
dplyr::pull(dplyr::all_of(c("conceptId"))) |>
unique()
)
)
Expand All @@ -340,7 +340,7 @@ extractConceptSetsInCohortDefinition <-

if (length(codeSetsIdsInPrimaryCriteria) > 0) {
conceptSetExpression <- conceptSetExpression |>
dplyr::select(-dplyr::all_of("conceptSetUsedInEntryEvent")) |>
dplyr::select(-dplyr::all_of(c("conceptSetUsedInEntryEvent"))) |>
dplyr::left_join(
dplyr::tibble(conceptSetId = codeSetsIdsInPrimaryCriteria) |>
dplyr::distinct() |>
Expand Down
2 changes: 1 addition & 1 deletion R/ExtractConceptSetsInCohortDefinitionSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ extractConceptSetsInCohortDefinitionSet <-
!class(conceptSetsInCohortDefinition) == "try-error"
)) {
conceptSets[[i]] <- conceptSetsInCohortDefinition |>
dplyr::select(dplyr::all_of(uniqueConceptSetId)) |>
dplyr::select(dplyr::all_of(c("uniqueConceptSetId"))) |>
dplyr::mutate(cohortId = cohort$cohortId) |>
dplyr::relocate(dplyr::all_of(c("cohortId", "conceptSetId")))
}
Expand Down
36 changes: 27 additions & 9 deletions R/GetConceptRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@
#'
#' @param cohortDefinitionId Optional
#'
#' @param domainTableName Vector of strings Domains to look for concept IDs. Supported domains
#' include "drug_exposure", "condition_occurrence", "procedure_occurrence", "measurement", "observation".
#'
#' @param stratifyByGender Logical Whether to stratify the counts by gender.
#'
#' @param stratifyByYear Logical Whether to stratify the counts by year.
#'
#' @param stratifyByYearQuarter Logical Whether to stratify the counts by quarter of the year.
#'
#' @param stratifyByYearMonth Logical Whether to stratify the counts by month of the year.
#'
#' @param stratifyByAgeGroup Logical Whether to stratify the counts by age group.
#'
#' @param stratifyByIncidence Logical Whether to limit the counts to first occurrences (incidence).
#'
#' @param getOverallCounts Logical Whether to include overall counts across all specified stratifications.
#'
#' @return
#' Returns a tibble data frame.
#'
Expand Down Expand Up @@ -114,15 +131,16 @@ getConceptRecordCount <- function(conceptIds = NULL,
{@concept_id_universe != ''} ? {
DROP TABLE IF EXISTS #concept_id_unv_2;
CREATE TABLE #concept_id_unv_2 as
SELECT DISTINCT u.concept_id
FROM @concept_id_universe u
INNER JOIN (
SELECT concept_id
FROM @vocabulary_database_schema.CONCEPT
WHERE concept_id > 0
) c
ON u.concept_id = c.concept_id
;
(
SELECT DISTINCT u.concept_id
FROM @concept_id_universe u
INNER JOIN (
SELECT concept_id
FROM @vocabulary_database_schema.CONCEPT
WHERE concept_id > 0
) c
ON u.concept_id = c.concept_id
) ;
}
--PERCENTILES are difficult and will need subqueries
Expand Down
2 changes: 2 additions & 0 deletions R/GetConceptSetOccurrenceDate.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#'
#' @param conceptIds An array of concept ids
#'
#' @param subset options are "all", "first", "last"
#'
#' @param limitToPersonDate Do you want to limit to person dates
#'
#' @return
Expand Down
8 changes: 5 additions & 3 deletions R/InstantiateCohortFromConceptSetExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#'
#' @template CdmDatabaseSchema
#'
#' @template CohortDatabaseSchema
#'
#' @template TempEmulationSchema
#'
#' @param cohortId An integer value to identify the cohort.
Expand Down Expand Up @@ -66,10 +68,10 @@ instantiateCohortFromConceptSetExpression <-
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema
) |>
dplyr::select(dplyr::all_of("conceptId")) |>
dplyr::select(dplyr::all_of(c("conceptId"))) |>
dplyr::distinct() |>
dplyr::arrange(dplyr::all_of("conceptId")) |>
dplyr::pull(dplyr::all_of("conceptId"))
dplyr::arrange(dplyr::all_of(c("conceptId"))) |>
dplyr::pull(dplyr::all_of(c("conceptId")))

tempTableWithConceptDates <-
getConceptSetOccurrenceDate(
Expand Down
49 changes: 14 additions & 35 deletions R/MapMedraToSnomedViaVocabulary.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ mapMedraToSnomedViaVocabulary <-
dplyr::inner_join(
medDraRelationship$pt |>
dplyr::select(
.data$givenConceptId,
.data$ptConceptName
dplyr::all_of(c("givenConceptId", "ptConceptName"))
) |>
rename(
"medDraConceptId" = .data$givenConceptId,
Expand All @@ -129,8 +128,7 @@ mapMedraToSnomedViaVocabulary <-
dplyr::inner_join(
medDraRelationship$llt |>
dplyr::select(
.data$givenConceptId,
.data$lltConceptName
dplyr::all_of(c("givenConceptId", "lltConceptName"))
) |>
rename(
"medDraConceptId" = .data$givenConceptId,
Expand Down Expand Up @@ -222,8 +220,7 @@ mapMedraToSnomedViaVocabulary <-
relatedToSnomed <-
medDraRelated |>
dplyr::select(
.data$conceptId1,
.data$conceptId2
dplyr::all_of(c("conceptId1", "conceptId2"))
) |>
dplyr::distinct() |>
dplyr::inner_join(
Expand Down Expand Up @@ -255,8 +252,7 @@ mapMedraToSnomedViaVocabulary <-
dplyr::inner_join(
conceptRelationship |>
dplyr::select(
.data$conceptId1,
.data$conceptId2
dplyr::all_of(c("conceptId1", "conceptId2"))
),
by = c("invalidSnomedConceptId" = "conceptId1")
) |>
Expand Down Expand Up @@ -307,8 +303,7 @@ mapMedraToSnomedViaVocabulary <-
dplyr::inner_join(
snomedSynonyms |>
dplyr::select(
.data$conceptId,
.data$conceptSynonymName
dplyr::all_of(c("conceptId", "conceptSynonymName"))
) |>
dplyr::distinct() |>
dplyr::rename("conceptName" = .data$conceptSynonymName),
Expand Down Expand Up @@ -414,9 +409,7 @@ mapMedraToSnomedViaVocabulary <-
)
) |>
dplyr::select(
.data$medDraConceptId,
.data$snomedConceptId,
.data$stringDistanceScore
dplyr::all_of(c("medDraConceptId", "snomedConceptId", "stringDistanceScore"))
) |>
dplyr::distinct()

Expand All @@ -430,16 +423,12 @@ mapMedraToSnomedViaVocabulary <-
tidyr::replace_na(list(stringDistanceScore = 999)) |>
dplyr::group_by(.data$medDraConceptId) |>
dplyr::arrange(
.data$minLevelsOfSeparation,
.data$maxLevelsOfSeparation,
.data$stringDistanceScore
dplyr::all_of(c("minLevelsOfSeparation", "maxLevelsOfSeparation", "stringDistanceScore"))
) |>
dplyr::mutate(rank = dplyr::row_number()) |>
dplyr::ungroup() |>
dplyr::select(
.data$medDraConceptId,
.data$snomedConceptId,
.data$rank
dplyr::all_of(c("medDraConceptId", "snomedConceptId", "rank"))
) |>
dplyr::distinct()

Expand Down Expand Up @@ -481,9 +470,7 @@ mapMedraToSnomedViaVocabulary <-
) |>
dplyr::mutate(ancestorRank = dplyr::row_number()) |>
dplyr::arrange(
.data$medDraConceptId,
.data$descendantConceptId,
.data$ancestorRank
dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank"))
)

canBeRolledUp <-
Expand All @@ -497,10 +484,7 @@ mapMedraToSnomedViaVocabulary <-
) |>
dplyr::rename("descendantConceptId" = .data$snomedConceptId) |>
dplyr::select(
.data$medDraConceptId,
.data$descendantConceptId,
.data$ancestorConceptId,
.data$ancestorRank
dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorConceptId", "ancestorRank"))
) |>
dplyr::distinct()

Expand All @@ -527,9 +511,7 @@ mapMedraToSnomedViaVocabulary <-
suppressWarnings(
canBeRolledUp |>
dplyr::select(
.data$medDraConceptId,
.data$descendantConceptId,
.data$ancestorRank
dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank"))
) |>
dplyr::group_by(
.data$medDraConceptId,
Expand Down Expand Up @@ -584,7 +566,7 @@ mapMedraToSnomedViaVocabulary <-
dplyr::group_by(.data$medDraConceptId) |>
dplyr::arrange(.data$rank) |>
dplyr::mutate(rn = dplyr::row_number()) |>
dplyr::select(-.data$rank) |>
dplyr::select(-dplyr::all_of(c("rank"))) |>
dplyr::rename(rank = .data$rn) |>
dplyr::ungroup() |>
dplyr::arrange(.data$medDraConceptId)
Expand Down Expand Up @@ -633,14 +615,11 @@ mapMedraToSnomedViaVocabulary <-
by = "medDraConceptId"
) |>
dplyr::arrange(
.data$medDraConceptId,
.data$medDraConceptName,
.data$medDraConceptClassId,
.data$rank
dplyr::all_of(c("medDraConceptId", "medDraConceptName", "medDraConceptName", "rank"))
) |>
dplyr::select(
dplyr::starts_with(c("medDra", "snomed")),
.data$rank
dplyr::all_of(c("rank"))
)

return(mappedUsingVocabaulary)
Expand Down
16 changes: 5 additions & 11 deletions R/Private.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,21 +200,15 @@ getDomainInformation <- function() {
data$long <- dplyr::bind_rows(
data$wide |>
dplyr::select(
.data$domainTableShort,
.data$domainTable,
.data$domainConceptIdShort,
.data$domainConceptId
dplyr::all_of(c("domainTableShort", "domainTable", "domainConceptIdShort", "domainConceptId"))
) |>
dplyr::rename(
domainFieldShort = .data$domainConceptIdShort,
domainField = .data$domainConceptId
),
data$wide |>
dplyr::select(
.data$domainTableShort,
.data$domainSourceConceptIdShort,
.data$domainTable,
.data$domainSourceConceptId
dplyr::all_of(c("domainTableShort", "domainSourceConceptIdShort", "domainTable", "domainSourceConceptId"))
) |>
dplyr::rename(
domainFieldShort = .data$domainSourceConceptIdShort,
Expand All @@ -239,15 +233,15 @@ getDomainInformation <- function() {
data |>
dplyr::collect() |>
dplyr::mutate(dplyr::across(
tidyselect:::where(is.character),
tidyselect::where(is.character),
~ tidyr::replace_na(.x, as.character(""))
)) |>
dplyr::mutate(dplyr::across(
tidyselect:::where(is.logical),
tidyselect::where(is.logical),
~ tidyr::replace_na(.x, as.character(""))
)) |>
dplyr::mutate(dplyr::across(
tidyselect:::where(is.numeric),
tidyselect::where(is.numeric),
~ tidyr::replace_na(.x, as.numeric(""))
))
}
Expand Down
Loading

0 comments on commit e255247

Please sign in to comment.