From e2552478fa9d02c71aaa7b16f3fe11caaaf9db82 Mon Sep 17 00:00:00 2001
From: Gowtham Rao <rao@ohdsi.org>
Date: Fri, 13 Sep 2024 08:48:56 -0400
Subject: [PATCH] main fixes

---
 DESCRIPTION                                   |  2 +
 R/ExtractConceptSetsInCohortDefinition.R      | 26 +++++-----
 R/ExtractConceptSetsInCohortDefinitionSet.R   |  2 +-
 R/GetConceptRecordCount.R                     | 36 ++++++++++----
 R/GetConceptSetOccurrenceDate.R               |  2 +
 R/InstantiateCohortFromConceptSetExpression.R |  8 +--
 R/MapMedraToSnomedViaVocabulary.R             | 49 ++++++-------------
 R/Private.R                                   | 16 ++----
 man/getConceptRecordCount.Rd                  | 17 +++++++
 man/getConceptSetOccurrenceDate.Rd            |  2 +
 ...stantiateCohortFromConceptSetExpression.Rd |  3 ++
 11 files changed, 91 insertions(+), 72 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6627fb1..f2f10c7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,8 +11,10 @@ Depends:
     dplyr,
     R (>= 4.0.0)
 Imports:
+  CohortAlgebra,
 	checkmate,
 	CirceR,
+	ParallelLogger,
 	purrr,
 	RJSONIO,
 	rlang,
diff --git a/R/ExtractConceptSetsInCohortDefinition.R b/R/ExtractConceptSetsInCohortDefinition.R
index 9928852..d4ace61 100644
--- a/R/ExtractConceptSetsInCohortDefinition.R
+++ b/R/ExtractConceptSetsInCohortDefinition.R
@@ -96,7 +96,7 @@ extractConceptSetsInCohortDefinition <-
         )
       conceptSetExpressionMetaData[[j]] <-
         conceptSetExpression2[[j]][1, ] |>
-        dplyr::select(dplyr::all_of("conceptSetId")) |>
+        dplyr::select(dplyr::all_of(c("conceptSetId"))) |>
         dplyr::mutate(
           hasStandard = as.integer(
             conceptSetDataFrame |>
@@ -156,7 +156,7 @@ extractConceptSetsInCohortDefinition <-
                   pattern = "condition"
                 )
               ) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasProcedure = as.integer(
@@ -177,7 +177,7 @@ extractConceptSetsInCohortDefinition <-
                   pattern = "procedure"
                 )
               ) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasDevice = as.integer(
@@ -194,7 +194,7 @@ extractConceptSetsInCohortDefinition <-
                 string = tolower(.data$domainId),
                 pattern = "device"
               )) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasDrug = as.integer(
@@ -211,7 +211,7 @@ extractConceptSetsInCohortDefinition <-
                 string = tolower(.data$domainId),
                 pattern = "drug"
               )) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasObservation = as.integer(
@@ -232,7 +232,7 @@ extractConceptSetsInCohortDefinition <-
                   pattern = "observation"
                 )
               ) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasVisit = as.integer(
@@ -249,7 +249,7 @@ extractConceptSetsInCohortDefinition <-
                 string = tolower(.data$domainId),
                 pattern = "visit"
               )) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           hasType = as.integer(
@@ -266,7 +266,7 @@ extractConceptSetsInCohortDefinition <-
                 string = tolower(.data$domainId),
                 pattern = "type"
               )) |>
-              dplyr::select(dplyr::all_of("conceptId")) |>
+              dplyr::select(dplyr::all_of(c("conceptId"))) |>
               dplyr::distinct() |>
               nrow(),
           isSelectedIncludeMapped = max(as.integer(conceptSetDataFrame$includeMapped)),
@@ -284,13 +284,13 @@ extractConceptSetsInCohortDefinition <-
           numberOfUniqueConceptIdsWithoutDescendants = length(
             conceptSetDataFrame |>
               dplyr::filter(.data$includeDescendants == FALSE) |>
-              dplyr::pull(dplyr::all_of("conceptId")) |>
+              dplyr::pull(dplyr::all_of(c("conceptId"))) |>
               unique()
           ),
           numberOfUniqueConceptIdsWitDescendants = length(
             conceptSetDataFrame |>
               dplyr::filter(.data$includeDescendants == TRUE) |>
-              dplyr::pull(dplyr::all_of("conceptId")) |>
+              dplyr::pull(dplyr::all_of(c("conceptId"))) |>
               unique()
           ),
           numberOfUniqueConceptIdIsStandard = length(
@@ -301,7 +301,7 @@ extractConceptSetsInCohortDefinition <-
                   pattern = "S"
                 )
               ) |>
-              dplyr::pull(dplyr::all_of("conceptId")) |>
+              dplyr::pull(dplyr::all_of(c("conceptId"))) |>
               unique()
           ),
           numberOfUniqueConceptIdIsNonStandard = length(
@@ -313,7 +313,7 @@ extractConceptSetsInCohortDefinition <-
                   negate = TRUE
                 )
               ) |>
-              dplyr::pull(dplyr::all_of("conceptId")) |>
+              dplyr::pull(dplyr::all_of(c("conceptId"))) |>
               unique()
           )
         )
@@ -340,7 +340,7 @@ extractConceptSetsInCohortDefinition <-
 
     if (length(codeSetsIdsInPrimaryCriteria) > 0) {
       conceptSetExpression <- conceptSetExpression |>
-        dplyr::select(-dplyr::all_of("conceptSetUsedInEntryEvent")) |>
+        dplyr::select(-dplyr::all_of(c("conceptSetUsedInEntryEvent"))) |>
         dplyr::left_join(
           dplyr::tibble(conceptSetId = codeSetsIdsInPrimaryCriteria) |>
             dplyr::distinct() |>
diff --git a/R/ExtractConceptSetsInCohortDefinitionSet.R b/R/ExtractConceptSetsInCohortDefinitionSet.R
index 0d9c9d1..b88a4cf 100644
--- a/R/ExtractConceptSetsInCohortDefinitionSet.R
+++ b/R/ExtractConceptSetsInCohortDefinitionSet.R
@@ -52,7 +52,7 @@ extractConceptSetsInCohortDefinitionSet <-
         !class(conceptSetsInCohortDefinition) == "try-error"
       )) {
         conceptSets[[i]] <- conceptSetsInCohortDefinition |>
-          dplyr::select(dplyr::all_of(uniqueConceptSetId)) |>
+          dplyr::select(dplyr::all_of(c("uniqueConceptSetId"))) |>
           dplyr::mutate(cohortId = cohort$cohortId) |>
           dplyr::relocate(dplyr::all_of(c("cohortId", "conceptSetId")))
       }
diff --git a/R/GetConceptRecordCount.R b/R/GetConceptRecordCount.R
index 2f34988..b90a3b8 100644
--- a/R/GetConceptRecordCount.R
+++ b/R/GetConceptRecordCount.R
@@ -42,6 +42,23 @@
 #'
 #' @param cohortDefinitionId          Optional
 #'
+#' @param domainTableName Vector of strings Domains to look for concept IDs. Supported domains
+#'  include "drug_exposure", "condition_occurrence", "procedure_occurrence", "measurement", "observation".
+#'
+#' @param stratifyByGender Logical Whether to stratify the counts by gender.
+#'
+#' @param stratifyByYear Logical Whether to stratify the counts by year.
+#'
+#' @param stratifyByYearQuarter Logical Whether to stratify the counts by quarter of the year.
+#'
+#' @param stratifyByYearMonth Logical Whether to stratify the counts by month of the year.
+#'
+#' @param stratifyByAgeGroup Logical Whether to stratify the counts by age group.
+#'
+#' @param stratifyByIncidence Logical Whether to limit the counts to first occurrences (incidence).
+#'
+#' @param getOverallCounts Logical Whether to include overall counts across all specified stratifications.
+#'
 #' @return
 #' Returns a tibble data frame.
 #'
@@ -114,15 +131,16 @@ getConceptRecordCount <- function(conceptIds = NULL,
           {@concept_id_universe != ''} ? {
             DROP TABLE IF EXISTS #concept_id_unv_2;
             CREATE TABLE #concept_id_unv_2 as
-              SELECT DISTINCT u.concept_id
-              FROM @concept_id_universe u
-              INNER JOIN (
-                SELECT concept_id
-                FROM @vocabulary_database_schema.CONCEPT
-                WHERE concept_id > 0
-                ) c
-                ON u.concept_id = c.concept_id
-          ;
+              (
+                SELECT DISTINCT u.concept_id
+                FROM @concept_id_universe u
+                INNER JOIN (
+                  SELECT concept_id
+                  FROM @vocabulary_database_schema.CONCEPT
+                  WHERE concept_id > 0
+                            ) c
+               ON u.concept_id = c.concept_id
+              ) ;
           }
 
           --PERCENTILES are difficult and will need subqueries
diff --git a/R/GetConceptSetOccurrenceDate.R b/R/GetConceptSetOccurrenceDate.R
index 054f845..397551b 100644
--- a/R/GetConceptSetOccurrenceDate.R
+++ b/R/GetConceptSetOccurrenceDate.R
@@ -39,6 +39,8 @@
 #'
 #' @param conceptIds An array of concept ids
 #'
+#' @param subset options are "all", "first", "last"
+#'
 #' @param limitToPersonDate Do you want to limit to person dates
 #'
 #' @return
diff --git a/R/InstantiateCohortFromConceptSetExpression.R b/R/InstantiateCohortFromConceptSetExpression.R
index c6bbc2a..f838c7b 100644
--- a/R/InstantiateCohortFromConceptSetExpression.R
+++ b/R/InstantiateCohortFromConceptSetExpression.R
@@ -29,6 +29,8 @@
 #'
 #' @template CdmDatabaseSchema
 #'
+#' @template CohortDatabaseSchema
+#'
 #' @template TempEmulationSchema
 #'
 #' @param cohortId An integer value to identify the cohort.
@@ -66,10 +68,10 @@ instantiateCohortFromConceptSetExpression <-
         connection = connection,
         vocabularyDatabaseSchema = vocabularyDatabaseSchema
       ) |>
-      dplyr::select(dplyr::all_of("conceptId")) |>
+      dplyr::select(dplyr::all_of(c("conceptId"))) |>
       dplyr::distinct() |>
-      dplyr::arrange(dplyr::all_of("conceptId")) |>
-      dplyr::pull(dplyr::all_of("conceptId"))
+      dplyr::arrange(dplyr::all_of(c("conceptId"))) |>
+      dplyr::pull(dplyr::all_of(c("conceptId")))
 
     tempTableWithConceptDates <-
       getConceptSetOccurrenceDate(
diff --git a/R/MapMedraToSnomedViaVocabulary.R b/R/MapMedraToSnomedViaVocabulary.R
index 0d0ec11..f50a887 100644
--- a/R/MapMedraToSnomedViaVocabulary.R
+++ b/R/MapMedraToSnomedViaVocabulary.R
@@ -115,8 +115,7 @@ mapMedraToSnomedViaVocabulary <-
         dplyr::inner_join(
           medDraRelationship$pt |>
             dplyr::select(
-              .data$givenConceptId,
-              .data$ptConceptName
+              dplyr::all_of(c("givenConceptId", "ptConceptName"))
             ) |>
             rename(
               "medDraConceptId" = .data$givenConceptId,
@@ -129,8 +128,7 @@ mapMedraToSnomedViaVocabulary <-
         dplyr::inner_join(
           medDraRelationship$llt |>
             dplyr::select(
-              .data$givenConceptId,
-              .data$lltConceptName
+              dplyr::all_of(c("givenConceptId", "lltConceptName"))
             ) |>
             rename(
               "medDraConceptId" = .data$givenConceptId,
@@ -222,8 +220,7 @@ mapMedraToSnomedViaVocabulary <-
     relatedToSnomed <-
       medDraRelated |>
       dplyr::select(
-        .data$conceptId1,
-        .data$conceptId2
+        dplyr::all_of(c("conceptId1", "conceptId2"))
       ) |>
       dplyr::distinct() |>
       dplyr::inner_join(
@@ -255,8 +252,7 @@ mapMedraToSnomedViaVocabulary <-
       dplyr::inner_join(
         conceptRelationship |>
           dplyr::select(
-            .data$conceptId1,
-            .data$conceptId2
+            dplyr::all_of(c("conceptId1", "conceptId2"))
           ),
         by = c("invalidSnomedConceptId" = "conceptId1")
       ) |>
@@ -307,8 +303,7 @@ mapMedraToSnomedViaVocabulary <-
           dplyr::inner_join(
             snomedSynonyms |>
               dplyr::select(
-                .data$conceptId,
-                .data$conceptSynonymName
+                dplyr::all_of(c("conceptId", "conceptSynonymName"))
               ) |>
               dplyr::distinct() |>
               dplyr::rename("conceptName" = .data$conceptSynonymName),
@@ -414,9 +409,7 @@ mapMedraToSnomedViaVocabulary <-
         )
       ) |>
       dplyr::select(
-        .data$medDraConceptId,
-        .data$snomedConceptId,
-        .data$stringDistanceScore
+        dplyr::all_of(c("medDraConceptId", "snomedConceptId", "stringDistanceScore"))
       ) |>
       dplyr::distinct()
 
@@ -430,16 +423,12 @@ mapMedraToSnomedViaVocabulary <-
       tidyr::replace_na(list(stringDistanceScore = 999)) |>
       dplyr::group_by(.data$medDraConceptId) |>
       dplyr::arrange(
-        .data$minLevelsOfSeparation,
-        .data$maxLevelsOfSeparation,
-        .data$stringDistanceScore
+        dplyr::all_of(c("minLevelsOfSeparation", "maxLevelsOfSeparation", "stringDistanceScore"))
       ) |>
       dplyr::mutate(rank = dplyr::row_number()) |>
       dplyr::ungroup() |>
       dplyr::select(
-        .data$medDraConceptId,
-        .data$snomedConceptId,
-        .data$rank
+        dplyr::all_of(c("medDraConceptId", "snomedConceptId", "rank"))
       ) |>
       dplyr::distinct()
 
@@ -481,9 +470,7 @@ mapMedraToSnomedViaVocabulary <-
       ) |>
       dplyr::mutate(ancestorRank = dplyr::row_number()) |>
       dplyr::arrange(
-        .data$medDraConceptId,
-        .data$descendantConceptId,
-        .data$ancestorRank
+        dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank"))
       )
 
     canBeRolledUp <-
@@ -497,10 +484,7 @@ mapMedraToSnomedViaVocabulary <-
       ) |>
       dplyr::rename("descendantConceptId" = .data$snomedConceptId) |>
       dplyr::select(
-        .data$medDraConceptId,
-        .data$descendantConceptId,
-        .data$ancestorConceptId,
-        .data$ancestorRank
+        dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorConceptId", "ancestorRank"))
       ) |>
       dplyr::distinct()
 
@@ -527,9 +511,7 @@ mapMedraToSnomedViaVocabulary <-
         suppressWarnings(
           canBeRolledUp |>
             dplyr::select(
-              .data$medDraConceptId,
-              .data$descendantConceptId,
-              .data$ancestorRank
+              dplyr::all_of(c("medDraConceptId", "descendantConceptId", "ancestorRank"))
             ) |>
             dplyr::group_by(
               .data$medDraConceptId,
@@ -584,7 +566,7 @@ mapMedraToSnomedViaVocabulary <-
       dplyr::group_by(.data$medDraConceptId) |>
       dplyr::arrange(.data$rank) |>
       dplyr::mutate(rn = dplyr::row_number()) |>
-      dplyr::select(-.data$rank) |>
+      dplyr::select(-dplyr::all_of(c("rank"))) |>
       dplyr::rename(rank = .data$rn) |>
       dplyr::ungroup() |>
       dplyr::arrange(.data$medDraConceptId)
@@ -633,14 +615,11 @@ mapMedraToSnomedViaVocabulary <-
         by = "medDraConceptId"
       ) |>
       dplyr::arrange(
-        .data$medDraConceptId,
-        .data$medDraConceptName,
-        .data$medDraConceptClassId,
-        .data$rank
+        dplyr::all_of(c("medDraConceptId", "medDraConceptName", "medDraConceptName", "rank"))
       ) |>
       dplyr::select(
         dplyr::starts_with(c("medDra", "snomed")),
-        .data$rank
+        dplyr::all_of(c("rank"))
       )
 
     return(mappedUsingVocabaulary)
diff --git a/R/Private.R b/R/Private.R
index cf7b769..2bb7a03 100644
--- a/R/Private.R
+++ b/R/Private.R
@@ -200,10 +200,7 @@ getDomainInformation <- function() {
   data$long <- dplyr::bind_rows(
     data$wide |>
       dplyr::select(
-        .data$domainTableShort,
-        .data$domainTable,
-        .data$domainConceptIdShort,
-        .data$domainConceptId
+        dplyr::all_of(c("domainTableShort", "domainTable", "domainConceptIdShort", "domainConceptId"))
       ) |>
       dplyr::rename(
         domainFieldShort = .data$domainConceptIdShort,
@@ -211,10 +208,7 @@ getDomainInformation <- function() {
       ),
     data$wide |>
       dplyr::select(
-        .data$domainTableShort,
-        .data$domainSourceConceptIdShort,
-        .data$domainTable,
-        .data$domainSourceConceptId
+        dplyr::all_of(c("domainTableShort", "domainSourceConceptIdShort", "domainTable", "domainSourceConceptId"))
       ) |>
       dplyr::rename(
         domainFieldShort = .data$domainSourceConceptIdShort,
@@ -239,15 +233,15 @@ getDomainInformation <- function() {
   data |>
     dplyr::collect() |>
     dplyr::mutate(dplyr::across(
-      tidyselect:::where(is.character),
+      tidyselect::where(is.character),
       ~ tidyr::replace_na(.x, as.character(""))
     )) |>
     dplyr::mutate(dplyr::across(
-      tidyselect:::where(is.logical),
+      tidyselect::where(is.logical),
       ~ tidyr::replace_na(.x, as.character(""))
     )) |>
     dplyr::mutate(dplyr::across(
-      tidyselect:::where(is.numeric),
+      tidyselect::where(is.numeric),
       ~ tidyr::replace_na(.x, as.numeric(""))
     ))
 }
diff --git a/man/getConceptRecordCount.Rd b/man/getConceptRecordCount.Rd
index a5112be..d042636 100644
--- a/man/getConceptRecordCount.Rd
+++ b/man/getConceptRecordCount.Rd
@@ -57,6 +57,23 @@ tables, provide a schema with write privileges where temp tables can be created.
 
 \item{cohortDefinitionId}{Optional}
 
+\item{stratifyByGender}{Logical Whether to stratify the counts by gender.}
+
+\item{stratifyByYear}{Logical Whether to stratify the counts by year.}
+
+\item{stratifyByYearQuarter}{Logical Whether to stratify the counts by quarter of the year.}
+
+\item{stratifyByYearMonth}{Logical Whether to stratify the counts by month of the year.}
+
+\item{stratifyByAgeGroup}{Logical Whether to stratify the counts by age group.}
+
+\item{stratifyByIncidence}{Logical Whether to limit the counts to first occurrences (incidence).}
+
+\item{getOverallCounts}{Logical Whether to include overall counts across all specified stratifications.}
+
+\item{domainTableName}{Vector of strings Domains to look for concept IDs. Supported domains
+include "drug_exposure", "condition_occurrence", "procedure_occurrence", "measurement", "observation".}
+
 \item{domain}{domains to look for concept id}
 
 \item{limitToCohort}{Do you want to limit to a cohort_definition_id?}
diff --git a/man/getConceptSetOccurrenceDate.Rd b/man/getConceptSetOccurrenceDate.Rd
index 95d1a93..f830bd1 100644
--- a/man/getConceptSetOccurrenceDate.Rd
+++ b/man/getConceptSetOccurrenceDate.Rd
@@ -27,6 +27,8 @@ schema name, for example 'cdm_data.dbo'.}
 
 \item{conceptIds}{An array of concept ids}
 
+\item{subset}{options are "all", "first", "last"}
+
 \item{limitToPersonDate}{Do you want to limit to person dates}
 
 \item{restrictToObservationPeriod}{(Default = TRUE) Do you want to restrict to Observation period? i.e
diff --git a/man/instantiateCohortFromConceptSetExpression.Rd b/man/instantiateCohortFromConceptSetExpression.Rd
index 9531520..4c40aec 100644
--- a/man/instantiateCohortFromConceptSetExpression.Rd
+++ b/man/instantiateCohortFromConceptSetExpression.Rd
@@ -35,6 +35,9 @@ schema name, for example 'cdm_data.dbo'.}
 
 \item{vocabularyDatabaseSchema}{The schema name of containing the vocabulary tables.}
 
+\item{cohortDatabaseSchema}{Schema name where your user has write access (has CRUD privileges). This is the location,
+of the cohort tables.}
+
 \item{cohortId}{An integer value to identify the cohort.}
 
 \item{cohortTable}{the name of the cohort table}