@@ -51,9 +51,6 @@ runBreakdownIndexEvents <- function(connection,
51
51
vocabularyDatabaseSchema = cdmDatabaseSchema ,
52
52
databaseId ,
53
53
cohorts ,
54
- runIncludedSourceConcepts ,
55
- runOrphanConcepts ,
56
- runBreakdownIndexEvents ,
57
54
exportFolder ,
58
55
minCellCount ,
59
56
conceptCountsDatabaseSchema = NULL ,
@@ -526,151 +523,6 @@ runBreakdownIndexEvents <- function(connection,
526
523
}
527
524
}
528
525
529
- if (runOrphanConcepts ) {
530
- # Orphan concepts ---------------------------------------------------------
531
- ParallelLogger :: logInfo(" Finding orphan concepts" )
532
- if (incremental && (nrow(cohorts ) - nrow(subsetOrphans )) > 0 ) {
533
- ParallelLogger :: logInfo(sprintf(
534
- " Skipping %s cohorts in incremental mode." ,
535
- nrow(cohorts ) - nrow(subsetOrphans )
536
- ))
537
- }
538
- if (nrow(subsetOrphans > 0 )) {
539
- start <- Sys.time()
540
-
541
- if (! useExternalConceptCountsTable ) {
542
- ParallelLogger :: logTrace(" Using internal concept count table." )
543
- } else {
544
- stop(" Use of external concept count table is not supported" )
545
- }
546
-
547
- # [OPTIMIZATION idea] can we modify the sql to do this for all uniqueConceptSetId in one query using group by?
548
- data <- list ()
549
- for (i in (1 : nrow(uniqueConceptSets ))) {
550
- conceptSet <- uniqueConceptSets [i , ]
551
- ParallelLogger :: logInfo(
552
- " - Finding orphan concepts for concept set '" ,
553
- conceptSet $ conceptSetName ,
554
- " '"
555
- )
556
-
557
- timeExecution(
558
- exportFolder ,
559
- taskName = " orphanConcepts" ,
560
- parent = " runConceptSetDiagnostics" ,
561
- cohortIds = paste(" concept_set-" , conceptSet $ conceptSetName ),
562
- expr = {
563
- data [[i ]] <- .findOrphanConcepts(
564
- connection = connection ,
565
- cdmDatabaseSchema = cdmDatabaseSchema ,
566
- tempEmulationSchema = tempEmulationSchema ,
567
- useCodesetTable = TRUE ,
568
- codesetId = conceptSet $ uniqueConceptSetId ,
569
- conceptCountsDatabaseSchema = conceptCountsDatabaseSchema ,
570
- conceptCountsTable = conceptCountsTable ,
571
- conceptCountsTableIsTemp = conceptCountsTableIsTemp ,
572
- instantiatedCodeSets = " #inst_concept_sets" ,
573
- orphanConceptTable = " #orphan_concepts"
574
- )
575
-
576
- if (! is.null(conceptIdTable )) {
577
- sql <- " INSERT INTO @concept_id_table (concept_id)
578
- SELECT DISTINCT concept_id
579
- FROM @orphan_concept_table;"
580
- DatabaseConnector :: renderTranslateExecuteSql(
581
- connection = connection ,
582
- sql = sql ,
583
- tempEmulationSchema = tempEmulationSchema ,
584
- concept_id_table = conceptIdTable ,
585
- orphan_concept_table = " #orphan_concepts" ,
586
- progressBar = FALSE ,
587
- reportOverallTime = FALSE
588
- )
589
- }
590
- }
591
- )
592
- sql <-
593
- " TRUNCATE TABLE @orphan_concept_table;\n DROP TABLE @orphan_concept_table;"
594
- DatabaseConnector :: renderTranslateExecuteSql(
595
- connection = connection ,
596
- sql = sql ,
597
- tempEmulationSchema = tempEmulationSchema ,
598
- orphan_concept_table = " #orphan_concepts" ,
599
- progressBar = FALSE ,
600
- reportOverallTime = FALSE
601
- )
602
- }
603
-
604
- data <- dplyr :: bind_rows(data ) %> %
605
- dplyr :: distinct() %> %
606
- dplyr :: rename(" uniqueConceptSetId" = " codesetId" ) %> %
607
- dplyr :: inner_join(
608
- conceptSets %> %
609
- dplyr :: select(
610
- " uniqueConceptSetId" ,
611
- " cohortId" ,
612
- " conceptSetId"
613
- ) %> % dplyr :: distinct(),
614
- by = " uniqueConceptSetId" ,
615
- relationship = " many-to-many"
616
- ) %> %
617
- dplyr :: select(- " uniqueConceptSetId" ) %> %
618
- dplyr :: select(
619
- " cohortId" ,
620
- " conceptSetId" ,
621
- " conceptId" ,
622
- " conceptCount" ,
623
- " conceptSubjects"
624
- ) %> %
625
- dplyr :: group_by(
626
- .data $ cohortId ,
627
- .data $ conceptSetId ,
628
- .data $ conceptId
629
- ) %> %
630
- dplyr :: summarise(
631
- conceptCount = max(.data $ conceptCount ),
632
- conceptSubjects = max(.data $ conceptSubjects )
633
- ) %> %
634
- dplyr :: ungroup()
635
-
636
-
637
- exportDataToCsv(
638
- data = data ,
639
- tableName = " orphan_concept" ,
640
- fileName = file.path(exportFolder , " orphan_concept.csv" ),
641
- minCellCount = minCellCount ,
642
- databaseId = databaseId ,
643
- incremental = incremental ,
644
- cohortId = subsetOrphans $ cohortId
645
- )
646
-
647
- recordTasksDone(
648
- cohortId = subsetOrphans $ cohortId ,
649
- task = " runOrphanConcepts" ,
650
- checksum = subsetOrphans $ checksum ,
651
- recordKeepingFile = recordKeepingFile ,
652
- incremental = incremental
653
- )
654
-
655
- delta <- Sys.time() - start
656
-
657
- timeExecution(
658
- exportFolder ,
659
- taskName = " allOrphanConcepts" ,
660
- parent = " runConceptSetDiagnostics" ,
661
- start = start ,
662
- execTime = delta
663
- )
664
-
665
- ParallelLogger :: logInfo(
666
- " Finding orphan concepts took " ,
667
- signif(delta , 3 ),
668
- " " ,
669
- attr(delta , " units" )
670
- )
671
- }
672
- }
673
-
674
526
# put all instantiated concepts into #concept_ids table
675
527
# this is extracted with vocabulary tables
676
528
# this will have more codes than included source concepts
0 commit comments