From f248bb86497e9dca5bc951315da9aee4dd39f0cc Mon Sep 17 00:00:00 2001 From: brouillette Date: Fri, 25 Feb 2022 13:47:33 -0500 Subject: [PATCH 1/3] begin documentId --- src/api/models/dtos/main.go | 1 + src/api/mvc/variants.go | 37 +++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 1c540dc5..ec829387 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -53,6 +53,7 @@ type VariantCall struct { // TODO: GenotypeProbability, PhredScaleLikelyhood ? AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"` + DocumentId string `json:"documentId,omitempty"` } // -- Genes diff --git a/src/api/mvc/variants.go b/src/api/mvc/variants.go index 16a376ae..45e7f5eb 100644 --- a/src/api/mvc/variants.go +++ b/src/api/mvc/variants.go @@ -678,17 +678,22 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error mapstructure.Decode(docsHits, &allDocHits) // grab _source for each hit - var allSources []indexes.Variant + var allSources []interface{} + // var allSources []indexes.Variant for _, r := range allDocHits { source := r["_source"].(map[string]interface{}) + docId := r["_id"].(string) // cast map[string]interface{} to struct var resultingVariant indexes.Variant mapstructure.Decode(source, &resultingVariant) // accumulate structs - allSources = append(allSources, resultingVariant) + allSources = append(allSources, map[string]interface{}{ + "variant": resultingVariant, + "documentId": docId, + }) } fmt.Printf("Found %d docs!\n", len(allSources)) @@ -697,24 +702,28 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error // TEMP : re-capitalize sampleIds retrieved from elasticsearch at response time // TODO: touch up elasticsearch ingestion/parsing settings // to not automatically force all sampleIds to lowercase when indexing - sampleId := strings.ToUpper(source.Sample.Id) + variant := source.(map[string]interface{})["variant"].(indexes.Variant) + docId := source.(map[string]interface{})["documentId"].(string) + + sampleId := strings.ToUpper(variant.Sample.Id) variantResult.Calls = append(variantResult.Calls, dtos.VariantCall{ - Chrom: source.Chrom, - Pos: source.Pos, - Id: source.Id, - Ref: source.Ref, - Alt: source.Alt, - Format: source.Format, - Qual: source.Qual, - Filter: source.Filter, + Chrom: variant.Chrom, + Pos: variant.Pos, + Id: variant.Id, + Ref: variant.Ref, + Alt: variant.Alt, + Format: variant.Format, + Qual: variant.Qual, + Filter: variant.Filter, - Info: source.Info, + Info: variant.Info, SampleId: sampleId, - GenotypeType: zygosity.ZygosityToString(source.Sample.Variation.Genotype.Zygosity), + GenotypeType: zygosity.ZygosityToString(variant.Sample.Variation.Genotype.Zygosity), - AssemblyId: source.AssemblyId, + AssemblyId: variant.AssemblyId, + DocumentId: docId, }) } } From 56b30a0b3b1809f6204b0f5893dbaf9fef02184c Mon Sep 17 00:00:00 2001 From: brouillette Date: Fri, 25 Feb 2022 14:14:05 -0500 Subject: [PATCH 2/3] simple variant query by document id --- src/api/main.go | 1 + src/api/mvc/variants.go | 57 +++++++++----- .../repositories/elasticsearch/variants.go | 75 +++++++++++++++++++ 3 files changed, 114 insertions(+), 19 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index 53da4216..3ad884b0 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -153,6 +153,7 @@ func main() { gam.MandateAssemblyIdAttribute, gam.MandateSampleIdsPluralAttribute, gam.ValidatePotentialGenotypeQueryParameter) + e.GET("/variants/get/by/documentId", mvc.VariantsGetByDocumentId) e.GET("/variants/count/by/variantId", mvc.VariantsCountByVariantId, // middleware diff --git a/src/api/mvc/variants.go b/src/api/mvc/variants.go index 45e7f5eb..aadecd79 100644 --- a/src/api/mvc/variants.go +++ b/src/api/mvc/variants.go @@ -51,9 +51,8 @@ func VariantsGetByVariantId(c echo.Context) error { variantIds = []string{"*"} } - return executeGetByIds(c, variantIds, true) + return executeGetByIds(c, variantIds, true, false) } - func VariantsGetBySampleId(c echo.Context) error { fmt.Printf("[%s] - VariantsGetBySampleId hit!\n", time.Now()) // retrieve sample Ids from query parameter (comma separated) @@ -63,7 +62,18 @@ func VariantsGetBySampleId(c echo.Context) error { sampleIds = []string{"*"} } - return executeGetByIds(c, sampleIds, false) + return executeGetByIds(c, sampleIds, false, false) +} +func VariantsGetByDocumentId(c echo.Context) error { + fmt.Printf("[%s] - VariantsGetByDocumentId hit!\n", time.Now()) + // retrieve document Ids from query parameter (comma separated) + docIds := strings.Split(c.QueryParam("ids"), ",") + if len(docIds[0]) == 0 { + // if no ids were provided, assume "wildcard" search + docIds = []string{"*"} + } + + return executeGetByIds(c, docIds, false, true) } func VariantsCountByVariantId(c echo.Context) error { @@ -78,7 +88,6 @@ func VariantsCountByVariantId(c echo.Context) error { return executeCountByIds(c, singleVariantIdSlice, true) } - func VariantsCountBySampleId(c echo.Context) error { fmt.Printf("[%s] - VariantsCountBySampleId hit!\n", time.Now()) // retrieve single sample id from query parameter and map to a list @@ -528,7 +537,7 @@ func GetAllVariantIngestionRequests(c echo.Context) error { return c.JSON(http.StatusOK, m) } -func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { +func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config var es, chromosome, lowerBound, upperBound, reference, alternative, genotype, assemblyId = retrieveCommonElements(c) @@ -620,24 +629,34 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool) error includeInfoInResultSet, genotype, assemblyId, getSampleIdsOnly) } else { - // implied sampleId query - fmt.Printf("Executing Get-Samples for SampleId %s\n", _id) - // only set query string if - // 'getSampleIdsOnly' is false - // (current support for bentoV2 + bento_federation_service integration) - if !getSampleIdsOnly { - variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor + if isDocumentIdQuery { + variantResult.Query = fmt.Sprintf("documentId:%s", _id) // TODO: Refactor + + fmt.Printf("Executing Get-Samples for DocumentId %s\n", _id) + docs, searchErr = esRepo.GetDocumentsByDocumentId(cfg, es, _id) + } else { + // implied sampleId query + fmt.Printf("Executing Get-Samples for SampleId %s\n", _id) + + // only set query string if + // 'getSampleIdsOnly' is false + // (current support for bentoV2 + bento_federation_service integration) + if !getSampleIdsOnly { + variantResult.Query = fmt.Sprintf("variantId:%s", _id) // TODO: Refactor + } + + docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + chromosome, lowerBound, upperBound, + "", _id, // note : "" is for variantId + reference, alternative, + size, sortByPosition, + includeInfoInResultSet, genotype, assemblyId, + false) } - docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, - chromosome, lowerBound, upperBound, - "", _id, // note : "" is for variantId - reference, alternative, - size, sortByPosition, - includeInfoInResultSet, genotype, assemblyId, - false) } + if searchErr != nil { errorMux.Lock() errors = append(errors, searchErr) diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index f5b108c9..1893ac2b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -21,6 +21,81 @@ import ( const variantsIndex = "variants" +func GetDocumentsByDocumentId(cfg *models.Config, es *elasticsearch.Client, id string) (map[string]interface{}, error) { + + // overall query structure + var buf bytes.Buffer + query := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "filter": []map[string]interface{}{{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "query_string": map[string]string{ + "query": fmt.Sprintf("_id:%s", id), + }, + }, + }, + }}, + }, + }, + }, + } + + // encode the query + if err := json.NewEncoder(&buf).Encode(query); err != nil { + log.Fatalf("Error encoding query: %s\n", err) + return nil, err + } + + if cfg.Debug { + // view the outbound elasticsearch query + myString := string(buf.Bytes()[:]) + fmt.Println(myString) + } + + fmt.Printf("Query Start: %s\n", time.Now()) + + // TEMP: SECURITY RISK + http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + // + // Perform the search request. + res, searchErr := es.Search( + es.Search.WithContext(context.Background()), + es.Search.WithIndex(variantsIndex), + es.Search.WithBody(&buf), + es.Search.WithTrackTotalHits(true), + es.Search.WithPretty(), + ) + if searchErr != nil { + fmt.Printf("Error getting response: %s\n", searchErr) + return nil, searchErr + } + + defer res.Body.Close() + + resultString := res.String() + if cfg.Debug { + fmt.Println(resultString) + } + + // Declared an empty interface + result := make(map[string]interface{}) + + // Unmarshal or Decode the JSON to the interface. + // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming (hence the [9:]) + umErr := json.Unmarshal([]byte(resultString[9:]), &result) + if umErr != nil { + fmt.Printf("Error unmarshalling response: %s\n", umErr) + return nil, umErr + } + + fmt.Printf("Query End: %s\n", time.Now()) + + return result, nil +} + func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, es *elasticsearch.Client, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, From 9de4d5104ac965757f6c2d4237c907374a4cfb70 Mon Sep 17 00:00:00 2001 From: brouillette Date: Wed, 16 Mar 2022 15:56:27 -0400 Subject: [PATCH 3/3] - api container build git patch - small refactoring --- src/api/Dockerfile | 7 +++++++ src/api/contexts/contexts.go | 2 +- src/api/main.go | 7 ++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/api/Dockerfile b/src/api/Dockerfile index c3ac8615..e67ef8e2 100644 --- a/src/api/Dockerfile +++ b/src/api/Dockerfile @@ -20,16 +20,23 @@ RUN apk update && \ # Install & build samtools from github repos (htslib needed first) RUN apk --update add ncurses-dev && \ apk add git build-base xz-dev zlib-dev bzip2-dev curl-dev && \ + \ + git config --global url."https://".insteadOf git:// && \ git clone git://github.com/samtools/htslib.git && \ + \ cd htslib && \ git submodule update --init --recursive && \ + \ make && \ make install && \ + \ cd .. && \ git clone git://github.com/samtools/samtools.git && \ + \ cd samtools && \ make && \ make install && \ + \ cd .. && \ rm -rf htslib samtools diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index c004d1cc..790fa8f3 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -15,6 +15,6 @@ type ( echo.Context Es7Client *es7.Client Config *models.Config - IngestionService services.IngestionService + IngestionService *services.IngestionService } ) diff --git a/src/api/main.go b/src/api/main.go index 3ad884b0..950ab131 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -94,7 +94,12 @@ func main() { // to be able to provide variables and global singletons e.Use(func(h echo.HandlerFunc) echo.HandlerFunc { return func(c echo.Context) error { - cc := &contexts.GohanContext{c, es, &cfg, *iz} + cc := &contexts.GohanContext{ + Context: c, + Es7Client: es, + Config: &cfg, + IngestionService: iz, + } return h(cc) } })