diff --git a/README.md b/README.md index b608a2e9..ffc18ce0 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ## Prerequisites - Golang >= 1.19 - installation: https://golang.org/doc/install - - other references + - other references - https://linguinecode.com/post/install-golang-linux-terminal - Elasticsearch - getting started: https://www.elastic.co/guide/en/elasticsearch/reference/current/getting-started.html @@ -39,55 +39,56 @@ ## TL;DR ### Typical use-case walkthrough + ``` - # environment - cp ./etc/example.env .env # modify to your needs +# environment +cp ./etc/example.env .env # modify to your needs + +# kickstart dockerized gohan environment +make init + +# (optional): if you plan on modifying the api codebase before deploying +make init-dev + +# gateway & certificates +mkdir -p gateway/certs/dev + +openssl req -newkey rsa:2048 -nodes -keyout gateway/certs/dev/gohan_privkey1.key -x509 -days 365 -out gateway/certs/dev/gohan_fullchain1.crt +openssl req -newkey rsa:2048 -nodes -keyout gateway/certs/dev/es_gohan_privkey1.key -x509 -days 365 -out gateway/certs/dev/es_gohan_fullchain1.crt - # kickstart dockerized gohan environment - make init - # (optional): if you plan on modifying the api codebase before deploying - make init-dev +# build services +make build-gateway +make build-api - # gateway & certificates - mkdir -p gateway/certs/dev +# run services +make run-gateway +make run-elasticsearch +make run-drs +make run-api - openssl req -newkey rsa:2048 -nodes -keyout gateway/certs/dev/gohan_privkey1.key -x509 -days 365 -out gateway/certs/dev/gohan_fullchain1.crt - openssl req -newkey rsa:2048 -nodes -keyout gateway/certs/dev/es_gohan_privkey1.key -x509 -days 365 -out gateway/certs/dev/es_gohan_fullchain1.crt +# initiate genes catlogue: +curl -k https://gohan.local/genes/ingestion/run - # build services - make build-gateway - make build-api +# monitor progress: +curl -k https://gohan.local/genes/ingestion/requests +curl -k https://gohan.local/genes/ingestion/stats - # run services - make run-gateway - make run-elasticsearch - make run-drs - make run-api - - - # initiate genes catlogue: - curl -k https://gohan.local/genes/ingestion/run - - # monitor progress: - curl -k https://gohan.local/genes/ingestion/requests - curl -k https://gohan.local/genes/ingestion/stats +# view catalogue +curl -k https://gohan.local/genes/overview - # view catalogue - curl -k https://gohan.local/genes/overview +# move vcf.gz files to `$GOHAN_API_VCF_PATH` - # move vcf.gz files to `$GOHAN_API_VCF_PATH` +# ingest vcf.gz +curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&dataset=00000000-0000-0000-0000-000000000000 - # ingest vcf.gz - curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&dataset=00000000-0000-0000-0000-000000000000 - - # monitor progress: - curl -k https://gohan.local/variants/ingestion/requests - curl -k https://gohan.local/variants/ingestion/stats +# monitor progress: +curl -k https://gohan.local/variants/ingestion/requests +curl -k https://gohan.local/variants/ingestion/stats - # view variants - curl -k https://gohan.local/variants/overview +# view variants +curl -k https://gohan.local/variants/overview ``` ## Getting started @@ -105,7 +106,7 @@ cp ./etc/example.env .env
### **Initialization** -Run +Run ``` make init ``` @@ -115,11 +116,14 @@ make init ### **Elasticsearch & Kibana :** -Run +Run + ``` -make run-elasticsearch +make run-elasticsearch ``` + and *(optionally)* + ``` make run-kibana ``` @@ -130,7 +134,7 @@ make run-kibana ### **DRS :** -Run +Run ``` make run-drs ``` @@ -140,7 +144,7 @@ make run-drs ### **Data Access Authorization with OPA (more on this to come..) :** -Run +Run ``` make build-authz make run-authz @@ -158,6 +162,7 @@ make run-authz ### **Gateway** To create and use development certs from the project root, run + ``` mkdir -p gateway/certs/dev @@ -179,7 +184,7 @@ make run-gateway ### **API** -Containerized : +**Containerized:**  To simply run a working instance of the api "out of the box", build the `docker image` and spawn the `container` with an fresh binary build by running @@ -193,21 +198,21 @@ make run-api
-Local Development : +**Local Development:**  This can be done multiple ways. - 1. `Terminal` : From the project root, run -``` -# load variables from local file -set -a -. ./.env -set +a + 1. `Terminal` : From the project root, run + ``` + # load variables from local file + set -a + . ./.env + set +a -cd src/api + cd src/api -go run . -``` + go run . + ``` 2. `IDE (preferably VSCode)` @@ -215,11 +220,11 @@ go run . - configure the `.vscode/launch.json` to inject the above mentioned variables as recommended by https://stackoverflow.com/questions/29971572/how-do-i-add-environment-variables-to-launch-json-in-vscode - - click 'Run & Debug' > "Play" + - click 'Run & Debug' > "Play" Local Release - To build / test from source; + To build / test from source; ``` make build-api-local-binaries @@ -243,7 +248,7 @@ cd bin/ -Endpoints : +**Endpoints:** **`/variants`** @@ -255,50 +260,49 @@ Request
Response ->```json -> { -> "chromosomes": { -> "": `number`, -> ... -> }, -> "sampleIDs": { -> "": `number`, -> ... -> }, -> "variantIDs": { -> "": `number`, -> ... -> } -> } -> ->``` +```js +{ + "chromosomes": { + "": `number`, + // ... + }, + "sampleIDs": { + "": `number`, + // ... + }, + "variantIDs": { + "": `number`, + // ... + } +} +```
-Example : ->```json -> { -> "chromosomes": { -> "21": 90548 -> }, -> "sampleIDs": { -> "hg00096": 33664, -> "hg00099": 31227, -> "hg00111": 25657 -> }, -> "variantIDs": { -> ".": 90548 -> } -> } -> ->``` +**Example:** +```json + { + "chromosomes": { + "21": 90548 + }, + "sampleIDs": { + "hg00096": 33664, + "hg00099": 31227, + "hg00111": 25657 + }, + "variantIDs": { + ".": 90548 + } + } + +```

Requests >   **GET** `/variants/get/by/variantId`
->    params: -> - chromosome : **string** `( 1-23, X, Y, MT )` +>    params: +> - chromosome : **string** > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele ( "A" | "C" | "G" | "T" | "N" or some combination thereof )` @@ -312,8 +316,8 @@ Requests > - getSampleIdsOnly : **bool** *`(optional) - default: false `* > >   **GET** `/variants/count/by/variantId`
->    params: -> - chromosome : **string** `( 1-23, X, Y, MT )` +>    params: +> - chromosome : **string** > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -323,8 +327,8 @@ Requests > - genotype : **string** `( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )` >   **GET** `/variants/get/by/sampleId`
->    params: -> - chromosome : **string** `( 1-23, X, Y, MT )` +>    params: +> - chromosome : **string** > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -337,8 +341,8 @@ Requests > - genotype : **string** `( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )` > >   **GET** `/variants/count/by/sampleId`
->    params: -> - chromosome : **string** `( 1-23, X, Y, MT )` +>    params: +> - chromosome : **string** > - lowerBound : **number** > - upperBound : **number** > - reference : **string** `an allele` @@ -352,46 +356,46 @@ Requests Generalized Response Body Structure ->```json ->{ -> "status": `number` (200 - 500), -> "message": `string` ("Success" | "Error"), -> "results": [ -> { -> "query": `string`, // reflective of the type of id queried for, i.e 'variantId:abc123', or 'sampleId:HG0001 -> "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), // reflective of the assembly id queried for -> "count": `number`, // this field is only present when performing a COUNT query -> "start": `number`, // reflective of the provided lowerBound parameter, 0 if none -> "end": `number`, // reflective of the provided upperBound parameter, 0 if none -> "chromosome": `string`, // reflective of the chromosome queried for -> "calls": [ // this field is only present when performing a GET query -> { -> "id": `string`, // variantId -> "chrom": `string`, -> "pos": `number`, -> "ref": `[]string`, // list of alleles -> "alt": `[]string`, // list of alleles -> "alleles": `[]string`, // ordereed list of alleles -> "info": [ -> { -> "id": `string`, -> "value": `string`, -> }, -> ... -> ], -> "format":`string`, -> "qual": `number`, -> "filter": `string`, -> "sampleId": `string`, -> "genotype_type": `string ( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )`, -> "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), -> }, -> ... -> ] -> }, -> ] -> } -> ``` +```js +{ + "status": `number` (200 - 500), + "message": `string` ("Success" | "Error"), + "results": [ + { + "query": `string`, // reflective of the type of id queried for, i.e 'variantId:abc123', or 'sampleId:HG0001 + "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), // reflective of the assembly id queried for + "count": `number`, // this field is only present when performing a COUNT query + "start": `number`, // reflective of the provided lowerBound parameter, 0 if none + "end": `number`, // reflective of the provided upperBound parameter, 0 if none + "chromosome": `string`, // reflective of the chromosome queried for - no `chr` prefix + "calls": [ // this field is only present when performing a GET query + { + "id": `string`, // variantId + "chrom": `string`, + "pos": `number`, + "ref": `[]string`, // list of alleles + "alt": `[]string`, // list of alleles + "alleles": `[]string`, // ordereed list of alleles + "info": [ + { + "id": `string`, + "value": `string`, + }, + ... + ], + "format":`string`, + "qual": `number`, + "filter": `string`, + "sampleId": `string`, + "genotype_type": `string ( "HETEROZYGOUS" | "HOMOZYGOUS_REFERENCE" | "HOMOZYGOUS_ALTERNATE" )`, + "assemblyId": `string` ("GRCh38" | "GRCh37" | "NCBI36" | "Other"), + }, + ... + ] + }, + ] +} +```
@@ -404,7 +408,7 @@ Generalized Response Body Structure
- http://localhost:5000/variants/get/by/sampleId?ids=HG00097&size=1000&sortByPosition=asc - +
- http://localhost:5000/variants/count/by/variantId?chromosome=8 @@ -424,20 +428,20 @@ Generalized Response Body Structure Request >   **GET** `/variants/ingestion/run`
->    params: +>    params: > - filename : **string** `(required)`
Response ->```json -> { -> "state": `number` ("Queuing" | "Running" | "Done" | "Error"), -> "id": `string`, -> "filename": `string`, -> "message": `string`, -> } -> ``` +```js + { + "state": `number` // ("Queuing" | "Running" | "Done" | "Error"), + "id": `string`, + "filename": `string`, + "message": `string`, + } + ```

@@ -449,19 +453,19 @@ Request
Response ->```json -> [ -> { -> "state": `number` ("Queuing" | "Running" | "Done" | "Error"), -> "id": `string`, -> "filename": `string`, -> "message": `string`, -> "createdAt": `timestamp string`, -> "updatedAt": `timestamp string` -> }, -> ... -> ] -> ``` +```js +[ + { + "state": `number` // ("Queuing" | "Running" | "Done" | "Error"), + "id": `string`, + "filename": `string`, + "message": `string`, + "createdAt": `timestamp string`, + "updatedAt": `timestamp string` + }, + ... +] +```
@@ -471,11 +475,11 @@ Response ## Deployments : -All in all, run +All in all, run ``` -make run-elasticsearch +make run-elasticsearch make run-drs -make build-gateway && make run-gateway +make build-gateway && make run-gateway make build-api && make run-api # and optionally @@ -490,7 +494,7 @@ For other handy tools, see the Makefile. Among those already mentionned here, yo ## Tests : -Once `elasticsearch`, `drs`, the `api`, and the `gateway` are up, run +Once `elasticsearch`, `drs`, the `api`, and the `gateway` are up, run ``` make test-api-dev ``` diff --git a/src/api/middleware/chromosomeMiddleware.go b/src/api/middleware/chromosomeMiddleware.go index 1f800bf7..6eb5cbcf 100644 --- a/src/api/middleware/chromosomeMiddleware.go +++ b/src/api/middleware/chromosomeMiddleware.go @@ -2,8 +2,6 @@ package middleware import ( "gohan/api/contexts" - "gohan/api/models/constants/chromosome" - "net/http" "github.com/labstack/echo" ) @@ -18,13 +16,6 @@ func ValidateOptionalChromosomeAttribute(next echo.HandlerFunc) echo.HandlerFunc // check for chromosome query parameter chromQP := c.QueryParam("chromosome") - // verify: - if len(chromQP) > 0 && !chromosome.IsValidHumanChromosome(chromQP) { - // if chromosome less than 1 or greater than 23 - // and not 'x', 'y' or 'm' - return echo.NewHTTPError(http.StatusBadRequest, "Please provide a valid 'chromosome' (either 1-23, X, Y, or M)") - } - if len(chromQP) == 0 { // if no chromosome is provided, assume "wildcard" search gc.Chromosome = "*" diff --git a/src/api/models/constants/chromosome/main.go b/src/api/models/constants/chromosome/main.go deleted file mode 100644 index 81f20ecb..00000000 --- a/src/api/models/constants/chromosome/main.go +++ /dev/null @@ -1,37 +0,0 @@ -package chromosome - -import ( - "strconv" - "strings" -) - -func IsValidHumanChromosome(text string) bool { - - // Check if number can be represented as an int as is non-zero - chromNumber, _ := strconv.Atoi(text) - if chromNumber > 0 { - // It can.. - // Check if it in range 1-23 - if chromNumber < 24 { - return true - } - } else { - // No it can't.. - // Check if it is an X, Y.. - loweredText := strings.ToLower(text) - switch loweredText { - case "x": - return true - case "y": - return true - } - - // ..or M (MT) - switch strings.Contains(loweredText, "m") { - case true: - return true - } - } - - return false -} diff --git a/src/api/mvc/genes/main.go b/src/api/mvc/genes/main.go index e3efbb57..a82ceb8a 100644 --- a/src/api/mvc/genes/main.go +++ b/src/api/mvc/genes/main.go @@ -7,7 +7,6 @@ import ( "fmt" "gohan/api/contexts" assemblyId "gohan/api/models/constants/assembly-id" - "gohan/api/models/constants/chromosome" "gohan/api/models/dtos" "gohan/api/models/ingest" "gohan/api/models/ingest/structs" @@ -243,11 +242,6 @@ func GenesIngest(c echo.Context) error { //clean chromosome chromosomeClean := strings.ReplaceAll(rowSplits[_chromHeaderKey], "chr", "") - if !chromosome.IsValidHumanChromosome(chromosomeClean) { - defer _gwg.Done() - return - } - // clean start/end chromStartClean := strings.ReplaceAll(strings.ReplaceAll(rowSplits[_startKey], ",", ""), " ", "") start, _ = strconv.Atoi(chromStartClean) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index fbd4c3f9..21b707d6 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -10,7 +10,6 @@ import ( "fmt" "gohan/api/models" "gohan/api/models/constants" - "gohan/api/models/constants/chromosome" p "gohan/api/models/constants/ploidy" z "gohan/api/models/constants/zygosity" "gohan/api/models/ingest" @@ -447,7 +446,7 @@ func (i *IngestionService) ProcessVcf( // filter field type by column name if key == "chrom" { - // Strip out chr prefix + // Strip out chr prefix for some normalization with human/model-organism contigs value = strings.ReplaceAll(value, "chr", "") // We're making contig indices on the fly - check if we haven't created the contig yet. @@ -460,20 +459,9 @@ func (i *IngestionService) ProcessVcf( } contigMutex.Unlock() - // ems if value is valid chromosome - if chromosome.IsValidHumanChromosome(value) { - tmpVariantMapMutex.Lock() - tmpVariant[key] = value - tmpVariantMapMutex.Unlock() - } else { - // skip this call - skipThisCall = true - - // redundant? - tmpVariantMapMutex.Lock() - tmpVariant[key] = "err" - tmpVariantMapMutex.Unlock() - } + tmpVariantMapMutex.Lock() + tmpVariant[key] = value + tmpVariantMapMutex.Unlock() } else if key == "pos" || key == "qual" { // // Convert string's to int's, if possible diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go index 2ed150dc..119f172e 100644 --- a/src/api/tests/build/api/genes_test.go +++ b/src/api/tests/build/api/genes_test.go @@ -8,7 +8,6 @@ import ( "gohan/api/models" ingest "gohan/api/models/ingest" - "gohan/api/models/constants/chromosome" "gohan/api/models/dtos" "gohan/api/models/indexes" @@ -130,7 +129,6 @@ func TestGenesIngestion(t *testing.T) { // ensure the gene is legit assert.NotNil(t, gene.Name) assert.NotNil(t, gene.AssemblyId) - assert.True(t, chromosome.IsValidHumanChromosome(gene.Chrom)) assert.Greater(t, gene.End, gene.Start) }) })