Skip to content

Commit

Permalink
Merge pull request #42 from bento-platform/qa/v3.7
Browse files Browse the repository at this point in the history
v3.7.4-rc1
  • Loading branch information
brouillette authored Mar 27, 2023
2 parents 0d12297 + 571dc2e commit dcef2db
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 124 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/api.build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
branches:
- master
- "releases/**" # temp
- "qa/**" # Build PRs which will be eventually merged into patches
push:
branches:
- master
Expand Down Expand Up @@ -35,17 +36,15 @@ jobs:
uses: xom9ikk/dotenv@v2

- name: Run Bento build action
uses: bento-platform/bento_build_action@v0.9.3
uses: bento-platform/bento_build_action@v0.11.0
with:
context: "{{defaultContext}}:src/api"
build-args: |
BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }}
BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }}
HOST_USER_UID=1000
HOST_USER_GID=1000
registry: ghcr.io
registry-username: ${{ github.actor }}
registry-password: ${{ secrets.GITHUB_TOKEN }}
image-name: ghcr.io/bento-platform/gohan-api
development-dockerfile: Dockerfile
dockerfile: Dockerfile
dockerfile: Dockerfile
6 changes: 3 additions & 3 deletions .github/workflows/elasticsearch.build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
pull_request:
branches:
- master
- "qa/**" # Build PRs which will be eventually merged into patches
push:
branches:
- master
Expand Down Expand Up @@ -34,16 +35,15 @@ jobs:
uses: xom9ikk/dotenv@v2

- name: Run Bento build action
uses: bento-platform/bento_build_action@v0.9.3
uses: bento-platform/bento_build_action@v0.11.0
with:
context: "{{defaultContext}}:elasticsearch"
build-args: |
BASE_IMAGE=${{ env.GOHAN_ES_BASE_IMAGE }}
BASE_IMAGE_VERSION=${{ env.GOHAN_ES_BASE_VERSION }}
HOST_USER_GID=1000
registry: ghcr.io
registry-username: ${{ github.actor }}
registry-password: ${{ secrets.GITHUB_TOKEN }}
image-name: ghcr.io/bento-platform/gohan-elasticsearch
development-dockerfile: Dockerfile
dockerfile: Dockerfile
dockerfile: Dockerfile
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.vscode
.DS_store
.idea

*/bin
*/obj
Expand Down
15 changes: 9 additions & 6 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ services:
BASE_IMAGE: ${GOHAN_API_BASE_IMAGE}
# also passed in as an ENV from within Dockerfile :
GOHAN_API_INTERNAL_PORT: ${GOHAN_API_INTERNAL_PORT}
HOST_USER_UID: ${HOST_USER_UID}
HOST_USER_GID: ${HOST_USER_GID}
OS_NAME: ${OS_NAME}
networks:
- ${GOHAN_DOCKER_NET}
mem_limit: ${GOHAN_API_MEM_LIM} # for mem_limit to work, make sure docker-compose is v2.4
Expand All @@ -47,6 +44,10 @@ services:
image: ${GOHAN_API_IMAGE}:${GOHAN_API_VERSION}
container_name: ${GOHAN_API_CONTAINER_NAME}
environment:
# Image
- BENTO_UID=${UID}
- BENTO_GID=${GID}

# API
- GOHAN_DEBUG=${GOHAN_DEBUG}
- GOHAN_SERVICE_CONTACT=${GOHAN_SERVICE_CONTACT}
Expand Down Expand Up @@ -92,16 +93,18 @@ services:
args:
BASE_IMAGE: ${GOHAN_ES_BASE_IMAGE}
BASE_IMAGE_VERSION: ${GOHAN_ES_BASE_VERSION}
HOST_USER_UID: ${HOST_USER_UID}
HOST_USER_GID: ${HOST_USER_GID}
OS_NAME: ${OS_NAME}
mem_limit: ${GOHAN_ES_MEM_LIM} # for mem_limit to work, make sure docker-compose is v2.4
cpus: ${GOHAN_ES_CPUS}
cpu_shares: 2048
container_name: ${GOHAN_ES_CONTAINER_NAME}
networks:
- ${GOHAN_DOCKER_NET}
environment:
# Image
- BENTO_UID=${UID}
- BENTO_GID=${GID}

# ES
- ELASTIC_USERNAME=${GOHAN_ES_USERNAME}
- ELASTIC_PASSWORD=${GOHAN_ES_PASSWORD}
- ${GOHAN_ES_JAVA_OPTS}
Expand Down
20 changes: 9 additions & 11 deletions elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
ARG BASE_IMAGE
ARG BASE_IMAGE_VERSION

FROM "${BASE_IMAGE}:${BASE_IMAGE_VERSION}"
FROM ${BASE_IMAGE}:${BASE_IMAGE_VERSION}

ARG HOST_USER_UID
ARG HOST_USER_GID
ARG OS_NAME
RUN if [ $OS_NAME != darwin ]; \
then groupadd --system elasticsearchgroup -g $HOST_USER_GID || true; \
else groupadd --system elasticsearchgroup || true; \
fi && \
usermod -a -G elasticsearchgroup elasticsearch || true
# note: ' || true' ignores possible minor errors
RUN apt-get update -y && \
apt-get install -y bash gosu && \
rm -rf /var/lib/apt/lists/*

USER elasticsearch
COPY gohan_create_service_user.bash /gohan_create_service_user.bash
COPY gohan_entrypoint.bash /gohan_entrypoint.bash

ENTRYPOINT ["/bin/bash", "/gohan_entrypoint.bash"]
CMD ["/usr/local/bin/docker-entrypoint.sh"]
11 changes: 11 additions & 0 deletions elasticsearch/gohan_create_service_user.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# If set, use the local UID from outside the container (or default to 1001; 1000 is already created by the
# Elasticsearch container)
USER_ID=${BENTO_UID:-1001}

echo "[gohan_elasticsearch] [/gohan_create_service_user.bash] using USER_ID=${USER_ID}"

# Add the user
useradd --shell /bin/bash -u "${USER_ID}" --non-unique -c "Bento container user" -m gohan_user
export HOME=/home/gohan_user
13 changes: 13 additions & 0 deletions elasticsearch/gohan_entrypoint.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

source /gohan_create_service_user.bash

# Fix permissions on Elasticsearch directories
# See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/docker.html#_configuration_files_must_be_readable_by_the_elasticsearch_user
# - except we use a different user!
chown -R gohan_user:gohan_user /usr/share/elasticsearch/config
chown -R gohan_user:gohan_user /usr/share/elasticsearch/data
chown -R gohan_user:gohan_user /usr/share/elasticsearch/logs

# Drop into gohan_user from root and execute the CMD specified for the image
exec gosu gohan_user "$@"
8 changes: 4 additions & 4 deletions etc/example.env
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ GOHAN_GATEWAY_CPUS=2
GOHAN_API_IMAGE=gohan-api
GOHAN_API_VERSION=latest

GOHAN_API_BUILDER_BASE_IMAGE=golang:1.19-alpine
GOHAN_API_BASE_IMAGE=alpine
GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye
GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.03.06

GOHAN_API_CONTAINER_NAME=gohan-api
GOHAN_API_SERVICE_HOST=0.0.0.0
Expand Down Expand Up @@ -77,7 +77,7 @@ GOHAN_ES_IMAGE=gohan-elasticsearch
GOHAN_ES_VERSION=latest

GOHAN_ES_BASE_IMAGE=elasticsearch
GOHAN_ES_BASE_VERSION=7.10.1
GOHAN_ES_BASE_VERSION=7.17.9
GOHAN_ES_CONTAINER_NAME=elasticsearch
GOHAN_ES_EXTERNAL_PORT_1=9200
GOHAN_ES_INTERNAL_PORT_1=9200
Expand All @@ -97,7 +97,7 @@ GOHAN_ES_DATA_DIR=${GOHAN_DATA_ROOT}/elasticsearch

# KIBANA
GOHAN_KB_BASE_IMAGE=kibana
GOHAN_KB_BASE_VERSION=7.10.1
GOHAN_KB_BASE_VERSION=7.17.9
GOHAN_KB_CONTAINER_NAME=kibana
GOHAN_KB_EXTERNAL_PORT=5601
GOHAN_KB_INTERNAL_PORT=5601
Expand Down
79 changes: 14 additions & 65 deletions src/api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,83 +9,32 @@ LABEL maintainer="Brennan Brouillette <brennan.brouillette@computationalgenomics

WORKDIR /build

COPY . /build/

# Alpine updates and executable dependencies
RUN apk update && \
apk upgrade && \
# For compressing the final binary :
apk add git
# Temporarily disabling:
# \
# upx

# Install & build samtools from github repos (htslib needed first)
RUN apk --update add ncurses-dev && \
apk add git build-base xz-dev zlib-dev bzip2-dev curl-dev && \
\
git clone https://github.com/samtools/htslib.git && \
\
cd htslib && \
git submodule update --init --recursive && \
\
make && \
make install && \
\
cd .. && \
git clone https://github.com/samtools/samtools.git && \
\
cd samtools && \
make && \
make install && \
\
cd .. && \
rm -rf htslib samtools
COPY . .

# Build gohan api
RUN go mod vendor && \
go build -ldflags="-s -w" -o gohan_api
# Temporarily disabling:
#&& \
# Compress the final binary :
#upx --brute gohan_api
go build -ldflags="-s -w" -o gohan_api


# Stage two - executioner
FROM $BASE_IMAGE

# Alpine updates
RUN apk update && \
apk upgrade && \
# Dependencies :
apk add \
# - for healthchecks
curl \
# - for tabix
xz-dev zlib-dev bzip2-dev

# Security :
ARG HOST_USER_UID
ARG HOST_USER_GID
ARG OS_NAME
RUN if [ $OS_NAME != darwin ]; \
then addgroup -S apigroup -g $HOST_USER_GID || true; \
else addgroup -S apigroup || true; \
fi && \
adduser -S apiuser -u $HOST_USER_UID -G apigroup || true
# note: ' || true' ignores possible minor errors
# Debian updates
# - tabix for indexing VCFs
# - other base dependencies provided by the base image
RUN apt-get update -y && \
apt-get upgrade -y && \
apt-get install -y tabix && \
rm -rf /var/lib/apt/lists/*

USER apiuser
WORKDIR /app

# Copy pre-built executables
COPY --from=builder /build/gohan_api /app/
COPY --from=builder /usr/local/bin/tabix /usr/local/bin/
# Copy pre-built executable from builder stage
COPY --from=builder /build/gohan_api .

# Copy static workflow files
COPY workflows/*.wdl /app/workflows/

# Use base image entrypoint to set up user & gosu exec the command below
# Run
ARG GOHAN_API_INTERNAL_PORT
ENV GOHAN_API_INTERNAL_PORT=$GOHAN_API_INTERNAL_PORT
EXPOSE $GOHAN_API_INTERNAL_PORT
ENTRYPOINT [ "/app/gohan_api" ]
CMD [ "/app/gohan_api" ]
74 changes: 43 additions & 31 deletions src/api/services/ingestion.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,27 +374,25 @@ func (i *IngestionService) ProcessVcf(
// Gather Header row by seeking the CHROM string
line := scanner.Text()
if !discoveredHeaders {
if line[0] == '#' {
if strings.Contains(line, "CHROM") {
// Split the string by tabs
headers = strings.Split(line, "\t")

for id, header := range headers {
// determine if header is a default VCF header.
// if it is not, assume it's a sampleId and keep
// track of it with an id
if !utils.StringInSlice(strings.ToLower(strings.TrimSpace(strings.ReplaceAll(header, "#", ""))), constants.VcfHeaders) {
headerSampleIds[len(constants.VcfHeaders)-id] = header
}
if line[0:6] == "#CHROM" {
// Split the string by tabs
headers = strings.Split(line, "\t")

for id, header := range headers {
// determine if header is a default VCF header.
// if it is not, assume it's a sampleId and keep
// track of it with an id
if !utils.StringInSlice(strings.ToLower(strings.TrimSpace(strings.ReplaceAll(header, "#", ""))), constants.VcfHeaders) {
headerSampleIds[len(constants.VcfHeaders)-id] = header
}
}

discoveredHeaders = true
discoveredHeaders = true

fmt.Println("Found the headers: ", headers)
continue
}
fmt.Println("Found the headers: ", headers)
continue
}
continue
}

// take a spot in the queue
Expand Down Expand Up @@ -607,8 +605,8 @@ func (i *IngestionService) ProcessVcf(

var (
alleleStringSplits []string
alleleLeft int
alleleRight int
alleleLeft int = -1
alleleRight int = -1
errLeft error
errRight error
)
Expand All @@ -618,22 +616,36 @@ func (i *IngestionService) ProcessVcf(
alleleStringSplits = strings.Split(gtString, "/")
}

// convert string to int
// - check and handle when 'gtString' contains '.'s
if alleleStringSplits[0] == "." && alleleStringSplits[1] == "." {
alleleLeft = 0
alleleRight = 0
} else {
// -- if error, probably an unknown character -- assign -1
alleleLeft, errLeft = strconv.Atoi(alleleStringSplits[0])
if errLeft != nil {
alleleLeft = -1
switch len(alleleStringSplits) {
case 1:
if alleleStringSplits[0] == "." {
alleleLeft = 0
} else {
// -- if error, probably an unknown character -- assign -1
alleleLeft, errLeft = strconv.Atoi(alleleStringSplits[0])
if errLeft != nil {
alleleLeft = -1
}
}
case 2:
// convert string to int
// - check and handle when 'gtString' contains '.'s
if alleleStringSplits[0] == "." && alleleStringSplits[1] == "." {
alleleLeft = 0
alleleRight = 0
} else {
// -- if error, probably an unknown character -- assign -1
alleleLeft, errLeft = strconv.Atoi(alleleStringSplits[0])
if errLeft != nil {
alleleLeft = -1
}

alleleRight, errRight = strconv.Atoi(alleleStringSplits[1])
if errRight != nil {
alleleRight = -1
alleleRight, errRight = strconv.Atoi(alleleStringSplits[1])
if errRight != nil {
alleleRight = -1
}
}
// default (0) : let default -1 and -1 be handled
}

// -- zygosity:
Expand Down
1 change: 1 addition & 0 deletions src/api/workflows/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{
"description": "This ingestion workflow will validate and ingest a BGZip-Compressed-VCF into Elasticsearch.",
"data_type": "variant",
"file": "vcf_gz.wdl",
"purpose": "ingestion",
"inputs": []map[string]interface{}{
{
"id": "vcf_gz_file_names",
Expand Down

0 comments on commit dcef2db

Please sign in to comment.