From 88146556943133097c3c149183de122388b23aa8 Mon Sep 17 00:00:00 2001 From: Oliver Stolpe Date: Thu, 1 Feb 2024 12:53:04 +0100 Subject: [PATCH] fix: strip chr prefix for grch38 (#10) - Adapted docker build and github actions - Performed isort --- .github/workflows/docker-build.yml | 52 ++++++++++++ .github/workflows/docker-cleanup-pr.yml | 19 +++++ .github/workflows/docker-cleanup-untagged.yml | 18 ++++ .gitignore | 2 + Makefile | 16 +++- README.rst | 2 +- config/celery.py | 1 + config/settings/base.py | 2 +- config/settings/production.py | 2 +- config/urls.py | 3 +- docker/Dockerfile | 42 ---------- docker/build-docker.sh | 14 ---- environment.yaml | 0 restapi/migrations/0001_initial.py | 3 +- restapi/models.py | 3 +- restapi/tasks.py | 39 +++++++-- restapi/tests.py | 8 +- restapi/urls.py | 1 + restapi/views.py | 14 +--- setup.py | 6 +- utils/docker/Dockerfile | 82 +++++++++++++++++++ utils/docker/build-docker.sh | 24 ++++++ {docker => utils/docker}/docker-entrypoint.sh | 0 23 files changed, 261 insertions(+), 92 deletions(-) create mode 100644 .github/workflows/docker-build.yml create mode 100644 .github/workflows/docker-cleanup-pr.yml create mode 100644 .github/workflows/docker-cleanup-untagged.yml delete mode 100644 docker/Dockerfile delete mode 100644 docker/build-docker.sh delete mode 100644 environment.yaml create mode 100644 utils/docker/Dockerfile create mode 100644 utils/docker/build-docker.sh rename {docker => utils/docker}/docker-entrypoint.sh (100%) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..2d58e4a --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,52 @@ +# This workflow is run as part of CI to test that they run through. +# +# The images are pushed to `ghcr.io` for each PR and branch. The ones for +# the releases are pushed in `release-please.yml`. +name: Docker Build + +on: + - push + - pull_request + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Write VERSION file for Python package + run: | + git describe --all | tee VERSION + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + file: utils/docker/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/docker-cleanup-pr.yml b/.github/workflows/docker-cleanup-pr.yml new file mode 100644 index 0000000..e78e374 --- /dev/null +++ b/.github/workflows/docker-cleanup-pr.yml @@ -0,0 +1,19 @@ +name: Cleanup PR Images + +on: + pull_request: + types: + - closed + +jobs: + purge-image: + name: Delete PR images + runs-on: ubuntu-latest + steps: + - uses: bots-house/ghcr-delete-image-action@v1.1.0 + with: + owner: varfish-org + name: cadd-rest-api + token: ${{ secrets.GITHUB_TOKEN }} + tag: pr-${{github.event.pull_request.number}} + continue-on-error: true diff --git a/.github/workflows/docker-cleanup-untagged.yml b/.github/workflows/docker-cleanup-untagged.yml new file mode 100644 index 0000000..80a5548 --- /dev/null +++ b/.github/workflows/docker-cleanup-untagged.yml @@ -0,0 +1,18 @@ +name: Cleanup Untagged Images + +on: + workflow_dispatch: + schedule: + - cron: "0 0 * * SUN" + +jobs: + delete-untagged-images: + name: Delete untagged images + runs-on: ubuntu-latest + steps: + - uses: bots-house/ghcr-delete-image-action@v1.1.0 + with: + owner: varfish-org + name: cadd-rest-api + token: ${{ secrets.GITHUB_TOKEN }} + untagged-keep-latest: 3 diff --git a/.gitignore b/.gitignore index 28a7c1e..eb28a6f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +/VERSION + # SQLite database. /*.db diff --git a/Makefile b/Makefile index 0fe594d..92274ee 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,35 @@ SHELL = /bin/bash MANAGE = time python manage.py -.PHONY: black serve _migrate migrate celery - +.PHONY: black black: black -l 100 --exclude '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.?v?env|_build|buck-out|build|dist|src)/' . +.PHONY: isort +isort: + isort --force-sort-within-sections --profile=black . + +.PHONY: flake8 +flake8: + flake8 + +.PHONY: serve serve: $(MANAGE) runserver +.PHONY: _migrate _migrate: $(MANAGE) makemigrations $(MANAGE) migrate +.PHONY: migrate migrate: _migrate black +.PHONY: celery celery: celery worker -A config.celery_app -l info --concurrency=4 +.PHONY: test test: $(MANAGE) test --settings=config.settings.test -v2 diff --git a/README.rst b/README.rst index 9cbae52..d4e3d47 100644 --- a/README.rst +++ b/README.rst @@ -36,4 +36,4 @@ Or: .. code-block:: bash - $ docker build . --build-arg app_git_tag=v0.3.2 -t bihealth/cadd-rest-api:0.3.2-0 + $ docker build . --build-arg app_git_tag=v0.3.2 -t varfish-org/cadd-rest-api:0.3.2-0 diff --git a/config/celery.py b/config/celery.py index f407e2d..b4d6a01 100644 --- a/config/celery.py +++ b/config/celery.py @@ -1,4 +1,5 @@ import os + from celery import Celery # set the default Django settings module for the 'celery' program. diff --git a/config/settings/base.py b/config/settings/base.py index 55ba95f..f371bee 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -162,7 +162,7 @@ # Django Admin URL. ADMIN_URL = "admin/" # https://docs.djangoproject.com/en/dev/ref/settings/#admins -ADMINS = [("""Oliver Stolpe""", "oliver.stolpe@bihealth.de")] +ADMINS = [("""Oliver Stolpe""", "oliver.stolpe@bih-charite.de")] # https://docs.djangoproject.com/en/dev/ref/settings/#managers MANAGERS = ADMINS diff --git a/config/settings/production.py b/config/settings/production.py index 8d834cb..7f39296 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -73,7 +73,7 @@ # ------------------------------------------------------------------------------ # https://docs.djangoproject.com/en/dev/ref/settings/#default-from-email DEFAULT_FROM_EMAIL = env( - "DJANGO_DEFAULT_FROM_EMAIL", default="CADD REST API " + "DJANGO_DEFAULT_FROM_EMAIL", default="CADD REST API " ) # https://docs.djangoproject.com/en/dev/ref/settings/#server-email SERVER_EMAIL = env("DJANGO_SERVER_EMAIL", default=DEFAULT_FROM_EMAIL) diff --git a/config/urls.py b/config/urls.py index 76a9ccc..f1e8af4 100644 --- a/config/urls.py +++ b/config/urls.py @@ -1,6 +1,5 @@ from django.conf import settings -from django.conf.urls import url -from django.conf.urls import include +from django.conf.urls import include, url from django.conf.urls.static import static urlpatterns = [url(r"", include("restapi.urls"))] + static( diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 092f537..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -FROM python:3.6-buster - -ARG app_git_url=https://github.com/bihealth/cadd-rest-api.git -ARG app_git_tag - -ENV DEBIAN_FRONTEND noninteractive - -# Copy source code into Docker image. -RUN mkdir -p /usr/src -RUN git clone --depth 1 --branch $app_git_tag $app_git_url /usr/src/app - -# Install system dependencies. -RUN apt-get update && \ - apt-get install -y \ - apt-utils -RUN apt-get install -y \ - gcc \ - make \ - postgresql-client - -# Install miniconda3 -RUN cd /tmp && \ - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3 && \ - rm Miniconda3-latest-Linux-x86_64.sh -ENV PATH="/opt/miniconda3/bin:${PATH}" -RUN conda install -c conda-forge -y mamba - -# Install packages with mamba -RUN mamba install -c conda-forge -c bioconda -y python=3.6 cadd-scripts - -# Install Python dependencies. -RUN cd /usr/src/app && \ - pip install --no-cache-dir -r requirements/production.txt - -# Define the entry point. -COPY docker-entrypoint.sh /usr/local/bin -RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \ - ln -s /usr/local/bin/docker-entrypoint.sh / # backwards compat -ENTRYPOINT ["docker-entrypoint.sh"] -CMD ["wsgi"] -EXPOSE 8080/tcp diff --git a/docker/build-docker.sh b/docker/build-docker.sh deleted file mode 100644 index 250108b..0000000 --- a/docker/build-docker.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -BUILD_NO=0 - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd $DIR - -GIT_DESCRIBE=$(git describe | cut -d - -f 1) -GIT_TAG=${GIT_TAG-$GIT_DESCRIBE} -DOCKER_VERSION=$(echo $GIT_TAG | sed -e 's/^v//')-$BUILD_NO - -docker build . \ - --build-arg app_git_tag=$GIT_TAG \ - -t bihealth/cadd-rest-api:$DOCKER_VERSION diff --git a/environment.yaml b/environment.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/restapi/migrations/0001_initial.py b/restapi/migrations/0001_initial.py index 0f372ac..2f5b98d 100644 --- a/restapi/migrations/0001_initial.py +++ b/restapi/migrations/0001_initial.py @@ -2,9 +2,10 @@ # Generated by Django 1.11.24 on 2019-09-19 14:31 from __future__ import unicode_literals +import uuid + from django.db import migrations, models import jsonfield.fields -import uuid class Migration(migrations.Migration): diff --git a/restapi/models.py b/restapi/models.py index 2e9bd4a..5223a24 100644 --- a/restapi/models.py +++ b/restapi/models.py @@ -1,6 +1,7 @@ +import uuid + from django.db import models import jsonfield -import uuid STATUS_ACTIVE = "active" STATUS_FAILED = "failed" diff --git a/restapi/tasks.py b/restapi/tasks.py index c44d02c..58920b0 100644 --- a/restapi/tasks.py +++ b/restapi/tasks.py @@ -4,10 +4,11 @@ import subprocess # nosec import tempfile -from restapi.models import AnnotateBackgroundJob -from config.celery import app from celery.exceptions import SoftTimeLimitExceeded + +from config.celery import app from config.settings.base import CADD_CONDA, CADD_SH, CADD_TIMEOUT +from restapi.models import AnnotateBackgroundJob @app.task(bind=True) @@ -15,6 +16,7 @@ def annotate_background_job(_self, bgjob_uuid): """Task to execute a CADD scoring background job.""" bgjob = AnnotateBackgroundJob.objects.get(uuid=bgjob_uuid) args = bgjob.args + chrom_stripped = False if not args["variants"]: # no scores, nothing to do bgjob.scores = {} @@ -28,6 +30,9 @@ def annotate_background_job(_self, bgjob_uuid): # Write out the input file for CADD.sh with open(os.path.join(tmpdir, "in.vcf"), "wt") as vcff: for variant in args["variants"]: + if variant.startswith("chr"): + chrom_stripped = True + variant = variant[3:] print("%s\t%s\t.\t%s\t%s" % tuple(variant.split("-")), file=vcff) # Build command line to CADD.sh and execute. cmdline = [ @@ -61,12 +66,32 @@ def annotate_background_job(_self, bgjob_uuid): raise # Check bash return code for validity, and raise exception if it is invalid. if return_code != 0: + print("[processed variants]") + for variant in args["variants"]: + if args["genome_build"] == "GRCh38" and variant.startswith("chr"): + variant = variant[3:] + print("%s\t%s\t.\t%s\t%s" % tuple(variant.split("-"))) + try: + outs, errs = proc.communicate(timeout=15) + print("[stdout]") + print(outs.decode("utf-8")) + print("[stderr]") + print(errs.decode("utf-8")) + except subprocess.TimeoutExpired: + proc.kill() + outs, errs = proc.communicate() + print("[stdout]") + print(outs.decode("utf-8")) + print("[stderr]") + print(errs.decode("utf-8")) # Write job status to database before raising. bgjob.status = "failed" - bgjob.message = "Command line '{}' exited with error code {} and message: {}".format( - " ".join(cmdline), - return_code, - " ".join(map(lambda x: x.decode(), proc.communicate())), + bgjob.message = ( + "Command line '{}' exited with error code {} and message: {}".format( + " ".join(cmdline), + return_code, + " ".join(map(lambda x: x.decode(), proc.communicate())), + ) ) bgjob.save() raise subprocess.CalledProcessError(return_code, " ".join(cmdline)) @@ -83,6 +108,8 @@ def annotate_background_job(_self, bgjob_uuid): header = row elif header: data = dict(zip(header, row)) + if chrom_stripped and not data["#Chrom"].startswith("chr"): + data["#Chrom"] = f"chr{data['#Chrom']}" key = "-".join((data[k] for k in ("#Chrom", "Pos", "Ref", "Alt"))) val = list(map(float, (data["RawScore"], data["PHRED"]))) scores[key] = val diff --git a/restapi/tests.py b/restapi/tests.py index facad10..b25fb0c 100644 --- a/restapi/tests.py +++ b/restapi/tests.py @@ -1,11 +1,11 @@ import os from unittest.mock import patch +import uuid from django.test import TestCase from django.urls import reverse from restapi.models import AnnotateBackgroundJob -import uuid class TestBase(TestCase): @@ -15,8 +15,7 @@ def setUp(self): class TestAnnotateApiView(TestBase): - """Tests for AnnotateApiView. - """ + """Tests for AnnotateApiView.""" # Patching the CADD_SH variable to point to the CADD.sh mocking script. # This script will return a valid CADD file with made-up data. @@ -36,8 +35,7 @@ def test_post_annotate_request(self): class TestResultApiView(TestBase): - """Tests for ResultApiView. - """ + """Tests for ResultApiView.""" def test_bgjob_doesnt_exist(self): bgjob_uuid = str(uuid.uuid4()) diff --git a/restapi/urls.py b/restapi/urls.py index f65ad3e..8b08baf 100644 --- a/restapi/urls.py +++ b/restapi/urls.py @@ -1,5 +1,6 @@ from django.conf.urls import url from django.views.generic import TemplateView + from . import views app_name = "restapi" diff --git a/restapi/views.py b/restapi/views.py index 09a3290..7db681c 100644 --- a/restapi/views.py +++ b/restapi/views.py @@ -8,7 +8,6 @@ from restapi.models import AnnotateBackgroundJob from restapi.tasks import annotate_background_job - #: Regular expression to parse variants with. RE_VAR = ( r"^(?P[a-zA-Z0-9\._])+-(?P\d+)-" @@ -25,11 +24,7 @@ def post(self, *args, **kwargs): data = { "genome_build": genomebuild, "cadd_release": self.request.data.get("cadd_release"), - "variants": [ - _normalize_vars(var, genomebuild) - for var in self.request.data.get("variant") - if re.search(RE_VAR, var) - ], + "variants": [var for var in self.request.data.get("variant") if re.search(RE_VAR, var)], } try: bgjob = AnnotateBackgroundJob.objects.create( @@ -74,10 +69,3 @@ def post(self, *args, **kwargs): if bgjob.status in ("finished", "failed"): bgjob.delete() return JsonResponse(response) - - -def _normalize_vars(var, genomebuild): - """Normalize variants regarding the ``"chr"`` prefix.""" - if genomebuild == "GRCh37": - return var[3:] if var.startswith("chr") else var - return var if var.startswith("chr") else ("chr" + var) diff --git a/setup.py b/setup.py index 623eda4..8ec8fc2 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ import os.path -from setuptools import setup, find_packages +from setuptools import find_packages, setup import versioneer @@ -34,7 +34,7 @@ def parse_requirements(path): setup( author="Manuel Holtgrewe, Oliver Stolpe", - author_email=("manuel.holtgrewe@bihealth.de, oliver.stolpe@bihealth.de"), + author_email=("manuel.holtgrewe@bih-charite.de, oliver.stolpe@bih-charite.de"), classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", @@ -55,7 +55,7 @@ def parse_requirements(path): keywords="cadd varfish", name="cadd-rest-api", packages=find_packages(), - url="https://github.com/bihealth/cadd-rest-api", + url="https://github.com/varfish-org/cadd-rest-api", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), zip_safe=False, diff --git a/utils/docker/Dockerfile b/utils/docker/Dockerfile new file mode 100644 index 0000000..3862097 --- /dev/null +++ b/utils/docker/Dockerfile @@ -0,0 +1,82 @@ +# syntax=docker/dockerfile:1.3 + +# --------------------------------------------------------------------------- +# Base +# --------------------------------------------------------------------------- + +FROM python:3.8 AS base + +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONFAULTHANDLER 1 + +WORKDIR /usr/src/app + +LABEL org.opencontainers.image.authors="Oliver Stolpe " +LABEL org.opencontainers.image.source="https://github.com/varfish-org/cadd-rest-api" + +RUN apt-get update && apt-get install -y curl +RUN apt-get clean autoclean && \ + apt-get autoremove --yes && \ + rm -rf /var/lib/{apt,dpkg,cache,log}/ + +# --------------------------------------------------------------------------- +# Dependencies +# --------------------------------------------------------------------------- + +FROM base AS deps + +RUN pip install pipenv +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + gcc \ + make \ + wget \ + postgresql-client + +# Install Python dependencies via pipenv. +COPY requirements requirements +RUN virtualenv .venv && \ + . .venv/bin/activate && \ + pip install --no-cache-dir -r requirements/production.txt + +# --------------------------------------------------------------------------- +# Runtime +# --------------------------------------------------------------------------- + +FROM base AS runtime + +# Add the wait script to the image +ADD https://github.com/ufoscout/docker-compose-wait/releases/download/2.7.3/wait /usr/local/bin/wait +RUN chmod +x /usr/local/bin/wait + +# Install conda environment with varfish-server-worker if configured to do so. +RUN ["/bin/bash","-c", "cd /tmp && \ + wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ + bash Mambaforge-Linux-x86_64.sh -b -p /opt/miniconda3 && \ + source /opt/miniconda3/bin/activate && \ + conda install -c conda-forge -y mamba && \ + mamba install -c conda-forge -c bioconda -y \ + cadd-scripts==1.6.0 && \ + rm -f Mambaforge-Linux-x86_64.sh"] + +COPY --from=deps /usr/src/app/.venv /usr/src/app/.venv + +ENV PATH="/usr/src/app/.venv/bin:${PATH}" + +# Temporarily setup environment variables for ``manage.py` commands below. +# We will clear them again later. +ENV DJANGO_SECRET_KEY=for-build-only \ + DJANGO_SETTINGS_MODULE=config.settings.production \ + DATABASE_URL=postgres://cadd-rest-api:cadd-rest-api@fake/cadd-rest-api + +# Copy source code +COPY . . + +# Define entrypoint +COPY utils/docker/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh +ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] +CMD ["wsgi"] +EXPOSE 8080/tcp diff --git a/utils/docker/build-docker.sh b/utils/docker/build-docker.sh new file mode 100644 index 0000000..96a6e2e --- /dev/null +++ b/utils/docker/build-docker.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Utility script to start the Docker build process. + +set -x +set -euo pipefail + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# Write /VERSION file for server to know its version. +git describe --all >$DIR/../../VERSION + +# Obtain version for the Docker image. +IMAGE_TAG=${IMAGE_TAG:-adhoc} + +# Explicitely set organization and repository name for Docker image. +ORG=varfish-org +REPO=cadd-rest-api + +# Actually start the Docker build. +docker build . \ + --file $DIR/Dockerfile \ + -t ghcr.io/$ORG/$REPO:$IMAGE_TAG \ + "$@" diff --git a/docker/docker-entrypoint.sh b/utils/docker/docker-entrypoint.sh similarity index 100% rename from docker/docker-entrypoint.sh rename to utils/docker/docker-entrypoint.sh