From 9fe317187fbb7957c5a278b991e32da9f9e02062 Mon Sep 17 00:00:00 2001 From: Rodrigo Gallardo Date: Fri, 28 Jun 2024 10:57:11 +0000 Subject: [PATCH] Create CD pipeline --- .github/workflows/cd.yml | 52 ++++++++++++++++++++++++++++++++++++++++ docs/challenge.md | 2 ++ train.py | 26 ++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 .github/workflows/cd.yml create mode 100644 train.py diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..352ad56 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,52 @@ +name: 'Continuous Deployment' + +#on: +# push: +# branches: +# - main +# - develop +# - release/* +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Get the branch name + run: | + run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT + id: get_branch_name + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + + - name: Authenticate to GCP + uses: 'google-github-actions/auth@v2' + with: + credentials_json: '${{ secrets.CD_SA_KEYS }}' + + - name: Install dependencies + run: | + pip install -r requirements.txt -r requirements-dev.txt -r requirements-test.txt + + - name: Run training script + run: | + python train.py + + - name: Authenticate Docker to GAR + uses: docker/login-action@v3 + with: + registry: '${{ vars.GCP_REGION }}-docker.pkg.dev' + username: _json_key + password: ${{ secrets.CD_SA_KEYS }} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + push: true + tags: '${{ vars.GAR_REPOSITORY }}/${{ vars.GAR_IMAGE_NAME }}' diff --git a/docs/challenge.md b/docs/challenge.md index 9e021d8..4089a5c 100644 --- a/docs/challenge.md +++ b/docs/challenge.md @@ -133,3 +133,5 @@ The CI workflows focus on running the tests and assesing the quality of the code The CD workflows focus on training the model, deploying the API and running the stress testing against it. These workflows only run when there's a push to the `main`, `develop` or `release` branches on the repository. * Undesirable model tracking +* env variables ci/cd +* makefile argument diff --git a/train.py b/train.py new file mode 100644 index 0000000..8120c30 --- /dev/null +++ b/train.py @@ -0,0 +1,26 @@ +from challenge.model import DelayModel +import pandas as pd + +print("Loading data...") +# Read the data +df = pd.read_csv("data/data.csv") +print("-> Data loaded") + +# Create the model +model = DelayModel() + +print("Preprocessing data...") +# Preprocess the data +X_train, y_train = model.preprocess(df, "delay") +print("-> Preprocessed data") + + +print("Training model...") +# Train the model +model.fit(X_train, y_train) +print("-> Model trained") + +print("Saving model...") +# Store the model +model.save("challenge/tmp/model_checkpoint.pkl") +print("-> Model saved")