Skip to content
This repository was archived by the owner on Dec 2, 2021. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,28 @@ SUBSCRIPTION_ID = ''
LOCATION = ''
BASE_NAME = ''
RESOURCE_GROUP = ''

# Observability related
APPLICATIONINSIGHTS_CONNECTION_STRING = ''
LOG_TO_CONSOLE = 'false'
# DEBUG, INFO, WARNING, ERROR, CRITICAL
LOG_LEVEL = 'DEBUG'
# Probability 0.0 -> 1.0
LOG_SAMPLING_RATE = '1.0'
# Probability 0.0 -> 1.0
TRACE_SAMPLING_RATE = '1.0'
# Seconds
METRICS_EXPORT_INTERVAL = '15'

# Azure ML Workspace Variables
WORKSPACE_NAME = ''
ACI_DEPLOYMENT_NAME = ''

####################################################
# Variables that are defined in variables-template.yml
# they determine _how_ the project runs
####################################################
SOURCES_DIR_TRAIN = 'ml_model'
SOURCES_DIR_TRAIN = '.'
EXPERIMENT_NAME = 'flower_classification'
DATASET_NAME = 'flower_dataset'
# Optional. Set it if you have configured non default datastore to point to your data
Expand Down
2 changes: 2 additions & 0 deletions .pipelines/02-processing-data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ stages:
# Invoke the Python building and publishing a data preprocessing pipeline
python -m ml_service.pipelines.build_data_processing_pipeline
displayName: 'Publish Data Preprocessing Pipeline'
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)

# Trigger_Preprocessing_Pipeline
- template: trigger-preprocessing-pipeline.yml
Expand Down
6 changes: 6 additions & 0 deletions .pipelines/03-train-evaluate-register-model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ stages:
# Invoke the Python building and publishing a training pipeline
python -m ml_service.pipelines.build_training_pipeline
displayName: 'Publish Azure Machine Learning Pipeline'
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)

- stage: 'Trigger_Training_Pipeline'
displayName: 'Train and evaluate model'
Expand All @@ -75,6 +77,7 @@ stages:
container: mlops
timeoutInMinutes: 0
steps:
- template: update-ci-dependencies.yml
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
Expand All @@ -89,6 +92,9 @@ stages:
echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
name: 'getpipelineid'
displayName: 'Get Pipeline ID'
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)

- job: "Run_ML_Pipeline"
dependsOn: "Get_Pipeline_ID"
displayName: "Trigger ML Training Pipeline"
Expand Down
2 changes: 2 additions & 0 deletions .pipelines/04-deploy-model-aci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,5 @@ stages:
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.util.smoke_test_scoring_service --service "$(ACI_DEPLOYMENT_NAME)"
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
2 changes: 2 additions & 0 deletions .pipelines/07-processing-data-os-cmd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ stages:
# Invoke the Python building and publishing a data preprocessing pipeline
python -m ml_service.pipelines.build_data_processing_os_cmd_pipeline
displayName: 'Publish Data Preprocessing OS cmd Pipeline'
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)

# Trigger_Preprocessing_Pipeline
- template: trigger-preprocessing-pipeline.yml
Expand Down
2 changes: 2 additions & 0 deletions .pipelines/code-quality-template.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Pipeline template to run linting, unit tests with code coverage, and publish the results.
steps:
- template: update-ci-dependencies.yml

- script: |
flake8 --output-file=lint-testresults.xml --format junit-xml
displayName: 'Run lint tests'
Expand Down
4 changes: 4 additions & 0 deletions .pipelines/trigger-preprocessing-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ stages:
container: mlops
timeoutInMinutes: 0
steps:
- template: update-ci-dependencies.yml
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
Expand All @@ -30,6 +31,9 @@ stages:
echo "##vso[task.setvariable variable=PREPROCESSPIPELINEID;isOutput=true]$PREPROCESSPIPELINEID"
name: 'getpreprocessingpipelineid'
displayName: 'Get Preprocessing Pipeline ID of ${{ parameters.aml_pipeline_name }}'
env:
APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)

- job: "Run_Data_Processing_Pipeline"
dependsOn: "Get_Preprocessing_Pipeline_ID"
displayName: "Trigger Preprocessing Pipeline ${{ parameters.aml_pipeline_name }}"
Expand Down
5 changes: 5 additions & 0 deletions .pipelines/update-ci-dependencies.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
steps:
# This step ensures that the latest ci dependencies are applied to the build agent
- script: |
conda env update -f ml_model/ci_dependencies.yml -n ci
displayName: 'Update missing dependencies for current branch on build agent'
26 changes: 23 additions & 3 deletions .pipelines/variables-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
variables:
# The directory containing the scripts for training, evaluating, and registering the model
- name: SOURCES_DIR_TRAIN
value: ml_model
value: '.'

# Azure ML Variables
- name: EXPERIMENT_NAME
Expand Down Expand Up @@ -32,8 +32,8 @@ variables:
- name: ALLOW_RUN_CANCEL
value: "false"
# Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
# - name: AML_REBUILD_ENVIRONMENT
# value: "false"
- name: AML_REBUILD_ENVIRONMENT
value: "true"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only want to rebuild the first time the dependencies are changed, right? Perhaps leave the default to false, and set the ADO variable to true?


# AML Environment Config
- name: AML_ENV_NAME
Expand All @@ -42,6 +42,8 @@ variables:
value: flower_custom_preprocess_env

# AML Compute Cluster Config
- name: AML_ENV_TRAIN_CONDA_DEP_FILE
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this var used?

value: "ml_model/conda_dependencies.yml"
- name: AML_COMPUTE_CLUSTER_CPU_SKU
value: STANDARD_DS2_V2
- name: AML_COMPUTE_CLUSTER_NAME
Expand All @@ -52,6 +54,24 @@ variables:
value: 0
- name: AML_CLUSTER_MAX_NODES
value: 4
- name: AML_CLUSTER_PRIORITY
value: lowpriority

# Observability related
- name: LOG_TO_CONSOLE
value: 'false'
- name: LOG_LEVEL
value: 'INFO' # DEBUG, INFO, WARNING, ERROR, CRITICAL
- name: LOG_SAMPLING_RATE
value: '1.0' # Probability 0.0 -> 1.0
- name: TRACE_SAMPLING_RATE
value: '1.0' # Probability 0.0 -> 1.0
- name: METRICS_EXPORT_INTERVAL
value: '15' # Seconds

# The name for the (docker/webapp) scoring image
- name: IMAGE_NAME
value: "flowerclassifier"

# AML pipelines can run outside of Azure DevOps, these parameters control AML pipeline behaviors
- name: PREPROCESSING_PARAM
Expand Down
7 changes: 7 additions & 0 deletions ml_model/ci_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,11 @@ dependencies:
- tensorflow==2.3.*
- keras==2.4.*

# Observability
- dataclasses==0.6
- opencensus==0.7.11
- opencensus-ext-httplib==0.7.3
- opencensus-ext-logging==0.1.0
- opencensus-context==0.1.2
- opencensus-ext-azure==1.0.5

9 changes: 9 additions & 0 deletions ml_model/conda_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,12 @@ dependencies:
# Training deps
- tensorflow==2.3.*
- keras==2.4.*

# Observability
- python-dotenv==0.12.*
- dataclasses==0.6
- opencensus==0.7.11
- opencensus-ext-httplib==0.7.3
- opencensus-ext-logging==0.1.0
- opencensus-context==0.1.2
- opencensus-ext-azure==1.0.5
8 changes: 7 additions & 1 deletion ml_model/dev_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,10 @@ dependencies:
- keras==2.4.*
- debugpy


# Observability
- dataclasses==0.6
- opencensus==0.7.11
- opencensus-ext-httplib==0.7.3
- opencensus-ext-logging==0.1.0
- opencensus-context==0.1.2
- opencensus-ext-azure==1.0.5
21 changes: 15 additions & 6 deletions ml_model/evaluate/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"""
from azureml.core import Run
import argparse
from util.model_helper import get_model
from ml_model.util.model_helper import get_model
from ml_service.util.logger.observability import observability


def evaluate_model_performs_better(model, run):
Expand All @@ -37,13 +38,14 @@ def evaluate_model_performs_better(model, run):
if (production_model_accuracy is None or new_model_accuracy is None):
raise Exception(f"Unable to find {metric_eval} metrics, exiting evaluation") # NOQA: E501
else:
print(f"Current model accuracy: {production_model_accuracy}, new model accuracy: {new_model_accuracy}") # NOQA: E501
observability.log(f"Current model accuracy: {production_model_accuracy}, new model accuracy: {new_model_accuracy}") # NOQA: E501

if (new_model_accuracy > production_model_accuracy):
print("New model performs better, register it")
observability.log("New model performs better, register it")
return True
else:
print("New model doesn't perform better, skip registration")
observability.log("New model doesn't perform better,"
" skip registration")
return False


Expand Down Expand Up @@ -91,8 +93,15 @@ def main():
if(not should_register and (allow_run_cancel).lower() == 'true'):
run.parent.cancel()
else:
print("This is the first model, register it")
observability.log("This is the first model, register it")


if __name__ == '__main__':
main()
observability.start_span('evaluate_model')
try:
main()
except Exception as exception:
observability.exception(exception)
raise exception
finally:
observability.end_span()
10 changes: 10 additions & 0 deletions ml_model/preprocessing/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,18 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}
~/miniconda/bin/conda clean -tipsy
ENV PATH="/home/dockeruser/miniconda/bin/:${PATH}"

USER root

RUN apt-get update --fix-missing && \
apt-get install -y build-essential

# Create conda environment for dockeruser user
USER dockeruser

RUN conda install -y conda=${CONDA_VERSION} python=${PYTHON_VERSION} && \
pip install azureml-defaults==${AZUREML_SDK_VERSION} inference-schema==${INFERENCE_SCHEMA_VERSION} &&\
pip install python-dotenv==0.12.* dataclasses==0.6 opencensus==0.7.11 opencensus-ext-httplib==0.7.3 \
opencensus-ext-azure==1.0.5 opencensus-ext-logging==0.1.0 opencensus-context==0.1.2 && \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we use conda dependency yml to be consistent rather than directly pip install?

conda clean -aqy && \
rm -rf ~/miniconda/pkgs && \
find ~/miniconda/ -type d -name __pycache__ -prune -exec rm -rf {} \;
35 changes: 22 additions & 13 deletions ml_model/preprocessing/preprocess_aml.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@
from azureml.core.run import Run
import argparse
import json
from preprocess_images import resize_images
from util.model_helper import get_or_register_dataset, get_aml_context
from ml_model.preprocessing.preprocess_images import resize_images
from ml_model.util.model_helper import get_or_register_dataset, get_aml_context
from ml_service.util.logger.observability import observability


def main():
print("Running preprocess.py")
observability.log("Running preprocess.py")

parser = argparse.ArgumentParser("preprocess")
parser.add_argument(
Expand Down Expand Up @@ -69,11 +70,12 @@ def main():

args = parser.parse_args()

print("Argument [dataset_name]: %s" % args.dataset_name)
print("Argument [datastore_name]: %s" % args.datastore_name)
print("Argument [data_file_path]: %s" % args.data_file_path)
print("Argument [output_dataset]: %s" % args.output_dataset)
print("Argument [preprocessing_param]: %s" % args.preprocessing_param)
observability.log("Argument [dataset_name]: %s" % args.dataset_name)
observability.log("Argument [datastore_name]: %s" % args.datastore_name)
observability.log("Argument [data_file_path]: %s" % args.data_file_path)
observability.log("Argument [output_dataset]: %s" % args.output_dataset)
observability.log("Argument [preprocessing_param]: %s"
% args.preprocessing_param)

data_file_path = args.data_file_path
dataset_name = args.dataset_name
Expand All @@ -85,12 +87,12 @@ def main():
aml_workspace, *_ = get_aml_context(run)

if preprocessing_param is None or preprocessing_param == "":
with open("parameters.json") as f:
with open("ml_model/parameters.json") as f:
pars = json.load(f)
preprocessing_args = pars["preprocessing"]
else:
preprocessing_args = json.loads(preprocessing_param)
print(f"preprocessing parameters {preprocessing_args}")
observability.log(f"preprocessing parameters {preprocessing_args}")
for (k, v) in preprocessing_args.items():
run.log(k, v)
run.parent.log(k, v)
Expand All @@ -107,15 +109,22 @@ def main():
# Process data
mount_context = dataset.mount()
mount_context.start()
print(f"mount_point is: {mount_context.mount_point}")
observability.log(f"mount_point is: {mount_context.mount_point}")
resize_images(mount_context.mount_point, output_dataset, preprocessing_args) # NOQA: E501
mount_context.stop()

run.tag("run_type", value="preprocess")
print(f"tags now present for run: {run.tags}")
observability.log(f"tags now present for run: {run.tags}")

run.complete()


if __name__ == '__main__':
main()
observability.start_span('preprocess_aml')
try:
main()
except Exception as exception:
observability.exception(exception)
raise exception
finally:
observability.end_span()
Loading