From e36368bb682d15925132a1eec9afc2eab3ed8551 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 20 Nov 2020 08:28:08 +0000
Subject: [PATCH 01/23] Modified Observability added dependency tracing

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/conda_dependencies.yml               |   7 +
 ml_model/dev_dependencies.yml                 |   7 +-
 ml_model/preprocessing/preprocess_aml.py      |  14 +-
 .../build_data_processing_pipeline.py         |  19 +-
 ml_service/tests/__init__.py                  |   0
 ml_service/tests/util/__init__.py             |   0
 .../tests/util/test_app_insights_logger.py    |  72 +++++++
 ml_service/tests/util/test_azure_ml_logger.py |  32 ++++
 ml_service/tests/util/test_observability.py   |  55 ++++++
 ml_service/util/env_variables.py              |   4 +-
 ml_service/util/logger/__init__.py            |   0
 ml_service/util/logger/app_insights_logger.py | 176 ++++++++++++++++++
 ml_service/util/logger/azure_ml_logger.py     |  49 +++++
 ml_service/util/logger/console_logger.py      |  61 ++++++
 ml_service/util/logger/logger_interface.py    |  83 +++++++++
 ml_service/util/logger/observability.py       | 133 +++++++++++++
 16 files changed, 700 insertions(+), 12 deletions(-)
 create mode 100644 ml_service/tests/__init__.py
 create mode 100644 ml_service/tests/util/__init__.py
 create mode 100644 ml_service/tests/util/test_app_insights_logger.py
 create mode 100644 ml_service/tests/util/test_azure_ml_logger.py
 create mode 100644 ml_service/tests/util/test_observability.py
 create mode 100644 ml_service/util/logger/__init__.py
 create mode 100644 ml_service/util/logger/app_insights_logger.py
 create mode 100644 ml_service/util/logger/azure_ml_logger.py
 create mode 100644 ml_service/util/logger/console_logger.py
 create mode 100644 ml_service/util/logger/logger_interface.py
 create mode 100644 ml_service/util/logger/observability.py

diff --git a/ml_model/conda_dependencies.yml b/ml_model/conda_dependencies.yml
index dd6a1eb0..e73f1c28 100644
--- a/ml_model/conda_dependencies.yml
+++ b/ml_model/conda_dependencies.yml
@@ -23,3 +23,10 @@ dependencies:
       # Training deps
       - tensorflow==2.3.*
       - keras==2.4.*
+
+      # Observability
+      - opencensus==0.7.11
+      - opencensus-ext-httplib==0.7.3
+      - opencensus-ext-logging==0.1.0
+      - opencensus-context==0.1.2
+      - opencensus-ext-azure==1.0.5
diff --git a/ml_model/dev_dependencies.yml b/ml_model/dev_dependencies.yml
index 43f42db9..c10a8b94 100644
--- a/ml_model/dev_dependencies.yml
+++ b/ml_model/dev_dependencies.yml
@@ -29,4 +29,9 @@ dependencies:
       - keras==2.4.*
       - debugpy
 
-
+      # Observability
+      - opencensus==0.7.11
+      - opencensus-ext-httplib==0.7.3
+      - opencensus-ext-logging==0.1.0
+      - opencensus-context==0.1.2
+      - opencensus-ext-azure==1.0.5
\ No newline at end of file
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 45ec3bf6..474bdd3b 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -28,10 +28,14 @@
 import json
 from preprocess_images import resize_images
 from util.model_helper import get_or_register_dataset, get_aml_context
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
-    print("Running preprocess.py")
+    observability.start_span()
+    observability.log("Running preprocess.py")
 
     parser = argparse.ArgumentParser("preprocess")
     parser.add_argument(
@@ -90,7 +94,7 @@ def main():
             preprocessing_args = pars["preprocessing"]
     else:
         preprocessing_args = json.loads(preprocessing_param)
-    print(f"preprocessing parameters {preprocessing_args}")
+    observability.log(f"preprocessing parameters {preprocessing_args}")
     for (k, v) in preprocessing_args.items():
         run.log(k, v)
         run.parent.log(k, v)
@@ -107,15 +111,17 @@ def main():
     # Process data
     mount_context = dataset.mount()
     mount_context.start()
-    print(f"mount_point is: {mount_context.mount_point}")
+    observability.log(f"mount_point is: {mount_context.mount_point}")
     resize_images(mount_context.mount_point, output_dataset, preprocessing_args)  # NOQA: E501
     mount_context.stop()
 
     run.tag("run_type", value="preprocess")
-    print(f"tags now present for run: {run.tags}")
+    observability.log(f"tags now present for run: {run.tags}")
 
     run.complete()
 
+    observability.end_span()
+
 
 if __name__ == '__main__':
     main()
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index c84af6d8..1d9b0a83 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -7,9 +7,15 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
+from ml_service.util.logger.logger_interface import Severity
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
+    observability.start_span()
+
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -17,12 +23,12 @@ def main():
         subscription_id=e.subscription_id,
         resource_group=e.resource_group,
     )
-    print(f"get_workspace:{aml_workspace}")
-
+    observability.log(f"get_workspace:{aml_workspace}")
+    observability.log(f"ohoh", Severity.CRITICAL)
     # Get Azure machine learning cluster
     aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
     if aml_compute is not None:
-        print(f"aml_compute:{aml_compute}")
+        observability.log(f"aml_compute:{aml_compute}")
 
     # Create a reusable Azure ML environment
     environment = get_environment(
@@ -74,7 +80,7 @@ def main():
         runconfig=run_config,
         allow_reuse=False,
     )
-    print("Step Preprocess created")
+    observability.log("Step Preprocess created")
 
     steps = [preprocess_step]
     preprocess_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
@@ -85,8 +91,9 @@ def main():
         description="Data preprocessing pipeline",
         version=e.build_id,
     )
-    print(f"Published pipeline: {published_pipeline.name}")
-    print(f"for build {published_pipeline.version}")
+    observability.log(f"Published pipeline: {published_pipeline.name}")
+    observability.log(f"for build {published_pipeline.version}")
+    observability.end_span()
 
 
 if __name__ == "__main__":
diff --git a/ml_service/tests/__init__.py b/ml_service/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ml_service/tests/util/__init__.py b/ml_service/tests/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ml_service/tests/util/test_app_insights_logger.py b/ml_service/tests/util/test_app_insights_logger.py
new file mode 100644
index 00000000..e3693f14
--- /dev/null
+++ b/ml_service/tests/util/test_app_insights_logger.py
@@ -0,0 +1,72 @@
+import logging
+import unittest
+from unittest.mock import patch
+
+from ml_service.util.logger.app_insights_logger import AppInsightsLogger
+
+
+class RealAppInsightsLogger(AppInsightsLogger):
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.env = MockEnv("")
+
+
+class MockRun:
+    def __init__(self, run_id):
+        self.id = run_id
+
+
+class MockEnv:
+    def __init__(self, run_id):
+        self.build_id = run_id
+
+
+class TestObservability(unittest.TestCase):
+    @patch("ml_service.util.logger.app_insights_logger.AppInsightsLogger")
+    def setUp(cls, mock_app_insights_logger):
+        cls.concert_app_insights_logger = RealAppInsightsLogger()
+        cls.mock_app_insights_logger = mock_app_insights_logger
+
+    def test_get_run_id_having_online_context(self):
+        expected = "FOO"
+
+        response = self.concert_app_insights_logger.get_run_id(MockRun("FOO"))
+
+        self.assertEqual(expected, response)
+
+    def test_get_run_id_having_online_context_using_build_id(self):
+        self.concert_app_insights_logger.env.build_id = expected = "FOO"
+
+        response = self.concert_app_insights_logger.\
+            get_run_id(MockRun("OfflineRun"))
+
+        self.assertEqual(expected, response)
+
+    def test_get_run_id_having_online_context_using_uuid(self):
+        self.concert_app_insights_logger.env.build_id = ""
+
+        response = self.concert_app_insights_logger.\
+            get_run_id(MockRun("OfflineRun"))
+
+        self.assertIsNotNone(response)
+
+    def test_log_called_with_parameters(self):
+        self.mock_app_insights_logger.log("FOO", "BAZ")
+
+        self.mock_app_insights_logger.log.assert_called_with("FOO", "BAZ")
+
+    def test_log_metric_called_with_parameters(self):
+        self.mock_app_insights_logger.log_metric("FOO", "BAZ", "BAR", False)
+
+        self.mock_app_insights_logger.log_metric.assert_called_with(
+            "FOO", "BAZ", "BAR", False
+        )
+
+    def test_set_view_is_called_with_parameters(self):
+        self.mock_app_insights_logger.set_view("FOO", "BAR", "BAZ")
+        self.mock_app_insights_logger.set_view.\
+            assert_called_with("FOO", "BAR", "BAZ")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/ml_service/tests/util/test_azure_ml_logger.py b/ml_service/tests/util/test_azure_ml_logger.py
new file mode 100644
index 00000000..992e0743
--- /dev/null
+++ b/ml_service/tests/util/test_azure_ml_logger.py
@@ -0,0 +1,32 @@
+import unittest
+from unittest.mock import patch
+
+from ml_service.util.logger.azure_ml_logger import AzureMlLogger
+
+
+class TestObservability(unittest.TestCase):
+    @patch("ml_service.util.logger.azure_ml_logger.AzureMlLogger")
+    def setUp(cls, mock_azure_ml_logger):
+        cls.azure_ml_logger = mock_azure_ml_logger
+
+    def test_log_called_with_parameters(self):
+        self.azure_ml_logger.log("FOO", "BAZ")
+
+        self.azure_ml_logger.log.assert_called_with("FOO", "BAZ")
+
+    def test_log_metric_called_with_parameters(self):
+        self.azure_ml_logger.log_metric("FOO", "BAZ", "BAR")
+
+        self.azure_ml_logger.log_metric.assert_called_with("FOO", "BAZ", "BAR")
+
+    def test_get_callee_returns_callee_file_with_line_number(self):
+        azure_ml_logger = AzureMlLogger()
+        expected = "test_azure_ml_logger.py:26"
+
+        response = azure_ml_logger.get_callee(0)
+
+        self.assertEqual(expected, response)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/ml_service/tests/util/test_observability.py b/ml_service/tests/util/test_observability.py
new file mode 100644
index 00000000..cde11718
--- /dev/null
+++ b/ml_service/tests/util/test_observability.py
@@ -0,0 +1,55 @@
+import unittest
+from unittest.mock import patch
+
+from ml_service.util.logger.observability import Observability
+
+
+class ObservabilityMock(Observability):
+    @patch("ml_service.util.logger.app_insights_logger.AppInsightsLogger")
+    @patch("ml_service.util.logger.azure_ml_logger.AzureMlLogger")
+    @patch("ml_service.util.logger.observability.Loggers")
+    def __init__(self, mock_loggers, mock_aml_logger, mock_app_insight_logger):
+        mock_loggers.loggers = [mock_aml_logger, mock_app_insight_logger]
+        self._loggers = mock_loggers
+
+
+class TestObservability(unittest.TestCase):
+    @patch("ml_service.util.logger.observability.Observability")
+    def setUp(cls, mock_observability):
+        cls.observability = mock_observability
+
+    def test_log_metric_called_with_parameters(self):
+        self.observability.log_metric("FOO", "BAZ", "BAR")
+
+        self.observability.log_metric.assert_called_with("FOO", "BAZ", "BAR")
+
+    def test_log_called_with_parameters(self):
+        self.observability.log("FOO", "BAZ")
+
+        self.observability.log.assert_called_with("FOO", "BAZ")
+
+    def test_log_metric_is_being_called_by_all_loggers(self):
+        self.observability = ObservabilityMock()
+
+        self.observability.log_metric("FOO", "BAZ", "BAR")
+
+        self.observability._loggers.loggers[0].log_metric.assert_called_with(
+            "FOO", "BAZ", "BAR", False
+        )
+        self.observability._loggers.loggers[1].log_metric.assert_called_with(
+            "FOO", "BAZ", "BAR", False
+        )
+
+    def test_log_is_being_called_by_all_loggers(self):
+        self.observability = ObservabilityMock()
+
+        self.observability.log("FOO", "BAZ")
+
+        self.observability._loggers.loggers[0].\
+            log.assert_called_with("FOO", "BAZ")
+        self.observability._loggers.loggers[1].\
+            log.assert_called_with("FOO", "BAZ")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index 14ef086b..f18084f7 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -3,7 +3,6 @@
 from dataclasses import dataclass
 import os
 from typing import Optional
-
 from dotenv import load_dotenv
 
 
@@ -40,6 +39,9 @@ class Env:
     max_nodes: int = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
     aml_preprocessing_custom_docker_env_name: Optional[str] = os.environ.get("AML_PREPROCESSING_CUSTOM_DOCKER_ENV_NAME")  # NOQA: E501
     preprocessing_os_cmd_pipeline_name: Optional[str] = os.environ.get("PREPROCESSING_OS_CMD_PIPELINE_NAME")  # NOQA: E501
+    app_insights_connection_string: Optional[str] = os.environ.get("APP_INSIGHTS_CONNECTION_STRING")  # NOQA: E501
+    log_to_console: Optional[bool] = os.environ.get("LOG_TO_CONSOLE", "false").lower().strip() == "true"  # NOQA: E501
+    log_level: Optional[str] = os.environ.get("LOG_LEVEL", "WARNING")  # NOQA: E501
 
     # derived variables
     processed_dataset_name: Optional[str] = f"{dataset_name}_processed"  # NOQA: E501
diff --git a/ml_service/util/logger/__init__.py b/ml_service/util/logger/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
new file mode 100644
index 00000000..0a059496
--- /dev/null
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -0,0 +1,176 @@
+import logging
+import uuid
+
+from opencensus.ext.azure import metrics_exporter
+from opencensus.ext.azure.log_exporter import AzureLogHandler
+from opencensus.ext.azure.trace_exporter import AzureExporter
+from opencensus.trace import config_integration
+from opencensus.trace.samplers import ProbabilitySampler
+from opencensus.trace.tracer import Tracer
+
+from opencensus.stats import aggregation as aggregation_module
+from opencensus.stats import measure as measure_module
+from opencensus.stats import stats as stats_module
+from opencensus.stats import view as view_module
+from opencensus.tags import tag_map as tag_map_module
+
+from ml_service.util.env_variables import Env
+from ml_service.util.logger.logger_interface import (
+    LoggerInterface,
+    ObservabilityAbstract,
+    Severity,
+)
+
+
+class AppInsightsLogger(LoggerInterface, ObservabilityAbstract):
+    def __init__(self, run):
+        self.env = Env()
+        self.run_id = self.get_run_id(run)
+
+        # Prepare integrations and log format
+        config_integration.trace_integrations(['httplib', 'logging'])
+        self.logger = logging.getLogger(__name__)
+        self.logger.level = getattr(logging, self.env.log_level.upper(), None)
+        # initializes log exporter
+        handler = AzureLogHandler(
+            connection_string=self.env.app_insights_connection_string,
+            logging_sampling_rate=1.0,
+        )
+        # handler.setFormatter(
+        #     logging.Formatter('%(asctime)s traceId=%(traceId)s '
+        #                       'spanId=%(spanId)s %(message)s'))
+        handler.add_telemetry_processor(self.callback_function)
+
+        self.logger.addHandler(handler)
+        # initializes tracer
+        self.tracer = Tracer(
+            exporter=AzureExporter(connection_string=self.
+                                   env.app_insights_connection_string),
+            sampler=ProbabilitySampler(1.0)
+        )
+        # initializes metric exporter
+        exporter = metrics_exporter.new_metrics_exporter(
+            enable_standard_metrics=False,
+            connection_string=self.env.app_insights_connection_string,
+        )
+        exporter.add_telemetry_processor(self.callback_function)
+        stats_module.stats.view_manager.register_exporter(exporter)
+
+    def log_metric(
+        self, name="", value="", description="", log_parent=False,
+    ):
+        """
+        Sends a custom metric to appInsights
+        :param name: name  of the metric
+        :param value: value of the metric
+        :param description: description of the metric
+        :param log_parent: not being used for this logger
+        :return:
+        """
+        measurement_map = \
+            stats_module.stats.stats_recorder.new_measurement_map()
+        tag_map = tag_map_module.TagMap()
+
+        measure = measure_module.MeasureFloat(name, description)
+        self.set_view(name, description, measure)
+        measurement_map.measure_float_put(measure, value)
+        measurement_map.record(tag_map)
+
+    def log(self, description="", severity=Severity.INFO):
+        """
+        Sends the logs to App Insights
+        :param description: log description
+        :param severity: log severity
+        :return:
+        """
+
+        if severity == self.severity.DEBUG:
+            self.logger.debug(description)
+        elif severity == self.severity.INFO:
+            self.logger.info(description)
+        elif severity == self.severity.WARNING:
+            self.logger.warning(description)
+        elif severity == self.severity.ERROR:
+            self.logger.error(description)
+        elif severity == self.severity.CRITICAL:
+            self.logger.critical(description)
+
+    def get_run_id(self, run):
+        """
+        gets the correlation ID by the in following order:
+        - If the script is running  in an Online run Context of AML --> run_id
+        - If the script is running where a build_id
+        environment variable  is set --> build_id
+        - Else --> generate a unique id
+        :param run:
+        :return: correlation_id
+        """
+        run_id = str(uuid.uuid1())
+        if not run.id.startswith(self.OFFLINE_RUN):
+            run_id = run.id
+        elif self.env.build_id:
+            run_id = self.env.build_id
+        return run_id
+
+    @staticmethod
+    def set_view(metric, description, measure):
+        """
+        Sets the view for the custom metric
+        """
+        prompt_view = view_module.View(
+            metric,
+            description,
+            [],
+            measure,
+            aggregation_module.LastValueAggregation()
+        )
+        stats_module.stats.view_manager.register_view(prompt_view)
+
+    def callback_function(self, envelope):
+        """
+        Attaches a correlation_id as a custom
+        dimension to the exporter just before
+        sending the logs/metrics
+        :param envelope:
+        :return: Always return True
+        (if False, it  does not export metrics/logs)
+        """
+        envelope.data.baseData.properties[self.CORRELATION_ID] = self.run_id
+        return True
+
+    def span(self, name='span'):
+        """Create a new span with the trace using the context information.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        return self.tracer.span(name)
+
+    def start_span(self, name='span'):
+        """Start a span.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        return self.tracer.start_span(name)
+
+    def end_span(self):
+        """End a span. Remove the span from the span stack, and update the
+        span_id in TraceContext as the current span_id which is the peek
+        element in the span stack.
+        """
+        self.tracer.end_span()
+
+    def current_span(self):
+        """Return the current span."""
+        return self.tracer.current_span()
+
+    def add_attribute_to_current_span(self, attribute_key, attribute_value):
+        self.tracer.add_attribute_to_current_span(attribute_key,
+                                                  attribute_value)
+
+    def list_collected_spans(self):
+        """List collected spans."""
+        self.tracer.list_collected_spans()
diff --git a/ml_service/util/logger/azure_ml_logger.py b/ml_service/util/logger/azure_ml_logger.py
new file mode 100644
index 00000000..fb9e2d10
--- /dev/null
+++ b/ml_service/util/logger/azure_ml_logger.py
@@ -0,0 +1,49 @@
+import datetime
+import time
+
+from ml_service.util.logger.logger_interface import (
+    LoggerInterface,
+    ObservabilityAbstract,
+    Severity,
+)
+
+
+class AzureMlLogger(LoggerInterface, ObservabilityAbstract):
+    def __init__(self, run=None):
+        self.run = run
+
+    def log_metric(self, name, value, description, log_parent):
+        """Log a metric value to the run with the given name.
+        :param log_parent: mark True  if you want to log to parent Run
+        :param name: The name of metric.
+        :type name: str
+        :param value: The value to be posted to the service.
+        :type value:
+        :param description: An optional metric description.
+        :type description: str
+        """
+        if name != "" and value != "":
+            self.run.log(
+                name, value, description
+            ) if log_parent is False \
+                else self.run.parent.log(name, value, description)
+
+    def log(self, description="", severity=Severity.INFO):
+        """
+        Sends the logs to AML (experiments -> logs/outputs)
+        :param description: log description
+        :param severity: log severity
+        :return:
+        """
+
+        time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y-%m-%d %H:%M:%S"
+        )
+        callee = self.get_callee(
+            2
+        )  # to get the script who is calling Observability
+        print(
+            "{}, [{}], {}:{}".format(
+                time_stamp, self.severity_map[severity], callee, description
+            )
+        )
diff --git a/ml_service/util/logger/console_logger.py b/ml_service/util/logger/console_logger.py
new file mode 100644
index 00000000..984f12a2
--- /dev/null
+++ b/ml_service/util/logger/console_logger.py
@@ -0,0 +1,61 @@
+import logging
+import uuid
+
+from ml_service.util.env_variables import Env
+from ml_service.util.logger.logger_interface import (
+    LoggerInterface,
+    ObservabilityAbstract,
+    Severity,
+)
+
+
+class ConsoleLogger(LoggerInterface, ObservabilityAbstract):
+    def __init__(self, run):
+        self.env = Env()
+        # initializes log exporter
+        self.run_id = self.get_run_id(run)
+        self.logger = logging.getLogger(__name__)
+        self.logger.level = getattr(logging, self.env.log_level.upper(), None)
+
+    def log_metric(
+        self, name="", value="", description="", log_parent=False,
+    ):
+        self.logger.info(f"Logging Metric for runId={self.run_id}: "
+                         "name={name} value={value} "
+                         "description={description} log_parent={log_parent}")
+
+    def log(self, description="", severity=Severity.INFO):
+        """
+        Sends the logs to App Insights
+        :param description: log description
+        :param severity: log severity
+        :return:
+        """
+
+        if severity == self.severity.DEBUG:
+            self.logger.debug(description)
+        elif severity == self.severity.INFO:
+            self.logger.info(description)
+        elif severity == self.severity.WARNING:
+            self.logger.warning(description)
+        elif severity == self.severity.ERROR:
+            self.logger.error(description)
+        elif severity == self.severity.CRITICAL:
+            self.logger.critical(description)
+
+    def get_run_id(self, run):
+        """
+        gets the correlation ID by the in following order:
+        - If the script is running  in an Online run Context of AML --> run_id
+        - If the script is running where a build_id
+        environment variable  is set --> build_id
+        - Else --> generate a unique id
+        :param run:
+        :return: correlation_id
+        """
+        run_id = str(uuid.uuid1())
+        if not run.id.startswith(self.OFFLINE_RUN):
+            run_id = run.id
+        elif self.env.build_id:
+            run_id = self.env.build_id
+        return run_id
diff --git a/ml_service/util/logger/logger_interface.py b/ml_service/util/logger/logger_interface.py
new file mode 100644
index 00000000..cfbc00d6
--- /dev/null
+++ b/ml_service/util/logger/logger_interface.py
@@ -0,0 +1,83 @@
+import inspect
+from opencensus.trace.tracer import Tracer
+
+
+class Severity:
+    DEBUG = 10
+    INFO = 20
+    WARNING = 30
+    ERROR = 40
+    CRITICAL = 50
+
+
+class LoggerInterface(Tracer):
+
+    def log_metric(self, name, value, description, log_parent):
+        pass
+
+    def log(self, name, value, description, severity, log_parent):
+        pass
+
+    def finish(self):
+        """End the spans and send to reporters."""
+        pass
+
+    def span(self, name='span'):
+        """Create a new span with the trace using the context information.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        pass
+
+    def start_span(self, name='span'):
+        """Start a span.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        pass
+
+    def end_span(self):
+        """End a span. Remove the span from the span stack, and update the
+        span_id in TraceContext as the current span_id which is the peek
+        element in the span stack.
+        """
+        pass
+
+    def current_span(self):
+        """Return the current span."""
+        pass
+
+    def add_attribute_to_current_span(self, attribute_key, attribute_value):
+        pass
+
+    def list_collected_spans(self):
+        """List collected spans."""
+        pass
+
+
+class ObservabilityAbstract:
+    OFFLINE_RUN = "OfflineRun"
+    CORRELATION_ID = "correlation_id"
+    severity = Severity()
+    severity_map = {10: "DEBUG", 20: "INFO",
+                    30: "WARNING", 40: "ERROR", 50: "CRITICAL"}
+
+    @staticmethod
+    def get_callee(stack_level):
+        """
+        This method get the callee location in [file_name:line_number] format
+        :param stack_level:
+        :return: string of [file_name:line_number]
+        """
+        try:
+            stack = inspect.stack()
+            file_name = stack[stack_level + 1].filename.split("/")[-1]
+            line_number = stack[stack_level + 1].lineno
+            return "{}:{}".format(file_name, line_number)
+        except IndexError:
+            print("Index error, failed to log to AzureML")
+            return ""
diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
new file mode 100644
index 00000000..254aa113
--- /dev/null
+++ b/ml_service/util/logger/observability.py
@@ -0,0 +1,133 @@
+from azureml.core import Run
+
+from ml_service.util.env_variables import Env
+from ml_service.util.logger.app_insights_logger import AppInsightsLogger
+from ml_service.util.logger.azure_ml_logger import AzureMlLogger
+from ml_service.util.logger.console_logger import ConsoleLogger
+from ml_service.util.logger.logger_interface import (
+    ObservabilityAbstract,
+    LoggerInterface,
+    Severity,
+)
+
+
+class Loggers(ObservabilityAbstract):
+    def __init__(self) -> None:
+        self.loggers: LoggerInterface = []
+        self.register_loggers()
+
+    def add(self, logger) -> None:
+        self.loggers.append(logger)
+
+    def get_loggers_string(self) -> None:
+        return ", ".join([type(x).__name__ for x in self.loggers])
+
+    def register_loggers(self):
+        """
+        This method is responsible to create loggers/tracers
+        and add them to the list of loggers
+        Notes:
+        - If the context of the Run object is offline,
+        we do not create AzureMlLogger instance
+        - If APP_INSIGHTS_CONNECTION_STRING is notset
+        to ENV variable, we do not create AppInsightsLogger
+        instance
+        """
+        run = Run.get_context()
+        if not run.id.startswith(self.OFFLINE_RUN):
+            self.loggers.append(AzureMlLogger(run))
+        if Env().app_insights_connection_string:
+            self.loggers.append(AppInsightsLogger(run))
+        if Env().log_to_console:
+            self.loggers.append(ConsoleLogger(run))
+
+
+class Observability(LoggerInterface):
+    def __init__(self) -> None:
+        self._loggers = Loggers()
+
+    def log_metric(
+            self, name="", value="", description="", log_parent=False,
+    ):
+        """
+        this method sends the metrics to all registered loggers
+        :param name: metric name
+        :param value: metric value
+        :param description: description of the metric
+        :param log_parent: (only for AML), send the metric to the run.parent
+        :return:
+        """
+        for logger in self._loggers.loggers:
+            logger.log_metric(name, value, description, log_parent)
+
+    def log(self, description="", severity=Severity.INFO):
+        """
+        this method sends the logs to all registered loggers
+        :param description: Actual log description to be sent
+        :param severity: log Severity
+        :return:
+        """
+        for logger in self._loggers.loggers:
+            logger.log(description, severity)
+
+    def get_logger(self, logger_class):
+        """
+        This method iterate over the loggers and it
+        returns the logger with the same type as the provided one.
+        this is a reference that can be used in case
+        any of the built in functions of the loggers is required
+        :param logger_class:
+        :return: a logger class
+        """
+        for logger in self._loggers.loggers:
+            if type(logger) is type(logger_class):
+                return logger
+
+    def span(self, name='span'):
+        """Create a new span with the trace using the context information
+           for all registered loggers.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        for logger in self._loggers.loggers:
+            logger.span(name)
+        return self.current_span()
+
+    def start_span(self, name='span'):
+        """Start a span for all registered loggers.
+        :type name: str
+        :param name: The name of the span.
+        :rtype: :class:`~opencensus.trace.span.Span`
+        :returns: The Span object.
+        """
+        for logger in self._loggers.loggers:
+            logger.start_span(name)
+        return self.current_span()
+
+    def end_span(self):
+        """End a span for all registered loggers.
+        Remove the span from the span stack, and update the
+        span_id in TraceContext as the current span_id which is the peek
+        element in the span stack.
+        """
+        for logger in self._loggers.loggers:
+            logger.end_span()
+
+    def current_span(self):
+        """Return the current span from first logger"""
+        if len(self._loggers.loggers) > 0:
+            return self._loggers.loggers[0].current_span()
+
+    def add_attribute_to_current_span(self, attribute_key, attribute_value):
+        """Add attribute to current span for all registered loggers.
+        """
+        for logger in self._loggers.loggers:
+            logger.add_attribute_to_current_span(attribute_key,
+                                                 attribute_value)
+
+    def list_collected_spans(self):
+        """List collected spans from first logger."""
+        if len(self._loggers.loggers) > 0:
+            return self._loggers.loggers[0].list_collected_spans()

From df33fa7250c6a0918cfca7fca74a65bf35008624 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 20 Nov 2020 08:29:50 +0000
Subject: [PATCH 02/23] Add add env vars to .env.example

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .env.example | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.env.example b/.env.example
index 127c3523..46744a51 100644
--- a/.env.example
+++ b/.env.example
@@ -14,6 +14,13 @@ SUBSCRIPTION_ID = ''
 LOCATION = ''
 BASE_NAME = ''
 RESOURCE_GROUP = ''
+APP_INSIGHTS_CONNECTION_STRING = ''
+
+# Development related
+LOG_TO_CONSOLE = 'true'
+LOG_LEVEL = 'DEBUG'
+
+# Azure ML Workspace Variables
 WORKSPACE_NAME = ''
 ACI_DEPLOYMENT_NAME = ''
 

From e2d38dd4f41b712ee9f2bc0f509197d85f697e19 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Mon, 23 Nov 2020 03:48:38 +0000
Subject: [PATCH 03/23] Improvements for Observability Helper

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .env.example                                  | 15 +++--
 .pipelines/variables-template.yml             | 22 +++++++-
 ml_model/conda_dependencies.yml               |  2 +
 ml_model/evaluate/evaluate_model.py           |  2 +-
 ml_model/preprocessing/preprocess_aml.py      | 10 +++-
 ml_model/register/register_model.py           |  2 +-
 ml_model/training/train_aml.py                |  4 +-
 .../build_data_processing_pipeline.py         | 11 +++-
 ml_service/util/env_variables.py              |  3 +
 ml_service/util/logger/app_insights_logger.py | 55 ++++++++-----------
 ml_service/util/logger/azure_ml_logger.py     |  8 +++
 ml_service/util/logger/console_logger.py      | 47 +++++-----------
 ml_service/util/logger/logger_interface.py    | 46 ++++++++++++++++
 ml_service/util/logger/observability.py       |  9 +++
 14 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/.env.example b/.env.example
index 46744a51..0fb420e9 100644
--- a/.env.example
+++ b/.env.example
@@ -14,11 +14,18 @@ SUBSCRIPTION_ID = ''
 LOCATION = ''
 BASE_NAME = ''
 RESOURCE_GROUP = ''
-APP_INSIGHTS_CONNECTION_STRING = ''
 
-# Development related
+# Observability related
+APP_INSIGHTS_CONNECTION_STRING = ''
 LOG_TO_CONSOLE = 'true'
-LOG_LEVEL = 'DEBUG'
+# DEBUG, INFO, WARNING, ERROR, CRITICAL
+LOG_LEVEL = 'DEBUG' 
+# Probability 0.0 -> 1.0
+LOG_SAMPLING_RATE = '1.0'
+# Probability 0.0 -> 1.0
+TRACE_SAMPLING_RATE = '1.0'
+# Seconds
+METRICS_EXPORT_INTERVAL = '15'
 
 # Azure ML Workspace Variables
 WORKSPACE_NAME = ''
@@ -28,7 +35,7 @@ ACI_DEPLOYMENT_NAME = ''
 # Variables that are defined in variables-template.yml
 #   they determine _how_ the project runs 
 ####################################################
-SOURCES_DIR_TRAIN = 'ml_model'
+SOURCES_DIR_TRAIN = '.'
 EXPERIMENT_NAME = 'flower_classification'
 DATASET_NAME = 'flower_dataset'
 # Optional. Set it if you have configured non default datastore to point to your data
diff --git a/.pipelines/variables-template.yml b/.pipelines/variables-template.yml
index 3e0c2241..c944eaf6 100644
--- a/.pipelines/variables-template.yml
+++ b/.pipelines/variables-template.yml
@@ -4,7 +4,7 @@
 variables:
   # The directory containing the scripts for training, evaluating, and registering the model
   - name: SOURCES_DIR_TRAIN
-    value: ml_model
+    value: '.'
     
   # Azure ML Variables
   - name: EXPERIMENT_NAME
@@ -42,6 +42,8 @@ variables:
     value: flower_custom_preprocess_env
 
   # AML Compute Cluster Config
+  - name: AML_ENV_TRAIN_CONDA_DEP_FILE
+    value: "ml_model/conda_dependencies.yml"
   - name: AML_COMPUTE_CLUSTER_CPU_SKU
     value: STANDARD_DS2_V2
   - name: AML_COMPUTE_CLUSTER_NAME
@@ -52,6 +54,24 @@ variables:
     value: 0
   - name: AML_CLUSTER_MAX_NODES
     value: 4
+  - name: AML_CLUSTER_PRIORITY
+    value: lowpriority
+
+  # Observability related
+  - name: LOG_TO_CONSOLE
+    value: 'true'
+  - name: LOG_LEVEL
+    value: 'INFO' # DEBUG, INFO, WARNING, ERROR, CRITICAL
+  - name: LOG_SAMPLING_RATE
+    value: '1.0' # Probability 0.0 -> 1.0
+  - name: TRACE_SAMPLING_RATE
+    value: '1.0' # Probability 0.0 -> 1.0
+  - name: METRICS_EXPORT_INTERVAL
+    value: '15' # Seconds
+
+  # The name for the (docker/webapp) scoring image
+  - name: IMAGE_NAME
+    value: "flowerclassifier"
 
   # AML pipelines can run outside of Azure DevOps, these parameters control AML pipeline behaviors
   - name: PREPROCESSING_PARAM
diff --git a/ml_model/conda_dependencies.yml b/ml_model/conda_dependencies.yml
index e73f1c28..0c954d8b 100644
--- a/ml_model/conda_dependencies.yml
+++ b/ml_model/conda_dependencies.yml
@@ -25,6 +25,8 @@ dependencies:
       - keras==2.4.*
 
       # Observability
+      - python-dotenv==0.12.*
+      - dataclasses==0.6
       - opencensus==0.7.11
       - opencensus-ext-httplib==0.7.3
       - opencensus-ext-logging==0.1.0
diff --git a/ml_model/evaluate/evaluate_model.py b/ml_model/evaluate/evaluate_model.py
index 2776b20a..5ccc1dd4 100644
--- a/ml_model/evaluate/evaluate_model.py
+++ b/ml_model/evaluate/evaluate_model.py
@@ -25,7 +25,7 @@
 """
 from azureml.core import Run
 import argparse
-from util.model_helper import get_model
+from ml_model.util.model_helper import get_model
 
 
 def evaluate_model_performs_better(model, run):
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 474bdd3b..22426bb5 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -26,8 +26,8 @@
 from azureml.core.run import Run
 import argparse
 import json
-from preprocess_images import resize_images
-from util.model_helper import get_or_register_dataset, get_aml_context
+from ml_model.preprocessing.preprocess_images import resize_images
+from ml_model.util.model_helper import get_or_register_dataset, get_aml_context
 from ml_service.util.logger.observability import Observability
 
 observability = Observability()
@@ -124,4 +124,8 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index 3d550441..2a91830e 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -30,7 +30,7 @@
 import traceback
 from azureml.core import Run
 from azureml.core.model import Model as AMLModel
-from util.model_helper import get_aml_context
+from ml_model.util.model_helper import get_aml_context
 
 
 def find_child_run(parent_run, child_run_id):
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index 46f3bbac..3a342631 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -27,8 +27,8 @@
 import os
 import argparse
 import json
-from train import split_data, train_model, get_model_metrics
-from util.model_helper import get_or_register_dataset
+from ml_model.training.train import split_data, train_model, get_model_metrics
+from ml_model.util.model_helper import get_or_register_dataset
 
 
 def main():
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index 1d9b0a83..bae1c4e5 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -7,7 +7,6 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
-from ml_service.util.logger.logger_interface import Severity
 from ml_service.util.logger.observability import Observability
 
 observability = Observability()
@@ -24,7 +23,6 @@ def main():
         resource_group=e.resource_group,
     )
     observability.log(f"get_workspace:{aml_workspace}")
-    observability.log(f"ohoh", Severity.CRITICAL)
     # Get Azure machine learning cluster
     aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
     if aml_compute is not None:
@@ -40,6 +38,15 @@ def main():
     run_config = RunConfiguration()
     run_config.environment = environment
 
+    # Activate AppInsights in Pipeline run:
+    # https://docs.microsoft.com/en-us/azure/machine-learning/how-to-log-pipelines-application-insights
+    # Add environment variable with Application Insights Connection String
+    # Replace the value with your own connection string
+    run_config.environment.environment_variables = {
+        "APPLICATIONINSIGHTS_CONNECTION_STRING":
+        e.app_insights_connection_string
+    }
+
     if e.datastore_name:
         datastore_name = e.datastore_name
     else:
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index f18084f7..b365010f 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -42,6 +42,9 @@ class Env:
     app_insights_connection_string: Optional[str] = os.environ.get("APP_INSIGHTS_CONNECTION_STRING")  # NOQA: E501
     log_to_console: Optional[bool] = os.environ.get("LOG_TO_CONSOLE", "false").lower().strip() == "true"  # NOQA: E501
     log_level: Optional[str] = os.environ.get("LOG_LEVEL", "WARNING")  # NOQA: E501
+    log_sampling_rate: float = float(os.environ.get("LOG_SAMPLING_RATE", 1.0))  # NOQA: E501
+    trace_sampling_rate: float = float(os.environ.get("TRACE_SAMPLING_RATE", 1.0))  # NOQA: E501
+    metrics_export_interval: int = int(os.environ.get("METRICS_EXPORT_INTERVAL", 15))  # NOQA: E501
 
     # derived variables
     processed_dataset_name: Optional[str] = f"{dataset_name}_processed"  # NOQA: E501
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
index 0a059496..89049fea 100644
--- a/ml_service/util/logger/app_insights_logger.py
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -1,5 +1,4 @@
 import logging
-import uuid
 
 from opencensus.ext.azure import metrics_exporter
 from opencensus.ext.azure.log_exporter import AzureLogHandler
@@ -25,36 +24,37 @@
 class AppInsightsLogger(LoggerInterface, ObservabilityAbstract):
     def __init__(self, run):
         self.env = Env()
-        self.run_id = self.get_run_id(run)
+        self.run_id = self.get_run_id_and_set_context(run)
 
         # Prepare integrations and log format
         config_integration.trace_integrations(['httplib', 'logging'])
         self.logger = logging.getLogger(__name__)
-        self.logger.level = getattr(logging, self.env.log_level.upper(), None)
+        self.logger.setLevel(
+            getattr(logging, self.env.log_level.upper(), "WARNING"))
         # initializes log exporter
         handler = AzureLogHandler(
             connection_string=self.env.app_insights_connection_string,
-            logging_sampling_rate=1.0,
+            logging_sampling_rate=self.env.log_sampling_rate,
         )
-        # handler.setFormatter(
-        #     logging.Formatter('%(asctime)s traceId=%(traceId)s '
-        #                       'spanId=%(spanId)s %(message)s'))
         handler.add_telemetry_processor(self.callback_function)
 
         self.logger.addHandler(handler)
         # initializes tracer
+        texporter = AzureExporter(connection_string=self.
+                                  env.app_insights_connection_string)
+        texporter.add_telemetry_processor(self.callback_function)
         self.tracer = Tracer(
-            exporter=AzureExporter(connection_string=self.
-                                   env.app_insights_connection_string),
-            sampler=ProbabilitySampler(1.0)
+            exporter=texporter,
+            sampler=ProbabilitySampler(self.env.trace_sampling_rate)
         )
         # initializes metric exporter
-        exporter = metrics_exporter.new_metrics_exporter(
+        mexporter = metrics_exporter.new_metrics_exporter(
             enable_standard_metrics=False,
+            export_interval=self.env.metrics_export_interval,
             connection_string=self.env.app_insights_connection_string,
         )
-        exporter.add_telemetry_processor(self.callback_function)
-        stats_module.stats.view_manager.register_exporter(exporter)
+        mexporter.add_telemetry_processor(self.callback_function)
+        stats_module.stats.view_manager.register_exporter(mexporter)
 
     def log_metric(
         self, name="", value="", description="", log_parent=False,
@@ -85,32 +85,23 @@ def log(self, description="", severity=Severity.INFO):
         """
 
         if severity == self.severity.DEBUG:
-            self.logger.debug(description)
+            self.logger.debug(description, extra=self.custom_dimensions)
         elif severity == self.severity.INFO:
-            self.logger.info(description)
+            self.logger.info(description, extra=self.custom_dimensions)
         elif severity == self.severity.WARNING:
-            self.logger.warning(description)
+            self.logger.warning(description, extra=self.custom_dimensions)
         elif severity == self.severity.ERROR:
-            self.logger.error(description)
+            self.logger.error(description, extra=self.custom_dimensions)
         elif severity == self.severity.CRITICAL:
-            self.logger.critical(description)
+            self.logger.critical(description, extra=self.custom_dimensions)
 
-    def get_run_id(self, run):
+    def exception(self, exception: Exception):
         """
-        gets the correlation ID by the in following order:
-        - If the script is running  in an Online run Context of AML --> run_id
-        - If the script is running where a build_id
-        environment variable  is set --> build_id
-        - Else --> generate a unique id
-        :param run:
-        :return: correlation_id
+        Sends the exception to App Insights
+        :param exception: Actual exception to be sent
+        :return:
         """
-        run_id = str(uuid.uuid1())
-        if not run.id.startswith(self.OFFLINE_RUN):
-            run_id = run.id
-        elif self.env.build_id:
-            run_id = self.env.build_id
-        return run_id
+        self.logger.exception(exception, extra=self.custom_dimensions)
 
     @staticmethod
     def set_view(metric, description, measure):
diff --git a/ml_service/util/logger/azure_ml_logger.py b/ml_service/util/logger/azure_ml_logger.py
index fb9e2d10..10f9a207 100644
--- a/ml_service/util/logger/azure_ml_logger.py
+++ b/ml_service/util/logger/azure_ml_logger.py
@@ -47,3 +47,11 @@ def log(self, description="", severity=Severity.INFO):
                 time_stamp, self.severity_map[severity], callee, description
             )
         )
+
+    def exception(self, exception: Exception):
+        """
+        Prints the exception to console
+        :param exception: Actual exception to be sent
+        :return:
+        """
+        self.log(exception, Severity.CRITICAL)
diff --git a/ml_service/util/logger/console_logger.py b/ml_service/util/logger/console_logger.py
index 984f12a2..fc0ebdb8 100644
--- a/ml_service/util/logger/console_logger.py
+++ b/ml_service/util/logger/console_logger.py
@@ -1,5 +1,4 @@
 import logging
-import uuid
 
 from ml_service.util.env_variables import Env
 from ml_service.util.logger.logger_interface import (
@@ -13,49 +12,31 @@ class ConsoleLogger(LoggerInterface, ObservabilityAbstract):
     def __init__(self, run):
         self.env = Env()
         # initializes log exporter
-        self.run_id = self.get_run_id(run)
-        self.logger = logging.getLogger(__name__)
-        self.logger.level = getattr(logging, self.env.log_level.upper(), None)
+        self.run_id = self.get_run_id_and_set_context(run)
+        self.level = getattr(logging, self.env.log_level.upper(), "WARNING")
 
     def log_metric(
         self, name="", value="", description="", log_parent=False,
     ):
-        self.logger.info(f"Logging Metric for runId={self.run_id}: "
-                         "name={name} value={value} "
-                         "description={description} log_parent={log_parent}")
+        self.log(f"Logging Metric for runId={self.run_id}: "
+                 "name={name} value={value} "
+                 "description={description} log_parent={log_parent}")
 
     def log(self, description="", severity=Severity.INFO):
         """
-        Sends the logs to App Insights
+        Prints the logs to console
         :param description: log description
         :param severity: log severity
         :return:
         """
+        if self.level <= severity:
+            print(f"{description} - custom dimensions:"
+                  f" {self.custom_dimensions}")
 
-        if severity == self.severity.DEBUG:
-            self.logger.debug(description)
-        elif severity == self.severity.INFO:
-            self.logger.info(description)
-        elif severity == self.severity.WARNING:
-            self.logger.warning(description)
-        elif severity == self.severity.ERROR:
-            self.logger.error(description)
-        elif severity == self.severity.CRITICAL:
-            self.logger.critical(description)
-
-    def get_run_id(self, run):
+    def exception(self, exception: Exception):
         """
-        gets the correlation ID by the in following order:
-        - If the script is running  in an Online run Context of AML --> run_id
-        - If the script is running where a build_id
-        environment variable  is set --> build_id
-        - Else --> generate a unique id
-        :param run:
-        :return: correlation_id
+        Prints the exception to console
+        :param exception: Actual exception to be sent
+        :return:
         """
-        run_id = str(uuid.uuid1())
-        if not run.id.startswith(self.OFFLINE_RUN):
-            run_id = run.id
-        elif self.env.build_id:
-            run_id = self.env.build_id
-        return run_id
+        print(exception)
diff --git a/ml_service/util/logger/logger_interface.py b/ml_service/util/logger/logger_interface.py
index cfbc00d6..6a6b9deb 100644
--- a/ml_service/util/logger/logger_interface.py
+++ b/ml_service/util/logger/logger_interface.py
@@ -1,4 +1,5 @@
 import inspect
+import uuid
 from opencensus.trace.tracer import Tracer
 
 
@@ -18,6 +19,9 @@ def log_metric(self, name, value, description, log_parent):
     def log(self, name, value, description, severity, log_parent):
         pass
 
+    def exception(self, exception):
+        pass
+
     def finish(self):
         """End the spans and send to reporters."""
         pass
@@ -66,6 +70,48 @@ class ObservabilityAbstract:
     severity_map = {10: "DEBUG", 20: "INFO",
                     30: "WARNING", 40: "ERROR", 50: "CRITICAL"}
 
+    def get_run_id_and_set_context(self, run):
+        """
+        gets the correlation ID by the in following order:
+        - If the script is running  in an Online run Context of AML --> run_id
+        - If the script is running where a build_id
+        environment variable  is set --> build_id
+        - Else --> generate a unique id
+
+        Sets also the custom context dimensions based on On or Offline run
+        :param run:
+        :return: correlation_id
+        """
+        run_id = str(uuid.uuid1())
+        if not run.id.startswith(self.OFFLINE_RUN):
+            run_id = run.id
+            self.custom_dimensions = {
+                'custom_dimensions': {
+                    "parent_run_id": run.parent.id,
+                    "step_id": run.id,
+                    "step_name": run.name,
+                    "experiment_name": run.experiment.name,
+                    "run_url": run.parent.get_portal_url(),
+                    "offline_run": False
+                }
+            }
+        elif self.env.build_id:
+            run_id = self.env.build_id
+            self.custom_dimensions = {
+                'custom_dimensions': {
+                    "run_id": self.env.build_id,
+                    "offline_run": True
+                }
+            }
+        else:
+            self.custom_dimensions = {
+                'custom_dimensions': {
+                    "run_id": run_id,
+                    "offline_run": True
+                }
+            }
+        return run_id
+
     @staticmethod
     def get_callee(stack_level):
         """
diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index 254aa113..1f6b6881 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -70,6 +70,15 @@ def log(self, description="", severity=Severity.INFO):
         for logger in self._loggers.loggers:
             logger.log(description, severity)
 
+    def exception(self, exception: Exception):
+        """
+        this method sends the exception to all registered loggers
+        :param exception: Actual exception to be sent
+        :return:
+        """
+        for logger in self._loggers.loggers:
+            logger.exception(exception)
+
     def get_logger(self, logger_class):
         """
         This method iterate over the loggers and it

From 5faab7e344dbdee8904629672a493db5bca8f86d Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Thu, 3 Dec 2020 05:19:16 +0000
Subject: [PATCH 04/23] Update script paths

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_service/pipelines/build_data_processing_os_cmd_pipeline.py | 2 +-
 ml_service/pipelines/build_data_processing_pipeline.py        | 2 +-
 ml_service/pipelines/build_training_pipeline.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index b3e44900..98a4020b 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -61,7 +61,7 @@ def main():
 
     preprocess_step = PythonScriptStep(
         name="Preprocess Data with OS cmd",
-        script_name='preprocessing/preprocess_os_cmd.py',
+        script_name='ml_model/preprocessing/preprocess_os_cmd.py',
         compute_target=aml_compute,
         source_directory=e.sources_directory_train,
         arguments=[
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index bae1c4e5..b7437133 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -74,7 +74,7 @@ def main():
 
     preprocess_step = PythonScriptStep(
         name="Preprocess Data",
-        script_name="preprocessing/preprocess_aml.py",
+        script_name="ml_model/preprocessing/preprocess_aml.py",
         compute_target=aml_compute,
         source_directory=e.sources_directory_train,
         arguments=[
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index c7c2fe56..0a9ab617 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -51,7 +51,7 @@ def main():
 
     train_step = PythonScriptStep(
         name="Train Model",
-        script_name="training/train_aml.py",
+        script_name="ml_model/training/train_aml.py",
         compute_target=aml_compute,
         source_directory=e.sources_directory_train,
         outputs=[pipeline_data],

From 3f6f3170769d8d279a276b3bec1c5f24559d21cb Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Thu, 3 Dec 2020 07:38:59 +0000
Subject: [PATCH 05/23] Add observability module to scripts

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/preprocessing/Dockerfile             | 10 +++++
 ml_model/preprocessing/preprocess_aml.py      | 11 ++---
 ml_model/preprocessing/preprocess_images.py   | 29 ++++++++----
 ml_model/preprocessing/preprocess_os_cmd.py   | 34 +++++++++-----
 ml_model/register/register_model.py           | 40 +++++++++++------
 ml_model/training/train.py                    | 24 +++++++---
 ml_model/training/train_aml.py                | 39 ++++++++++------
 ml_model/util/model_helper.py                 | 13 ++++--
 .../build_data_processing_os_cmd_pipeline.py  | 31 ++++++++++---
 .../build_data_processing_pipeline.py         |  6 ++-
 .../pipelines/build_training_pipeline.py      | 44 ++++++++++++++-----
 .../pipelines/run_data_processing_pipeline.py | 17 +++++--
 ml_service/pipelines/run_training_pipeline.py | 17 +++++--
 .../tests/util/test_app_insights_logger.py    | 12 +++--
 ml_service/util/logger/observability.py       | 11 +++++
 15 files changed, 244 insertions(+), 94 deletions(-)

diff --git a/ml_model/preprocessing/Dockerfile b/ml_model/preprocessing/Dockerfile
index e28cb0d7..4ea98a1c 100644
--- a/ml_model/preprocessing/Dockerfile
+++ b/ml_model/preprocessing/Dockerfile
@@ -25,8 +25,18 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}
     ~/miniconda/bin/conda clean -tipsy
 ENV PATH="/home/dockeruser/miniconda/bin/:${PATH}"
 
+USER root
+
+RUN apt-get update --fix-missing && \
+    apt-get install -y build-essential
+
+# Create conda environment for dockeruser user
+USER dockeruser
+
 RUN conda install -y conda=${CONDA_VERSION} python=${PYTHON_VERSION} && \
     pip install azureml-defaults==${AZUREML_SDK_VERSION} inference-schema==${INFERENCE_SCHEMA_VERSION} &&\
+    pip install python-dotenv==0.12.* dataclasses==0.6 opencensus==0.7.11 opencensus-ext-httplib==0.7.3 \
+    opencensus-ext-azure==1.0.5 opencensus-ext-logging==0.1.0 opencensus-context==0.1.2 && \
     conda clean -aqy && \
     rm -rf ~/miniconda/pkgs && \
     find ~/miniconda/ -type d -name __pycache__ -prune -exec rm -rf {} \;
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 22426bb5..5abd2968 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -73,11 +73,12 @@ def main():
 
     args = parser.parse_args()
 
-    print("Argument [dataset_name]: %s" % args.dataset_name)
-    print("Argument [datastore_name]: %s" % args.datastore_name)
-    print("Argument [data_file_path]: %s" % args.data_file_path)
-    print("Argument [output_dataset]: %s" % args.output_dataset)
-    print("Argument [preprocessing_param]: %s" % args.preprocessing_param)
+    observability.log("Argument [dataset_name]: %s" % args.dataset_name)
+    observability.log("Argument [datastore_name]: %s" % args.datastore_name)
+    observability.log("Argument [data_file_path]: %s" % args.data_file_path)
+    observability.log("Argument [output_dataset]: %s" % args.output_dataset)
+    observability.log("Argument [preprocessing_param]: %s"
+                      % args.preprocessing_param)
 
     data_file_path = args.data_file_path
     dataset_name = args.dataset_name
diff --git a/ml_model/preprocessing/preprocess_images.py b/ml_model/preprocessing/preprocess_images.py
index 43608fd0..582aae7b 100644
--- a/ml_model/preprocessing/preprocess_images.py
+++ b/ml_model/preprocessing/preprocess_images.py
@@ -2,6 +2,9 @@
 import shutil
 import numpy as np
 from PIL import Image
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def resize_image(img, size):
@@ -33,15 +36,15 @@ def resize_image(img, size):
 def resize_images(indir, outdir, preprocessing_args):
     size = (preprocessing_args['image_size']['x'],
             preprocessing_args['image_size']['y'])
-    print(f"indir: {indir}")
-    print(f"outdir: {outdir}")
+    observability.log(f"indir: {indir}")
+    observability.log(f"outdir: {outdir}")
     if (os.path.exists(indir)):
-        print("indir exists")
+        observability.log("indir exists")
     else:
-        print("indir doesn't exit")
+        observability.log("indir doesn't exit")
 
     if os.path.exists(outdir):
-        print("outdir exists, delete all files")
+        observability.log("outdir exists, delete all files")
         for filename in os.listdir(outdir):
             file_path = os.path.join(outdir, filename)
             if os.path.isfile(file_path):
@@ -52,7 +55,7 @@ def resize_images(indir, outdir, preprocessing_args):
     # Loop through each subfolder in the input dir
     for root, dirs, filenames in os.walk(indir):
         for d in dirs:
-            print('processing folder ' + d)
+            observability.log('processing folder ' + d)
             # Create a matching subfolder in the output dir
             saveFolder = os.path.join(outdir, d)
             if not os.path.exists(saveFolder):
@@ -62,16 +65,18 @@ def resize_images(indir, outdir, preprocessing_args):
             for f in files:
                 # Open the file
                 imgFile = os.path.join(root, d, f)
-                print("reading " + imgFile)
+                observability.log("reading " + imgFile)
                 img = Image.open(imgFile)
                 # Create a resized version and save it
                 proc_img = resize_image(img, size)
                 saveAs = os.path.join(saveFolder, f)
-                print("writing " + saveAs)
+                observability.log("writing " + saveAs)
                 proc_img.save(saveAs)
 
 
 def main():
+    observability.start_span()
+
     in_dir = 'data/gear_images/raw'
     out_dir = 'data/processed'
     preprocessing_args = {
@@ -80,6 +85,12 @@ def main():
     }
     resize_images(in_dir, out_dir, preprocessing_args)  # NOQA: E501
 
+    observability.end_span()
+
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/preprocessing/preprocess_os_cmd.py b/ml_model/preprocessing/preprocess_os_cmd.py
index 328eabf9..edf8d789 100644
--- a/ml_model/preprocessing/preprocess_os_cmd.py
+++ b/ml_model/preprocessing/preprocess_os_cmd.py
@@ -4,11 +4,15 @@
 from azureml.core.run import Run
 import argparse
 import subprocess
-from util.model_helper import get_or_register_dataset, get_aml_context
+from ml_model.util.model_helper import get_or_register_dataset, get_aml_context
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
-    print("Running preprocess_os_cmd.py")
+    observability.start_span()
+    observability.log("Running preprocess_os_cmd.py")
 
     parser = argparse.ArgumentParser("preprocess_os_cmd")
     parser.add_argument(
@@ -40,10 +44,10 @@ def main():
 
     args = parser.parse_args()
 
-    print("Argument [dataset_name]: %s" % args.dataset_name)
-    print("Argument [datastore_name]: %s" % args.datastore_name)
-    print("Argument [data_file_path]: %s" % args.data_file_path)
-    print("Argument [output_dataset]: %s" % args.output_dataset)
+    observability.log("Argument [dataset_name]: %s" % args.dataset_name)
+    observability.log("Argument [datastore_name]: %s" % args.datastore_name)
+    observability.log("Argument [data_file_path]: %s" % args.data_file_path)
+    observability.log("Argument [output_dataset]: %s" % args.output_dataset)
 
     data_file_path = args.data_file_path
     dataset_name = args.dataset_name
@@ -68,7 +72,7 @@ def main():
     # Process data
     mount_context = dataset.mount()
     mount_context.start()
-    print(f"mount_point is: {mount_context.mount_point}")
+    observability.log(f"mount_point is: {mount_context.mount_point}")
 
     ####
     # Execute something here just 'cp' from input to output folder
@@ -85,23 +89,29 @@ def main():
     # Check output
     while True:
         output = process.stdout.readline()
-        print(output.strip())
+        observability.log(output.strip())
         # Do something else
         return_code = process.poll()
         if return_code is not None:
-            print('RETURN CODE', return_code)
+            observability.log(f'RETURN CODE {return_code}')
             # Process has finished, read rest of the output
             for output in process.stdout.readlines():
-                print(output.strip())
+                observability.log(output.strip())
             break
 
     mount_context.stop()
 
     run.tag("run_type", value="preprocess_os_cmd")
-    print(f"tags now present for run: {run.tags}")
+    observability.log(f"tags now present for run: {run.tags}")
 
     run.complete()
 
+    observability.end_span()
+
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index 2a91830e..8d9a1b51 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -31,6 +31,9 @@
 from azureml.core import Run
 from azureml.core.model import Model as AMLModel
 from ml_model.util.model_helper import get_aml_context
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def find_child_run(parent_run, child_run_id):
@@ -54,6 +57,8 @@ def find_run(experiment, run_id):
 
 
 def main():
+    observability.start_span()
+
     run = Run.get_context()
     ws, exp, run_id = get_aml_context(run)
 
@@ -84,11 +89,11 @@ def main():
     if (run_id == 'amlcompute'):
         run_id = run.parent.id
         run = run.parent
-    print(f"parent run_id is {run_id}")
+    observability.log(f"parent run_id is {run_id}")
     model_name = args.model_name
     model_path = args.step_input
 
-    print("Getting registration parameters")
+    observability.log("Getting registration parameters")
 
     # Load the registration parameters from the parameters file
     with open("parameters.json") as f:
@@ -96,7 +101,7 @@ def main():
     try:
         register_args = pars["registration"]
     except KeyError:
-        print("Could not load registration values from file")
+        observability.log("Could not load registration values from file")
         register_args = {"tags": []}
 
     model_tags = {}
@@ -105,23 +110,23 @@ def main():
             mtag = run.get_metrics()[tag]
             model_tags[tag] = mtag
         except KeyError:
-            print(f"Could not find {tag} metric on parent run.")
+            observability.log(f"Could not find {tag} metric on parent run.")
 
     parent_tags = run.get_tags()
     try:
         build_id = parent_tags["BuildId"]
     except KeyError:
         build_id = None
-        print("BuildId tag not found on parent run.")
-        print(f"Tags present: {parent_tags}")
+        observability.log("BuildId tag not found on parent run.")
+        observability.log(f"Tags present: {parent_tags}")
     try:
         build_uri = parent_tags["BuildUri"]
     except KeyError:
         build_uri = None
-        print("BuildUri tag not found on parent run.")
-        print(f"Tags present: {parent_tags}")
+        observability.log("BuildUri tag not found on parent run.")
+        observability.log(f"Tags present: {parent_tags}")
 
-    print(f"Loading training run_id from {model_path}")
+    observability.log(f"Loading training run_id from {model_path}")
     run_id_file = os.path.join(model_path, "run_id.txt")
     with open(run_id_file, "r") as text_file:
         training_run_id = text_file.read().replace('\n', '')
@@ -152,16 +157,19 @@ def main():
                 build_id,
                 build_uri)
     else:
-        print("Training run not found. Skipping model registration.")
+        observability.log("Training run not found."
+                          "Skipping model registration.")
         sys.exit(0)
 
+    observability.end_span()
+
 
 def model_already_registered(model_name, exp, run_id):
     model_list = AMLModel.list(exp.workspace, name=model_name, run_id=run_id)
     if len(model_list) >= 1:
         raise Exception(f"Model name: {model_name} in workspace {exp.workspace} with run_id {run_id} is already registered.")  # NOQA: E501
     else:
-        print("Model is not registered for this run.")
+        observability.log("Model is not registered for this run.")
 
 
 def register_aml_model(
@@ -185,7 +193,7 @@ def register_aml_model(
             model_name=model_name,
             model_path=os.path.join("outputs", model_name),
             tags=tagsValue)
-        print(
+        observability.log(
             "Model registered: {} \nModel Description: {} "
             "\nModel Version: {}".format(
                 model.name, model.description, model.version
@@ -193,9 +201,13 @@ def register_aml_model(
         )
     except Exception:
         traceback.print_exc(limit=None, file=None, chain=True)
-        print("Model registration failed")
+        observability.log("Model registration failed")
         raise
 
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/training/train.py b/ml_model/training/train.py
index ab4bb3a1..51fd9e60 100644
--- a/ml_model/training/train.py
+++ b/ml_model/training/train.py
@@ -29,6 +29,9 @@
 from keras.layers import Dropout, Flatten, Dense
 from keras import optimizers
 from keras.preprocessing.image import ImageDataGenerator
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 # Split the dataframe into test and train data
@@ -38,12 +41,12 @@ def split_data(data_folder, preprocessing_args):
         preprocessing_args['image_size']['y'])
     batch_size = preprocessing_args['batch_size']
 
-    print("Getting Data...")
+    observability.log("Getting Data...")
     datagen = ImageDataGenerator(
         rescale=1./255,  # normalize pixel values
         validation_split=0.3)  # hold back 30% of the images for validation
 
-    print("Preparing training dataset...")
+    observability.log("Preparing training dataset...")
     train_generator = datagen.flow_from_directory(
         data_folder,
         target_size=img_size,
@@ -51,7 +54,7 @@ def split_data(data_folder, preprocessing_args):
         class_mode='categorical',
         subset='training')  # set as training data
 
-    print("Preparing validation dataset...")
+    observability.log("Preparing validation dataset...")
     validation_generator = datagen.flow_from_directory(
         data_folder,
         target_size=img_size,
@@ -60,7 +63,7 @@ def split_data(data_folder, preprocessing_args):
         subset='validation')  # set as validation data
 
     classes = sorted(train_generator.class_indices.keys())
-    print("class names: ", classes)
+    observability.log("class names: ", classes)
 
     data = {"train": train_generator,
             "test": validation_generator,
@@ -138,7 +141,8 @@ def get_model_metrics(history):
 
 
 def main():
-    print("Running train.py")
+    observability.start_span()
+    observability.log("Running train.py")
 
     train_args = {"num_epochs": 10}
     preprocessing_args = {
@@ -151,8 +155,14 @@ def main():
 
     metrics = get_model_metrics(history)
     for (k, v) in metrics.items():
-        print(f"{k}: {v}")
+        observability.log(f"{k}: {v}")
+
+    observability.end_span()
 
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index 3a342631..2c1da7ff 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -29,10 +29,14 @@
 import json
 from ml_model.training.train import split_data, train_model, get_model_metrics
 from ml_model.util.model_helper import get_or_register_dataset
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
-    print("Running train_aml.py")
+    observability.start_span()
+    observability.log("Running train_aml.py")
 
     parser = argparse.ArgumentParser("train")
     parser.add_argument(
@@ -69,11 +73,11 @@ def main():
     )
     args = parser.parse_args()
 
-    print("Argument [model_name]: %s" % args.model_name)
-    print("Argument [step_output]: %s" % args.step_output)
-    print("Argument [data_file_path]: %s" % args.data_file_path)
-    print("Argument [dataset_name]: %s" % args.dataset_name)
-    print("Argument [datastore_name]: %s" % args.datastore_name)
+    observability.log("Argument [model_name]: %s" % args.model_name)
+    observability.log("Argument [step_output]: %s" % args.step_output)
+    observability.log("Argument [data_file_path]: %s" % args.data_file_path)
+    observability.log("Argument [dataset_name]: %s" % args.dataset_name)
+    observability.log("Argument [datastore_name]: %s" % args.datastore_name)
 
     model_name = args.model_name
     step_output_path = args.step_output
@@ -83,7 +87,7 @@ def main():
 
     run = Run.get_context()
 
-    print("Getting training parameters")
+    observability.log("Getting training parameters")
 
     # Load the training parameters from the parameters file
     with open("parameters.json") as f:
@@ -92,16 +96,17 @@ def main():
         preprocessing_args = pars["preprocessing"]
         train_args = pars["training"]
     except KeyError:
-        print("Could not load preprocessing or training values from file")
+        observability.log("Could not load preprocessing or training values "
+                          "from file")
         train_args = {}
         preprocessing_args = {}
 
     # Log the training parameters
-    print(f"Parameters: {preprocessing_args}")
+    observability.log(f"Parameters: {preprocessing_args}")
     for (k, v) in preprocessing_args.items():
         run.log(k, v)
         run.parent.log(k, v)
-    print(f"Parameters: {train_args}")
+    observability.log(f"Parameters: {train_args}")
     for (k, v) in train_args.items():
         run.log(k, v)
         run.parent.log(k, v)
@@ -122,7 +127,7 @@ def main():
     # mount the dynamic version of the dataset, which can't be determined at pipeline publish time  # NOQA: E501
     mount_context = dataset.mount()
     mount_context.start()
-    print(f"mount_point is: {mount_context.mount_point}")
+    observability.log(f"mount_point is: {mount_context.mount_point}")
     data = split_data(mount_context.mount_point, preprocessing_args)
     model, history = train_model(data, train_args, preprocessing_args)
     mount_context.stop()
@@ -138,7 +143,7 @@ def main():
     model_output_path = os.path.join(step_output_path, model_name)
     model.save(model_output_path)
     with open(os.path.join(step_output_path, "run_id.txt"), "w") as text_file:
-        print(f"{run.id}", file=text_file)
+        observability.log(f"{run.id}", file=text_file)
 
     # Also upload model file to run outputs for history
     os.makedirs('outputs', exist_ok=True)
@@ -146,10 +151,16 @@ def main():
     model.save(output_path)
 
     run.tag("run_type", value="train")
-    print(f"tags now present for run: {run.tags}")
+    observability.log(f"tags now present for run: {run.tags}")
 
     run.complete()
 
+    observability.end_span()
+
 
 if __name__ == '__main__':
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_model/util/model_helper.py b/ml_model/util/model_helper.py
index 95154ffd..385828ab 100644
--- a/ml_model/util/model_helper.py
+++ b/ml_model/util/model_helper.py
@@ -4,6 +4,9 @@
 from azureml.core import Run
 from azureml.core import Workspace, Dataset, Datastore
 from azureml.core.model import Model as AMLModel
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def get_aml_context(run):
@@ -53,7 +56,8 @@ def get_model(
     None.
     """
     if aml_workspace is None:
-        print("No workspace defined - using current experiment workspace.")
+        observability.log("No workspace defined - "
+                          "using current experiment workspace.")
         aml_workspace, *_ = get_aml_context(Run.get_context(allow_offline=False))  # NOQA: E501
 
     tags = None
@@ -116,14 +120,15 @@ def get_or_register_dataset(
         raise Exception("Datset name can't be null")
 
     if aml_workspace is None:
-        print("No workspace defined - using current experiment workspace.")
+        observability.log("No workspace defined - "
+                          "using current experiment workspace.")
         aml_workspace, *_ = get_aml_context(Run.get_context())
 
     if data_file_path == "nopath":
-        print(f"get latest version of dataset: {dataset_name}")
+        observability.log(f"get latest version of dataset: {dataset_name}")
         dataset = Dataset.get_by_name(aml_workspace, dataset_name)
     else:
-        print(f"register a new dataset or new version: {dataset_name}, {datastore_name}, {data_file_path}")  # NOQA: E501
+        observability.log(f"register a new dataset or new version: {dataset_name}, {datastore_name}, {data_file_path}")  # NOQA: E501
         dataset = register_dataset(
             aml_workspace,
             dataset_name,
diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index 98a4020b..0697ee04 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -7,9 +7,14 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
+    observability.start_span()
+
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -17,12 +22,12 @@ def main():
         subscription_id=e.subscription_id,
         resource_group=e.resource_group,
     )
-    print(f"get_workspace:{aml_workspace}")
+    observability.log(f"get_workspace:{aml_workspace}")
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
     if aml_compute is not None:
-        print(f"aml_compute:{aml_compute}")
+        observability.log(f"aml_compute:{aml_compute}")
 
     # Create a reusable Azure ML environment
     environment = get_environment(
@@ -35,6 +40,15 @@ def main():
     run_config = RunConfiguration()
     run_config.environment = environment
 
+    # Activate AppInsights in Pipeline run:
+    # https://docs.microsoft.com/en-us/azure/machine-learning/how-to-log-pipelines-application-insights
+    # Add environment variable with Application Insights Connection String
+    # Replace the value with your own connection string
+    run_config.environment.environment_variables = {
+        "APPLICATIONINSIGHTS_CONNECTION_STRING":
+        e.app_insights_connection_string
+    }
+
     if e.datastore_name:
         datastore_name = e.datastore_name
     else:
@@ -73,7 +87,7 @@ def main():
         runconfig=run_config,
         allow_reuse=False,
     )
-    print("Step Preprocess OS cmd created")
+    observability.log("Step Preprocess OS cmd created")
 
     steps = [preprocess_step]
     preprocess_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
@@ -84,9 +98,14 @@ def main():
         description="Data preprocessing OS cmd pipeline",
         version=e.build_id,
     )
-    print(f"Published pipeline: {published_pipeline.name}")
-    print(f"for build {published_pipeline.version}")
+    observability.log(f"Published pipeline: {published_pipeline.name}")
+    observability.log(f"for build {published_pipeline.version}")
+    observability.end_span()
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index b7437133..741347f1 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -104,4 +104,8 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index 0a9ab617..125cd3cf 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -6,9 +6,14 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
+    observability.start_span()
+
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -16,12 +21,12 @@ def main():
         subscription_id=e.subscription_id,
         resource_group=e.resource_group,
     )
-    print(f"get_workspace:{aml_workspace}")
+    observability.log(f"get_workspace:{aml_workspace}")
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
     if aml_compute is not None:
-        print(f"aml_compute:{aml_compute}")
+        observability.log(f"aml_compute:{aml_compute}")
 
     # Create a reusable Azure ML environment
     environment = get_environment(
@@ -33,6 +38,15 @@ def main():
     run_config = RunConfiguration()
     run_config.environment = environment
 
+    # Activate AppInsights in Pipeline run:
+    # https://docs.microsoft.com/en-us/azure/machine-learning/how-to-log-pipelines-application-insights
+    # Add environment variable with Application Insights Connection String
+    # Replace the value with your own connection string
+    run_config.environment.environment_variables = {
+        "APPLICATIONINSIGHTS_CONNECTION_STRING":
+        e.app_insights_connection_string
+    }
+
     if e.datastore_name:
         datastore_name = e.datastore_name
     else:
@@ -65,11 +79,11 @@ def main():
         runconfig=run_config,
         allow_reuse=True,
     )
-    print("Step Train created")
+    observability.log("Step Train created")
 
     evaluate_step = PythonScriptStep(
         name="Evaluate Model ",
-        script_name="evaluate/evaluate_model.py",
+        script_name="ml_model/evaluate/evaluate_model.py",
         compute_target=aml_compute,
         source_directory=e.sources_directory_train,
         arguments=[
@@ -79,11 +93,11 @@ def main():
         runconfig=run_config,
         allow_reuse=False,
     )
-    print("Step Evaluate created")
+    observability.log("Step Evaluate created")
 
     register_step = PythonScriptStep(
         name="Register Model ",
-        script_name="register/register_model.py",
+        script_name="ml_model/register/register_model.py",
         compute_target=aml_compute,
         source_directory=e.sources_directory_train,
         inputs=[pipeline_data],
@@ -94,15 +108,16 @@ def main():
         runconfig=run_config,
         allow_reuse=False,
     )
-    print("Step Register created")
+    observability.log("Step Register created")
     # Check run_evaluation flag to include or exclude evaluation step.
     if (e.run_evaluation).lower() == "true":
-        print("Include evaluation step before register step.")
+        observability.log("Include evaluation step before register step.")
         evaluate_step.run_after(train_step)
         register_step.run_after(evaluate_step)
         steps = [train_step, evaluate_step, register_step]
     else:
-        print("Exclude evaluation step and directly run register step.")
+        observability.log("Exclude evaluation step "
+                          "and directly run register step.")
         register_step.run_after(train_step)
         steps = [train_step, register_step]
 
@@ -114,9 +129,14 @@ def main():
         description="Model training/retraining pipeline",
         version=e.build_id,
     )
-    print(f"Published pipeline: {published_pipeline.name}")
-    print(f"for build {published_pipeline.version}")
+    observability.log(f"Published pipeline: {published_pipeline.name}")
+    observability.log(f"for build {published_pipeline.version}")
+    observability.end_span()
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_service/pipelines/run_data_processing_pipeline.py b/ml_service/pipelines/run_data_processing_pipeline.py
index 7df06e2f..b3e1a57c 100644
--- a/ml_service/pipelines/run_data_processing_pipeline.py
+++ b/ml_service/pipelines/run_data_processing_pipeline.py
@@ -2,9 +2,14 @@
 from azureml.core import Experiment, Workspace
 from ml_service.util.env_variables import Env
 import argparse
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
+    observability.start_span()
+
     parser = argparse.ArgumentParser("register")
     parser.add_argument(
         "--aml_pipeline_name",
@@ -59,7 +64,7 @@ def main():
         raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
     else:
         published_pipeline = matched_pipes[0]
-        print("published pipeline id is", published_pipeline.id)
+        observability.log("published pipeline id is", published_pipeline.id)
 
         # Save the Pipeline ID for other AzDO jobs after script is complete
         if args.output_pipeline_id_file is not None:
@@ -77,8 +82,14 @@ def main():
                 published_pipeline,
                 tags=tags)
 
-            print("Pipeline run initiated ", run.id)
+            observability.log("Pipeline run initiated ", run.id)
+
+    observability.end_span()
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_service/pipelines/run_training_pipeline.py b/ml_service/pipelines/run_training_pipeline.py
index 83eb4472..cbe6b8d6 100644
--- a/ml_service/pipelines/run_training_pipeline.py
+++ b/ml_service/pipelines/run_training_pipeline.py
@@ -2,9 +2,14 @@
 from azureml.core import Experiment, Workspace
 import argparse
 from ml_service.util.env_variables import Env
+from ml_service.util.logger.observability import Observability
+
+observability = Observability()
 
 
 def main():
+    observability.start_span()
+
     parser = argparse.ArgumentParser("register")
     parser.add_argument(
         "--output_pipeline_id_file",
@@ -45,7 +50,7 @@ def main():
         raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
     else:
         published_pipeline = matched_pipes[0]
-        print("published pipeline id is", published_pipeline.id)
+        observability.log("published pipeline id is", published_pipeline.id)
 
         # Save the Pipeline ID for other AzDO jobs after script is complete
         if args.output_pipeline_id_file is not None:
@@ -65,8 +70,14 @@ def main():
                 tags=tags,
                 pipeline_parameters=pipeline_parameters)
 
-            print("Pipeline run initiated ", run.id)
+            observability.log("Pipeline run initiated ", run.id)
+
+    observability.end_span()
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
diff --git a/ml_service/tests/util/test_app_insights_logger.py b/ml_service/tests/util/test_app_insights_logger.py
index e3693f14..ec40891c 100644
--- a/ml_service/tests/util/test_app_insights_logger.py
+++ b/ml_service/tests/util/test_app_insights_logger.py
@@ -1,6 +1,6 @@
 import logging
 import unittest
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 from ml_service.util.logger.app_insights_logger import AppInsightsLogger
 
@@ -14,6 +14,9 @@ def __init__(self):
 class MockRun:
     def __init__(self, run_id):
         self.id = run_id
+        self.parent = MagicMock()
+        self.name = run_id
+        self.experiment = MagicMock()
 
 
 class MockEnv:
@@ -30,7 +33,8 @@ def setUp(cls, mock_app_insights_logger):
     def test_get_run_id_having_online_context(self):
         expected = "FOO"
 
-        response = self.concert_app_insights_logger.get_run_id(MockRun("FOO"))
+        response = self.concert_app_insights_logger.\
+            get_run_id_and_set_context(MockRun("FOO"))
 
         self.assertEqual(expected, response)
 
@@ -38,7 +42,7 @@ def test_get_run_id_having_online_context_using_build_id(self):
         self.concert_app_insights_logger.env.build_id = expected = "FOO"
 
         response = self.concert_app_insights_logger.\
-            get_run_id(MockRun("OfflineRun"))
+            get_run_id_and_set_context(MockRun("OfflineRun"))
 
         self.assertEqual(expected, response)
 
@@ -46,7 +50,7 @@ def test_get_run_id_having_online_context_using_uuid(self):
         self.concert_app_insights_logger.env.build_id = ""
 
         response = self.concert_app_insights_logger.\
-            get_run_id(MockRun("OfflineRun"))
+            get_run_id_and_set_context(MockRun("OfflineRun"))
 
         self.assertIsNotNone(response)
 
diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index 1f6b6881..c9bdfc1a 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -43,6 +43,17 @@ def register_loggers(self):
 
 
 class Observability(LoggerInterface):
+    _instance = None
+
+    # Straightforward Singleton Pattern from
+    # https://python-patterns.guide/gang-of-four/singleton/
+    def __new__(cls):
+        if cls._instance is None:
+            print('Creating the object')
+            cls._instance = super(Observability, cls).__new__(cls)
+            # Put any initialization here.
+        return cls._instance
+
     def __init__(self) -> None:
         self._loggers = Loggers()
 

From 7893e3242aecc0a1e06f3400dd48cf5da2839ddf Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Thu, 3 Dec 2020 07:39:14 +0000
Subject: [PATCH 06/23] Changes to CI for observability

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/code-quality-template.yml | 5 +++++
 ml_model/ci_dependencies.yml         | 7 +++++++
 ml_model/dev_dependencies.yml        | 1 +
 3 files changed, 13 insertions(+)

diff --git a/.pipelines/code-quality-template.yml b/.pipelines/code-quality-template.yml
index 7e769658..2e3e4731 100644
--- a/.pipelines/code-quality-template.yml
+++ b/.pipelines/code-quality-template.yml
@@ -1,5 +1,10 @@
 # Pipeline template to run linting, unit tests with code coverage, and publish the results.
 steps:
+# This step ensures that the latest ci dependencies are applied to the build agent
+- script: |   
+   conda env update -f ml_model/ci_dependencies.yml -n ci
+  displayName: 'Update missing dependencies for current branch on build agent'
+
 - script: |   
    flake8 --output-file=lint-testresults.xml --format junit-xml
   displayName: 'Run lint tests'
diff --git a/ml_model/ci_dependencies.yml b/ml_model/ci_dependencies.yml
index 987a0f34..38699966 100644
--- a/ml_model/ci_dependencies.yml
+++ b/ml_model/ci_dependencies.yml
@@ -26,4 +26,11 @@ dependencies:
       - tensorflow==2.3.*
       - keras==2.4.*
 
+      # Observability
+      - dataclasses==0.6
+      - opencensus==0.7.11
+      - opencensus-ext-httplib==0.7.3
+      - opencensus-ext-logging==0.1.0
+      - opencensus-context==0.1.2
+      - opencensus-ext-azure==1.0.5
 
diff --git a/ml_model/dev_dependencies.yml b/ml_model/dev_dependencies.yml
index c10a8b94..a8e4cac6 100644
--- a/ml_model/dev_dependencies.yml
+++ b/ml_model/dev_dependencies.yml
@@ -30,6 +30,7 @@ dependencies:
       - debugpy
 
       # Observability
+      - dataclasses==0.6
       - opencensus==0.7.11
       - opencensus-ext-httplib==0.7.3
       - opencensus-ext-logging==0.1.0

From fcba561adef74d160872e453fb1a989156b92662 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 02:24:27 +0000
Subject: [PATCH 07/23] Fix log_metrics, adapt test to singleton pattern

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_service/tests/util/test_observability.py   | 28 +++++++++++++------
 ml_service/util/logger/app_insights_logger.py |  2 +-
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/ml_service/tests/util/test_observability.py b/ml_service/tests/util/test_observability.py
index cde11718..c95db2e0 100644
--- a/ml_service/tests/util/test_observability.py
+++ b/ml_service/tests/util/test_observability.py
@@ -2,14 +2,18 @@
 from unittest.mock import patch
 
 from ml_service.util.logger.observability import Observability
+from ml_service.util.logger.logger_interface import Severity
 
 
 class ObservabilityMock(Observability):
     @patch("ml_service.util.logger.app_insights_logger.AppInsightsLogger")
     @patch("ml_service.util.logger.azure_ml_logger.AzureMlLogger")
+    @patch("ml_service.util.logger.console_logger.ConsoleLogger")
     @patch("ml_service.util.logger.observability.Loggers")
-    def __init__(self, mock_loggers, mock_aml_logger, mock_app_insight_logger):
-        mock_loggers.loggers = [mock_aml_logger, mock_app_insight_logger]
+    def __init__(self, mock_loggers, mock_console_logger, mock_aml_logger,
+                 mock_app_insight_logger):
+        mock_loggers.loggers = [mock_console_logger, mock_aml_logger,
+                                mock_app_insight_logger]
         self._loggers = mock_loggers
 
 
@@ -24,13 +28,14 @@ def test_log_metric_called_with_parameters(self):
         self.observability.log_metric.assert_called_with("FOO", "BAZ", "BAR")
 
     def test_log_called_with_parameters(self):
-        self.observability.log("FOO", "BAZ")
+        self.observability.log("FOO", Severity.CRITICAL)
 
-        self.observability.log.assert_called_with("FOO", "BAZ")
+        self.observability.log.assert_called_with("FOO", Severity.CRITICAL)
 
     def test_log_metric_is_being_called_by_all_loggers(self):
+        # Force creating a new singleton on base class
+        Observability._instance = None
         self.observability = ObservabilityMock()
-
         self.observability.log_metric("FOO", "BAZ", "BAR")
 
         self.observability._loggers.loggers[0].log_metric.assert_called_with(
@@ -39,16 +44,23 @@ def test_log_metric_is_being_called_by_all_loggers(self):
         self.observability._loggers.loggers[1].log_metric.assert_called_with(
             "FOO", "BAZ", "BAR", False
         )
+        self.observability._loggers.loggers[2].log_metric.assert_called_with(
+            "FOO", "BAZ", "BAR", False
+        )
 
     def test_log_is_being_called_by_all_loggers(self):
+        # Force creating a new singleton on base class
+        Observability._instance = None
         self.observability = ObservabilityMock()
 
-        self.observability.log("FOO", "BAZ")
+        self.observability.log("FOO", Severity.CRITICAL)
 
         self.observability._loggers.loggers[0].\
-            log.assert_called_with("FOO", "BAZ")
+            log.assert_called_with("FOO", Severity.CRITICAL)
         self.observability._loggers.loggers[1].\
-            log.assert_called_with("FOO", "BAZ")
+            log.assert_called_with("FOO", Severity.CRITICAL)
+        self.observability._loggers.loggers[2].\
+            log.assert_called_with("FOO", Severity.CRITICAL)
 
 
 if __name__ == "__main__":
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
index 89049fea..e15b5589 100644
--- a/ml_service/util/logger/app_insights_logger.py
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -73,7 +73,7 @@ def log_metric(
 
         measure = measure_module.MeasureFloat(name, description)
         self.set_view(name, description, measure)
-        measurement_map.measure_float_put(measure, value)
+        measurement_map.measure_put_attachment(name, value)
         measurement_map.record(tag_map)
 
     def log(self, description="", severity=Severity.INFO):

From 99edaa6bacb70e3247f2b78283b3a634af58846d Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 03:51:27 +0000
Subject: [PATCH 08/23] Refactor CI dependency updates

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/code-quality-template.yml          | 5 +----
 .pipelines/trigger-preprocessing-pipeline.yml | 1 +
 .pipelines/update-ci-dependencies.yml         | 5 +++++
 3 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 .pipelines/update-ci-dependencies.yml

diff --git a/.pipelines/code-quality-template.yml b/.pipelines/code-quality-template.yml
index 2e3e4731..c83de34b 100644
--- a/.pipelines/code-quality-template.yml
+++ b/.pipelines/code-quality-template.yml
@@ -1,9 +1,6 @@
 # Pipeline template to run linting, unit tests with code coverage, and publish the results.
 steps:
-# This step ensures that the latest ci dependencies are applied to the build agent
-- script: |   
-   conda env update -f ml_model/ci_dependencies.yml -n ci
-  displayName: 'Update missing dependencies for current branch on build agent'
+- template: update-ci-dependencies.yml
 
 - script: |   
    flake8 --output-file=lint-testresults.xml --format junit-xml
diff --git a/.pipelines/trigger-preprocessing-pipeline.yml b/.pipelines/trigger-preprocessing-pipeline.yml
index a7c8e80b..3c68f321 100644
--- a/.pipelines/trigger-preprocessing-pipeline.yml
+++ b/.pipelines/trigger-preprocessing-pipeline.yml
@@ -16,6 +16,7 @@ stages:
     container: mlops
     timeoutInMinutes: 0
     steps:
+    - template: update-ci-dependencies.yml
     - task: AzureCLI@1
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
diff --git a/.pipelines/update-ci-dependencies.yml b/.pipelines/update-ci-dependencies.yml
new file mode 100644
index 00000000..a3f4561d
--- /dev/null
+++ b/.pipelines/update-ci-dependencies.yml
@@ -0,0 +1,5 @@
+steps:
+# This step ensures that the latest ci dependencies are applied to the build agent
+- script: |   
+    conda env update -f ml_model/ci_dependencies.yml -n ci
+    displayName: 'Update missing dependencies for current branch on build agent'
\ No newline at end of file

From 89ecf719bdf5ec29acc918cd7d220518a8ce7699 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 03:57:15 +0000
Subject: [PATCH 09/23] CI dependency template fix add to train pipe

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/03-train-evaluate-register-model.yml | 1 +
 .pipelines/update-ci-dependencies.yml           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pipelines/03-train-evaluate-register-model.yml b/.pipelines/03-train-evaluate-register-model.yml
index daa85317..e58a1022 100644
--- a/.pipelines/03-train-evaluate-register-model.yml
+++ b/.pipelines/03-train-evaluate-register-model.yml
@@ -75,6 +75,7 @@ stages:
     container: mlops
     timeoutInMinutes: 0
     steps:
+    - template: update-ci-dependencies.yml
     - task: AzureCLI@1
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
diff --git a/.pipelines/update-ci-dependencies.yml b/.pipelines/update-ci-dependencies.yml
index a3f4561d..ff790ed6 100644
--- a/.pipelines/update-ci-dependencies.yml
+++ b/.pipelines/update-ci-dependencies.yml
@@ -2,4 +2,4 @@ steps:
 # This step ensures that the latest ci dependencies are applied to the build agent
 - script: |   
     conda env update -f ml_model/ci_dependencies.yml -n ci
-    displayName: 'Update missing dependencies for current branch on build agent'
\ No newline at end of file
+  displayName: 'Update missing dependencies for current branch on build agent'
\ No newline at end of file

From 1d076b3562e0091112c4ebab24b1441978ce66b6 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 04:18:00 +0000
Subject: [PATCH 10/23] Fix observability formatting

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_service/pipelines/run_data_processing_pipeline.py | 4 ++--
 ml_service/pipelines/run_training_pipeline.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ml_service/pipelines/run_data_processing_pipeline.py b/ml_service/pipelines/run_data_processing_pipeline.py
index b3e1a57c..69853e47 100644
--- a/ml_service/pipelines/run_data_processing_pipeline.py
+++ b/ml_service/pipelines/run_data_processing_pipeline.py
@@ -64,7 +64,7 @@ def main():
         raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
     else:
         published_pipeline = matched_pipes[0]
-        observability.log("published pipeline id is", published_pipeline.id)
+        observability.log(f"published pipeline id is {published_pipeline.id}")
 
         # Save the Pipeline ID for other AzDO jobs after script is complete
         if args.output_pipeline_id_file is not None:
@@ -82,7 +82,7 @@ def main():
                 published_pipeline,
                 tags=tags)
 
-            observability.log("Pipeline run initiated ", run.id)
+            observability.log(f"Pipeline run initiated {run.id}")
 
     observability.end_span()
 
diff --git a/ml_service/pipelines/run_training_pipeline.py b/ml_service/pipelines/run_training_pipeline.py
index cbe6b8d6..781b4b2f 100644
--- a/ml_service/pipelines/run_training_pipeline.py
+++ b/ml_service/pipelines/run_training_pipeline.py
@@ -50,7 +50,7 @@ def main():
         raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
     else:
         published_pipeline = matched_pipes[0]
-        observability.log("published pipeline id is", published_pipeline.id)
+        observability.log(f"published pipeline id is {published_pipeline.id}")
 
         # Save the Pipeline ID for other AzDO jobs after script is complete
         if args.output_pipeline_id_file is not None:
@@ -70,7 +70,7 @@ def main():
                 tags=tags,
                 pipeline_parameters=pipeline_parameters)
 
-            observability.log("Pipeline run initiated ", run.id)
+            observability.log(f"Pipeline run initiated {run.id}")
 
     observability.end_span()
 

From 38d11b4993bbfe1dc9a6f1b36a120aa474ae0439 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 04:37:24 +0000
Subject: [PATCH 11/23] Activate env rebuild for aml pipeline tests

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/variables-template.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pipelines/variables-template.yml b/.pipelines/variables-template.yml
index c944eaf6..283ddaf2 100644
--- a/.pipelines/variables-template.yml
+++ b/.pipelines/variables-template.yml
@@ -32,8 +32,8 @@ variables:
   - name: ALLOW_RUN_CANCEL
     value: "false"
   # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml.
-  # - name: AML_REBUILD_ENVIRONMENT
-  #  value: "false"
+  - name: AML_REBUILD_ENVIRONMENT
+    value: "true"
 
   # AML Environment Config
   - name: AML_ENV_NAME

From 48d57eeec26a0948e7dd31bfd57d5707c9a2f9c2 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 06:13:38 +0000
Subject: [PATCH 12/23] Fix parameters.json path reference

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/preprocessing/preprocess_aml.py | 2 +-
 ml_model/register/register_model.py      | 2 +-
 ml_model/scoring/score.py                | 2 +-
 ml_model/training/train_aml.py           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 5abd2968..f27f747f 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -90,7 +90,7 @@ def main():
     aml_workspace, *_ = get_aml_context(run)
 
     if preprocessing_param is None or preprocessing_param == "":
-        with open("parameters.json") as f:
+        with open("ml_model/parameters.json") as f:
             pars = json.load(f)
             preprocessing_args = pars["preprocessing"]
     else:
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index 8d9a1b51..52007702 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -96,7 +96,7 @@ def main():
     observability.log("Getting registration parameters")
 
     # Load the registration parameters from the parameters file
-    with open("parameters.json") as f:
+    with open("ml_model/parameters.json") as f:
         pars = json.load(f)
     try:
         register_args = pars["registration"]
diff --git a/ml_model/scoring/score.py b/ml_model/scoring/score.py
index a8904714..d8b916cc 100644
--- a/ml_model/scoring/score.py
+++ b/ml_model/scoring/score.py
@@ -93,7 +93,7 @@ def predict_image(classifier, image_array):
     if len(image_urls) != len(image_classes):
         raise "number of urls is not same as number of classes"
 
-    with open("parameters.json") as f:
+    with open("ml_model/parameters.json") as f:
         pars = json.load(f)
         image_size = pars["preprocessing"]["image_size"]
         size = (image_size["x"], image_size["y"])
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index 2c1da7ff..19a25ec9 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -90,7 +90,7 @@ def main():
     observability.log("Getting training parameters")
 
     # Load the training parameters from the parameters file
-    with open("parameters.json") as f:
+    with open("ml_model/parameters.json") as f:
         pars = json.load(f)
     try:
         preprocessing_args = pars["preprocessing"]

From 8390138267d6ae15ca22c1c0227407249c0bf3c8 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 06:57:57 +0000
Subject: [PATCH 13/23] Fix logger usage in train.py

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/training/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml_model/training/train.py b/ml_model/training/train.py
index 51fd9e60..b589d9e0 100644
--- a/ml_model/training/train.py
+++ b/ml_model/training/train.py
@@ -63,7 +63,7 @@ def split_data(data_folder, preprocessing_args):
         subset='validation')  # set as validation data
 
     classes = sorted(train_generator.class_indices.keys())
-    observability.log("class names: ", classes)
+    observability.log(f"class names: {classes}")
 
     data = {"train": train_generator,
             "test": validation_generator,

From 284d2b0c5ed913774620e93d33f631c6035f9572 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Fri, 4 Dec 2020 07:22:55 +0000
Subject: [PATCH 14/23] Fix copy/paste error - print to file

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/training/train_aml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index 19a25ec9..b784fd85 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -143,7 +143,7 @@ def main():
     model_output_path = os.path.join(step_output_path, model_name)
     model.save(model_output_path)
     with open(os.path.join(step_output_path, "run_id.txt"), "w") as text_file:
-        observability.log(f"{run.id}", file=text_file)
+        print(f"{run.id}", file=text_file)
 
     # Also upload model file to run outputs for history
     os.makedirs('outputs', exist_ok=True)

From 05284f3b063c49171de7613f13c58201a7776923 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Mon, 7 Dec 2020 07:55:02 +0000
Subject: [PATCH 15/23] Fix duplicate log entries, enrich custom dimension

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/evaluate/evaluate_model.py           | 18 +++++++---
 ml_model/preprocessing/preprocess_aml.py      |  9 ++---
 ml_model/preprocessing/preprocess_images.py   | 10 ++----
 ml_model/preprocessing/preprocess_os_cmd.py   |  9 ++---
 ml_model/register/register_model.py           |  9 ++---
 ml_model/training/train.py                    |  9 ++---
 ml_model/training/train_aml.py                |  9 ++---
 ml_model/util/model_helper.py                 |  4 +--
 .../build_data_processing_os_cmd_pipeline.py  |  9 ++---
 .../build_data_processing_pipeline.py         |  9 ++---
 .../pipelines/build_training_pipeline.py      |  9 ++---
 .../pipelines/run_data_processing_pipeline.py | 10 ++----
 ml_service/pipelines/run_training_pipeline.py | 10 ++----
 ml_service/util/attach_compute.py             |  5 +--
 ml_service/util/logger/app_insights_logger.py | 36 ++++++++++++-------
 ml_service/util/logger/logger_interface.py    | 23 ++++++++++++
 ml_service/util/logger/observability.py       | 13 +++++--
 ml_service/util/manage_environment.py         |  5 +--
 18 files changed, 110 insertions(+), 96 deletions(-)

diff --git a/ml_model/evaluate/evaluate_model.py b/ml_model/evaluate/evaluate_model.py
index 5ccc1dd4..27488d5c 100644
--- a/ml_model/evaluate/evaluate_model.py
+++ b/ml_model/evaluate/evaluate_model.py
@@ -26,6 +26,7 @@
 from azureml.core import Run
 import argparse
 from ml_model.util.model_helper import get_model
+from ml_service.util.logger.observability import observability
 
 
 def evaluate_model_performs_better(model, run):
@@ -37,13 +38,14 @@ def evaluate_model_performs_better(model, run):
     if (production_model_accuracy is None or new_model_accuracy is None):
         raise Exception(f"Unable to find {metric_eval} metrics, exiting evaluation")  # NOQA: E501
     else:
-        print(f"Current model accuracy: {production_model_accuracy}, new model accuracy: {new_model_accuracy}")  # NOQA: E501
+        observability.log(f"Current model accuracy: {production_model_accuracy}, new model accuracy: {new_model_accuracy}")  # NOQA: E501
 
     if (new_model_accuracy > production_model_accuracy):
-        print("New model performs better, register it")
+        observability.log("New model performs better, register it")
         return True
     else:
-        print("New model doesn't perform better, skip registration")
+        observability.log("New model doesn't perform better,"
+                          " skip registration")
         return False
 
 
@@ -91,8 +93,14 @@ def main():
         if(not should_register and (allow_run_cancel).lower() == 'true'):
             run.parent.cancel()
     else:
-        print("This is the first model, register it")
+        observability.log("This is the first model, register it")
 
 
 if __name__ == '__main__':
-    main()
+    observability.start_span('evaluate_model')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index f27f747f..851f9e7a 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -28,13 +28,10 @@
 import json
 from ml_model.preprocessing.preprocess_images import resize_images
 from ml_model.util.model_helper import get_or_register_dataset, get_aml_context
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
     observability.log("Running preprocess.py")
 
     parser = argparse.ArgumentParser("preprocess")
@@ -121,12 +118,12 @@ def main():
 
     run.complete()
 
-    observability.end_span()
-
 
 if __name__ == '__main__':
+    observability.start_span('preprocess_aml')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_images.py b/ml_model/preprocessing/preprocess_images.py
index 582aae7b..e3fa1717 100644
--- a/ml_model/preprocessing/preprocess_images.py
+++ b/ml_model/preprocessing/preprocess_images.py
@@ -2,9 +2,7 @@
 import shutil
 import numpy as np
 from PIL import Image
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def resize_image(img, size):
@@ -75,8 +73,6 @@ def resize_images(indir, outdir, preprocessing_args):
 
 
 def main():
-    observability.start_span()
-
     in_dir = 'data/gear_images/raw'
     out_dir = 'data/processed'
     preprocessing_args = {
@@ -85,12 +81,12 @@ def main():
     }
     resize_images(in_dir, out_dir, preprocessing_args)  # NOQA: E501
 
-    observability.end_span()
-
 
 if __name__ == '__main__':
+    observability.start_span('preprocess_images')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_os_cmd.py b/ml_model/preprocessing/preprocess_os_cmd.py
index edf8d789..5bd2d3d6 100644
--- a/ml_model/preprocessing/preprocess_os_cmd.py
+++ b/ml_model/preprocessing/preprocess_os_cmd.py
@@ -5,13 +5,10 @@
 import argparse
 import subprocess
 from ml_model.util.model_helper import get_or_register_dataset, get_aml_context
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
     observability.log("Running preprocess_os_cmd.py")
 
     parser = argparse.ArgumentParser("preprocess_os_cmd")
@@ -106,12 +103,12 @@ def main():
 
     run.complete()
 
-    observability.end_span()
-
 
 if __name__ == '__main__':
+    observability.start_span('preprocess_os_cmd_main')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index 52007702..fab24cc6 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -31,9 +31,7 @@
 from azureml.core import Run
 from azureml.core.model import Model as AMLModel
 from ml_model.util.model_helper import get_aml_context
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def find_child_run(parent_run, child_run_id):
@@ -57,7 +55,6 @@ def find_run(experiment, run_id):
 
 
 def main():
-    observability.start_span()
 
     run = Run.get_context()
     ws, exp, run_id = get_aml_context(run)
@@ -161,8 +158,6 @@ def main():
                           "Skipping model registration.")
         sys.exit(0)
 
-    observability.end_span()
-
 
 def model_already_registered(model_name, exp, run_id):
     model_list = AMLModel.list(exp.workspace, name=model_name, run_id=run_id)
@@ -206,8 +201,10 @@ def register_aml_model(
 
 
 if __name__ == '__main__':
+    observability.start_span('register_model')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/training/train.py b/ml_model/training/train.py
index b589d9e0..46258f17 100644
--- a/ml_model/training/train.py
+++ b/ml_model/training/train.py
@@ -29,9 +29,7 @@
 from keras.layers import Dropout, Flatten, Dense
 from keras import optimizers
 from keras.preprocessing.image import ImageDataGenerator
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 # Split the dataframe into test and train data
@@ -141,7 +139,6 @@ def get_model_metrics(history):
 
 
 def main():
-    observability.start_span()
     observability.log("Running train.py")
 
     train_args = {"num_epochs": 10}
@@ -157,12 +154,12 @@ def main():
     for (k, v) in metrics.items():
         observability.log(f"{k}: {v}")
 
-    observability.end_span()
-
 
 if __name__ == '__main__':
+    observability.start_span('train')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index b784fd85..78b95417 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -29,13 +29,10 @@
 import json
 from ml_model.training.train import split_data, train_model, get_model_metrics
 from ml_model.util.model_helper import get_or_register_dataset
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
     observability.log("Running train_aml.py")
 
     parser = argparse.ArgumentParser("train")
@@ -155,12 +152,12 @@ def main():
 
     run.complete()
 
-    observability.end_span()
-
 
 if __name__ == '__main__':
+    observability.start_span('train_aml')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_model/util/model_helper.py b/ml_model/util/model_helper.py
index 385828ab..57bd15e8 100644
--- a/ml_model/util/model_helper.py
+++ b/ml_model/util/model_helper.py
@@ -1,12 +1,10 @@
 """
 model_helper.py
 """
+from ml_service.util.logger.observability import observability
 from azureml.core import Run
 from azureml.core import Workspace, Dataset, Datastore
 from azureml.core.model import Model as AMLModel
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
 
 
 def get_aml_context(run):
diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index 0697ee04..e6267037 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -7,14 +7,10 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
-
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -100,12 +96,13 @@ def main():
     )
     observability.log(f"Published pipeline: {published_pipeline.name}")
     observability.log(f"for build {published_pipeline.version}")
-    observability.end_span()
 
 
 if __name__ == "__main__":
+    observability.start_span('build_data_processing_os_cmd_pipeline_main')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index 741347f1..d0660699 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -7,14 +7,10 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
-
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -100,12 +96,13 @@ def main():
     )
     observability.log(f"Published pipeline: {published_pipeline.name}")
     observability.log(f"for build {published_pipeline.version}")
-    observability.end_span()
 
 
 if __name__ == "__main__":
+    observability.start_span('build_data_processing_pipeline')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index 125cd3cf..16e06f04 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -6,14 +6,10 @@
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
 from ml_service.util.manage_environment import get_environment
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
-
     e = Env()
     # Get Azure machine learning workspace
     aml_workspace = Workspace.get(
@@ -131,12 +127,13 @@ def main():
     )
     observability.log(f"Published pipeline: {published_pipeline.name}")
     observability.log(f"for build {published_pipeline.version}")
-    observability.end_span()
 
 
 if __name__ == "__main__":
+    observability.start_span('build_training_pipeline')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_service/pipelines/run_data_processing_pipeline.py b/ml_service/pipelines/run_data_processing_pipeline.py
index 69853e47..eddb53b6 100644
--- a/ml_service/pipelines/run_data_processing_pipeline.py
+++ b/ml_service/pipelines/run_data_processing_pipeline.py
@@ -2,14 +2,10 @@
 from azureml.core import Experiment, Workspace
 from ml_service.util.env_variables import Env
 import argparse
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
-
     parser = argparse.ArgumentParser("register")
     parser.add_argument(
         "--aml_pipeline_name",
@@ -84,12 +80,12 @@ def main():
 
             observability.log(f"Pipeline run initiated {run.id}")
 
-    observability.end_span()
-
 
 if __name__ == "__main__":
+    observability.start_span('run_data_processing_pipeline')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_service/pipelines/run_training_pipeline.py b/ml_service/pipelines/run_training_pipeline.py
index 781b4b2f..3081a4a7 100644
--- a/ml_service/pipelines/run_training_pipeline.py
+++ b/ml_service/pipelines/run_training_pipeline.py
@@ -2,14 +2,10 @@
 from azureml.core import Experiment, Workspace
 import argparse
 from ml_service.util.env_variables import Env
-from ml_service.util.logger.observability import Observability
-
-observability = Observability()
+from ml_service.util.logger.observability import observability
 
 
 def main():
-    observability.start_span()
-
     parser = argparse.ArgumentParser("register")
     parser.add_argument(
         "--output_pipeline_id_file",
@@ -72,12 +68,12 @@ def main():
 
             observability.log(f"Pipeline run initiated {run.id}")
 
-    observability.end_span()
-
 
 if __name__ == "__main__":
+    observability.start_span('run_training_pipeline')
     try:
         main()
     except Exception as exception:
         observability.exception(exception)
         raise exception
+    observability.end_span()
diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py
index 8756610e..57b25163 100644
--- a/ml_service/util/attach_compute.py
+++ b/ml_service/util/attach_compute.py
@@ -4,6 +4,7 @@
 from azureml.core.compute import ComputeTarget
 from azureml.exceptions import ComputeTargetException
 from ml_service.util.env_variables import Env
+from ml_service.util.logger.observability import observability
 
 
 def get_compute(workspace: Workspace, compute_name: str, vm_size: str):  # NOQA E501
@@ -11,7 +12,7 @@ def get_compute(workspace: Workspace, compute_name: str, vm_size: str):  # NOQA
         if compute_name in workspace.compute_targets:
             compute_target = workspace.compute_targets[compute_name]
             if compute_target and type(compute_target) is AmlCompute:
-                print("Found existing compute target " + compute_name + " so using it.") # NOQA
+                observability.log("Found existing compute target " + compute_name + " so using it.") # NOQA
         else:
             e = Env()
             compute_config = AmlCompute.provisioning_configuration(
@@ -33,6 +34,6 @@ def get_compute(workspace: Workspace, compute_name: str, vm_size: str):  # NOQA
             )
         return compute_target
     except ComputeTargetException as ex:
-        print(ex)
+        observability.exception(ex)
         print("An error occurred trying to provision compute.")
         exit(1)
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
index e15b5589..c0164b4c 100644
--- a/ml_service/util/logger/app_insights_logger.py
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -23,33 +23,34 @@
 
 class AppInsightsLogger(LoggerInterface, ObservabilityAbstract):
     def __init__(self, run):
+        print('Initializing the AppInsightsLogger')
         self.env = Env()
         self.run_id = self.get_run_id_and_set_context(run)
 
-        # Prepare integrations and log format
+        # Prepare integrations and initialize tracer
         config_integration.trace_integrations(['httplib', 'logging'])
+        texporter = AzureExporter(connection_string=self.
+                                  env.app_insights_connection_string)
+        texporter.add_telemetry_processor(self.callback_function)
+        self.tracer = Tracer(
+            exporter=texporter,
+            sampler=ProbabilitySampler(self.env.trace_sampling_rate)
+        )
+
+        # Create AppInsights Handler and set log format
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel(
             getattr(logging, self.env.log_level.upper(), "WARNING"))
-        # initializes log exporter
         handler = AzureLogHandler(
             connection_string=self.env.app_insights_connection_string,
             logging_sampling_rate=self.env.log_sampling_rate,
         )
         handler.add_telemetry_processor(self.callback_function)
-
         self.logger.addHandler(handler)
-        # initializes tracer
-        texporter = AzureExporter(connection_string=self.
-                                  env.app_insights_connection_string)
-        texporter.add_telemetry_processor(self.callback_function)
-        self.tracer = Tracer(
-            exporter=texporter,
-            sampler=ProbabilitySampler(self.env.trace_sampling_rate)
-        )
+
         # initializes metric exporter
         mexporter = metrics_exporter.new_metrics_exporter(
-            enable_standard_metrics=False,
+            enable_standard_metrics=True,
             export_interval=self.env.metrics_export_interval,
             connection_string=self.env.app_insights_connection_string,
         )
@@ -83,6 +84,17 @@ def log(self, description="", severity=Severity.INFO):
         :param severity: log severity
         :return:
         """
+        # Overwrite custom dimensions with caller data
+        modulename, filename, lineno = self.get_callee_details(2)
+        self.custom_dimensions[self.CUSTOM_DIMENSIONS][self.FILENAME] =\
+            filename
+        self.custom_dimensions[self.CUSTOM_DIMENSIONS][self.LINENO] =\
+            lineno
+        self.custom_dimensions[self.CUSTOM_DIMENSIONS][self.MODULE] =\
+            modulename
+        if self.current_span() is not None:
+            self.custom_dimensions[self.CUSTOM_DIMENSIONS][self.PROCESS] =\
+                self.current_span().name
 
         if severity == self.severity.DEBUG:
             self.logger.debug(description, extra=self.custom_dimensions)
diff --git a/ml_service/util/logger/logger_interface.py b/ml_service/util/logger/logger_interface.py
index 6a6b9deb..637d9047 100644
--- a/ml_service/util/logger/logger_interface.py
+++ b/ml_service/util/logger/logger_interface.py
@@ -65,7 +65,12 @@ def list_collected_spans(self):
 
 class ObservabilityAbstract:
     OFFLINE_RUN = "OfflineRun"
+    CUSTOM_DIMENSIONS = "custom_dimensions"
     CORRELATION_ID = "correlation_id"
+    FILENAME = "fileName"
+    MODULE = "module"
+    PROCESS = "process"
+    LINENO = "lineNumber"
     severity = Severity()
     severity_map = {10: "DEBUG", 20: "INFO",
                     30: "WARNING", 40: "ERROR", 50: "CRITICAL"}
@@ -127,3 +132,21 @@ def get_callee(stack_level):
         except IndexError:
             print("Index error, failed to log to AzureML")
             return ""
+
+    @staticmethod
+    def get_callee_details(stack_level):
+        """
+        This method returns the callee details as a tuple,
+        tuple values ar all strings.
+        :param stack_level:
+        :return: (module_name, file_name, line_number)
+        """
+        try:
+            stack = inspect.stack()
+            file_name = stack[stack_level + 1].filename
+            line_number = stack[stack_level + 1].lineno
+            module_name = inspect.getmodulename(file_name)
+            return module_name, file_name, line_number
+        except IndexError:
+            print("Index error, failed to log to AzureML")
+            return ""
diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index c9bdfc1a..260b1495 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -13,6 +13,7 @@
 
 class Loggers(ObservabilityAbstract):
     def __init__(self) -> None:
+        print('Initializing the Loggers')
         self.loggers: LoggerInterface = []
         self.register_loggers()
 
@@ -49,13 +50,16 @@ class Observability(LoggerInterface):
     # https://python-patterns.guide/gang-of-four/singleton/
     def __new__(cls):
         if cls._instance is None:
-            print('Creating the object')
+            print('Creating the Observability Singleton')
             cls._instance = super(Observability, cls).__new__(cls)
-            # Put any initialization here.
+            cls._instance.__initialized = False
         return cls._instance
 
     def __init__(self) -> None:
-        self._loggers = Loggers()
+        if(not self.__initialized):
+            print('Initializing the Observability Singleton')
+            self.__initialized = True
+            self._loggers = Loggers()
 
     def log_metric(
             self, name="", value="", description="", log_parent=False,
@@ -151,3 +155,6 @@ def list_collected_spans(self):
         """List collected spans from first logger."""
         if len(self._loggers.loggers) > 0:
             return self._loggers.loggers[0].list_collected_spans()
+
+
+observability = Observability()
diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py
index 0ff2c8de..fae6731d 100644
--- a/ml_service/util/manage_environment.py
+++ b/ml_service/util/manage_environment.py
@@ -3,6 +3,7 @@
 from azureml.core import Workspace, Environment
 from ml_service.util.env_variables import Env
 from azureml.core.runconfig import DEFAULT_CPU_IMAGE, DEFAULT_GPU_IMAGE
+from ml_service.util.logger.observability import observability
 
 
 def get_environment(
@@ -61,8 +62,8 @@ def get_environment(
             restored_environment.register(workspace)
 
         if restored_environment is not None:
-            print(restored_environment)
+            observability.log(restored_environment)
         return restored_environment
     except Exception as e:
-        print(e)
+        observability.exception(e)
         exit(1)

From 293f34433527bbb72c228cc96f4bad5443bfd162 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Tue, 8 Dec 2020 06:02:17 +0000
Subject: [PATCH 16/23] Add appinsights cs (secret) to env-var

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/02-processing-data.yml               | 2 ++
 .pipelines/03-train-evaluate-register-model.yml | 3 +++
 .pipelines/04-deploy-model-aci.yml              | 2 ++
 .pipelines/07-processing-data-os-cmd.yml        | 2 ++
 .pipelines/trigger-preprocessing-pipeline.yml   | 3 +++
 .pipelines/variables-template.yml               | 2 +-
 6 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/.pipelines/02-processing-data.yml b/.pipelines/02-processing-data.yml
index 1b74c56b..9a639985 100644
--- a/.pipelines/02-processing-data.yml
+++ b/.pipelines/02-processing-data.yml
@@ -51,6 +51,8 @@ stages:
           # Invoke the Python building and publishing a data preprocessing pipeline
           python -m ml_service.pipelines.build_data_processing_pipeline
       displayName: 'Publish Data Preprocessing Pipeline'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
 
 # Trigger_Preprocessing_Pipeline
 - template: trigger-preprocessing-pipeline.yml
diff --git a/.pipelines/03-train-evaluate-register-model.yml b/.pipelines/03-train-evaluate-register-model.yml
index e58a1022..b4cbaf30 100644
--- a/.pipelines/03-train-evaluate-register-model.yml
+++ b/.pipelines/03-train-evaluate-register-model.yml
@@ -90,6 +90,9 @@ stages:
           echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
       name: 'getpipelineid'
       displayName: 'Get Pipeline ID'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+
   - job: "Run_ML_Pipeline"
     dependsOn: "Get_Pipeline_ID"
     displayName: "Trigger ML Training Pipeline"
diff --git a/.pipelines/04-deploy-model-aci.yml b/.pipelines/04-deploy-model-aci.yml
index acbc1e5e..d7b77e93 100644
--- a/.pipelines/04-deploy-model-aci.yml
+++ b/.pipelines/04-deploy-model-aci.yml
@@ -67,3 +67,5 @@ stages:
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           python -m ml_service.util.smoke_test_scoring_service --service "$(ACI_DEPLOYMENT_NAME)"
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
diff --git a/.pipelines/07-processing-data-os-cmd.yml b/.pipelines/07-processing-data-os-cmd.yml
index c7da1ded..7627e20c 100644
--- a/.pipelines/07-processing-data-os-cmd.yml
+++ b/.pipelines/07-processing-data-os-cmd.yml
@@ -51,6 +51,8 @@ stages:
           # Invoke the Python building and publishing a data preprocessing pipeline
           python -m ml_service.pipelines.build_data_processing_os_cmd_pipeline
       displayName: 'Publish Data Preprocessing OS cmd Pipeline'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
 
 # Trigger_Preprocessing_Pipeline
 - template: trigger-preprocessing-pipeline.yml
diff --git a/.pipelines/trigger-preprocessing-pipeline.yml b/.pipelines/trigger-preprocessing-pipeline.yml
index 3c68f321..76b911e6 100644
--- a/.pipelines/trigger-preprocessing-pipeline.yml
+++ b/.pipelines/trigger-preprocessing-pipeline.yml
@@ -31,6 +31,9 @@ stages:
           echo "##vso[task.setvariable variable=PREPROCESSPIPELINEID;isOutput=true]$PREPROCESSPIPELINEID"
       name: 'getpreprocessingpipelineid'
       displayName: 'Get Preprocessing Pipeline ID of ${{ parameters.aml_pipeline_name }}'
+      env:
+        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+
   - job: "Run_Data_Processing_Pipeline"
     dependsOn: "Get_Preprocessing_Pipeline_ID"
     displayName: "Trigger Preprocessing Pipeline ${{ parameters.aml_pipeline_name }}"
diff --git a/.pipelines/variables-template.yml b/.pipelines/variables-template.yml
index 283ddaf2..d87679b2 100644
--- a/.pipelines/variables-template.yml
+++ b/.pipelines/variables-template.yml
@@ -59,7 +59,7 @@ variables:
 
   # Observability related
   - name: LOG_TO_CONSOLE
-    value: 'true'
+    value: 'false'
   - name: LOG_LEVEL
     value: 'INFO' # DEBUG, INFO, WARNING, ERROR, CRITICAL
   - name: LOG_SAMPLING_RATE

From 367de8fbb869b5cecb9d435e270bdb9060631c53 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Tue, 8 Dec 2020 06:13:53 +0000
Subject: [PATCH 17/23] change appinsights env variable name

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .env.example                                    | 4 ++--
 .pipelines/02-processing-data.yml               | 2 +-
 .pipelines/03-train-evaluate-register-model.yml | 2 +-
 .pipelines/04-deploy-model-aci.yml              | 2 +-
 .pipelines/07-processing-data-os-cmd.yml        | 2 +-
 .pipelines/trigger-preprocessing-pipeline.yml   | 2 +-
 ml_service/util/env_variables.py                | 2 +-
 ml_service/util/logger/observability.py         | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.env.example b/.env.example
index 0fb420e9..c8333fe7 100644
--- a/.env.example
+++ b/.env.example
@@ -16,8 +16,8 @@ BASE_NAME = ''
 RESOURCE_GROUP = ''
 
 # Observability related
-APP_INSIGHTS_CONNECTION_STRING = ''
-LOG_TO_CONSOLE = 'true'
+APPLICATIONINSIGHTS_CONNECTION_STRING = ''
+LOG_TO_CONSOLE = 'false'
 # DEBUG, INFO, WARNING, ERROR, CRITICAL
 LOG_LEVEL = 'DEBUG' 
 # Probability 0.0 -> 1.0
diff --git a/.pipelines/02-processing-data.yml b/.pipelines/02-processing-data.yml
index 9a639985..1d1ea91a 100644
--- a/.pipelines/02-processing-data.yml
+++ b/.pipelines/02-processing-data.yml
@@ -52,7 +52,7 @@ stages:
           python -m ml_service.pipelines.build_data_processing_pipeline
       displayName: 'Publish Data Preprocessing Pipeline'
       env:
-        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
 
 # Trigger_Preprocessing_Pipeline
 - template: trigger-preprocessing-pipeline.yml
diff --git a/.pipelines/03-train-evaluate-register-model.yml b/.pipelines/03-train-evaluate-register-model.yml
index b4cbaf30..1b96e0bc 100644
--- a/.pipelines/03-train-evaluate-register-model.yml
+++ b/.pipelines/03-train-evaluate-register-model.yml
@@ -91,7 +91,7 @@ stages:
       name: 'getpipelineid'
       displayName: 'Get Pipeline ID'
       env:
-        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
 
   - job: "Run_ML_Pipeline"
     dependsOn: "Get_Pipeline_ID"
diff --git a/.pipelines/04-deploy-model-aci.yml b/.pipelines/04-deploy-model-aci.yml
index d7b77e93..3c2aff6d 100644
--- a/.pipelines/04-deploy-model-aci.yml
+++ b/.pipelines/04-deploy-model-aci.yml
@@ -68,4 +68,4 @@ stages:
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           python -m ml_service.util.smoke_test_scoring_service --service "$(ACI_DEPLOYMENT_NAME)"
       env:
-        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
diff --git a/.pipelines/07-processing-data-os-cmd.yml b/.pipelines/07-processing-data-os-cmd.yml
index 7627e20c..50c85ea7 100644
--- a/.pipelines/07-processing-data-os-cmd.yml
+++ b/.pipelines/07-processing-data-os-cmd.yml
@@ -52,7 +52,7 @@ stages:
           python -m ml_service.pipelines.build_data_processing_os_cmd_pipeline
       displayName: 'Publish Data Preprocessing OS cmd Pipeline'
       env:
-        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
 
 # Trigger_Preprocessing_Pipeline
 - template: trigger-preprocessing-pipeline.yml
diff --git a/.pipelines/trigger-preprocessing-pipeline.yml b/.pipelines/trigger-preprocessing-pipeline.yml
index 76b911e6..7ff5698c 100644
--- a/.pipelines/trigger-preprocessing-pipeline.yml
+++ b/.pipelines/trigger-preprocessing-pipeline.yml
@@ -32,7 +32,7 @@ stages:
       name: 'getpreprocessingpipelineid'
       displayName: 'Get Preprocessing Pipeline ID of ${{ parameters.aml_pipeline_name }}'
       env:
-        APP_INSIGHTS_CONNECTION_STRING: $(APP_INSIGHTS_CONNECTION_STRING)
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
 
   - job: "Run_Data_Processing_Pipeline"
     dependsOn: "Get_Preprocessing_Pipeline_ID"
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index b365010f..c9eaae18 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -39,7 +39,7 @@ class Env:
     max_nodes: int = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
     aml_preprocessing_custom_docker_env_name: Optional[str] = os.environ.get("AML_PREPROCESSING_CUSTOM_DOCKER_ENV_NAME")  # NOQA: E501
     preprocessing_os_cmd_pipeline_name: Optional[str] = os.environ.get("PREPROCESSING_OS_CMD_PIPELINE_NAME")  # NOQA: E501
-    app_insights_connection_string: Optional[str] = os.environ.get("APP_INSIGHTS_CONNECTION_STRING")  # NOQA: E501
+    app_insights_connection_string: Optional[str] = os.environ.get("APPLICATIONINSIGHTS_CONNECTION_STRING")  # NOQA: E501
     log_to_console: Optional[bool] = os.environ.get("LOG_TO_CONSOLE", "false").lower().strip() == "true"  # NOQA: E501
     log_level: Optional[str] = os.environ.get("LOG_LEVEL", "WARNING")  # NOQA: E501
     log_sampling_rate: float = float(os.environ.get("LOG_SAMPLING_RATE", 1.0))  # NOQA: E501
diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index 260b1495..f4fbbd44 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -30,7 +30,7 @@ def register_loggers(self):
         Notes:
         - If the context of the Run object is offline,
         we do not create AzureMlLogger instance
-        - If APP_INSIGHTS_CONNECTION_STRING is notset
+        - If APPLICATIONINSIGHTS_CONNECTION_STRING is notset
         to ENV variable, we do not create AppInsightsLogger
         instance
         """

From f5e2e833ea3cf3127d2d7fefe1dafadbd43d8c65 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Tue, 8 Dec 2020 06:51:59 +0000
Subject: [PATCH 18/23] Fix log init if appinsights cs key is absent in vg

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_service/util/logger/observability.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index f4fbbd44..86d19d1a 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -35,11 +35,14 @@ def register_loggers(self):
         instance
         """
         run = Run.get_context()
+        e = Env()
         if not run.id.startswith(self.OFFLINE_RUN):
             self.loggers.append(AzureMlLogger(run))
-        if Env().app_insights_connection_string:
+        if e.app_insights_connection_string:
+            if "InstrumentationKey" in e.app_insights_connection_string:
+                self.loggers.append(AppInsightsLogger(run))
             self.loggers.append(AppInsightsLogger(run))
-        if Env().log_to_console:
+        if e.log_to_console:
             self.loggers.append(ConsoleLogger(run))
 
 

From 3fcbd3eccc715e4c03940a53a1c4110e2a15e607 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Tue, 8 Dec 2020 07:05:02 +0000
Subject: [PATCH 19/23] Extend run env with log related variables

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .../pipelines/build_data_processing_os_cmd_pipeline.py | 10 +++++++++-
 ml_service/pipelines/build_data_processing_pipeline.py | 10 +++++++++-
 ml_service/pipelines/build_training_pipeline.py        | 10 +++++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index e6267037..080e56ee 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -42,7 +42,15 @@ def main():
     # Replace the value with your own connection string
     run_config.environment.environment_variables = {
         "APPLICATIONINSIGHTS_CONNECTION_STRING":
-        e.app_insights_connection_string
+        e.app_insights_connection_string,
+        "LOG_LEVEL":
+        e.log_level,
+        "LOG_SAMPLING_RATE":
+        e.log_sampling_rate,
+        "TRACE_SAMPLING_RATE":
+        e.trace_sampling_rate,
+        "METRICS_EXPORT_INTERVAL":
+        e.metrics_export_interval
     }
 
     if e.datastore_name:
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index d0660699..08b0338f 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -40,7 +40,15 @@ def main():
     # Replace the value with your own connection string
     run_config.environment.environment_variables = {
         "APPLICATIONINSIGHTS_CONNECTION_STRING":
-        e.app_insights_connection_string
+        e.app_insights_connection_string,
+        "LOG_LEVEL":
+        e.log_level,
+        "LOG_SAMPLING_RATE":
+        e.log_sampling_rate,
+        "TRACE_SAMPLING_RATE":
+        e.trace_sampling_rate,
+        "METRICS_EXPORT_INTERVAL":
+        e.metrics_export_interval
     }
 
     if e.datastore_name:
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index 16e06f04..059209b7 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -40,7 +40,15 @@ def main():
     # Replace the value with your own connection string
     run_config.environment.environment_variables = {
         "APPLICATIONINSIGHTS_CONNECTION_STRING":
-        e.app_insights_connection_string
+        e.app_insights_connection_string,
+        "LOG_LEVEL":
+        e.log_level,
+        "LOG_SAMPLING_RATE":
+        e.log_sampling_rate,
+        "TRACE_SAMPLING_RATE":
+        e.trace_sampling_rate,
+        "METRICS_EXPORT_INTERVAL":
+        e.metrics_export_interval
     }
 
     if e.datastore_name:

From 943c956ed19079f41bea82660a05f96e2db4c302 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Tue, 8 Dec 2020 07:29:48 +0000
Subject: [PATCH 20/23] Fix c&p error in register_loggers

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_service/util/logger/observability.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ml_service/util/logger/observability.py b/ml_service/util/logger/observability.py
index 86d19d1a..8736c17a 100644
--- a/ml_service/util/logger/observability.py
+++ b/ml_service/util/logger/observability.py
@@ -41,7 +41,6 @@ def register_loggers(self):
         if e.app_insights_connection_string:
             if "InstrumentationKey" in e.app_insights_connection_string:
                 self.loggers.append(AppInsightsLogger(run))
-            self.loggers.append(AppInsightsLogger(run))
         if e.log_to_console:
             self.loggers.append(ConsoleLogger(run))
 

From 89a9d39a1147305d1b32278fe7f08c1e38a3ccbf Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Wed, 9 Dec 2020 01:02:08 +0000
Subject: [PATCH 21/23] Add missing env to build_train_pipeline step

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 .pipelines/03-train-evaluate-register-model.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.pipelines/03-train-evaluate-register-model.yml b/.pipelines/03-train-evaluate-register-model.yml
index 1b96e0bc..148751bc 100644
--- a/.pipelines/03-train-evaluate-register-model.yml
+++ b/.pipelines/03-train-evaluate-register-model.yml
@@ -62,6 +62,8 @@ stages:
           # Invoke the Python building and publishing a training pipeline
           python -m ml_service.pipelines.build_training_pipeline
       displayName: 'Publish Azure Machine Learning Pipeline'
+      env:
+        APPLICATIONINSIGHTS_CONNECTION_STRING: $(APPLICATIONINSIGHTS_CONNECTION_STRING)
 
 - stage: 'Trigger_Training_Pipeline'
   displayName: 'Train and evaluate model'

From 2348bd310a28afb26a1c972295bab7cd0451a890 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Wed, 9 Dec 2020 05:54:51 +0000
Subject: [PATCH 22/23] Optimize for AppInsights Application Map

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/evaluate/evaluate_model.py           | 14 ++++++-------
 ml_model/preprocessing/preprocess_aml.py      | 14 ++++++-------
 ml_model/preprocessing/preprocess_images.py   | 14 ++++++-------
 ml_model/preprocessing/preprocess_os_cmd.py   | 14 ++++++-------
 ml_model/register/register_model.py           | 14 ++++++-------
 ml_model/training/train.py                    | 14 ++++++-------
 ml_model/training/train_aml.py                | 14 ++++++-------
 .../build_data_processing_os_cmd_pipeline.py  | 14 ++++++-------
 .../build_data_processing_pipeline.py         | 14 ++++++-------
 .../pipelines/build_training_pipeline.py      | 14 ++++++-------
 .../pipelines/run_data_processing_pipeline.py | 14 ++++++-------
 ml_service/pipelines/run_training_pipeline.py | 14 ++++++-------
 ml_service/util/logger/app_insights_logger.py | 21 +++++++++++++++++--
 13 files changed, 103 insertions(+), 86 deletions(-)

diff --git a/ml_model/evaluate/evaluate_model.py b/ml_model/evaluate/evaluate_model.py
index 27488d5c..2e1e95bf 100644
--- a/ml_model/evaluate/evaluate_model.py
+++ b/ml_model/evaluate/evaluate_model.py
@@ -97,10 +97,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('evaluate_model')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('evaluate_model'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 851f9e7a..976d9858 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -120,10 +120,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('preprocess_aml')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('preprocess_aml'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/preprocessing/preprocess_images.py b/ml_model/preprocessing/preprocess_images.py
index e3fa1717..f57dbbc7 100644
--- a/ml_model/preprocessing/preprocess_images.py
+++ b/ml_model/preprocessing/preprocess_images.py
@@ -83,10 +83,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('preprocess_images')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('preprocess_images'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/preprocessing/preprocess_os_cmd.py b/ml_model/preprocessing/preprocess_os_cmd.py
index 5bd2d3d6..081969e6 100644
--- a/ml_model/preprocessing/preprocess_os_cmd.py
+++ b/ml_model/preprocessing/preprocess_os_cmd.py
@@ -105,10 +105,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('preprocess_os_cmd_main')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('preprocess_os_cmd_main'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index fab24cc6..7695f292 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -201,10 +201,10 @@ def register_aml_model(
 
 
 if __name__ == '__main__':
-    observability.start_span('register_model')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('register_model'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/training/train.py b/ml_model/training/train.py
index 46258f17..e67d18d5 100644
--- a/ml_model/training/train.py
+++ b/ml_model/training/train.py
@@ -156,10 +156,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('train')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('train'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index 78b95417..aa130569 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -154,10 +154,10 @@ def main():
 
 
 if __name__ == '__main__':
-    observability.start_span('train_aml')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('train_aml'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index 080e56ee..e2513b71 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -107,10 +107,10 @@ def main():
 
 
 if __name__ == "__main__":
-    observability.start_span('build_data_processing_os_cmd_pipeline_main')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('build_data_processing_os_cmd_pipeline'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index 08b0338f..6bb7be93 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -107,10 +107,10 @@ def main():
 
 
 if __name__ == "__main__":
-    observability.start_span('build_data_processing_pipeline')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('build_data_processing_pipeline'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index 059209b7..df46c0f2 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -138,10 +138,10 @@ def main():
 
 
 if __name__ == "__main__":
-    observability.start_span('build_training_pipeline')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('build_training_pipeline'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/pipelines/run_data_processing_pipeline.py b/ml_service/pipelines/run_data_processing_pipeline.py
index eddb53b6..7c3f4cde 100644
--- a/ml_service/pipelines/run_data_processing_pipeline.py
+++ b/ml_service/pipelines/run_data_processing_pipeline.py
@@ -82,10 +82,10 @@ def main():
 
 
 if __name__ == "__main__":
-    observability.start_span('run_data_processing_pipeline')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('run_data_processing_pipeline'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/pipelines/run_training_pipeline.py b/ml_service/pipelines/run_training_pipeline.py
index 3081a4a7..945ffe84 100644
--- a/ml_service/pipelines/run_training_pipeline.py
+++ b/ml_service/pipelines/run_training_pipeline.py
@@ -70,10 +70,10 @@ def main():
 
 
 if __name__ == "__main__":
-    observability.start_span('run_training_pipeline')
-    try:
-        main()
-    except Exception as exception:
-        observability.exception(exception)
-        raise exception
-    observability.end_span()
+    with observability.\
+         start_span('run_training_pipeline'):
+        try:
+            main()
+        except Exception as exception:
+            observability.exception(exception)
+            raise exception
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
index c0164b4c..4a7dbb7f 100644
--- a/ml_service/util/logger/app_insights_logger.py
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -7,6 +7,8 @@
 from opencensus.trace.samplers import ProbabilitySampler
 from opencensus.trace.tracer import Tracer
 
+from opencensus.trace.span import SpanKind
+from opencensus.trace.status import Status
 from opencensus.stats import aggregation as aggregation_module
 from opencensus.stats import measure as measure_module
 from opencensus.stats import stats as stats_module
@@ -57,6 +59,14 @@ def __init__(self, run):
         mexporter.add_telemetry_processor(self.callback_function)
         stats_module.stats.view_manager.register_exporter(mexporter)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        # Cleanup span if forgotten in logic (e.g. due to exception)
+        if self.current_span() is not None:
+            self.end_span()
+
     def log_metric(
         self, name="", value="", description="", log_parent=False,
     ):
@@ -114,6 +124,9 @@ def exception(self, exception: Exception):
         :return:
         """
         self.logger.exception(exception, extra=self.custom_dimensions)
+        # Mark current span/operation with internal error
+        self.current_span().status = Status(2, exception)
+        self.current_span().attributes['http.status_code'] = 500
 
     @staticmethod
     def set_view(metric, description, measure):
@@ -148,7 +161,7 @@ def span(self, name='span'):
         :rtype: :class:`~opencensus.trace.span.Span`
         :returns: The Span object.
         """
-        return self.tracer.span(name)
+        return self.start_span(name)
 
     def start_span(self, name='span'):
         """Start a span.
@@ -157,7 +170,11 @@ def start_span(self, name='span'):
         :rtype: :class:`~opencensus.trace.span.Span`
         :returns: The Span object.
         """
-        return self.tracer.start_span(name)
+        span = self.tracer.start_span(name)
+        span.span_kind = SpanKind.SERVER
+        span.attributes['http.method'] = 'START'
+        span.attributes['http.route'] = name
+        return span
 
     def end_span(self):
         """End a span. Remove the span from the span stack, and update the

From 46b39f111479a7b22bd32bf6349c9c60fdf4d2d4 Mon Sep 17 00:00:00 2001
From: Florian Wagner <flwagner@microsoft.com>
Date: Wed, 9 Dec 2020 10:52:48 +0000
Subject: [PATCH 23/23] Move end_span to finally block

Signed-off-by: Florian Wagner <flwagner@microsoft.com>
---
 ml_model/evaluate/evaluate_model.py               | 15 ++++++++-------
 ml_model/preprocessing/preprocess_aml.py          | 15 ++++++++-------
 ml_model/preprocessing/preprocess_images.py       | 15 ++++++++-------
 ml_model/preprocessing/preprocess_os_cmd.py       | 15 ++++++++-------
 ml_model/register/register_model.py               | 15 ++++++++-------
 ml_model/training/train.py                        | 15 ++++++++-------
 ml_model/training/train_aml.py                    | 15 ++++++++-------
 .../build_data_processing_os_cmd_pipeline.py      | 15 ++++++++-------
 .../pipelines/build_data_processing_pipeline.py   | 15 ++++++++-------
 ml_service/pipelines/build_training_pipeline.py   | 15 ++++++++-------
 .../pipelines/run_data_processing_pipeline.py     | 15 ++++++++-------
 ml_service/pipelines/run_training_pipeline.py     | 15 ++++++++-------
 ml_service/util/logger/app_insights_logger.py     |  8 --------
 13 files changed, 96 insertions(+), 92 deletions(-)

diff --git a/ml_model/evaluate/evaluate_model.py b/ml_model/evaluate/evaluate_model.py
index 2e1e95bf..03884e69 100644
--- a/ml_model/evaluate/evaluate_model.py
+++ b/ml_model/evaluate/evaluate_model.py
@@ -97,10 +97,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('evaluate_model'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('evaluate_model')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_aml.py b/ml_model/preprocessing/preprocess_aml.py
index 976d9858..909631b2 100644
--- a/ml_model/preprocessing/preprocess_aml.py
+++ b/ml_model/preprocessing/preprocess_aml.py
@@ -120,10 +120,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('preprocess_aml'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('preprocess_aml')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_images.py b/ml_model/preprocessing/preprocess_images.py
index f57dbbc7..87799153 100644
--- a/ml_model/preprocessing/preprocess_images.py
+++ b/ml_model/preprocessing/preprocess_images.py
@@ -83,10 +83,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('preprocess_images'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('preprocess_images')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/preprocessing/preprocess_os_cmd.py b/ml_model/preprocessing/preprocess_os_cmd.py
index 081969e6..f09daba4 100644
--- a/ml_model/preprocessing/preprocess_os_cmd.py
+++ b/ml_model/preprocessing/preprocess_os_cmd.py
@@ -105,10 +105,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('preprocess_os_cmd_main'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('preprocess_os_cmd')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/register/register_model.py b/ml_model/register/register_model.py
index 7695f292..7cfe23bb 100644
--- a/ml_model/register/register_model.py
+++ b/ml_model/register/register_model.py
@@ -201,10 +201,11 @@ def register_aml_model(
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('register_model'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('register_model')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/training/train.py b/ml_model/training/train.py
index e67d18d5..8172c139 100644
--- a/ml_model/training/train.py
+++ b/ml_model/training/train.py
@@ -156,10 +156,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('train'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('train')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_model/training/train_aml.py b/ml_model/training/train_aml.py
index aa130569..b9b32283 100644
--- a/ml_model/training/train_aml.py
+++ b/ml_model/training/train_aml.py
@@ -154,10 +154,11 @@ def main():
 
 
 if __name__ == '__main__':
-    with observability.\
-         start_span('train_aml'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('train_aml')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
index e2513b71..60310d96 100644
--- a/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_os_cmd_pipeline.py
@@ -107,10 +107,11 @@ def main():
 
 
 if __name__ == "__main__":
-    with observability.\
-         start_span('build_data_processing_os_cmd_pipeline'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('build_data_processing_os_cmd_pipeline')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/pipelines/build_data_processing_pipeline.py b/ml_service/pipelines/build_data_processing_pipeline.py
index 6bb7be93..a34843e8 100644
--- a/ml_service/pipelines/build_data_processing_pipeline.py
+++ b/ml_service/pipelines/build_data_processing_pipeline.py
@@ -107,10 +107,11 @@ def main():
 
 
 if __name__ == "__main__":
-    with observability.\
-         start_span('build_data_processing_pipeline'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('build_data_processing_pipeline')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/pipelines/build_training_pipeline.py b/ml_service/pipelines/build_training_pipeline.py
index df46c0f2..a9afed5a 100644
--- a/ml_service/pipelines/build_training_pipeline.py
+++ b/ml_service/pipelines/build_training_pipeline.py
@@ -138,10 +138,11 @@ def main():
 
 
 if __name__ == "__main__":
-    with observability.\
-         start_span('build_training_pipeline'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('build_training_pipeline')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/pipelines/run_data_processing_pipeline.py b/ml_service/pipelines/run_data_processing_pipeline.py
index 7c3f4cde..c526be80 100644
--- a/ml_service/pipelines/run_data_processing_pipeline.py
+++ b/ml_service/pipelines/run_data_processing_pipeline.py
@@ -82,10 +82,11 @@ def main():
 
 
 if __name__ == "__main__":
-    with observability.\
-         start_span('run_data_processing_pipeline'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('run_data_processing_pipeline')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/pipelines/run_training_pipeline.py b/ml_service/pipelines/run_training_pipeline.py
index 945ffe84..e3a4e7a0 100644
--- a/ml_service/pipelines/run_training_pipeline.py
+++ b/ml_service/pipelines/run_training_pipeline.py
@@ -70,10 +70,11 @@ def main():
 
 
 if __name__ == "__main__":
-    with observability.\
-         start_span('run_training_pipeline'):
-        try:
-            main()
-        except Exception as exception:
-            observability.exception(exception)
-            raise exception
+    observability.start_span('run_training_pipeline')
+    try:
+        main()
+    except Exception as exception:
+        observability.exception(exception)
+        raise exception
+    finally:
+        observability.end_span()
diff --git a/ml_service/util/logger/app_insights_logger.py b/ml_service/util/logger/app_insights_logger.py
index 4a7dbb7f..2d49f0c5 100644
--- a/ml_service/util/logger/app_insights_logger.py
+++ b/ml_service/util/logger/app_insights_logger.py
@@ -59,14 +59,6 @@ def __init__(self, run):
         mexporter.add_telemetry_processor(self.callback_function)
         stats_module.stats.view_manager.register_exporter(mexporter)
 
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, exc_traceback):
-        # Cleanup span if forgotten in logic (e.g. due to exception)
-        if self.current_span() is not None:
-            self.end_span()
-
     def log_metric(
         self, name="", value="", description="", log_parent=False,
     ):