diff --git a/dashboards/azure-machine-learning/Azure-Machine-Learning.png b/dashboards/azure-machine-learning/Azure-Machine-Learning.png index 6cec96b9c7..2f8fbe27de 100644 Binary files a/dashboards/azure-machine-learning/Azure-Machine-Learning.png and b/dashboards/azure-machine-learning/Azure-Machine-Learning.png differ diff --git a/dashboards/azure-machine-learning/Azure-Machine-Learning01.png b/dashboards/azure-machine-learning/Azure-Machine-Learning01.png index 2f8fbe27de..6cec96b9c7 100644 Binary files a/dashboards/azure-machine-learning/Azure-Machine-Learning01.png and b/dashboards/azure-machine-learning/Azure-Machine-Learning01.png differ diff --git a/dashboards/azure-machine-learning/Azure-Machine-Learning05.png b/dashboards/azure-machine-learning/Azure-Machine-Learning05.png new file mode 100644 index 0000000000..f2b1b3cc88 Binary files /dev/null and b/dashboards/azure-machine-learning/Azure-Machine-Learning05.png differ diff --git a/dashboards/azure-machine-learning/Azure-Machine-Learning06.png b/dashboards/azure-machine-learning/Azure-Machine-Learning06.png new file mode 100644 index 0000000000..b9f4cf4ec6 Binary files /dev/null and b/dashboards/azure-machine-learning/Azure-Machine-Learning06.png differ diff --git a/dashboards/azure-machine-learning/azure-machine-learning.json b/dashboards/azure-machine-learning/azure-machine-learning.json index 5673359f42..466d59e9d7 100644 --- a/dashboards/azure-machine-learning/azure-machine-learning.json +++ b/dashboards/azure-machine-learning/azure-machine-learning.json @@ -38,7 +38,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.CancelRequestedRuns) AS 'CancelRequestedRuns'" } ], @@ -70,7 +70,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.FailedRuns) AS 'FailedRuns'" } ], @@ -80,7 +80,7 @@ "thresholds": [ { "alertSeverity": "CRITICAL", - "value": -1 + "value": 0 } ] } @@ -102,7 +102,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.CancelledRuns) AS 'CancelledRuns'" } ], @@ -134,7 +134,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.CompletedRuns) AS 'CompletedRuns'" } ], @@ -166,7 +166,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.QueuedRuns) AS 'QueuedRuns'" } ], @@ -201,7 +201,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.NotRespondingRuns) AS 'NotRespondingRuns', sum(azure.machinelearningservices.workspaces.NotStartedRuns) AS 'NotStartedRuns' TIMESERIES AUTO" } ], @@ -227,7 +227,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.Errors) AS 'Errors'" } ], @@ -237,7 +237,7 @@ "thresholds": [ { "alertSeverity": "CRITICAL", - "value": -1 + "value": 0 } ] } @@ -259,7 +259,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.Warnings) AS 'Warnings'" } ], @@ -294,7 +294,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ProvisioningRuns) AS 'ProvisioningRuns', average(azure.machinelearningservices.workspaces.PreparingRuns) AS 'PreparingRuns' TIMESERIES AUTO" } ], @@ -326,7 +326,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.StartedRuns) AS 'StartedRuns', sum(azure.machinelearningservices.workspaces.StartingRuns) AS 'StartingRuns' TIMESERIES AUTO" } ], @@ -355,7 +355,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.FinalizingRuns) AS 'FinalizingRuns' TIMESERIES AUTO" } ], @@ -405,7 +405,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ModelDeployFailed) AS 'ModelDeployFailed'" } ], @@ -415,7 +415,7 @@ "thresholds": [ { "alertSeverity": "CRITICAL", - "value": -1 + "value": 0 } ] } @@ -437,7 +437,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ModelRegisterFailed) AS 'ModelRegisterFailed'" } ], @@ -447,7 +447,7 @@ "thresholds": [ { "alertSeverity": "CRITICAL", - "value": -1 + "value": 0 } ] } @@ -469,7 +469,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ModelDeploySucceeded) AS 'ModelDeploySucceeded'" } ], @@ -501,7 +501,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ModelRegisterSucceeded) AS 'ModelRegisterSucceeded'" } ], @@ -536,7 +536,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.ModelDeployStarted) AS 'ModelDeployStarted' TIMESERIES AUTO " } ], @@ -589,7 +589,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": " FROM Metric SELECT average(azure.machinelearningservices.workspaces.QuotaUtilizationPercentage) AS 'QuotaUtilizationPercentage' TIMESERIES AUTO " } ], @@ -621,7 +621,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.TotalCores) AS 'TotalCores',average(azure.machinelearningservices.workspaces.IdleCores) AS 'IdleCores',average(azure.machinelearningservices.workspaces.ActiveCores) AS 'ActiveCores',average(azure.machinelearningservices.workspaces.LeavingCores) AS 'LeavingCores',average(azure.machinelearningservices.workspaces.UnusableCores) AS 'UnusableCores',average(azure.machinelearningservices.workspaces.PreemptedCores) AS 'PreemptedCores' TIMESERIES AUTO " } ], @@ -650,7 +650,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.TotalNodes) AS 'TotalNodes',average(azure.machinelearningservices.workspaces.IdleNodes) AS 'IdleNodes',average(azure.machinelearningservices.workspaces.ActiveNodes) AS 'ActiveNodes',average(azure.machinelearningservices.workspaces.LeavingNodes) AS 'LeavingNodes',average(azure.machinelearningservices.workspaces.UnusableNodes) AS 'UnusableNodes',average(azure.machinelearningservices.workspaces.PreemptedNodes) AS 'PreemptedNodes' TIMESERIES AUTO" } ], @@ -662,7 +662,7 @@ ] }, { - "name": "CPU Usage", + "name": "Workspace CPU Usage", "description": null, "widgets": [ { @@ -682,7 +682,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.CpuMemoryCapacityMegabytes)/1e+6 AS 'CpuMemoryCapacityMegabytes'" } ], @@ -714,7 +714,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.CpuCapacityMillicores) AS 'CpuCapacityMillicores'" } ], @@ -749,7 +749,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.CpuUtilization) AS 'CpuUtilization' TIMESERIES AUTO" } ], @@ -784,7 +784,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.DiskAvailMegabytes) AS 'DiskAvailMegabytes', average(azure.machinelearningservices.workspaces.DiskUsedMegabytes) AS 'DiskUsedMegabytes' TIMESERIES AUTO" } ], @@ -819,7 +819,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.DiskReadMegabytes) AS 'DiskReadMegabytes',average(azure.machinelearningservices.workspaces.DiskWriteMegabytes) AS 'DiskWriteMegabytes' TIMESERIES AUTO" } ], @@ -854,7 +854,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.CpuMemoryUtilizationMegabytes) AS 'CpuMemoryUtilizationMegabytes' TIMESERIES AUTO" } ], @@ -872,7 +872,7 @@ ] }, { - "name": "GPU Usage", + "name": "Workspace GPU Usage", "description": null, "widgets": [ { @@ -892,7 +892,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.IBTransmitMegabytes) AS 'IBTransmitMegabytes'" } ], @@ -924,7 +924,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.IBReceiveMegabytes) AS 'IBReceiveMegabytes'" } ], @@ -959,7 +959,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.GpuMemoryCapacityMegabytes) AS 'GpuMemoryCapacityMegabytes', average(azure.machinelearningservices.workspaces.GpuMemoryUtilizationMegabytes) AS 'GpuMemoryUtilizationMegabytes' TIMESERIES AUTO" } ], @@ -994,7 +994,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.GpuCapacityMilliGPUs) AS 'GpuCapacityMilliGPUs' TIMESERIES AUTO" } ], @@ -1026,7 +1026,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.GpuEnergyJoules) AS 'GpuEnergyJoules' TIMESERIES AUTO" } ], @@ -1058,7 +1058,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.GpuMemoryUtilizationPercentage) AS 'GpuMemoryUtilizationPercentage', average(azure.machinelearningservices.workspaces.GpuUtilizationPercentage) AS 'GpuUtilizationPercentage' TIMESERIES AUTO" } ], @@ -1093,7 +1093,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.GpuUtilization) AS 'GpuUtilization',average(azure.machinelearningservices.workspaces.GpuUtilizationMilliGPUs) AS 'GpuUtilizationMilliGPUs' TIMESERIES AUTO" } ], @@ -1125,7 +1125,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT sum(azure.machinelearningservices.workspaces.StorageAPIFailureCount) AS 'StorageAPIFailureCount',sum(azure.machinelearningservices.workspaces.StorageAPISuccessCount) AS 'StorageAPISuccessCount' TIMESERIES AUTO" } ], @@ -1157,7 +1157,7 @@ }, "nrqlQueries": [ { - "accountId": 0, + "accountId": 0, "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.NetworkInputMegabytes) AS 'NetworkInputMegabytes',average(azure.machinelearningservices.workspaces.NetworkOutputMegabytes) AS 'NetworkOutputMegabytes' TIMESERIES AUTO" } ], @@ -1173,6 +1173,546 @@ } } ] + }, + { + "name": "Online Endpoints", + "description": null, + "widgets": [ + { + "title": "", + "layout": { + "column": 1, + "row": 1, + "width": 3, + "height": 2 + }, + "visualization": { + "id": "viz.markdown" + }, + "rawConfiguration": { + "text": "## Azure Machine Learning Online Endpoints\nThis endpoints are used for online (real-time) inferencing. They deploy models under a web server that can return predictions under the HTTP protocol." + } + }, + { + "title": "Connections active", + "layout": { + "column": 4, + "row": 1, + "width": 2, + "height": 2 + }, + "visualization": { + "id": "viz.billboard" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.ConnectionsActive) as 'ConnectionsActive'" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [ + { + "alertSeverity": "WARNING", + "value": -1 + } + ] + } + }, + { + "title": "New connections per second", + "layout": { + "column": 6, + "row": 1, + "width": 2, + "height": 2 + }, + "visualization": { + "id": "viz.billboard" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.NewConnectionsPerSecond) AS 'NewConnectionsPerSecond'" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [ + { + "alertSeverity": "WARNING", + "value": -1 + } + ] + } + }, + { + "title": "Network bytes", + "layout": { + "column": 8, + "row": 1, + "width": 5, + "height": 2 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.NetworkBytes) AS 'NetworkBytes' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "BYTES" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Request latency", + "layout": { + "column": 1, + "row": 3, + "width": 12, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.RequestLatency) AS 'RequestLatency', \naverage(azure.machinelearningservices.workspaces.onlineendpoints.RequestLatency_P50) AS 'RequestLatency_P50',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.RequestLatency_P90) AS 'RequestLatency_P90',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.RequestLatency_P95) AS 'RequestLatency_P95',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.RequestLatency_P99) AS 'RequestLatency_P99' TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "SECONDS" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Requests per minute", + "layout": { + "column": 1, + "row": 6, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.RequestsPerMinute) AS 'RequestsPerMinute' FACET azure.machinelearningservices.workspaces.onlineendpoints.deployment TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "REQUESTS_PER_MINUTE" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "New connections per second", + "layout": { + "column": 5, + "row": 6, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.NewConnectionsPerSecond) AS 'NewConnectionsPerSecond' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "REQUESTS_PER_SECOND" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Data collection Events & Errors per minute", + "layout": { + "column": 9, + "row": 6, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.DataCollectionErrorsPerMinute) AS 'DataCollectionErrorsPerMinute',average(azure.machinelearningservices.workspaces.onlineendpoints.DataCollectionEventsPerMinute) AS 'DataCollectionEventsPerMinute' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "REQUESTS_PER_MINUTE" + }, + "yAxisLeft": { + "zero": true + } + } + } + ] + }, + { + "name": "Online Endpoint Deployments", + "description": null, + "widgets": [ + { + "title": "Request latency", + "layout": { + "column": 1, + "row": 1, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.area" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.RequestLatency_P50) AS 'RequestLatency_P50',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.deployments.RequestLatency_P90) AS 'RequestLatency_P90',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.deployments.RequestLatency_P95) AS 'RequestLatency_P95',\naverage(azure.machinelearningservices.workspaces.onlineendpoints.deployments.RequestLatency_P99) AS 'RequestLatency_P99' TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "MS" + } + } + }, + { + "title": "Cpu utilization percentage", + "layout": { + "column": 7, + "row": 1, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.CpuUtilizationPercentage) AS 'CpuUtilizationPercentage' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "PERCENTAGE" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Requests per minute", + "layout": { + "column": 1, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.RequestsPerMinute) AS 'RequestsPerMinute' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "REQUESTS_PER_MINUTE" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Disk utilization", + "layout": { + "column": 5, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT max(azure.machinelearningservices.workspaces.onlineendpoints.deployments.DiskUtilization)/60 AS 'DiskUtilization' FACET azure.resourceId TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "PERCENTAGE" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Deployment capacity", + "layout": { + "column": 9, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.DeploymentCapacity) AS 'DeploymentCapacity' FACET azure.resourceId TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Gpu utilization percentage", + "layout": { + "column": 1, + "row": 7, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.GpuUtilizationPercentage) AS 'GpuUtilizationPercentage' TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "PERCENTAGE" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Gpu energy joules", + "layout": { + "column": 5, + "row": 7, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.GpuEnergyJoules) AS 'GpuEnergyJoules' TIMESERIES AUTO" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "COUNT" + }, + "yAxisLeft": { + "zero": true + } + } + }, + { + "title": "Data collection Events & Errors per minute", + "layout": { + "column": 9, + "row": 7, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.DataCollectionErrorsPerMinute) AS 'DataCollectionErrorsPerMinute',average(azure.machinelearningservices.workspaces.onlineendpoints.deployments.DataCollectionEventsPerMinute) AS 'DataCollectionEventsPerMinute' TIMESERIES AUTO " + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "units": { + "unit": "REQUESTS_PER_MINUTE" + }, + "yAxisLeft": { + "zero": true + } + } + } + ] } ] } \ No newline at end of file diff --git a/quickstarts/azure/azure-machine-learning/config.yml b/quickstarts/azure/azure-machine-learning/config.yml index c43e026f5c..1055214a7f 100644 --- a/quickstarts/azure/azure-machine-learning/config.yml +++ b/quickstarts/azure/azure-machine-learning/config.yml @@ -12,7 +12,7 @@ description: |- ### Why monitor Azure Machine Learning with New Relic? - [New Relic Azure Machine Learning](https://docs.newrelic.com/docs/infrastructure/microsoft-azure-integrations/azure-integrations-list/azure-machine-learning-monitoring-integration/) monitoring quickstart empowers you to track the performance of Azure Machine Learning via different metrics including Job Execution, ActiveCores, CpuUtilization, FinalizingRuns, Model Deployment and more. + [New Relic Azure Machine Learning](https://docs.newrelic.com/docs/infrastructure/microsoft-azure-integrations/azure-integrations-list/azure-machine-learning-monitor-integration/) monitoring quickstart empowers you to track the performance of Azure Machine Learning via different metrics including Job Execution, ActiveCores, CpuUtilization, FinalizingRuns, Model Deployment and more. Our integration features a standard dashboard that provides interactive visualizations to explore your data, understand context, and get valuable insights. @@ -29,7 +29,7 @@ documentation: description: | Monitor Azure Machine Learning by connecting Azure to New Relic. url: >- - https://docs.newrelic.com/docs/infrastructure/microsoft-azure-integrations/azure-integrations-list/azure-machine-learning-monitoring-integration/ + https://docs.newrelic.com/docs/infrastructure/microsoft-azure-integrations/azure-integrations-list/azure-machine-learning-monitor-integration/ keywords: - azure - azure machine learning @@ -39,3 +39,5 @@ dashboards: - azure-machine-learning dataSourceIds: - azure-monitor +alertPolicies: + - azure-machine-learning \ No newline at end of file