Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.TimeSeries;

namespace Samples.Dynamic
{
Expand Down Expand Up @@ -29,77 +30,52 @@ public static void Example()
// Convert data to IDataView.
var dataView = ml.Data.LoadFromEnumerable(data);

// Setup the estimator arguments

// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesData.Value);

// The transformed data.
var transformedData = ml.Transforms.DetectAnomalyBySrCnn(
outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(
dataView).Transform(dataView);
// Do batch anomaly detection
var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, batchSize: 512, sensitivity: 70, detectMode: SrCnnDetectMode.AnomalyAndMargin);

// Getting the data of the newly created column as an IEnumerable of
// SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
transformedData, reuseRowObject: false);
outputDataView, reuseRowObject: false);

Console.WriteLine($"{outputColumnName} column obtained post-" +
$"transformation.");

Console.WriteLine("Data\tAlert\tScore\tMag");
Console.WriteLine("Data\tAlert\tScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary");

int k = 0;
foreach (var prediction in predictionColumn)
PrintPrediction(data[k++].Value, prediction);

//Prediction column obtained post-transformation.
//Data Alert Score Mag
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.00 0.00
//5 0 0.03 0.18
//5 0 0.03 0.18
//5 0 0.03 0.18
//5 0 0.03 0.18
//5 0 0.03 0.18
//10 1 0.47 0.93
//5 0 0.31 0.50
//5 0 0.05 0.30
//5 0 0.01 0.23
//5 0 0.00 0.21
//5 0 0.01 0.25
//Data Alert Score Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary
// TODO: update with actual output from SrCnn
}

private static void PrintPrediction(float value, SrCnnAnomalyDetection
private static void PrintPrediction(double value, SrCnnAnomalyDetection
prediction) =>
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction
.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}\t{5:0.00}\t{6:0.00}", value,
prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2],
prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5]);

private class TimeSeriesData
{
public float Value;
public double Value;

public TimeSeriesData(float value)
public TimeSeriesData(double value)
{
Value = value;
}
}

private class SrCnnAnomalyDetection
{
[VectorType(3)]
[VectorType(6)]
public double[] Prediction { get; set; }
}
}
Expand Down
172 changes: 172 additions & 0 deletions src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.Data.DataView
{
internal abstract class BatchDataViewMapperBase<TInput, TBatch> : IDataView
{
public bool CanShuffle => false;

public DataViewSchema Schema => SchemaBindings.AsSchema;

private readonly IDataView _source;
private readonly IHost _host;

protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, IDataView input)
{
_host = env.Register(registrationName);
_source = input;
}

public long? GetRowCount() => _source.GetRowCount();

public DataViewRowCursor GetRowCursor(IEnumerable<DataViewSchema.Column> columnsNeeded, Random rand = null)
{
_host.CheckValue(columnsNeeded, nameof(columnsNeeded));
_host.CheckValueOrNull(rand);

var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, SchemaBindings.AsSchema);

// If we aren't selecting any of the output columns, don't construct our cursor.
// Note that because we cannot support random due to the inherently
// stratified nature, neither can we allow the base data to be shuffled,
// even if it supports shuffling.
if (!SchemaBindings.AnyNewColumnsActive(predicate))
{
var activeInput = SchemaBindings.GetActiveInput(predicate);
var inputCursor = _source.GetRowCursor(_source.Schema.Where(c => activeInput[c.Index]), null);
return new BindingsWrappedRowCursor(_host, inputCursor, SchemaBindings);
}
var active = SchemaBindings.GetActive(predicate);
Contracts.Assert(active.Length == SchemaBindings.ColumnCount);

// REVIEW: We can get a different input predicate for the input cursor and for the lookahead cursor. The lookahead
// cursor is only used for getting the values from the input column, so it only needs that column activated. The
// other cursor is used to get source columns, so it needs the rest of them activated.
var predInput = GetSchemaBindingDependencies(predicate);
var inputCols = _source.Schema.Where(c => predInput(c.Index));
return new Cursor(this, _source.GetRowCursor(inputCols), _source.GetRowCursor(inputCols), active);
}

public DataViewRowCursor[] GetRowCursorSet(IEnumerable<DataViewSchema.Column> columnsNeeded, int n, Random rand = null)
{
return new[] { GetRowCursor(columnsNeeded, rand) };
}

protected abstract ColumnBindingsBase SchemaBindings { get; }
protected abstract TBatch InitializeBatch(DataViewRowCursor input);
protected abstract void ProcessBatch(TBatch currentBatch);
protected abstract void ProcessExample(TBatch currentBatch, TInput currentInput);
protected abstract Func<bool> GetLastInBatchDelegate(DataViewRowCursor lookAheadCursor);
protected abstract Func<bool> GetIsNewBatchDelegate(DataViewRowCursor lookAheadCursor);
protected abstract ValueGetter<TInput> GetLookAheadGetter(DataViewRowCursor lookAheadCursor);
protected abstract Delegate[] CreateGetters(DataViewRowCursor input, TBatch currentBatch, bool[] active);
protected abstract Func<int, bool> GetSchemaBindingDependencies(Func<int, bool> predicate);

private sealed class Cursor : RootCursorBase
{
private readonly BatchDataViewMapperBase<TInput, TBatch> _parent;
private readonly DataViewRowCursor _lookAheadCursor;
private readonly DataViewRowCursor _input;

private readonly bool[] _active;
private readonly Delegate[] _getters;

private readonly TBatch _currentBatch;
private readonly Func<bool> _lastInBatchInLookAheadCursorDel;
private readonly Func<bool> _firstInBatchInInputCursorDel;
private readonly ValueGetter<TInput> _inputGetterInLookAheadCursor;
private TInput _currentInput;

public override long Batch => 0;

public override DataViewSchema Schema => _parent.Schema;

public Cursor(BatchDataViewMapperBase<TInput, TBatch> parent, DataViewRowCursor input, DataViewRowCursor lookAheadCursor, bool[] active)
: base(parent._host)
{
_parent = parent;
_input = input;
_lookAheadCursor = lookAheadCursor;
_active = active;

_currentBatch = _parent.InitializeBatch(_input);

_getters = _parent.CreateGetters(_input, _currentBatch, _active);

_lastInBatchInLookAheadCursorDel = _parent.GetLastInBatchDelegate(_lookAheadCursor);
_firstInBatchInInputCursorDel = _parent.GetIsNewBatchDelegate(_input);
_inputGetterInLookAheadCursor = _parent.GetLookAheadGetter(_lookAheadCursor);
}

public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
{
Contracts.CheckParam(IsColumnActive(column), nameof(column), "requested column is not active");

var col = _parent.SchemaBindings.MapColumnIndex(out bool isSrc, column.Index);
if (isSrc)
{
Contracts.AssertValue(_input);
return _input.GetGetter<TValue>(_input.Schema[col]);
}

Ch.AssertValue(_getters);
var getter = _getters[col];
Ch.Assert(getter != null);
var fn = getter as ValueGetter<TValue>;
if (fn == null)
throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue));
return fn;
}

public override ValueGetter<DataViewRowId> GetIdGetter()
{
return
(ref DataViewRowId val) =>
{
Ch.Check(IsGood, "Cannot call ID getter in current state");
val = new DataViewRowId((ulong)Position, 0);
};
}

public override bool IsColumnActive(DataViewSchema.Column column)
{
Ch.Check(column.Index < _parent.SchemaBindings.AsSchema.Count);
return _active[column.Index];
}

protected override bool MoveNextCore()
{
if (!_input.MoveNext())
return false;
if (!_firstInBatchInInputCursorDel())
return true;

// If we are here, this means that _input.MoveNext() has gotten us to the beginning of the next batch,
// so now we need to look ahead at the entire next batch in the _lookAheadCursor.
// The _lookAheadCursor's position should be on the last row of the previous batch (or -1).
Ch.Assert(_lastInBatchInLookAheadCursorDel());

var good = _lookAheadCursor.MoveNext();
// The two cursors should have the same number of elements, so if _input.MoveNext() returned true,
// then it must return true here too.
Ch.Assert(good);

do
{
_inputGetterInLookAheadCursor(ref _currentInput);
_parent.ProcessExample(_currentBatch, _currentInput);
} while (!_lastInBatchInLookAheadCursorDel() && _lookAheadCursor.MoveNext());

_parent.ProcessBatch(_currentBatch);
return true;
}
}
}
}
25 changes: 23 additions & 2 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Reflection;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.TimeSeries;
Expand Down Expand Up @@ -150,6 +148,29 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3)
=> new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName);

/// <summary>
/// Create <see cref="SrCnnAnomalyEstimator"/>, which detects timeseries anomalies using SRCNN algorithm.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="input">...</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 3 elements: alert (1 means anomaly while 0 means normal), raw score, and magnitude of spectual residual.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.</param>
/// <param name="threshold">The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Should be in (0,1)</param>
/// <param name="batchSize">.Divide the input data into batches to fit SrCnn model. Must be -1 or a positive integer no less than 12. Default value is 1024.</param>
/// <param name="sensitivity">The sensitivity of boundaries. Must be in the interval (0, 100).</param>
/// <param name="detectMode">The detect mode of the SrCnn model.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView BatchDetectAnomalyBySrCnn(this DataOperationsCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyAndMargin)
=> new SrCnnBatchAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);

/// <summary>
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
/// </summary>
Expand Down
Loading